Merge "Revert "Revert "Revert "Add intrinsic for Reference.get()""""
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index eb897f0..d1d5ad9 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -71,26 +71,28 @@
};
/**
- * @class CallInlining
- * @brief Perform method inlining pass.
+ * @class SpecialMethodInliner
+ * @brief Performs method inlining pass on special kinds of methods.
+ * @details Special methods are methods that fall in one of the following categories:
+ * empty, instance getter, instance setter, argument return, and constant return.
*/
-class CallInlining : public PassME {
+class SpecialMethodInliner : public PassME {
public:
- CallInlining() : PassME("CallInlining") {
+ SpecialMethodInliner() : PassME("SpecialMethodInliner") {
}
bool Gate(const PassDataHolder* data) const {
DCHECK(data != nullptr);
CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
DCHECK(cUnit != nullptr);
- return cUnit->mir_graph->InlineCallsGate();
+ return cUnit->mir_graph->InlineSpecialMethodsGate();
}
void Start(PassDataHolder* data) const {
DCHECK(data != nullptr);
CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
DCHECK(cUnit != nullptr);
- cUnit->mir_graph->InlineCallsStart();
+ cUnit->mir_graph->InlineSpecialMethodsStart();
}
bool Worker(const PassDataHolder* data) const {
@@ -100,7 +102,7 @@
DCHECK(cUnit != nullptr);
BasicBlock* bb = pass_me_data_holder->bb;
DCHECK(bb != nullptr);
- cUnit->mir_graph->InlineCalls(bb);
+ cUnit->mir_graph->InlineSpecialMethods(bb);
// No need of repeating, so just return false.
return false;
}
@@ -109,7 +111,7 @@
DCHECK(data != nullptr);
CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
DCHECK(cUnit != nullptr);
- cUnit->mir_graph->InlineCallsEnd();
+ cUnit->mir_graph->InlineSpecialMethodsEnd();
}
};
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index f3ef796..711743d 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -97,7 +97,6 @@
// 2 = kArm64. TODO(Arm64): enable optimizations once backend is mature enough.
(1 << kLoadStoreElimination) |
(1 << kLoadHoisting) |
- (1 << kBBOpt) |
0,
// 3 = kThumb2.
0,
@@ -575,7 +574,7 @@
// Check if we support the byte code.
if (std::find(unsupport_list, unsupport_list + unsupport_list_size,
opcode) != unsupport_list + unsupport_list_size) {
- if (!cu.mir_graph->IsPseudoMirOp(opcode)) {
+ if (!MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
VLOG(compiler) << "Unsupported dalvik byte code : "
<< mir->dalvikInsn.opcode;
} else {
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index e372206..3de4483 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -902,7 +902,7 @@
while (!done) {
tbb->visited = true;
for (MIR* mir = tbb->first_mir_insn; mir != NULL; mir = mir->next) {
- if (IsPseudoMirOp(mir->dalvikInsn.opcode)) {
+ if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
// Skip any MIR pseudo-op.
continue;
}
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index bc99a27..b82c5c7 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -909,6 +909,16 @@
def_v->SetBit(dalvik_reg_id);
}
+void MIRGraph::HandleExtended(ArenaBitVector* use_v, ArenaBitVector* def_v,
+ ArenaBitVector* live_in_v,
+ const MIR::DecodedInstruction& d_insn) {
+ switch (static_cast<int>(d_insn.opcode)) {
+ default:
+ LOG(ERROR) << "Unexpected Extended Opcode " << d_insn.opcode;
+ break;
+ }
+}
+
/*
* Find out live-in variables for natural loops. Variables that are live-in in
* the main loop body are considered to be defined in the entry block.
@@ -966,6 +976,9 @@
HandleDef(def_v, d_insn->vA+1);
}
}
+ if (df_attributes & DF_FORMAT_EXTENDED) {
+ HandleExtended(use_v, def_v, live_in_v, mir->dalvikInsn);
+ }
}
return true;
}
@@ -1048,6 +1061,14 @@
}
}
+void MIRGraph::DataFlowSSAFormatExtended(MIR* mir) {
+ switch (static_cast<int>(mir->dalvikInsn.opcode)) {
+ default:
+ LOG(ERROR) << "Missing case for extended MIR: " << mir->dalvikInsn.opcode;
+ break;
+ }
+}
+
/* Entry function to convert a block into SSA representation */
bool MIRGraph::DoSSAConversion(BasicBlock* bb) {
MIR* mir;
@@ -1063,7 +1084,7 @@
uint64_t df_attributes = GetDataFlowAttributes(mir);
// If not a pseudo-op, note non-leaf or can throw
- if (!IsPseudoMirOp(mir->dalvikInsn.opcode)) {
+ if (!MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
int flags = Instruction::FlagsOf(mir->dalvikInsn.opcode);
if ((flags & Instruction::kInvoke) != 0 && (mir->optimization_flags & MIR_INLINED) == 0) {
@@ -1083,6 +1104,11 @@
continue;
}
+ if (df_attributes & DF_FORMAT_EXTENDED) {
+ DataFlowSSAFormatExtended(mir);
+ continue;
+ }
+
if (df_attributes & DF_HAS_USES) {
if (df_attributes & DF_UA) {
num_uses++;
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 4fbace2..1c8a9b5 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -193,14 +193,16 @@
bottom_block->successor_block_list_type = orig_block->successor_block_list_type;
bottom_block->successor_blocks = orig_block->successor_blocks;
orig_block->successor_block_list_type = kNotUsed;
- orig_block->successor_blocks = NULL;
+ orig_block->successor_blocks = nullptr;
GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bottom_block->successor_blocks);
while (true) {
SuccessorBlockInfo* successor_block_info = iterator.Next();
- if (successor_block_info == NULL) break;
+ if (successor_block_info == nullptr) break;
BasicBlock* bb = GetBasicBlock(successor_block_info->block);
- bb->predecessors->Delete(orig_block->id);
- bb->predecessors->Insert(bottom_block->id);
+ if (bb != nullptr) {
+ bb->predecessors->Delete(orig_block->id);
+ bb->predecessors->Insert(bottom_block->id);
+ }
}
}
@@ -222,7 +224,7 @@
DCHECK(insn == bottom_block->first_mir_insn);
DCHECK_EQ(insn->offset, bottom_block->start_offset);
DCHECK(static_cast<int>(insn->dalvikInsn.opcode) == kMirOpCheck ||
- !IsPseudoMirOp(insn->dalvikInsn.opcode));
+ !MIR::DecodedInstruction::IsPseudoMirOp(insn->dalvikInsn.opcode));
DCHECK_EQ(dex_pc_to_block_map_.Get(insn->offset), orig_block->id);
MIR* p = insn;
dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
@@ -237,7 +239,7 @@
* CHECK and work portions. Since the 2nd half of a split operation is always
* the first in a BasicBlock, we can't hit it here.
*/
- if ((opcode == kMirOpCheck) || !IsPseudoMirOp(opcode)) {
+ if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
DCHECK_EQ(dex_pc_to_block_map_.Get(p->offset), orig_block->id);
dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
}
@@ -861,11 +863,17 @@
/* Dump the CFG into a DOT graph */
void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suffix) {
FILE* file;
+ static AtomicInteger cnt(0);
+
+ // Increment counter to get a unique file number.
+ cnt++;
+
std::string fname(PrettyMethod(cu_->method_idx, *cu_->dex_file));
ReplaceSpecialChars(fname);
- fname = StringPrintf("%s%s%x%s.dot", dir_prefix, fname.c_str(),
+ fname = StringPrintf("%s%s%x%s_%d.dot", dir_prefix, fname.c_str(),
GetBasicBlock(GetEntryBlock()->fall_through)->start_offset,
- suffix == nullptr ? "" : suffix);
+ suffix == nullptr ? "" : suffix,
+ cnt.LoadRelaxed());
file = fopen(fname.c_str(), "w");
if (file == NULL) {
return;
@@ -882,6 +890,7 @@
BasicBlock* bb = GetBasicBlock(block_idx);
if (bb == NULL) continue;
if (bb->block_type == kDead) continue;
+ if (bb->hidden) continue;
if (bb->block_type == kEntryBlock) {
fprintf(file, " entry_%d [shape=Mdiamond];\n", bb->id);
} else if (bb->block_type == kExitBlock) {
@@ -916,7 +925,8 @@
} else {
fprintf(file, " {%04x %s %s %s %s\\l}%s\\\n", mir->offset,
mir->ssa_rep ? GetDalvikDisassembly(mir) :
- !IsPseudoMirOp(opcode) ? Instruction::Name(mir->dalvikInsn.opcode) :
+ !MIR::DecodedInstruction::IsPseudoMirOp(opcode) ?
+ Instruction::Name(mir->dalvikInsn.opcode) :
extended_mir_op_names_[opcode - kMirOpFirst],
(mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ",
(mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ",
@@ -1222,7 +1232,7 @@
nop = true;
}
- if (IsPseudoMirOp(opcode)) {
+ if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
str.append(extended_mir_op_names_[opcode - kMirOpFirst]);
} else {
dalvik_format = Instruction::FormatOf(insn.opcode);
@@ -1693,11 +1703,13 @@
// We visited both taken and fallthrough. Now check if we have successors we need to visit.
if (have_successors_ == true) {
// Get information about next successor block.
- SuccessorBlockInfo* successor_block_info = successor_iter_.Next();
-
- // If we don't have anymore successors, return nullptr.
- if (successor_block_info != nullptr) {
- return mir_graph_->GetBasicBlock(successor_block_info->block);
+ for (SuccessorBlockInfo* successor_block_info = successor_iter_.Next();
+ successor_block_info != nullptr;
+ successor_block_info = successor_iter_.Next()) {
+ // If block was replaced by zero block, take next one.
+ if (successor_block_info->block != NullBasicBlockId) {
+ return mir_graph_->GetBasicBlock(successor_block_info->block);
+ }
}
}
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index d097328..1556a19 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -80,6 +80,7 @@
kSetsConst,
kFormat35c,
kFormat3rc,
+ kFormatExtended, // Extended format for extended MIRs.
kNullCheckSrc0, // Null check of uses[0].
kNullCheckSrc1, // Null check of uses[1].
kNullCheckSrc2, // Null check of uses[2].
@@ -118,6 +119,7 @@
#define DF_SETS_CONST (UINT64_C(1) << kSetsConst)
#define DF_FORMAT_35C (UINT64_C(1) << kFormat35c)
#define DF_FORMAT_3RC (UINT64_C(1) << kFormat3rc)
+#define DF_FORMAT_EXTENDED (UINT64_C(1) << kFormatExtended)
#define DF_NULL_CHK_0 (UINT64_C(1) << kNullCheckSrc0)
#define DF_NULL_CHK_1 (UINT64_C(1) << kNullCheckSrc1)
#define DF_NULL_CHK_2 (UINT64_C(1) << kNullCheckSrc2)
@@ -284,34 +286,46 @@
*/
bool GetConstant(int64_t* ptr_value, bool* wide) const;
+ static bool IsPseudoMirOp(Instruction::Code opcode) {
+ return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst);
+ }
+
+ static bool IsPseudoMirOp(int opcode) {
+ return opcode >= static_cast<int>(kMirOpFirst);
+ }
+
+ bool IsInvoke() const {
+ return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kInvoke) == Instruction::kInvoke);
+ }
+
bool IsStore() const {
- return ((Instruction::FlagsOf(opcode) & Instruction::kStore) == Instruction::kStore);
+ return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kStore) == Instruction::kStore);
}
bool IsLoad() const {
- return ((Instruction::FlagsOf(opcode) & Instruction::kLoad) == Instruction::kLoad);
+ return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kLoad) == Instruction::kLoad);
}
bool IsConditionalBranch() const {
- return (Instruction::FlagsOf(opcode) == (Instruction::kContinue | Instruction::kBranch));
+ return !IsPseudoMirOp(opcode) && (Instruction::FlagsOf(opcode) == (Instruction::kContinue | Instruction::kBranch));
}
/**
* @brief Is the register C component of the decoded instruction a constant?
*/
bool IsCFieldOrConstant() const {
- return ((Instruction::FlagsOf(opcode) & Instruction::kRegCFieldOrConstant) == Instruction::kRegCFieldOrConstant);
+ return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kRegCFieldOrConstant) == Instruction::kRegCFieldOrConstant);
}
/**
* @brief Is the register C component of the decoded instruction a constant?
*/
bool IsBFieldOrConstant() const {
- return ((Instruction::FlagsOf(opcode) & Instruction::kRegBFieldOrConstant) == Instruction::kRegBFieldOrConstant);
+ return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kRegBFieldOrConstant) == Instruction::kRegBFieldOrConstant);
}
bool IsCast() const {
- return ((Instruction::FlagsOf(opcode) & Instruction::kCast) == Instruction::kCast);
+ return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kCast) == Instruction::kCast);
}
/**
@@ -321,11 +335,11 @@
* when crossing such an instruction.
*/
bool Clobbers() const {
- return ((Instruction::FlagsOf(opcode) & Instruction::kClobber) == Instruction::kClobber);
+ return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kClobber) == Instruction::kClobber);
}
bool IsLinear() const {
- return (Instruction::FlagsOf(opcode) & (Instruction::kAdd | Instruction::kSubtract)) != 0;
+ return !IsPseudoMirOp(opcode) && (Instruction::FlagsOf(opcode) & (Instruction::kAdd | Instruction::kSubtract)) != 0;
}
} dalvikInsn;
@@ -877,14 +891,6 @@
return backward_branches_ + forward_branches_;
}
- static bool IsPseudoMirOp(Instruction::Code opcode) {
- return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst);
- }
-
- static bool IsPseudoMirOp(int opcode) {
- return opcode >= static_cast<int>(kMirOpFirst);
- }
-
// Is this vreg in the in set?
bool IsInVReg(int vreg) {
return (vreg >= cu_->num_regs);
@@ -956,10 +962,10 @@
void ComputeTopologicalSortOrder();
BasicBlock* CreateNewBB(BBType block_type);
- bool InlineCallsGate();
- void InlineCallsStart();
- void InlineCalls(BasicBlock* bb);
- void InlineCallsEnd();
+ bool InlineSpecialMethodsGate();
+ void InlineSpecialMethodsStart();
+ void InlineSpecialMethods(BasicBlock* bb);
+ void InlineSpecialMethodsEnd();
/**
* @brief Perform the initial preparation for the Method Uses.
@@ -1059,6 +1065,9 @@
void HandleLiveInUse(ArenaBitVector* use_v, ArenaBitVector* def_v,
ArenaBitVector* live_in_v, int dalvik_reg_id);
void HandleDef(ArenaBitVector* def_v, int dalvik_reg_id);
+ void HandleExtended(ArenaBitVector* use_v, ArenaBitVector* def_v,
+ ArenaBitVector* live_in_v,
+ const MIR::DecodedInstruction& d_insn);
bool DoSSAConversion(BasicBlock* bb);
bool InvokeUsesMethodStar(MIR* mir);
int ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction);
@@ -1080,6 +1089,7 @@
void HandleSSAUse(int* uses, int dalvik_reg, int reg_index);
void DataFlowSSAFormat35C(MIR* mir);
void DataFlowSSAFormat3RC(MIR* mir);
+ void DataFlowSSAFormatExtended(MIR* mir);
bool FindLocalLiveIn(BasicBlock* bb);
bool VerifyPredInfo(BasicBlock* bb);
BasicBlock* NeedsVisit(BasicBlock* bb);
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index dc1057f..869c48f 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -137,7 +137,7 @@
break;
}
// Keep going if pseudo op, otherwise terminate
- if (IsPseudoMirOp(mir->dalvikInsn.opcode)) {
+ if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
mir = AdvanceMIR(&tbb, mir);
} else {
mir = NULL;
@@ -877,7 +877,7 @@
struct BasicBlock* next_bb = GetBasicBlock(bb->fall_through);
for (MIR* tmir = next_bb->first_mir_insn; tmir != NULL;
tmir =tmir->next) {
- if (IsPseudoMirOp(tmir->dalvikInsn.opcode)) {
+ if (MIR::DecodedInstruction::IsPseudoMirOp(tmir->dalvikInsn.opcode)) {
continue;
}
// First non-pseudo should be MOVE_RESULT_OBJECT
@@ -1220,7 +1220,7 @@
iget_or_iput->meta.ifield_lowering_info = field_info_index;
}
-bool MIRGraph::InlineCallsGate() {
+bool MIRGraph::InlineSpecialMethodsGate() {
if ((cu_->disable_opt & (1 << kSuppressMethodInlining)) != 0 ||
method_lowering_infos_.Size() == 0u) {
return false;
@@ -1232,7 +1232,7 @@
return true;
}
-void MIRGraph::InlineCallsStart() {
+void MIRGraph::InlineSpecialMethodsStart() {
// Prepare for inlining getters/setters. Since we're inlining at most 1 IGET/IPUT from
// each INVOKE, we can index the data by the MIR::meta::method_lowering_info index.
@@ -1246,12 +1246,12 @@
temp_bit_vector_size_ * sizeof(*temp_insn_data_), kArenaAllocGrowableArray));
}
-void MIRGraph::InlineCalls(BasicBlock* bb) {
+void MIRGraph::InlineSpecialMethods(BasicBlock* bb) {
if (bb->block_type != kDalvikByteCode) {
return;
}
for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
- if (IsPseudoMirOp(mir->dalvikInsn.opcode)) {
+ if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
continue;
}
if (!(Instruction::FlagsOf(mir->dalvikInsn.opcode) & Instruction::kInvoke)) {
@@ -1270,17 +1270,17 @@
MethodReference target = method_info.GetTargetMethod();
if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(target.dex_file)
->GenInline(this, bb, mir, target.dex_method_index)) {
- if (cu_->verbose) {
- LOG(INFO) << "In \"" << PrettyMethod(cu_->method_idx, *cu_->dex_file)
- << "\" @0x" << std::hex << mir->offset
- << " inlined " << method_info.GetInvokeType() << " (" << sharp_type << ") call to \""
- << PrettyMethod(target.dex_method_index, *target.dex_file) << "\"";
+ if (cu_->verbose || cu_->print_pass) {
+ LOG(INFO) << "SpecialMethodInliner: Inlined " << method_info.GetInvokeType() << " ("
+ << sharp_type << ") call to \"" << PrettyMethod(target.dex_method_index, *target.dex_file)
+ << "\" from \"" << PrettyMethod(cu_->method_idx, *cu_->dex_file)
+ << "\" @0x" << std::hex << mir->offset;
}
}
}
}
-void MIRGraph::InlineCallsEnd() {
+void MIRGraph::InlineSpecialMethodsEnd() {
DCHECK(temp_insn_data_ != nullptr);
temp_insn_data_ = nullptr;
DCHECK(temp_bit_vector_ != nullptr);
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
index 4c9bed6..c72a4a6 100644
--- a/compiler/dex/pass_driver_me_opts.cc
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -35,7 +35,7 @@
const Pass* const PassDriver<PassDriverMEOpts>::g_passes[] = {
GetPassInstance<CacheFieldLoweringInfo>(),
GetPassInstance<CacheMethodLoweringInfo>(),
- GetPassInstance<CallInlining>(),
+ GetPassInstance<SpecialMethodInliner>(),
GetPassInstance<CodeLayout>(),
GetPassInstance<NullCheckEliminationAndTypeInference>(),
GetPassInstance<ClassInitCheckElimination>(),
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index e8f5cb9..3ee3e2e 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -91,17 +91,121 @@
RegLocation rl_dest = mir_graph_->GetDest(mir);
RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+
rl_src = LoadValue(rl_src, src_reg_class);
+ // rl_src may be aliased with rl_result/rl_dest, so do compare early.
+ OpRegImm(kOpCmp, rl_src.reg, 0);
+
ArmConditionCode code = ArmConditionEncoding(mir->meta.ccode);
- RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
- RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
- rl_true = LoadValue(rl_true, result_reg_class);
- rl_false = LoadValue(rl_false, result_reg_class);
- rl_result = EvalLoc(rl_dest, result_reg_class, true);
- OpRegImm(kOpCmp, rl_src.reg, 0);
- NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_true.reg.GetReg(),
- rl_false.reg.GetReg(), code);
+ // The kMirOpSelect has two variants, one for constants and one for moves.
+ bool is_wide = rl_dest.ref || rl_dest.wide;
+
+ if (mir->ssa_rep->num_uses == 1) {
+ uint32_t true_val = mir->dalvikInsn.vB;
+ uint32_t false_val = mir->dalvikInsn.vC;
+
+ int opcode; // The opcode.
+ int left_op, right_op; // The operands.
+ bool rl_result_evaled = false;
+
+ // Check some simple cases.
+ // TODO: Improve this.
+ int zero_reg = (is_wide ? rs_xzr : rs_wzr).GetReg();
+
+ if ((true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0)) {
+ // CSInc cheap based on wzr.
+ if (true_val == 1) {
+ // Negate.
+ code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
+ }
+
+ left_op = right_op = zero_reg;
+ opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc;
+ } else if ((true_val == 0 && false_val == 0xFFFFFFFF) ||
+ (true_val == 0xFFFFFFFF && false_val == 0)) {
+ // CSneg cheap based on wzr.
+ if (true_val == 0xFFFFFFFF) {
+ // Negate.
+ code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
+ }
+
+ left_op = right_op = zero_reg;
+ opcode = is_wide ? WIDE(kA64Csneg4rrrc) : kA64Csneg4rrrc;
+ } else if (true_val == 0 || false_val == 0) {
+ // Csel half cheap based on wzr.
+ rl_result = EvalLoc(rl_dest, result_reg_class, true);
+ rl_result_evaled = true;
+ if (false_val == 0) {
+ // Negate.
+ code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
+ }
+ LoadConstantNoClobber(rl_result.reg, true_val == 0 ? false_val : true_val);
+ left_op = zero_reg;
+ right_op = rl_result.reg.GetReg();
+ opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
+ } else if (true_val == 1 || false_val == 1) {
+ // CSInc half cheap based on wzr.
+ rl_result = EvalLoc(rl_dest, result_reg_class, true);
+ rl_result_evaled = true;
+ if (true_val == 1) {
+ // Negate.
+ code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
+ }
+ LoadConstantNoClobber(rl_result.reg, true_val == 1 ? false_val : true_val);
+ left_op = rl_result.reg.GetReg();
+ right_op = zero_reg;
+ opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc;
+ } else if (true_val == 0xFFFFFFFF || false_val == 0xFFFFFFFF) {
+ // CSneg half cheap based on wzr.
+ rl_result = EvalLoc(rl_dest, result_reg_class, true);
+ rl_result_evaled = true;
+ if (true_val == 0xFFFFFFFF) {
+ // Negate.
+ code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
+ }
+ LoadConstantNoClobber(rl_result.reg, true_val == 0xFFFFFFFF ? false_val : true_val);
+ left_op = rl_result.reg.GetReg();
+ right_op = zero_reg;
+ opcode = is_wide ? WIDE(kA64Csneg4rrrc) : kA64Csneg4rrrc;
+ } else {
+ // Csel. The rest. Use rl_result and a temp.
+ // TODO: To minimize the constants being loaded, check whether one can be inexpensively
+ // loaded as n - 1 or ~n.
+ rl_result = EvalLoc(rl_dest, result_reg_class, true);
+ rl_result_evaled = true;
+ LoadConstantNoClobber(rl_result.reg, true_val);
+ RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
+ if (rl_dest.wide) {
+ if (t_reg2.Is32Bit()) {
+ t_reg2 = As64BitReg(t_reg2);
+ }
+ }
+ LoadConstantNoClobber(t_reg2, false_val);
+
+ // Use csel.
+ left_op = rl_result.reg.GetReg();
+ right_op = t_reg2.GetReg();
+ opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
+ }
+
+ if (!rl_result_evaled) {
+ rl_result = EvalLoc(rl_dest, result_reg_class, true);
+ }
+
+ NewLIR4(opcode, rl_result.reg.GetReg(), left_op, right_op, code);
+ } else {
+ RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
+ RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
+
+ rl_true = LoadValue(rl_true, result_reg_class);
+ rl_false = LoadValue(rl_false, result_reg_class);
+ rl_result = EvalLoc(rl_dest, result_reg_class, true);
+
+ int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
+ NewLIR4(opcode, rl_result.reg.GetReg(),
+ rl_true.reg.GetReg(), rl_false.reg.GetReg(), code);
+ }
StoreValue(rl_dest, rl_result);
}
@@ -110,7 +214,6 @@
RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
LIR* taken = &block_label_list_[bb->taken];
LIR* not_taken = &block_label_list_[bb->fall_through];
- rl_src1 = LoadValueWide(rl_src1, kCoreReg);
// Normalize such that if either operand is constant, src2 will be constant.
ConditionCode ccode = mir->meta.ccode;
if (rl_src1.is_const) {
@@ -118,16 +221,22 @@
ccode = FlipComparisonOrder(ccode);
}
+ rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+
if (rl_src2.is_const) {
- rl_src2 = UpdateLocWide(rl_src2);
+ // TODO: Optimize for rl_src1.is_const? (Does happen in the boot image at the moment.)
+
int64_t val = mir_graph_->ConstantValueWide(rl_src2);
// Special handling using cbz & cbnz.
if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
OpCmpImmBranch(ccode, rl_src1.reg, 0, taken);
OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken);
return;
+ }
+
// Only handle Imm if src2 is not already in a register.
- } else if (rl_src2.location != kLocPhysReg) {
+ rl_src2 = UpdateLocWide(rl_src2);
+ if (rl_src2.location != kLocPhysReg) {
OpRegImm64(kOpCmp, rl_src1.reg, val);
OpCondBranch(ccode, taken);
OpCondBranch(NegateComparison(ccode), not_taken);
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 5870d22..048aca3 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1046,9 +1046,19 @@
}
// Push a marker to take place of lr.
vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
- // fp regs already sorted.
- for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) {
- vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment);
+ if (cu_->instruction_set == kThumb2) {
+ // fp regs already sorted.
+ for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) {
+ vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment);
+ }
+ } else {
+ // For other platforms regs may have been inserted out of order - sort first.
+ std::sort(fp_vmap_table_.begin(), fp_vmap_table_.end());
+ for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) {
+ // Copy, stripping out the phys register sort key.
+ vmap_encoder.PushBackUnsigned(
+ ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment));
+ }
}
} else {
DCHECK_EQ(POPCOUNT(core_spill_mask_), 0);
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 6191e4b..45dd7f0 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -96,7 +96,7 @@
uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) {
DCHECK_LT(arg, invoke->dalvikInsn.vA);
- DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode));
+ DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode));
if (Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc) {
return invoke->dalvikInsn.vC + arg; // Non-range invoke.
} else {
@@ -107,7 +107,7 @@
bool WideArgIsInConsecutiveDalvikRegs(MIR* invoke, uint32_t arg) {
DCHECK_LT(arg + 1, invoke->dalvikInsn.vA);
- DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode));
+ DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode));
return Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc ||
invoke->dalvikInsn.arg[arg + 1u] == invoke->dalvikInsn.arg[arg] + 1u;
}
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index caadc0a..07c615f 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1185,7 +1185,7 @@
work_half->meta.throw_insn = mir;
}
- if (MIRGraph::IsPseudoMirOp(opcode)) {
+ if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
HandleExtendedMethodMIR(bb, mir);
continue;
}
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4885501..87509b6 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -531,7 +531,7 @@
LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast,
LIR* cont = nullptr) :
m2l_(m2l), cu_(m2l->cu_), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) {
- m2l->StartSlowPath(cont);
+ m2l->StartSlowPath(this);
}
virtual ~LIRSlowPath() {}
virtual void Compile() = 0;
@@ -705,17 +705,17 @@
int AssignLiteralOffset(CodeOffset offset);
int AssignSwitchTablesOffset(CodeOffset offset);
int AssignFillArrayDataOffset(CodeOffset offset);
- LIR* InsertCaseLabel(DexOffset vaddr, int keyVal);
+ virtual LIR* InsertCaseLabel(DexOffset vaddr, int keyVal);
void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec);
void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec);
- virtual void StartSlowPath(LIR *label) {}
+ virtual void StartSlowPath(LIRSlowPath* slowpath) {}
virtual void BeginInvoke(CallInfo* info) {}
virtual void EndInvoke(CallInfo* info) {}
// Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation. No code generated.
- RegLocation NarrowRegLoc(RegLocation loc);
+ virtual RegLocation NarrowRegLoc(RegLocation loc);
// Shared by all targets - implemented in local_optimizations.cc
void ConvertMemOpIntoMove(LIR* orig_lir, RegStorage dest, RegStorage src);
@@ -763,7 +763,7 @@
virtual bool IsTemp(RegStorage reg);
bool IsPromoted(RegStorage reg);
bool IsDirty(RegStorage reg);
- void LockTemp(RegStorage reg);
+ virtual void LockTemp(RegStorage reg);
void ResetDef(RegStorage reg);
void NullifyRange(RegStorage reg, int s_reg);
void MarkDef(RegLocation rl, LIR *start, LIR *finish);
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 9000514..8e2a1e3 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -234,8 +234,7 @@
NewLIR0(kPseudoMethodEntry);
/* Spill core callee saves */
SpillCoreRegs();
- /* NOTE: promotion of FP regs currently unsupported, thus no FP spill */
- DCHECK_EQ(num_fp_spills_, 0);
+ SpillFPRegs();
if (!skip_overflow_check) {
class StackOverflowSlowPath : public LIRSlowPath {
public:
@@ -309,6 +308,7 @@
NewLIR0(kPseudoMethodExit);
UnSpillCoreRegs();
+ UnSpillFPRegs();
/* Remove frame except for return address */
stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
NewLIR0(kX86Ret);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index ff7b30e..b0c54e8 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -319,6 +319,8 @@
void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset);
void SpillCoreRegs();
void UnSpillCoreRegs();
+ void UnSpillFPRegs();
+ void SpillFPRegs();
static const X86EncodingMap EncodingMap[kX86Last];
bool InexpensiveConstantInt(int32_t value);
bool InexpensiveConstantFloat(int32_t value);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index e81f505..1ebbbbd 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -52,6 +52,13 @@
rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
};
+static constexpr RegStorage xp_regs_arr_32[] = {
+ rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
+};
+static constexpr RegStorage xp_regs_arr_64[] = {
+ rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
+ rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
+};
static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32};
static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
@@ -60,6 +67,24 @@
rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
rs_r8, rs_r9, rs_r10, rs_r11
};
+
+// How to add register to be available for promotion:
+// 1) Remove register from array defining temp
+// 2) Update ClobberCallerSave
+// 3) Update JNI compiler ABI:
+// 3.1) add reg in JniCallingConvention method
+// 3.2) update CoreSpillMask/FpSpillMask
+// 4) Update entrypoints
+// 4.1) Update constants in asm_support_x86_64.h for new frame size
+// 4.2) Remove entry in SmashCallerSaves
+// 4.3) Update jni_entrypoints to spill/unspill new callee save reg
+// 4.4) Update quick_entrypoints to spill/unspill new callee save reg
+// 5) Update runtime ABI
+// 5.1) Update quick_method_frame_info with new required spills
+// 5.2) Update QuickArgumentVisitor with new offsets to gprs and xmms
+// Note that you cannot use register corresponding to incoming args
+// according to ABI and QCG needs one additional XMM temp for
+// bulk copy in preparation to call.
static constexpr RegStorage core_temps_arr_64q[] = {
rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
rs_r8q, rs_r9q, rs_r10q, rs_r11q
@@ -69,14 +94,14 @@
};
static constexpr RegStorage sp_temps_arr_64[] = {
rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
- rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
+ rs_fr8, rs_fr9, rs_fr10, rs_fr11
};
static constexpr RegStorage dp_temps_arr_32[] = {
rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
};
static constexpr RegStorage dp_temps_arr_64[] = {
rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
- rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
+ rs_dr8, rs_dr9, rs_dr10, rs_dr11
};
static constexpr RegStorage xp_temps_arr_32[] = {
@@ -84,7 +109,7 @@
};
static constexpr RegStorage xp_temps_arr_64[] = {
rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
- rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
+ rs_xr8, rs_xr9, rs_xr10, rs_xr11
};
static constexpr ArrayRef<const RegStorage> empty_pool;
@@ -95,6 +120,8 @@
static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64);
static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32);
static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64);
+static constexpr ArrayRef<const RegStorage> xp_regs_32(xp_regs_arr_32);
+static constexpr ArrayRef<const RegStorage> xp_regs_64(xp_regs_arr_64);
static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32);
static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64);
static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q);
@@ -437,21 +464,13 @@
/* Clobber all regs that might be used by an external C call */
void X86Mir2Lir::ClobberCallerSave() {
- Clobber(rs_rAX);
- Clobber(rs_rCX);
- Clobber(rs_rDX);
- Clobber(rs_rBX);
-
- Clobber(rs_fr0);
- Clobber(rs_fr1);
- Clobber(rs_fr2);
- Clobber(rs_fr3);
- Clobber(rs_fr4);
- Clobber(rs_fr5);
- Clobber(rs_fr6);
- Clobber(rs_fr7);
-
if (cu_->target64) {
+ Clobber(rs_rAX);
+ Clobber(rs_rCX);
+ Clobber(rs_rDX);
+ Clobber(rs_rSI);
+ Clobber(rs_rDI);
+
Clobber(rs_r8);
Clobber(rs_r9);
Clobber(rs_r10);
@@ -461,11 +480,21 @@
Clobber(rs_fr9);
Clobber(rs_fr10);
Clobber(rs_fr11);
- Clobber(rs_fr12);
- Clobber(rs_fr13);
- Clobber(rs_fr14);
- Clobber(rs_fr15);
+ } else {
+ Clobber(rs_rAX);
+ Clobber(rs_rCX);
+ Clobber(rs_rDX);
+ Clobber(rs_rBX);
}
+
+ Clobber(rs_fr0);
+ Clobber(rs_fr1);
+ Clobber(rs_fr2);
+ Clobber(rs_fr3);
+ Clobber(rs_fr4);
+ Clobber(rs_fr5);
+ Clobber(rs_fr6);
+ Clobber(rs_fr7);
}
RegLocation X86Mir2Lir::GetReturnWideAlt() {
@@ -599,11 +628,15 @@
// Target-specific adjustments.
// Add in XMM registers.
- const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32;
- for (RegStorage reg : *xp_temps) {
+ const ArrayRef<const RegStorage> *xp_regs = cu_->target64 ? &xp_regs_64 : &xp_regs_32;
+ for (RegStorage reg : *xp_regs) {
RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
reginfo_map_.Put(reg.GetReg(), info);
- info->SetIsTemp(true);
+ }
+ const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32;
+ for (RegStorage reg : *xp_temps) {
+ RegisterInfo* xp_reg_info = GetRegInfo(reg);
+ xp_reg_info->SetIsTemp(true);
}
// Alias single precision xmm to double xmms.
@@ -665,9 +698,11 @@
// Spill mask not including fake return address register
uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
+ OpSize size = cu_->target64 ? k64 : k32;
for (int reg = 0; mask; mask >>= 1, reg++) {
if (mask & 0x1) {
- StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
+ StoreBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg),
+ size, kNotVolatile);
offset += GetInstructionSetPointerSize(cu_->instruction_set);
}
}
@@ -680,14 +715,46 @@
// Spill mask not including fake return address register
uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
+ OpSize size = cu_->target64 ? k64 : k32;
for (int reg = 0; mask; mask >>= 1, reg++) {
if (mask & 0x1) {
- LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
+ LoadBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg),
+ size, kNotVolatile);
offset += GetInstructionSetPointerSize(cu_->instruction_set);
}
}
}
+void X86Mir2Lir::SpillFPRegs() {
+ if (num_fp_spills_ == 0) {
+ return;
+ }
+ uint32_t mask = fp_spill_mask_;
+ int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
+ for (int reg = 0; mask; mask >>= 1, reg++) {
+ if (mask & 0x1) {
+ StoreBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg),
+ k64, kNotVolatile);
+ offset += sizeof(double);
+ }
+ }
+}
+void X86Mir2Lir::UnSpillFPRegs() {
+ if (num_fp_spills_ == 0) {
+ return;
+ }
+ uint32_t mask = fp_spill_mask_;
+ int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
+ for (int reg = 0; mask; mask >>= 1, reg++) {
+ if (mask & 0x1) {
+ LoadBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg),
+ k64, kNotVolatile);
+ offset += sizeof(double);
+ }
+ }
+}
+
+
bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
}
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 657160f..5c7c91b 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -917,7 +917,7 @@
for (MIR *mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
int opcode = mir->dalvikInsn.opcode;
- if (MIRGraph::IsPseudoMirOp(opcode)) {
+ if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
AnalyzeExtendedMIR(opcode, bb, mir);
} else {
AnalyzeMIR(opcode, bb, mir);
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 2789923..5657381 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -66,7 +66,9 @@
* XMM6: caller | caller, arg7 | caller, scratch | caller, arg7, scratch
* XMM7: caller | caller, arg8 | caller, scratch | caller, arg8, scratch
* --- x86-64/x32 registers
- * XMM8 .. 15: caller save available as scratch registers for ART.
+ * XMM8 .. 11: caller save available as scratch registers for ART.
+ * XMM12 .. 15: callee save available as promoted registers for ART.
+ * This change (XMM12..15) is for QCG only, for others they are caller save.
*
* X87 is a necessary evil outside of ART code for x86:
* ST0: x86 float/double native return value, caller save
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index db383c4..892b302 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -251,7 +251,8 @@
// Special-case handling for format 35c/3rc invokes
Instruction::Code opcode = mir->dalvikInsn.opcode;
- int flags = IsPseudoMirOp(opcode) ? 0 : Instruction::FlagsOf(mir->dalvikInsn.opcode);
+ int flags = MIR::DecodedInstruction::IsPseudoMirOp(opcode) ?
+ 0 : Instruction::FlagsOf(mir->dalvikInsn.opcode);
if ((flags & Instruction::kInvoke) &&
(attrs & (DF_FORMAT_35C | DF_FORMAT_3RC))) {
DCHECK_EQ(next, 0);
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 5febed2..525f05c 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -130,6 +130,10 @@
callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R13));
callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R14));
callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R15));
+ callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM12));
+ callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM13));
+ callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM14));
+ callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM15));
}
uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
@@ -137,6 +141,10 @@
1 << kNumberOfCpuRegisters;
}
+uint32_t X86_64JniCallingConvention::FpSpillMask() const {
+ return 1 << XMM12 | 1 << XMM13 | 1 << XMM14 | 1 << XMM15;
+}
+
size_t X86_64JniCallingConvention::FrameSize() {
// Method*, return address and callee save area size, local reference segment state
size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index 1ba5353..7a90c6e 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -61,9 +61,7 @@
}
ManagedRegister ReturnScratchRegister() const OVERRIDE;
uint32_t CoreSpillMask() const OVERRIDE;
- uint32_t FpSpillMask() const OVERRIDE {
- return 0;
- }
+ uint32_t FpSpillMask() const OVERRIDE;
bool IsCurrentParamInRegister() OVERRIDE;
bool IsCurrentParamOnStack() OVERRIDE;
ManagedRegister CurrentParamRegister() OVERRIDE;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 4d5d613..78738d8 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1671,16 +1671,31 @@
const std::vector<ManagedRegister>& spill_regs,
const ManagedRegisterEntrySpills& entry_spills) {
CHECK_ALIGNED(frame_size, kStackAlignment);
+ int gpr_count = 0;
for (int i = spill_regs.size() - 1; i >= 0; --i) {
- pushq(spill_regs.at(i).AsX86_64().AsCpuRegister());
+ x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+ if (spill.IsCpuRegister()) {
+ pushq(spill.AsCpuRegister());
+ gpr_count++;
+ }
}
// return address then method on stack
- addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(frame_size) + (spill_regs.size() * kFramePointerSize) +
- sizeof(StackReference<mirror::ArtMethod>) /*method*/ +
- kFramePointerSize /*return address*/));
+ int64_t rest_of_frame = static_cast<int64_t>(frame_size)
+ - (gpr_count * kFramePointerSize)
+ - kFramePointerSize /*return address*/;
+ subq(CpuRegister(RSP), Immediate(rest_of_frame));
+ // spill xmms
+ int64_t offset = rest_of_frame;
+ for (int i = spill_regs.size() - 1; i >= 0; --i) {
+ x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+ if (spill.IsXmmRegister()) {
+ offset -= sizeof(double);
+ movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
+ }
+ }
DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>));
- subq(CpuRegister(RSP), Immediate(4));
+
movl(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
for (size_t i = 0; i < entry_spills.size(); ++i) {
@@ -1707,9 +1722,24 @@
void X86_64Assembler::RemoveFrame(size_t frame_size,
const std::vector<ManagedRegister>& spill_regs) {
CHECK_ALIGNED(frame_size, kStackAlignment);
- addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - kFramePointerSize));
+ int gpr_count = 0;
+ // unspill xmms
+ int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
for (size_t i = 0; i < spill_regs.size(); ++i) {
- popq(spill_regs.at(i).AsX86_64().AsCpuRegister());
+ x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+ if (spill.IsXmmRegister()) {
+ offset += sizeof(double);
+ movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
+ } else {
+ gpr_count++;
+ }
+ }
+ addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize));
+ for (size_t i = 0; i < spill_regs.size(); ++i) {
+ x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+ if (spill.IsCpuRegister()) {
+ popq(spill.AsCpuRegister());
+ }
}
ret();
}
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index f7bad8b..dc1758f 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -246,11 +246,9 @@
str << "pushq %rsi\n";
str << "pushq %r10\n";
// 2) Move down the stack pointer.
- ssize_t displacement = -static_cast<ssize_t>(frame_size) + spill_regs.size() * 8 +
- sizeof(StackReference<mirror::ArtMethod>) + 8;
- str << "addq $" << displacement << ", %rsp\n";
- // 3) Make space for method reference, and store it.
- str << "subq $4, %rsp\n";
+ ssize_t displacement = static_cast<ssize_t>(frame_size) - (spill_regs.size() * 8 + 8);
+ str << "subq $" << displacement << ", %rsp\n";
+ // 3) Store method reference.
str << "movl %edi, (%rsp)\n";
// 4) Entry spills.
str << "movq %rax, " << frame_size + 0 << "(%rsp)\n";
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index bff8501..05d0ef8 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -35,9 +35,9 @@
// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
#define THREAD_ID_OFFSET 12
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 + 4*8
+#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 + 4*8
+#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 + 4*8
// Expected size of a heap reference
#define HEAP_REFERENCE_SIZE 4
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index e1f47ee..7699eaf 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -78,6 +78,18 @@
gprs_[R9] = nullptr;
gprs_[R10] = nullptr;
gprs_[R11] = nullptr;
+ fprs_[XMM0] = nullptr;
+ fprs_[XMM1] = nullptr;
+ fprs_[XMM2] = nullptr;
+ fprs_[XMM3] = nullptr;
+ fprs_[XMM4] = nullptr;
+ fprs_[XMM5] = nullptr;
+ fprs_[XMM6] = nullptr;
+ fprs_[XMM7] = nullptr;
+ fprs_[XMM8] = nullptr;
+ fprs_[XMM9] = nullptr;
+ fprs_[XMM10] = nullptr;
+ fprs_[XMM11] = nullptr;
}
bool X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
@@ -102,41 +114,26 @@
}
}
+extern "C" void art_quick_do_long_jump(uintptr_t*, uintptr_t*);
+
void X86_64Context::DoLongJump() {
#if defined(__x86_64__)
- // Array of GPR values, filled from the context backward for the long jump pop. We add a slot at
- // the top for the stack pointer that doesn't get popped in a pop-all.
- volatile uintptr_t gprs[kNumberOfCpuRegisters + 1];
+ uintptr_t gprs[kNumberOfCpuRegisters + 1];
+ uintptr_t fprs[kNumberOfFloatRegisters];
+
for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86_64Context::kBadGprBase + i;
}
+ for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) {
+ fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : X86_64Context::kBadFprBase + i;
+ }
+
// We want to load the stack pointer one slot below so that the ret will pop eip.
uintptr_t rsp = gprs[kNumberOfCpuRegisters - RSP - 1] - kWordSize;
gprs[kNumberOfCpuRegisters] = rsp;
*(reinterpret_cast<uintptr_t*>(rsp)) = rip_;
- __asm__ __volatile__(
- "movq %0, %%rsp\n\t" // RSP points to gprs.
- "popq %%r15\n\t" // Load all registers except RSP and RIP with values in gprs.
- "popq %%r14\n\t"
- "popq %%r13\n\t"
- "popq %%r12\n\t"
- "popq %%r11\n\t"
- "popq %%r10\n\t"
- "popq %%r9\n\t"
- "popq %%r8\n\t"
- "popq %%rdi\n\t"
- "popq %%rsi\n\t"
- "popq %%rbp\n\t"
- "addq $8, %%rsp\n\t"
- "popq %%rbx\n\t"
- "popq %%rdx\n\t"
- "popq %%rcx\n\t"
- "popq %%rax\n\t"
- "popq %%rsp\n\t" // Load stack pointer.
- "ret\n\t" // From higher in the stack pop rip.
- : // output.
- : "g"(&gprs[0]) // input.
- :); // clobber.
+
+ art_quick_do_long_jump(gprs, fprs);
#else
UNIMPLEMENTED(FATAL);
#endif
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 609d1c6..204d52c 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -35,7 +35,7 @@
extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
// Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
+extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
const mirror::Class* ref_class);
extern "C" void art_quick_check_cast(void*, void*);
@@ -129,7 +129,7 @@
ResetQuickAllocEntryPoints(qpoints);
// Cast
- qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
+ qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
qpoints->pCheckCast = art_quick_check_cast;
// DexCache
diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
index d668797..f6736df 100644
--- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
@@ -28,8 +28,8 @@
PUSH rdx // Arg.
PUSH rcx // Arg.
// Create space for FPR args, plus padding for alignment
- subq LITERAL(72), %rsp
- CFI_ADJUST_CFA_OFFSET(72)
+ subq LITERAL(72 + 4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(72 + 4 * 8)
// Save FPRs.
movq %xmm0, 0(%rsp)
movq %xmm1, 8(%rsp)
@@ -39,6 +39,10 @@
movq %xmm5, 40(%rsp)
movq %xmm6, 48(%rsp)
movq %xmm7, 56(%rsp)
+ movq %xmm12, 64(%rsp)
+ movq %xmm13, 72(%rsp)
+ movq %xmm14, 80(%rsp)
+ movq %xmm15, 88(%rsp)
// prepare call
movq %gs:THREAD_SELF_OFFSET, %rdi // RDI := Thread::Current()
// call
@@ -52,8 +56,12 @@
movq 40(%rsp), %xmm5
movq 48(%rsp), %xmm6
movq 56(%rsp), %xmm7
- addq LITERAL(72), %rsp
- CFI_ADJUST_CFA_OFFSET(-72)
+ movq 64(%rsp), %xmm12
+ movq 72(%rsp), %xmm13
+ movq 80(%rsp), %xmm14
+ movq 88(%rsp), %xmm15
+ addq LITERAL(72 + 4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-72 - 4 * 8)
POP rcx // Arg.
POP rdx // Arg.
POP rsi // Arg.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 8fa947c..7f7226c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -16,6 +16,26 @@
#include "asm_support_x86_64.S"
+MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
+ // Create space for ART FP callee-saved registers
+ subq LITERAL(4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(4 * 8)
+ movq %xmm12, 0(%rsp)
+ movq %xmm13, 8(%rsp)
+ movq %xmm14, 16(%rsp)
+ movq %xmm15, 24(%rsp)
+END_MACRO
+
+MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME)
+ // Restore ART FP callee-saved registers
+ movq 0(%rsp), %xmm12
+ movq 8(%rsp), %xmm13
+ movq 16(%rsp), %xmm14
+ movq 24(%rsp), %xmm15
+ addq LITERAL(4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(- 4 * 8)
+END_MACRO
+
// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
/*
@@ -37,6 +57,14 @@
PUSH r12 // Callee save.
PUSH rbp // Callee save.
PUSH rbx // Callee save.
+ // Create space for FPR args, plus padding for alignment
+ subq LITERAL(4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(4 * 8)
+ // Save FPRs.
+ movq %xmm12, 0(%rsp)
+ movq %xmm13, 8(%rsp)
+ movq %xmm14, 16(%rsp)
+ movq %xmm15, 24(%rsp)
subq MACRO_LITERAL(8), %rsp // Space for Method* (also aligns the frame).
CFI_ADJUST_CFA_OFFSET(8)
// R10 := ArtMethod* for save all callee save frame method.
@@ -46,7 +74,7 @@
// Ugly compile-time check, but we only have the preprocessor.
// Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 8 + 8)
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
#endif
#endif // __APPLE__
@@ -71,8 +99,14 @@
PUSH r12 // Callee save.
PUSH rbp // Callee save.
PUSH rbx // Callee save.
- subq MACRO_LITERAL(8), %rsp // Space for Method* (also aligns the frame).
- CFI_ADJUST_CFA_OFFSET(8)
+ // Create space for FPR args, plus padding for alignment
+ subq LITERAL(8 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(8 + 4*8)
+ // Save FPRs.
+ movq %xmm12, 8(%rsp)
+ movq %xmm13, 16(%rsp)
+ movq %xmm14, 24(%rsp)
+ movq %xmm15, 32(%rsp)
// R10 := ArtMethod* for refs only callee save frame method.
movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
// Store ArtMethod* to bottom of stack.
@@ -80,15 +114,19 @@
// Ugly compile-time check, but we only have the preprocessor.
// Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 8 + 8)
+#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
#endif
#endif // __APPLE__
END_MACRO
MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
- addq MACRO_LITERAL(8), %rsp
- CFI_ADJUST_CFA_OFFSET(-8)
+ movq 8(%rsp), %xmm12
+ movq 16(%rsp), %xmm13
+ movq 24(%rsp), %xmm14
+ movq 32(%rsp), %xmm15
+ addq LITERAL(8 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
// TODO: optimize by not restoring callee-saves restored by the ABI
POP rbx
POP rbp
@@ -123,8 +161,8 @@
PUSH rdx // Quick arg 2.
PUSH rcx // Quick arg 3.
// Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
- subq MACRO_LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(80)
+ subq MACRO_LITERAL(80 + 4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
// R10 := ArtMethod* for ref and args callee save frame method.
movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
// Save FPRs.
@@ -136,12 +174,16 @@
movq %xmm5, 56(%rsp)
movq %xmm6, 64(%rsp)
movq %xmm7, 72(%rsp)
+ movq %xmm12, 80(%rsp)
+ movq %xmm13, 88(%rsp)
+ movq %xmm14, 96(%rsp)
+ movq %xmm15, 104(%rsp)
// Store ArtMethod* to bottom of stack.
movq %r10, 0(%rsp)
// Ugly compile-time check, but we only have the preprocessor.
// Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 80 + 8)
+#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 4*8 + 80 + 8)
#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
#endif
#endif // __APPLE__
@@ -157,8 +199,12 @@
movq 56(%rsp), %xmm5
movq 64(%rsp), %xmm6
movq 72(%rsp), %xmm7
- addq MACRO_LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(-80)
+ movq 80(%rsp), %xmm12
+ movq 88(%rsp), %xmm13
+ movq 96(%rsp), %xmm14
+ movq 104(%rsp), %xmm15
+ addq MACRO_LITERAL(80 + 4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
// Restore callee and GPR args, mixed together to agree with core spills bitmap.
POP rcx
POP rdx
@@ -536,6 +582,58 @@
#endif // __APPLE__
END_FUNCTION art_quick_invoke_static_stub
+ /*
+ * Long jump stub.
+ * On entry:
+ * rdi = gprs
+ * rsi = fprs
+ */
+DEFINE_FUNCTION art_quick_do_long_jump
+#if defined(__APPLE__)
+ int3
+ int3
+#else
+ // Restore FPRs.
+ movq 0(%rsi), %xmm0
+ movq 8(%rsi), %xmm1
+ movq 16(%rsi), %xmm2
+ movq 24(%rsi), %xmm3
+ movq 32(%rsi), %xmm4
+ movq 40(%rsi), %xmm5
+ movq 48(%rsi), %xmm6
+ movq 56(%rsi), %xmm7
+ movq 64(%rsi), %xmm8
+ movq 72(%rsi), %xmm9
+ movq 80(%rsi), %xmm10
+ movq 88(%rsi), %xmm11
+ movq 96(%rsi), %xmm12
+ movq 104(%rsi), %xmm13
+ movq 112(%rsi), %xmm14
+ movq 120(%rsi), %xmm15
+ // Restore FPRs.
+ movq %rdi, %rsp // RSP points to gprs.
+ // Load all registers except RSP and RIP with values in gprs.
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rdi
+ popq %rsi
+ popq %rbp
+ addq LITERAL(8), %rsp // Skip rsp
+ popq %rbx
+ popq %rdx
+ popq %rcx
+ popq %rax
+ popq %rsp // Load stack pointer.
+ ret // From higher in the stack pop rip.
+#endif // __APPLE__
+END_FUNCTION art_quick_do_long_jump
+
MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
DEFINE_FUNCTION VAR(c_name, 0)
SETUP_REF_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC
@@ -820,13 +918,17 @@
DEFINE_FUNCTION art_quick_check_cast
PUSH rdi // Save args for exc
PUSH rsi
+ SETUP_FP_CALLEE_SAVE_FRAME
call PLT_SYMBOL(artIsAssignableFromCode) // (Class* klass, Class* ref_klass)
testq %rax, %rax
jz 1f // jump forward if not assignable
+ RESTORE_FP_CALLEE_SAVE_FRAME
addq LITERAL(16), %rsp // pop arguments
CFI_ADJUST_CFA_OFFSET(-16)
+
ret
1:
+ RESTORE_FP_CALLEE_SAVE_FRAME
POP rsi // Pop arguments
POP rdi
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context
@@ -907,6 +1009,7 @@
PUSH rdx
subq LITERAL(8), %rsp // Alignment padding.
CFI_ADJUST_CFA_OFFSET(8)
+ SETUP_FP_CALLEE_SAVE_FRAME
// "Uncompress" = do nothing, as already zero-extended on load.
movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
@@ -918,6 +1021,7 @@
testq %rax, %rax
jz .Lthrow_array_store_exception
+ RESTORE_FP_CALLEE_SAVE_FRAME
// Restore arguments.
addq LITERAL(8), %rsp
CFI_ADJUST_CFA_OFFSET(-8)
@@ -934,6 +1038,7 @@
// movb %dl, (%rdx, %rdi)
ret
.Lthrow_array_store_exception:
+ RESTORE_FP_CALLEE_SAVE_FRAME
// Restore arguments.
addq LITERAL(8), %rsp
CFI_ADJUST_CFA_OFFSET(-8)
@@ -1012,8 +1117,8 @@
PUSH rdx // Quick arg 2.
PUSH rcx // Quick arg 3.
// Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
- subq LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(80)
+ subq LITERAL(80 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(80 + 4*8)
// Save FPRs.
movq %xmm0, 16(%rsp)
movq %xmm1, 24(%rsp)
@@ -1023,14 +1128,18 @@
movq %xmm5, 56(%rsp)
movq %xmm6, 64(%rsp)
movq %xmm7, 72(%rsp)
+ movq %xmm12, 80(%rsp)
+ movq %xmm13, 88(%rsp)
+ movq %xmm14, 96(%rsp)
+ movq %xmm15, 104(%rsp)
// Store proxy method to bottom of stack.
movq %rdi, 0(%rsp)
movq %gs:THREAD_SELF_OFFSET, %rdx // Pass Thread::Current().
movq %rsp, %rcx // Pass SP.
call PLT_SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
movq %rax, %xmm0 // Copy return value in case of float returns.
- addq LITERAL(168), %rsp // Pop arguments.
- CFI_ADJUST_CFA_OFFSET(-168)
+ addq LITERAL(168 + 4*8), %rsp // Pop arguments.
+ CFI_ADJUST_CFA_OFFSET(-168 - 4*8)
RETURN_OR_DELIVER_PENDING_EXCEPTION
END_FUNCTION art_quick_proxy_invoke_handler
@@ -1156,8 +1265,8 @@
PUSH rdx // Quick arg 2.
PUSH rcx // Quick arg 3.
// Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
- subq LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(80)
+ subq LITERAL(80 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(80 + 4*8)
// Save FPRs.
movq %xmm0, 16(%rsp)
movq %xmm1, 24(%rsp)
@@ -1167,6 +1276,10 @@
movq %xmm5, 56(%rsp)
movq %xmm6, 64(%rsp)
movq %xmm7, 72(%rsp)
+ movq %xmm12, 80(%rsp)
+ movq %xmm13, 88(%rsp)
+ movq %xmm14, 96(%rsp)
+ movq %xmm15, 104(%rsp)
movq %rdi, 0(%rsp) // Store native ArtMethod* to bottom of stack.
movq %rsp, %rbp // save SP at (old) callee-save frame
CFI_DEF_CFA_REGISTER(rbp)
@@ -1260,9 +1373,13 @@
movq 56(%rsp), %xmm5
movq 64(%rsp), %xmm6
movq 72(%rsp), %xmm7
+ movq 80(%rsp), %xmm12
+ movq 88(%rsp), %xmm13
+ movq 96(%rsp), %xmm14
+ movq 104(%rsp), %xmm15
// was 80 bytes
- addq LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(-80)
+ addq LITERAL(80 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
// Save callee and GPR args, mixed together to agree with core spills bitmap.
POP rcx // Arg.
POP rdx // Arg.
@@ -1292,9 +1409,13 @@
movq 56(%rsp), %xmm5
movq 64(%rsp), %xmm6
movq 72(%rsp), %xmm7
- // was 80 bytes
- addq LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(-80)
+ movq 80(%rsp), %xmm12
+ movq 88(%rsp), %xmm13
+ movq 96(%rsp), %xmm14
+ movq 104(%rsp), %xmm15
+ // was 80 + 32 bytes
+ addq LITERAL(80 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
// Save callee and GPR args, mixed together to agree with core spills bitmap.
POP rcx // Arg.
POP rdx // Arg.
@@ -1450,3 +1571,10 @@
END_FUNCTION art_quick_string_compareto
UNIMPLEMENTED art_quick_memcmp16
+
+DEFINE_FUNCTION art_quick_assignable_from_code
+ SETUP_FP_CALLEE_SAVE_FRAME
+ call PLT_SYMBOL(artIsAssignableFromCode) // (const mirror::Class*, const mirror::Class*)
+ RESTORE_FP_CALLEE_SAVE_FRAME
+ ret
+END_FUNCTION art_quick_assignable_from_code
diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
index 6183909..53aa212 100644
--- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
+++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
@@ -34,6 +34,9 @@
(1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) |
(1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
(1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7);
+static constexpr uint32_t kX86_64CalleeSaveFpSpills =
+ (1 << art::x86_64::XMM12) | (1 << art::x86_64::XMM13) |
+ (1 << art::x86_64::XMM14) | (1 << art::x86_64::XMM15);
constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
return kX86_64CalleeSaveRefSpills |
@@ -42,7 +45,8 @@
}
constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
- return (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0);
+ return kX86_64CalleeSaveFpSpills |
+ (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0);
}
constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/x86_64/registers_x86_64.cc b/runtime/arch/x86_64/registers_x86_64.cc
index 38f3494..f29c426 100644
--- a/runtime/arch/x86_64/registers_x86_64.cc
+++ b/runtime/arch/x86_64/registers_x86_64.cc
@@ -34,5 +34,14 @@
return os;
}
+std::ostream& operator<<(std::ostream& os, const FloatRegister& rhs) {
+ if (rhs >= XMM0 && rhs <= XMM15) {
+ os << "xmm" << static_cast<int>(rhs);
+ } else {
+ os << "Register[" << static_cast<int>(rhs) << "]";
+ }
+ return os;
+}
+
} // namespace x86_64
} // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 95cb85e..2a66f2f 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -175,8 +175,8 @@
static constexpr size_t kNumQuickGprArgs = 5; // 5 arguments passed in GPRs.
static constexpr size_t kNumQuickFprArgs = 8; // 8 arguments passed in FPRs.
static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg.
- static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80; // Offset of first GPR arg.
- static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168; // Offset of return address.
+ static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80 + 4*8; // Offset of first GPR arg.
+ static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168 + 4*8; // Offset of return address.
static size_t GprIndexToGprOffset(uint32_t gpr_index) {
switch (gpr_index) {
case 0: return (4 * GetBytesPerGprSpillLocation(kRuntimeISA));
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 722576f..c66e80d 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -2112,30 +2112,40 @@
// result in occasionally not releasing pages which we could release.
byte pm = page_map_[i];
switch (pm) {
+ case kPageMapReleased:
+ // Fall through.
case kPageMapEmpty: {
- // Only lock if we have an empty page since we want to prevent other threads racing in.
+ // This is currently the start of a free page run.
+ // Acquire the lock to prevent other threads racing in and modifying the page map.
MutexLock mu(self, lock_);
// Check that it's still empty after we acquired the lock since another thread could have
// raced in and placed an allocation here.
- pm = page_map_[i];
- if (LIKELY(pm == kPageMapEmpty)) {
- // The start of a free page run. Release pages.
+ if (IsFreePage(i)) {
+ // Free page runs can start with a released page if we coalesced a released page free
+ // page run with an empty page run.
FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
- DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
- size_t fpr_size = fpr->ByteSize(this);
- DCHECK(IsAligned<kPageSize>(fpr_size));
- byte* start = reinterpret_cast<byte*>(fpr);
- reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
- i += fpr_size / kPageSize;
- DCHECK_LE(i, page_map_size_);
+ // There is a race condition where FreePage can coalesce fpr with the previous
+ // free page run before we acquire lock_. In that case free_page_runs_.find will not find
+ // a run starting at fpr. To handle this race, we skip reclaiming the page range and go
+ // to the next page.
+ if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
+ size_t fpr_size = fpr->ByteSize(this);
+ DCHECK(IsAligned<kPageSize>(fpr_size));
+ byte* start = reinterpret_cast<byte*>(fpr);
+ reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
+ size_t pages = fpr_size / kPageSize;
+ CHECK_GT(pages, 0U) << "Infinite loop probable";
+ i += pages;
+ DCHECK_LE(i, page_map_size_);
+ break;
+ }
}
- break;
+ // Fall through.
}
case kPageMapLargeObject: // Fall through.
case kPageMapLargeObjectPart: // Fall through.
case kPageMapRun: // Fall through.
case kPageMapRunPart: // Fall through.
- case kPageMapReleased: // Fall through since it is already released.
++i;
break; // Skip.
default:
@@ -2175,6 +2185,34 @@
return reclaimed_bytes;
}
+void RosAlloc::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) {
+ Thread* self = Thread::Current();
+ size_t largest_continuous_free_pages = 0;
+ WriterMutexLock wmu(self, bulk_free_lock_);
+ MutexLock mu(self, lock_);
+ for (FreePageRun* fpr : free_page_runs_) {
+ largest_continuous_free_pages = std::max(largest_continuous_free_pages,
+ fpr->ByteSize(this));
+ }
+ if (failed_alloc_bytes > kLargeSizeThreshold) {
+ // Large allocation.
+ size_t required_bytes = RoundUp(failed_alloc_bytes, kPageSize);
+ if (required_bytes > largest_continuous_free_pages) {
+ os << "; failed due to fragmentation (required continguous free "
+ << required_bytes << " bytes where largest contiguous free "
+ << largest_continuous_free_pages << " bytes)";
+ }
+ } else {
+ // Non-large allocation.
+ size_t required_bytes = numOfPages[SizeToIndex(failed_alloc_bytes)] * kPageSize;
+ if (required_bytes > largest_continuous_free_pages) {
+ os << "; failed due to fragmentation (required continguous free "
+ << required_bytes << " bytes for a new buffer where largest contiguous free "
+ << largest_continuous_free_pages << " bytes)";
+ }
+ }
+}
+
} // namespace allocator
} // namespace gc
} // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index fad0dc8..85a8225 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -590,6 +590,8 @@
// Verify for debugging.
void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes);
};
} // namespace allocator
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index e9adca0..19715e9 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -805,37 +805,23 @@
return NULL;
}
-static void MSpaceChunkCallback(void* start, void* end, size_t used_bytes, void* arg) {
- size_t chunk_size = reinterpret_cast<uint8_t*>(end) - reinterpret_cast<uint8_t*>(start);
- if (used_bytes < chunk_size) {
- size_t chunk_free_bytes = chunk_size - used_bytes;
- size_t& max_contiguous_allocation = *reinterpret_cast<size_t*>(arg);
- max_contiguous_allocation = std::max(max_contiguous_allocation, chunk_free_bytes);
- }
-}
-
-void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation) {
+void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) {
std::ostringstream oss;
size_t total_bytes_free = GetFreeMemory();
oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
<< " free bytes";
// If the allocation failed due to fragmentation, print out the largest continuous allocation.
- if (!large_object_allocation && total_bytes_free >= byte_count) {
- size_t max_contiguous_allocation = 0;
- for (const auto& space : continuous_spaces_) {
- if (space->IsMallocSpace()) {
- // To allow the Walk/InspectAll() to exclusively-lock the mutator
- // lock, temporarily release the shared access to the mutator
- // lock here by transitioning to the suspended state.
- Locks::mutator_lock_->AssertSharedHeld(self);
- self->TransitionFromRunnableToSuspended(kSuspended);
- space->AsMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
- self->TransitionFromSuspendedToRunnable();
- Locks::mutator_lock_->AssertSharedHeld(self);
- }
+ if (allocator_type != kAllocatorTypeLOS && total_bytes_free >= byte_count) {
+ space::MallocSpace* space = nullptr;
+ if (allocator_type == kAllocatorTypeNonMoving) {
+ space = non_moving_space_;
+ } else if (allocator_type == kAllocatorTypeRosAlloc ||
+ allocator_type == kAllocatorTypeDlMalloc) {
+ space = main_space_;
}
- oss << "; failed due to fragmentation (largest possible contiguous allocation "
- << max_contiguous_allocation << " bytes)";
+ if (space != nullptr) {
+ space->LogFragmentationAllocFailure(oss, byte_count);
+ }
}
self->ThrowOutOfMemoryError(oss.str().c_str());
}
@@ -1188,7 +1174,7 @@
}
ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size);
if (ptr == nullptr) {
- ThrowOutOfMemoryError(self, alloc_size, allocator == kAllocatorTypeLOS);
+ ThrowOutOfMemoryError(self, alloc_size, allocator);
}
return ptr;
}
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index c9ea03e..86dab21 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -120,7 +120,7 @@
static constexpr size_t kDefaultStartingSize = kPageSize;
static constexpr size_t kDefaultInitialSize = 2 * MB;
- static constexpr size_t kDefaultMaximumSize = 32 * MB;
+ static constexpr size_t kDefaultMaximumSize = 256 * MB;
static constexpr size_t kDefaultMaxFree = 2 * MB;
static constexpr size_t kDefaultMinFree = kDefaultMaxFree / 4;
static constexpr size_t kDefaultLongPauseLogThreshold = MsToNs(5);
@@ -194,7 +194,6 @@
void CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
- void ThrowOutOfMemoryError(size_t byte_count, bool large_object_allocation);
void RegisterNativeAllocation(JNIEnv* env, int bytes);
void RegisterNativeFree(JNIEnv* env, int bytes);
@@ -628,7 +627,7 @@
size_t* usable_size)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
- void ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation)
+ void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
template <bool kGrow>
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 5123e47..456d1b3 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -304,6 +304,30 @@
}
#endif
+static void MSpaceChunkCallback(void* start, void* end, size_t used_bytes, void* arg) {
+ size_t chunk_size = reinterpret_cast<uint8_t*>(end) - reinterpret_cast<uint8_t*>(start);
+ if (used_bytes < chunk_size) {
+ size_t chunk_free_bytes = chunk_size - used_bytes;
+ size_t& max_contiguous_allocation = *reinterpret_cast<size_t*>(arg);
+ max_contiguous_allocation = std::max(max_contiguous_allocation, chunk_free_bytes);
+ }
+}
+
+void DlMallocSpace::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) {
+ Thread* self = Thread::Current();
+ size_t max_contiguous_allocation = 0;
+ // To allow the Walk/InspectAll() to exclusively-lock the mutator
+ // lock, temporarily release the shared access to the mutator
+ // lock here by transitioning to the suspended state.
+ Locks::mutator_lock_->AssertSharedHeld(self);
+ self->TransitionFromRunnableToSuspended(kSuspended);
+ Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+ self->TransitionFromSuspendedToRunnable();
+ Locks::mutator_lock_->AssertSharedHeld(self);
+ os << "; failed due to fragmentation (largest possible contiguous allocation "
+ << max_contiguous_allocation << " bytes)";
+}
+
} // namespace space
} // namespace gc
} // namespace art
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index accd26b..7aff14b 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -124,6 +124,9 @@
return this;
}
+ void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) OVERRIDE
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
protected:
DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin, byte* end,
byte* limit, size_t growth_limit, bool can_move_objects, size_t starting_size,
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index d24016c..6f49fbf 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -19,6 +19,7 @@
#include "space.h"
+#include <iostream>
#include <valgrind.h>
#include <memcheck/memcheck.h>
@@ -132,6 +133,8 @@
return can_move_objects_;
}
+ virtual void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) = 0;
+
protected:
MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
byte* limit, size_t growth_limit, bool create_bitmaps, bool can_move_objects,
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 2934af8..f505305 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -120,6 +120,10 @@
virtual ~RosAllocSpace();
+ void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) OVERRIDE {
+ rosalloc_->LogFragmentationAllocFailure(os, failed_alloc_bytes);
+ }
+
protected:
RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc,
byte* begin, byte* end, byte* limit, size_t growth_limit, bool can_move_objects,