Merge "Migrated dexlist from Dalvik (libdex) into Art (libart)"
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index d993d93..d1fe167 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1336,9 +1336,24 @@
}
OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh());
OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low);
+ // Free up at least one input register if it was a temp. Otherwise we may be in the bad
+ // situation of not having a temp available for SwapBits. Make sure it's not overlapping
+ // with the output, though.
if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
+ // There's definitely a free temp after this.
FreeTemp(r_i_low);
+ } else {
+ // We opportunistically release both here. That saves duplication of the register state
+ // lookup (to see if it's actually a temp).
+ if (rl_i.reg.GetLowReg() != rl_result.reg.GetHighReg()) {
+ FreeTemp(rl_i.reg.GetLow());
+ }
+ if (rl_i.reg.GetHighReg() != rl_result.reg.GetLowReg() &&
+ rl_i.reg.GetHighReg() != rl_result.reg.GetHighReg()) {
+ FreeTemp(rl_i.reg.GetHigh());
+ }
}
+
SwapBits(rl_result.reg.GetLow(), 1, 0x55555555);
SwapBits(rl_result.reg.GetLow(), 2, 0x33333333);
SwapBits(rl_result.reg.GetLow(), 4, 0x0f0f0f0f);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 73e121f..fdfeb48 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -989,6 +989,8 @@
CHECK_EQ(image_objects_offset_begin_ + bin_slot_previous_sizes_[kBinArtMethodClean],
methods_section->Offset());
cur_pos = methods_section->End();
+ // Round up to the alignment the string table expects. See HashSet::WriteToMemory.
+ cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
// Calculate the size of the interned strings.
auto* interned_strings_section = §ions[ImageHeader::kSectionInternedStrings];
*interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
@@ -1417,9 +1419,6 @@
if (UNLIKELY(orig->IsAbstract())) {
copy->SetEntryPointFromQuickCompiledCodePtrSize(
GetOatAddress(quick_to_interpreter_bridge_offset_), target_ptr_size_);
- copy->SetEntryPointFromInterpreterPtrSize(
- reinterpret_cast<EntryPointFromInterpreter*>(const_cast<uint8_t*>(
- GetOatAddress(interpreter_to_interpreter_bridge_offset_))), target_ptr_size_);
} else {
bool quick_is_interpreted;
const uint8_t* quick_code = GetQuickCode(orig, &quick_is_interpreted);
@@ -1432,16 +1431,6 @@
copy->SetEntryPointFromJniPtrSize(
GetOatAddress(jni_dlsym_lookup_offset_), target_ptr_size_);
}
-
- // Interpreter entrypoint:
- // Set the interpreter entrypoint depending on whether there is compiled code or not.
- uint32_t interpreter_code = (quick_is_interpreted)
- ? interpreter_to_interpreter_bridge_offset_
- : interpreter_to_compiled_code_bridge_offset_;
- EntryPointFromInterpreter* interpreter_entrypoint =
- reinterpret_cast<EntryPointFromInterpreter*>(
- const_cast<uint8_t*>(GetOatAddress(interpreter_code)));
- copy->SetEntryPointFromInterpreterPtrSize(interpreter_entrypoint, target_ptr_size_);
}
}
}
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index daf7d67..329112a 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -119,6 +119,14 @@
// Check if the selection negates/preserves the value of the condition and
// if so, generate a suitable replacement instruction.
HInstruction* if_condition = if_instruction->InputAt(0);
+
+ // Don't change FP compares. The definition of compares involving NaNs forces
+ // the compares to be done as written by the user.
+ if (if_condition->IsCondition() &&
+ Primitive::IsFloatingPointType(if_condition->InputAt(0)->GetType())) {
+ return;
+ }
+
HInstruction* replacement;
if (NegatesCondition(true_value, false_value)) {
replacement = GetOppositeCondition(if_condition);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 732630d..8551382 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -657,7 +657,7 @@
void HGraphBuilder::Binop_23x_cmp(const Instruction& instruction,
Primitive::Type type,
- HCompare::Bias bias,
+ ComparisonBias bias,
uint32_t dex_pc) {
HInstruction* first = LoadLocal(instruction.VRegB(), type);
HInstruction* second = LoadLocal(instruction.VRegC(), type);
@@ -2311,27 +2311,27 @@
}
case Instruction::CMP_LONG: {
- Binop_23x_cmp(instruction, Primitive::kPrimLong, HCompare::kNoBias, dex_pc);
+ Binop_23x_cmp(instruction, Primitive::kPrimLong, kNoBias, dex_pc);
break;
}
case Instruction::CMPG_FLOAT: {
- Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kGtBias, dex_pc);
+ Binop_23x_cmp(instruction, Primitive::kPrimFloat, kGtBias, dex_pc);
break;
}
case Instruction::CMPG_DOUBLE: {
- Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kGtBias, dex_pc);
+ Binop_23x_cmp(instruction, Primitive::kPrimDouble, kGtBias, dex_pc);
break;
}
case Instruction::CMPL_FLOAT: {
- Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kLtBias, dex_pc);
+ Binop_23x_cmp(instruction, Primitive::kPrimFloat, kLtBias, dex_pc);
break;
}
case Instruction::CMPL_DOUBLE: {
- Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kLtBias, dex_pc);
+ Binop_23x_cmp(instruction, Primitive::kPrimDouble, kLtBias, dex_pc);
break;
}
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index cae762b..e487255 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -139,7 +139,7 @@
void Binop_23x_cmp(const Instruction& instruction,
Primitive::Type type,
- HCompare::Bias bias,
+ ComparisonBias bias,
uint32_t dex_pc);
template<typename T>
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 262b234..be71443 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -889,6 +889,180 @@
UNUSED(exit);
}
+void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
+ Label* true_label,
+ Label* false_label) {
+ bool gt_bias = cond->IsGtBias();
+ IfCondition if_cond = cond->GetCondition();
+ Condition ccode = X86Condition(if_cond);
+ switch (if_cond) {
+ case kCondEQ:
+ if (!gt_bias) {
+ __ j(kParityEven, false_label);
+ }
+ break;
+ case kCondNE:
+ if (!gt_bias) {
+ __ j(kParityEven, true_label);
+ }
+ break;
+ case kCondLT:
+ if (gt_bias) {
+ __ j(kParityEven, false_label);
+ }
+ ccode = kBelow;
+ break;
+ case kCondLE:
+ if (gt_bias) {
+ __ j(kParityEven, false_label);
+ }
+ ccode = kBelowEqual;
+ break;
+ case kCondGT:
+ if (gt_bias) {
+ __ j(kParityEven, true_label);
+ }
+ ccode = kAbove;
+ break;
+ case kCondGE:
+ if (gt_bias) {
+ __ j(kParityEven, true_label);
+ }
+ ccode = kAboveEqual;
+ break;
+ }
+ __ j(ccode, true_label);
+}
+
+void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
+ Label* true_label,
+ Label* false_label) {
+ LocationSummary* locations = cond->GetLocations();
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+ IfCondition if_cond = cond->GetCondition();
+
+ Register left_low = left.AsRegisterPairLow<Register>();
+ Register left_high = left.AsRegisterPairHigh<Register>();
+ IfCondition true_high_cond = if_cond;
+ IfCondition false_high_cond = cond->GetOppositeCondition();
+ Condition final_condition = X86Condition(if_cond);
+
+ // Set the conditions for the test, remembering that == needs to be
+ // decided using the low words.
+ switch (if_cond) {
+ case kCondEQ:
+ false_high_cond = kCondNE;
+ break;
+ case kCondNE:
+ false_high_cond = kCondEQ;
+ break;
+ case kCondLT:
+ false_high_cond = kCondGT;
+ final_condition = kBelow;
+ break;
+ case kCondLE:
+ true_high_cond = kCondLT;
+ final_condition = kBelowEqual;
+ break;
+ case kCondGT:
+ false_high_cond = kCondLT;
+ final_condition = kAbove;
+ break;
+ case kCondGE:
+ true_high_cond = kCondGT;
+ final_condition = kAboveEqual;
+ break;
+ }
+
+ if (right.IsConstant()) {
+ int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+ int32_t val_low = Low32Bits(value);
+ int32_t val_high = High32Bits(value);
+
+ if (val_high == 0) {
+ __ testl(left_high, left_high);
+ } else {
+ __ cmpl(left_high, Immediate(val_high));
+ }
+ if (if_cond == kCondNE) {
+ __ j(X86Condition(true_high_cond), true_label);
+ } else if (if_cond == kCondEQ) {
+ __ j(X86Condition(false_high_cond), false_label);
+ } else {
+ __ j(X86Condition(true_high_cond), true_label);
+ __ j(X86Condition(false_high_cond), false_label);
+ }
+ // Must be equal high, so compare the lows.
+ if (val_low == 0) {
+ __ testl(left_low, left_low);
+ } else {
+ __ cmpl(left_low, Immediate(val_low));
+ }
+ } else {
+ Register right_low = right.AsRegisterPairLow<Register>();
+ Register right_high = right.AsRegisterPairHigh<Register>();
+
+ __ cmpl(left_high, right_high);
+ if (if_cond == kCondNE) {
+ __ j(X86Condition(true_high_cond), true_label);
+ } else if (if_cond == kCondEQ) {
+ __ j(X86Condition(false_high_cond), false_label);
+ } else {
+ __ j(X86Condition(true_high_cond), true_label);
+ __ j(X86Condition(false_high_cond), false_label);
+ }
+ // Must be equal high, so compare the lows.
+ __ cmpl(left_low, right_low);
+ }
+ // The last comparison might be unsigned.
+ __ j(final_condition, true_label);
+}
+
+void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HIf* if_instr,
+ HCondition* condition,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target) {
+ LocationSummary* locations = condition->GetLocations();
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+
+ // We don't want true_target as a nullptr.
+ if (true_target == nullptr) {
+ true_target = always_true_target;
+ }
+ bool falls_through = (false_target == nullptr);
+
+ // FP compares don't like null false_targets.
+ if (false_target == nullptr) {
+ false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+ }
+
+ Primitive::Type type = condition->InputAt(0)->GetType();
+ switch (type) {
+ case Primitive::kPrimLong:
+ GenerateLongComparesAndJumps(condition, true_target, false_target);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK(right.IsFpuRegister());
+ __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+ GenerateFPJumps(condition, true_target, false_target);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK(right.IsFpuRegister());
+ __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+ GenerateFPJumps(condition, true_target, false_target);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected compare type " << type;
+ }
+
+ if (!falls_through) {
+ __ jmp(false_target);
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
Label* true_target,
Label* false_target,
@@ -910,9 +1084,12 @@
!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
// Moves do not affect the eflags register, so if the condition is
// evaluated just before the if, we don't need to evaluate it
- // again.
+ // again. We can't use the eflags on long/FP conditions if they are
+ // materialized due to the complex branching.
+ Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
bool eflags_set = cond->IsCondition()
- && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction);
+ && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
+ && type == Primitive::kPrimInt;
if (materialized) {
if (!eflags_set) {
// Materialized condition, compare against 0.
@@ -927,6 +1104,16 @@
__ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
}
} else {
+ // Is this a long or FP comparison that has been folded into the HCondition?
+ if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+ // Generate the comparison directly.
+ GenerateCompareTestAndBranch(instruction->AsIf(),
+ cond->AsCondition(),
+ true_target,
+ false_target,
+ always_true_target);
+ return;
+ }
Location lhs = cond->GetLocations()->InAt(0);
Location rhs = cond->GetLocations()->InAt(1);
// LHS is guaranteed to be in a register (see
@@ -1041,36 +1228,94 @@
void LocationsBuilderX86::VisitCondition(HCondition* cond) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
- if (cond->NeedsMaterialization()) {
- // We need a byte register.
- locations->SetOut(Location::RegisterLocation(ECX));
+ // Handle the long/FP comparisons made in instruction simplification.
+ switch (cond->InputAt(0)->GetType()) {
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
+ if (cond->NeedsMaterialization()) {
+ locations->SetOut(Location::RequiresRegister());
+ }
+ break;
+ }
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ if (cond->NeedsMaterialization()) {
+ locations->SetOut(Location::RequiresRegister());
+ }
+ break;
+ }
+ default:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::Any());
+ if (cond->NeedsMaterialization()) {
+ // We need a byte register.
+ locations->SetOut(Location::RegisterLocation(ECX));
+ }
+ break;
}
}
void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) {
- if (cond->NeedsMaterialization()) {
- LocationSummary* locations = cond->GetLocations();
- Register reg = locations->Out().AsRegister<Register>();
- // Clear register: setcc only sets the low byte.
- __ xorl(reg, reg);
- Location lhs = locations->InAt(0);
- Location rhs = locations->InAt(1);
- if (rhs.IsRegister()) {
- __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
- } else if (rhs.IsConstant()) {
- int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- if (constant == 0) {
- __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
- } else {
- __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
- }
- } else {
- __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
- }
- __ setb(X86Condition(cond->GetCondition()), reg);
+ if (!cond->NeedsMaterialization()) {
+ return;
}
+
+ LocationSummary* locations = cond->GetLocations();
+ Location lhs = locations->InAt(0);
+ Location rhs = locations->InAt(1);
+ Register reg = locations->Out().AsRegister<Register>();
+ Label true_label, false_label;
+
+ switch (cond->InputAt(0)->GetType()) {
+ default: {
+ // Integer case.
+
+ // Clear output register: setcc only sets the low byte.
+ __ xorl(reg, reg);
+
+ if (rhs.IsRegister()) {
+ __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
+ } else if (rhs.IsConstant()) {
+ int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+ if (constant == 0) {
+ __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
+ } else {
+ __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
+ }
+ } else {
+ __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
+ }
+ __ setb(X86Condition(cond->GetCondition()), reg);
+ return;
+ }
+ case Primitive::kPrimLong:
+ GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+ break;
+ case Primitive::kPrimFloat:
+ __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+ GenerateFPJumps(cond, &true_label, &false_label);
+ break;
+ case Primitive::kPrimDouble:
+ __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+ GenerateFPJumps(cond, &true_label, &false_label);
+ break;
+ }
+
+ // Convert the jumps into the result.
+ Label done_label;
+
+ // false case: result = 0;
+ __ Bind(&false_label);
+ __ xorl(reg, reg);
+ __ jmp(&done_label);
+
+ // True case: result = 1
+ __ Bind(&true_label);
+ __ movl(reg, Immediate(1));
+ __ Bind(&done_label);
}
void LocationsBuilderX86::VisitEqual(HEqual* comp) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 623e832..65d6e0a 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -177,7 +177,7 @@
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivByPowerOfTwo(HDiv* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
- void GenerateRemFP(HRem *rem);
+ void GenerateRemFP(HRem* rem);
void HandleShift(HBinaryOperation* instruction);
void GenerateShlLong(const Location& loc, Register shifter);
void GenerateShrLong(const Location& loc, Register shifter);
@@ -201,6 +201,13 @@
Label* true_target,
Label* false_target,
Label* always_true_target);
+ void GenerateCompareTestAndBranch(HIf* if_inst,
+ HCondition* condition,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target);
+ void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
+ void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
X86Assembler* const assembler_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index c9d19c8..ddaa60d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -833,6 +833,134 @@
UNUSED(exit);
}
+void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
+ Label* true_label,
+ Label* false_label) {
+ bool gt_bias = cond->IsGtBias();
+ IfCondition if_cond = cond->GetCondition();
+ Condition ccode = X86_64Condition(if_cond);
+ switch (if_cond) {
+ case kCondEQ:
+ if (!gt_bias) {
+ __ j(kParityEven, false_label);
+ }
+ break;
+ case kCondNE:
+ if (!gt_bias) {
+ __ j(kParityEven, true_label);
+ }
+ break;
+ case kCondLT:
+ if (gt_bias) {
+ __ j(kParityEven, false_label);
+ }
+ ccode = kBelow;
+ break;
+ case kCondLE:
+ if (gt_bias) {
+ __ j(kParityEven, false_label);
+ }
+ ccode = kBelowEqual;
+ break;
+ case kCondGT:
+ if (gt_bias) {
+ __ j(kParityEven, true_label);
+ }
+ ccode = kAbove;
+ break;
+ case kCondGE:
+ if (gt_bias) {
+ __ j(kParityEven, true_label);
+ }
+ ccode = kAboveEqual;
+ break;
+ }
+ __ j(ccode, true_label);
+}
+
+void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr,
+ HCondition* condition,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target) {
+ LocationSummary* locations = condition->GetLocations();
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+
+ // We don't want true_target as a nullptr.
+ if (true_target == nullptr) {
+ true_target = always_true_target;
+ }
+ bool falls_through = (false_target == nullptr);
+
+ // FP compares don't like null false_targets.
+ if (false_target == nullptr) {
+ false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+ }
+
+ Primitive::Type type = condition->InputAt(0)->GetType();
+ switch (type) {
+ case Primitive::kPrimLong: {
+ CpuRegister left_reg = left.AsRegister<CpuRegister>();
+ if (right.IsConstant()) {
+ int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+ if (IsInt<32>(value)) {
+ if (value == 0) {
+ __ testq(left_reg, left_reg);
+ } else {
+ __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+ }
+ } else {
+ // Value won't fit in an 32-bit integer.
+ __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
+ }
+ } else if (right.IsDoubleStackSlot()) {
+ __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+ } else {
+ __ cmpq(left_reg, right.AsRegister<CpuRegister>());
+ }
+ __ j(X86_64Condition(condition->GetCondition()), true_target);
+ break;
+ }
+ case Primitive::kPrimFloat: {
+ if (right.IsFpuRegister()) {
+ __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+ } else if (right.IsConstant()) {
+ __ ucomiss(left.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(
+ right.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(right.IsStackSlot());
+ __ ucomiss(left.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), right.GetStackIndex()));
+ }
+ GenerateFPJumps(condition, true_target, false_target);
+ break;
+ }
+ case Primitive::kPrimDouble: {
+ if (right.IsFpuRegister()) {
+ __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+ } else if (right.IsConstant()) {
+ __ ucomisd(left.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(
+ right.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(right.IsDoubleStackSlot());
+ __ ucomisd(left.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), right.GetStackIndex()));
+ }
+ GenerateFPJumps(condition, true_target, false_target);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected condition type " << type;
+ }
+
+ if (!falls_through) {
+ __ jmp(false_target);
+ }
+}
+
void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
Label* true_target,
Label* false_target,
@@ -854,9 +982,13 @@
!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
// Moves do not affect the eflags register, so if the condition is
// evaluated just before the if, we don't need to evaluate it
- // again.
+ // again. We can't use the eflags on FP conditions if they are
+ // materialized due to the complex branching.
+ Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
bool eflags_set = cond->IsCondition()
- && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction);
+ && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
+ && !Primitive::IsFloatingPointType(type);
+
if (materialized) {
if (!eflags_set) {
// Materialized condition, compare against 0.
@@ -872,6 +1004,13 @@
__ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target);
}
} else {
+ // Is this a long or FP comparison that has been folded into the HCondition?
+ if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+ // Generate the comparison directly
+ GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(),
+ true_target, false_target, always_true_target);
+ return;
+ }
Location lhs = cond->GetLocations()->InAt(0);
Location rhs = cond->GetLocations()->InAt(1);
if (rhs.IsRegister()) {
@@ -985,35 +1124,122 @@
void LocationsBuilderX86_64::VisitCondition(HCondition* cond) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
+ // Handle the long/FP comparisons made in instruction simplification.
+ switch (cond->InputAt(0)->GetType()) {
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::Any());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
+ break;
+ default:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::Any());
+ break;
+ }
if (cond->NeedsMaterialization()) {
locations->SetOut(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) {
- if (cond->NeedsMaterialization()) {
- LocationSummary* locations = cond->GetLocations();
- CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
- // Clear register: setcc only sets the low byte.
- __ xorl(reg, reg);
- Location lhs = locations->InAt(0);
- Location rhs = locations->InAt(1);
- if (rhs.IsRegister()) {
- __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
- } else if (rhs.IsConstant()) {
- int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- if (constant == 0) {
- __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
- } else {
- __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
- }
- } else {
- __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
- }
- __ setcc(X86_64Condition(cond->GetCondition()), reg);
+ if (!cond->NeedsMaterialization()) {
+ return;
}
+
+ LocationSummary* locations = cond->GetLocations();
+ Location lhs = locations->InAt(0);
+ Location rhs = locations->InAt(1);
+ CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
+ Label true_label, false_label;
+
+ switch (cond->InputAt(0)->GetType()) {
+ default:
+ // Integer case.
+
+ // Clear output register: setcc only sets the low byte.
+ __ xorl(reg, reg);
+
+ if (rhs.IsRegister()) {
+ __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
+ } else if (rhs.IsConstant()) {
+ int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+ if (constant == 0) {
+ __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+ } else {
+ __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
+ }
+ } else {
+ __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
+ }
+ __ setcc(X86_64Condition(cond->GetCondition()), reg);
+ return;
+ case Primitive::kPrimLong:
+ // Clear output register: setcc only sets the low byte.
+ __ xorl(reg, reg);
+
+ if (rhs.IsRegister()) {
+ __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
+ } else if (rhs.IsConstant()) {
+ int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
+ if (IsInt<32>(value)) {
+ if (value == 0) {
+ __ testq(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+ } else {
+ __ cmpq(lhs.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
+ }
+ } else {
+ // Value won't fit in an int.
+ __ cmpq(lhs.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
+ }
+ } else {
+ __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
+ }
+ __ setcc(X86_64Condition(cond->GetCondition()), reg);
+ return;
+ case Primitive::kPrimFloat: {
+ XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
+ if (rhs.IsConstant()) {
+ float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
+ __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
+ } else if (rhs.IsStackSlot()) {
+ __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
+ } else {
+ __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
+ }
+ GenerateFPJumps(cond, &true_label, &false_label);
+ break;
+ }
+ case Primitive::kPrimDouble: {
+ XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
+ if (rhs.IsConstant()) {
+ double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
+ __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
+ } else if (rhs.IsDoubleStackSlot()) {
+ __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
+ } else {
+ __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
+ }
+ GenerateFPJumps(cond, &true_label, &false_label);
+ break;
+ }
+ }
+
+ // Convert the jumps into the result.
+ Label done_label;
+
+ // false case: result = 0;
+ __ Bind(&false_label);
+ __ xorl(reg, reg);
+ __ jmp(&done_label);
+
+ // True case: result = 1
+ __ Bind(&true_label);
+ __ movl(reg, Immediate(1));
+ __ Bind(&done_label);
}
void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index c2aa56b..4b90381 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -183,7 +183,7 @@
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
void HandleBitwiseOperation(HBinaryOperation* operation);
- void GenerateRemFP(HRem *rem);
+ void GenerateRemFP(HRem* rem);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivByPowerOfTwo(HDiv* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -202,6 +202,12 @@
Label* true_target,
Label* false_target,
Label* always_true_target);
+ void GenerateCompareTestAndBranch(HIf* if_inst,
+ HCondition* condition,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target);
+ void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
X86_64Assembler* const assembler_;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 62f90c2..337cf5b 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -54,6 +54,11 @@
void VisitCheckCast(HCheckCast* instruction) OVERRIDE;
void VisitAdd(HAdd* instruction) OVERRIDE;
void VisitAnd(HAnd* instruction) OVERRIDE;
+ void VisitCondition(HCondition* instruction) OVERRIDE;
+ void VisitGreaterThan(HGreaterThan* condition) OVERRIDE;
+ void VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) OVERRIDE;
+ void VisitLessThan(HLessThan* condition) OVERRIDE;
+ void VisitLessThanOrEqual(HLessThanOrEqual* condition) OVERRIDE;
void VisitDiv(HDiv* instruction) OVERRIDE;
void VisitMul(HMul* instruction) OVERRIDE;
void VisitNeg(HNeg* instruction) OVERRIDE;
@@ -330,7 +335,11 @@
block->RemoveInstruction(equal);
RecordSimplification();
}
+ } else {
+ VisitCondition(equal);
}
+ } else {
+ VisitCondition(equal);
}
}
@@ -358,7 +367,11 @@
block->RemoveInstruction(not_equal);
RecordSimplification();
}
+ } else {
+ VisitCondition(not_equal);
}
+ } else {
+ VisitCondition(not_equal);
}
}
@@ -485,6 +498,76 @@
}
}
+void InstructionSimplifierVisitor::VisitGreaterThan(HGreaterThan* condition) {
+ VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) {
+ VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitLessThan(HLessThan* condition) {
+ VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitLessThanOrEqual(HLessThanOrEqual* condition) {
+ VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) {
+ // Try to fold an HCompare into this HCondition.
+
+ // This simplification is currently only supported on x86 and x86_64.
+ // TODO: Implement it for ARM, ARM64 and MIPS64.
+ InstructionSet instruction_set = GetGraph()->GetInstructionSet();
+ if (instruction_set != kX86 && instruction_set != kX86_64) {
+ return;
+ }
+
+ HInstruction* left = condition->GetLeft();
+ HInstruction* right = condition->GetRight();
+ // We can only replace an HCondition which compares a Compare to 0.
+ // Both 'dx' and 'jack' generate a compare to 0 when compiling a
+ // condition with a long, float or double comparison as input.
+ if (!left->IsCompare() || !right->IsConstant() || right->AsIntConstant()->GetValue() != 0) {
+ // Conversion is not possible.
+ return;
+ }
+
+ // Is the Compare only used for this purpose?
+ if (!left->GetUses().HasOnlyOneUse()) {
+ // Someone else also wants the result of the compare.
+ return;
+ }
+
+ if (!left->GetEnvUses().IsEmpty()) {
+ // There is a reference to the compare result in an environment. Do we really need it?
+ if (GetGraph()->IsDebuggable()) {
+ return;
+ }
+
+ // We have to ensure that there are no deopt points in the sequence.
+ if (left->HasAnyEnvironmentUseBefore(condition)) {
+ return;
+ }
+ }
+
+ // Clean up any environment uses from the HCompare, if any.
+ left->RemoveEnvironmentUsers();
+
+ // We have decided to fold the HCompare into the HCondition. Transfer the information.
+ condition->SetBias(left->AsCompare()->GetBias());
+
+ // Replace the operands of the HCondition.
+ condition->ReplaceInput(left->InputAt(0), 0);
+ condition->ReplaceInput(left->InputAt(1), 1);
+
+ // Remove the HCompare.
+ left->GetBlock()->RemoveInstruction(left);
+
+ RecordSimplification();
+}
+
void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) {
HConstant* input_cst = instruction->GetConstantRight();
HInstruction* input_other = instruction->GetLeastConstantLeft();
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index b82e37c..588ab70 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -940,6 +940,9 @@
GetRight()->AsLongConstant()->GetValue());
if (GetResultType() == Primitive::kPrimLong) {
return GetBlock()->GetGraph()->GetLongConstant(value);
+ } else if (GetResultType() == Primitive::kPrimBoolean) {
+ // This can be the result of an HCondition evaluation.
+ return GetBlock()->GetGraph()->GetIntConstant(static_cast<int32_t>(value));
} else {
DCHECK_EQ(GetResultType(), Primitive::kPrimInt);
return GetBlock()->GetGraph()->GetIntConstant(static_cast<int32_t>(value));
@@ -1647,4 +1650,38 @@
return os;
}
+bool HInstruction::HasAnyEnvironmentUseBefore(HInstruction* other) {
+ // For now, assume that instructions in different blocks may use the
+ // environment.
+ // TODO: Use the control flow to decide if this is true.
+ if (GetBlock() != other->GetBlock()) {
+ return true;
+ }
+
+ // We know that we are in the same block. Walk from 'this' to 'other',
+ // checking to see if there is any instruction with an environment.
+ HInstruction* current = this;
+ for (; current != other && current != nullptr; current = current->GetNext()) {
+ // This is a conservative check, as the instruction result may not be in
+ // the referenced environment.
+ if (current->HasEnvironment()) {
+ return true;
+ }
+ }
+
+ // We should have been called with 'this' before 'other' in the block.
+ // Just confirm this.
+ DCHECK(current != nullptr);
+ return false;
+}
+
+void HInstruction::RemoveEnvironmentUsers() {
+ for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) {
+ HUseListNode<HEnvironment*>* user_node = use_it.Current();
+ HEnvironment* user = user_node->GetUser();
+ user->SetRawEnvAt(user_node->GetIndex(), nullptr);
+ }
+ env_uses_.Clear();
+}
+
} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 2cffe02..e4a7aa6 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -325,6 +325,10 @@
return invoke_type_;
}
+ InstructionSet GetInstructionSet() const {
+ return instruction_set_;
+ }
+
private:
void VisitBlockForDominatorTree(HBasicBlock* block,
HBasicBlock* predecessor,
@@ -1659,6 +1663,14 @@
virtual bool NeedsDexCache() const { return false; }
+ // Does this instruction have any use in an environment before
+ // control flow hits 'other'?
+ bool HasAnyEnvironmentUseBefore(HInstruction* other);
+
+ // Remove all references to environment uses of this instruction.
+ // The caller must ensure that this is safe to do.
+ void RemoveEnvironmentUsers();
+
protected:
virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
@@ -2135,11 +2147,20 @@
DISALLOW_COPY_AND_ASSIGN(HBinaryOperation);
};
+// The comparison bias applies for floating point operations and indicates how NaN
+// comparisons are treated:
+enum ComparisonBias {
+ kNoBias, // bias is not applicable (i.e. for long operation)
+ kGtBias, // return 1 for NaN comparisons
+ kLtBias, // return -1 for NaN comparisons
+};
+
class HCondition : public HBinaryOperation {
public:
HCondition(HInstruction* first, HInstruction* second)
: HBinaryOperation(Primitive::kPrimBoolean, first, second),
- needs_materialization_(true) {}
+ needs_materialization_(true),
+ bias_(kNoBias) {}
bool NeedsMaterialization() const { return needs_materialization_; }
void ClearNeedsMaterialization() { needs_materialization_ = false; }
@@ -2152,11 +2173,24 @@
virtual IfCondition GetCondition() const = 0;
+ virtual IfCondition GetOppositeCondition() const = 0;
+
+ bool IsGtBias() { return bias_ == kGtBias; }
+
+ void SetBias(ComparisonBias bias) { bias_ = bias; }
+
+ bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+ return bias_ == other->AsCondition()->bias_;
+ }
+
private:
// For register allocation purposes, returns whether this instruction needs to be
// materialized (that is, not just be in the processor flags).
bool needs_materialization_;
+ // Needed if we merge a HCompare into a HCondition.
+ ComparisonBias bias_;
+
DISALLOW_COPY_AND_ASSIGN(HCondition);
};
@@ -2181,6 +2215,10 @@
return kCondEQ;
}
+ IfCondition GetOppositeCondition() const OVERRIDE {
+ return kCondNE;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(HEqual);
};
@@ -2205,6 +2243,10 @@
return kCondNE;
}
+ IfCondition GetOppositeCondition() const OVERRIDE {
+ return kCondEQ;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(HNotEqual);
};
@@ -2227,6 +2269,10 @@
return kCondLT;
}
+ IfCondition GetOppositeCondition() const OVERRIDE {
+ return kCondGE;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(HLessThan);
};
@@ -2249,6 +2295,10 @@
return kCondLE;
}
+ IfCondition GetOppositeCondition() const OVERRIDE {
+ return kCondGT;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual);
};
@@ -2271,6 +2321,10 @@
return kCondGT;
}
+ IfCondition GetOppositeCondition() const OVERRIDE {
+ return kCondLE;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(HGreaterThan);
};
@@ -2293,6 +2347,10 @@
return kCondGE;
}
+ IfCondition GetOppositeCondition() const OVERRIDE {
+ return kCondLT;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual);
};
@@ -2302,18 +2360,10 @@
// Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
class HCompare : public HBinaryOperation {
public:
- // The bias applies for floating point operations and indicates how NaN
- // comparisons are treated:
- enum Bias {
- kNoBias, // bias is not applicable (i.e. for long operation)
- kGtBias, // return 1 for NaN comparisons
- kLtBias, // return -1 for NaN comparisons
- };
-
HCompare(Primitive::Type type,
HInstruction* first,
HInstruction* second,
- Bias bias,
+ ComparisonBias bias,
uint32_t dex_pc)
: HBinaryOperation(Primitive::kPrimInt, first, second), bias_(bias), dex_pc_(dex_pc) {
DCHECK_EQ(type, first->GetType());
@@ -2338,6 +2388,8 @@
return bias_ == other->AsCompare()->bias_;
}
+ ComparisonBias GetBias() const { return bias_; }
+
bool IsGtBias() { return bias_ == kGtBias; }
uint32_t GetDexPc() const { return dex_pc_; }
@@ -2345,7 +2397,7 @@
DECLARE_INSTRUCTION(Compare);
private:
- const Bias bias_;
+ const ComparisonBias bias_;
const uint32_t dex_pc_;
DISALLOW_COPY_AND_ASSIGN(HCompare);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index b86bc85..6da5c35 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -953,6 +953,48 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd");
}
+TEST_F(AssemblerX86_64Test, UcomissAddress) {
+ GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+ GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM1), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+ GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM2), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+ GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM3), x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), 0));
+ GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM4), x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0));
+ const char* expected =
+ "ucomiss 0xc(%RDI,%RBX,4), %xmm0\n"
+ "ucomiss 0xc(%RDI,%R9,4), %xmm1\n"
+ "ucomiss 0xc(%RDI,%R9,4), %xmm2\n"
+ "ucomiss (%R13), %xmm3\n"
+ "ucomiss (%R13,%R9,1), %xmm4\n";
+
+ DriverStr(expected, "ucomiss_address");
+}
+
+TEST_F(AssemblerX86_64Test, UcomisdAddress) {
+ GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+ GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM1), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+ GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM2), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+ GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM3), x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), 0));
+ GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM4), x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0));
+ const char* expected =
+ "ucomisd 0xc(%RDI,%RBX,4), %xmm0\n"
+ "ucomisd 0xc(%RDI,%R9,4), %xmm1\n"
+ "ucomisd 0xc(%RDI,%R9,4), %xmm2\n"
+ "ucomisd (%R13), %xmm3\n"
+ "ucomisd (%R13,%R9,1), %xmm4\n";
+
+ DriverStr(expected, "ucomisd_address");
+}
+
// X87
std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index f324881..dce5206 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -548,10 +548,6 @@
os << " entryPointFromJni: "
<< reinterpret_cast<const void*>(
art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
- os << " entryPointFromInterpreter: "
- << reinterpret_cast<const void*>(
- art_method->GetEntryPointFromInterpreterPtrSize(pointer_size))
- << ", ";
os << " entryPointFromQuickCompiledCode: "
<< reinterpret_cast<const void*>(
art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
@@ -631,10 +627,6 @@
os << " entryPointFromJni: "
<< reinterpret_cast<const void*>(
art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
- os << " entryPointFromInterpreter: "
- << reinterpret_cast<const void*>(
- art_method->GetEntryPointFromInterpreterPtrSize(pointer_size))
- << ", ";
os << " entryPointFromQuickCompiledCode: "
<< reinterpret_cast<const void*>(
art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index cf4f822..8dde547 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1618,7 +1618,8 @@
stats_.alignment_bytes += bitmap_section.Offset() - image_header_.GetImageSize();
stats_.bitmap_bytes += bitmap_section.Size();
stats_.art_field_bytes += field_section.Size();
- stats_.art_method_bytes += method_section.Size();
+ // RoundUp to 8 bytes to match the intern table alignment expectation.
+ stats_.art_method_bytes += RoundUp(method_section.Size(), sizeof(uint64_t));
stats_.interned_strings_bytes += intern_section.Size();
stats_.Dump(os);
os << "\n";
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 3a155be..dbd1d23 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -651,8 +651,6 @@
copy->SetDexCacheResolvedTypes(RelocatedAddressOfPointer(object->GetDexCacheResolvedTypes()));
copy->SetEntryPointFromQuickCompiledCodePtrSize(RelocatedAddressOfPointer(
object->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)), pointer_size);
- copy->SetEntryPointFromInterpreterPtrSize(RelocatedAddressOfPointer(
- object->GetEntryPointFromInterpreterPtrSize(pointer_size)), pointer_size);
copy->SetEntryPointFromJniPtrSize(RelocatedAddressOfPointer(
object->GetEntryPointFromJniPtrSize(pointer_size)), pointer_size);
}
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 4a1e2c4..e8c47d9 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -43,9 +43,6 @@
class PointerArray;
} // namespace mirror
-typedef void (EntryPointFromInterpreter)(Thread* self, const DexFile::CodeItem* code_item,
- ShadowFrame* shadow_frame, JValue* result);
-
class ArtMethod FINAL {
public:
ArtMethod() : access_flags_(0), dex_code_item_offset_(0), dex_method_index_(0),
@@ -272,23 +269,6 @@
void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result, const char* shorty)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
- EntryPointFromInterpreter* GetEntryPointFromInterpreter() {
- return GetEntryPointFromInterpreterPtrSize(sizeof(void*));
- }
- EntryPointFromInterpreter* GetEntryPointFromInterpreterPtrSize(size_t pointer_size) {
- return GetEntryPoint<EntryPointFromInterpreter*>(
- EntryPointFromInterpreterOffset(pointer_size), pointer_size);
- }
-
- void SetEntryPointFromInterpreter(EntryPointFromInterpreter* entry_point_from_interpreter) {
- SetEntryPointFromInterpreterPtrSize(entry_point_from_interpreter, sizeof(void*));
- }
- void SetEntryPointFromInterpreterPtrSize(EntryPointFromInterpreter* entry_point_from_interpreter,
- size_t pointer_size) {
- SetEntryPoint(EntryPointFromInterpreterOffset(pointer_size), entry_point_from_interpreter,
- pointer_size);
- }
-
const void* GetEntryPointFromQuickCompiledCode() {
return GetEntryPointFromQuickCompiledCodePtrSize(sizeof(void*));
}
@@ -398,11 +378,6 @@
void UnregisterNative() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
- static MemberOffset EntryPointFromInterpreterOffset(size_t pointer_size) {
- return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
- PtrSizedFields, entry_point_from_interpreter_) / sizeof(void*) * pointer_size);
- }
-
static MemberOffset EntryPointFromJniOffset(size_t pointer_size) {
return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
PtrSizedFields, entry_point_from_jni_) / sizeof(void*) * pointer_size);
@@ -573,10 +548,6 @@
// PACKED(4) is necessary for the correctness of
// RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size).
struct PACKED(4) PtrSizedFields {
- // Method dispatch from the interpreter invokes this pointer which may cause a bridge into
- // compiled code.
- void* entry_point_from_interpreter_;
-
// Pointer to JNI function registered to this method, or a function to resolve the JNI function.
void* entry_point_from_jni_;
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 10ed0f4..20d75f3 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -189,11 +189,11 @@
ADD_TEST_EQ(ART_METHOD_DEX_CACHE_TYPES_OFFSET,
art::ArtMethod::DexCacheResolvedTypesOffset().Int32Value())
-#define ART_METHOD_QUICK_CODE_OFFSET_32 36
+#define ART_METHOD_QUICK_CODE_OFFSET_32 32
ADD_TEST_EQ(ART_METHOD_QUICK_CODE_OFFSET_32,
art::ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value())
-#define ART_METHOD_QUICK_CODE_OFFSET_64 48
+#define ART_METHOD_QUICK_CODE_OFFSET_64 40
ADD_TEST_EQ(ART_METHOD_QUICK_CODE_OFFSET_64,
art::ArtMethod::EntryPointFromQuickCompiledCodeOffset(8).Int32Value())
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 7972158..6f45dc8 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -137,7 +137,7 @@
}
template<int n, typename T>
-static inline bool IsAligned(T x) {
+static constexpr bool IsAligned(T x) {
static_assert((n & (n - 1)) == 0, "n is not a power of two");
return (x & (n - 1)) == 0;
}
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 23c5942..0694227 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1195,13 +1195,9 @@
if (kIsDebugBuild && !method->IsRuntimeMethod()) {
CHECK(method->GetDeclaringClass() != nullptr);
}
- if (!method->IsNative()) {
- method->SetEntryPointFromInterpreterPtrSize(
- artInterpreterToInterpreterBridge, image_pointer_size_);
- if (!method->IsRuntimeMethod() && method != runtime->GetResolutionMethod()) {
- method->SetEntryPointFromQuickCompiledCodePtrSize(GetQuickToInterpreterBridge(),
- image_pointer_size_);
- }
+ if (!method->IsNative() && !method->IsRuntimeMethod() && !method->IsResolutionMethod()) {
+ method->SetEntryPointFromQuickCompiledCodePtrSize(GetQuickToInterpreterBridge(),
+ image_pointer_size_);
}
}
}
@@ -2206,11 +2202,6 @@
// Install entry point from interpreter.
bool enter_interpreter = NeedsInterpreter(method, method->GetEntryPointFromQuickCompiledCode());
- if (enter_interpreter && !method->IsNative()) {
- method->SetEntryPointFromInterpreter(artInterpreterToInterpreterBridge);
- } else {
- method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
- }
if (method->IsAbstract()) {
method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
@@ -3516,7 +3507,6 @@
// At runtime the method looks like a reference and argument saving method, clone the code
// related parameters from this method.
out->SetEntryPointFromQuickCompiledCode(GetQuickProxyInvokeHandler());
- out->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
}
void ClassLinker::CheckProxyMethod(ArtMethod* method, ArtMethod* prototype) const {
@@ -5647,18 +5637,15 @@
const void* method_code) const {
OatFile::OatMethod oat_method = CreateOatMethod(method_code);
oat_method.LinkMethod(method);
- method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
}
void ClassLinker::SetEntryPointsToInterpreter(ArtMethod* method) const {
if (!method->IsNative()) {
- method->SetEntryPointFromInterpreter(artInterpreterToInterpreterBridge);
method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
} else {
const void* quick_method_code = GetQuickGenericJniStub();
OatFile::OatMethod oat_method = CreateOatMethod(quick_method_code);
oat_method.LinkMethod(method);
- method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
}
}
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 7ac264a..3a15f1a 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -786,7 +786,10 @@
// Get the pointer to the start of the debugging data
const uint8_t* GetDebugInfoStream(const CodeItem* code_item) const {
- if (code_item->debug_info_off_ == 0) {
+ // Check that the offset is in bounds.
+ // Note that although the specification says that 0 should be used if there
+ // is no debug information, some applications incorrectly use 0xFFFFFFFF.
+ if (code_item->debug_info_off_ == 0 || code_item->debug_info_off_ >= size_) {
return nullptr;
} else {
return begin_ + code_item->debug_info_off_;
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 762f061..5f91566 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -320,7 +320,7 @@
return false;
}
- ArtMethod* method_obj = 0;
+ ArtMethod* method_obj = nullptr;
uintptr_t return_pc = 0;
uintptr_t sp = 0;
@@ -331,7 +331,9 @@
// If we don't have a potential method, we're outta here.
VLOG(signals) << "potential method: " << method_obj;
// TODO: Check linear alloc and image.
- if (method_obj == 0 || !IsAligned<kObjectAlignment>(method_obj)) {
+ DCHECK(IsAligned<sizeof(void*)>(ArtMethod::ObjectSize(sizeof(void*))))
+ << "ArtMethod is not pointer aligned";
+ if (method_obj == nullptr || !IsAligned<sizeof(void*)>(method_obj)) {
VLOG(signals) << "no method";
return false;
}
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index d37ddcb..abe9dc2 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -99,19 +99,6 @@
}
}
method->SetEntryPointFromQuickCompiledCode(quick_code);
- if (!method->IsResolutionMethod()) {
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- if (class_linker->IsQuickToInterpreterBridge(quick_code) ||
- (class_linker->IsQuickResolutionStub(quick_code) &&
- Runtime::Current()->GetInstrumentation()->IsForcedInterpretOnly() &&
- !method->IsNative() && !method->IsProxyMethod())) {
- DCHECK(!method->IsNative()) << PrettyMethod(method);
- DCHECK(!method->IsProxyMethod()) << PrettyMethod(method);
- method->SetEntryPointFromInterpreter(art::artInterpreterToInterpreterBridge);
- } else {
- method->SetEntryPointFromInterpreter(art::artInterpreterToCompiledCodeBridge);
- }
- }
}
void Instrumentation::InstallStubsForMethod(ArtMethod* method) {
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index fa103b1..0980ea1 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -19,6 +19,7 @@
#include <cmath>
#include "debugger.h"
+#include "entrypoints/runtime_asm_entrypoints.h"
#include "mirror/array-inl.h"
#include "unstarted_runtime.h"
#include "verifier/method_verifier.h"
@@ -490,6 +491,23 @@
uint32_t arg[Instruction::kMaxVarArgRegs],
uint32_t vregC) ALWAYS_INLINE;
+SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+static inline bool NeedsInterpreter(Thread* self, ShadowFrame* new_shadow_frame) ALWAYS_INLINE;
+
+static inline bool NeedsInterpreter(Thread* self, ShadowFrame* new_shadow_frame) {
+ ArtMethod* target = new_shadow_frame->GetMethod();
+ if (UNLIKELY(target->IsNative() || target->IsProxyMethod())) {
+ return false;
+ }
+ Runtime* runtime = Runtime::Current();
+ ClassLinker* class_linker = runtime->GetClassLinker();
+ return runtime->GetInstrumentation()->IsForcedInterpretOnly() ||
+ // Doing this check avoids doing compiled/interpreter transitions.
+ class_linker->IsQuickToInterpreterBridge(target->GetEntryPointFromQuickCompiledCode()) ||
+ // Force the use of interpreter when it is required by the debugger.
+ Dbg::IsForcedInterpreterNeededForCalling(self, target);
+}
+
template<bool is_range, bool do_assignability_check>
static inline bool DoCallCommon(ArtMethod* called_method,
Thread* self,
@@ -660,28 +678,11 @@
// Do the call now.
if (LIKELY(Runtime::Current()->IsStarted())) {
- if (kIsDebugBuild && new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter() == nullptr) {
- LOG(FATAL) << "Attempt to invoke non-executable method: "
- << PrettyMethod(new_shadow_frame->GetMethod());
- UNREACHABLE();
- }
- if (kIsDebugBuild && Runtime::Current()->GetInstrumentation()->IsForcedInterpretOnly() &&
- !new_shadow_frame->GetMethod()->IsNative() &&
- !new_shadow_frame->GetMethod()->IsProxyMethod() &&
- new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter()
- == artInterpreterToCompiledCodeBridge) {
- LOG(FATAL) << "Attempt to call compiled code when -Xint: "
- << PrettyMethod(new_shadow_frame->GetMethod());
- UNREACHABLE();
- }
- // Force the use of interpreter when it is required by the debugger.
- EntryPointFromInterpreter* entry;
- if (UNLIKELY(Dbg::IsForcedInterpreterNeededForCalling(self, new_shadow_frame->GetMethod()))) {
- entry = &art::artInterpreterToInterpreterBridge;
+ if (NeedsInterpreter(self, new_shadow_frame)) {
+ artInterpreterToInterpreterBridge(self, code_item, new_shadow_frame, result);
} else {
- entry = new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter();
+ artInterpreterToCompiledCodeBridge(self, code_item, new_shadow_frame, result);
}
- entry(self, code_item, new_shadow_frame, result);
} else {
UnstartedRuntime::Invoke(self, code_item, new_shadow_frame, result, first_dest_reg);
}
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index bc9545b..fda97db 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -132,11 +132,7 @@
VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to breakpoint";
return false;
}
- const bool result = jit_compile_method_(jit_compiler_handle_, method, self);
- if (result) {
- method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
- }
- return result;
+ return jit_compile_method_(jit_compiler_handle_, method, self);
}
void Jit::CreateThreadPool() {
diff --git a/runtime/oat.h b/runtime/oat.h
index 000ae8e..5706c4e 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '0', '6', '4', '\0' };
+ static constexpr uint8_t kOatVersion[] = { '0', '6', '5', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 11c94db..6f3b0a3 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -857,7 +857,6 @@
<< " native=" << method->IsNative()
<< " entrypoints=" << method->GetEntryPointFromQuickCompiledCode()
<< "," << method->GetEntryPointFromJni()
- << "," << method->GetEntryPointFromInterpreter()
<< " next=" << *cur_quick_frame_;
}
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 4dfa73c..177c5a4 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -1000,6 +1000,45 @@
Assert.assertEquals(Long.reverse(0x8765432187654321L), 0x84c2a6e184c2a6e1L);
Assert.assertEquals(Long.reverse(Long.MAX_VALUE), 0xfffffffffffffffeL);
Assert.assertEquals(Long.reverse(Long.MIN_VALUE), 1L);
+
+ Assert.assertEquals(test_Long_reverse_b22324327(0xaaaaaaaaaaaaaaaaL, 0x5555555555555555L),
+ 157472205507277347L);
+ }
+
+ // A bit more complicated than the above. Use local variables to stress register allocation.
+ private static long test_Long_reverse_b22324327(long l1, long l2) {
+ // A couple of local integers. Use them in a loop, so they get promoted.
+ int i1 = 0, i2 = 1, i3 = 2, i4 = 3, i5 = 4, i6 = 5, i7 = 6, i8 = 7;
+ for (int k = 0; k < 10; k++) {
+ i1 += 1;
+ i2 += 2;
+ i3 += 3;
+ i4 += 4;
+ i5 += 5;
+ i6 += 6;
+ i7 += 7;
+ i8 += 8;
+ }
+
+ // Do the Long.reverse() calls, save the results.
+ long r1 = Long.reverse(l1);
+ long r2 = Long.reverse(l2);
+
+ // Some more looping with the ints.
+ for (int k = 0; k < 10; k++) {
+ i1 += 1;
+ i2 += 2;
+ i3 += 3;
+ i4 += 4;
+ i5 += 5;
+ i6 += 6;
+ i7 += 7;
+ i8 += 8;
+ }
+
+ // Include everything in the result, so things are kept live. Try to be a little bit clever to
+ // avoid things being folded somewhere.
+ return (r1 / i1) + (r2 / i2) + i3 + i4 + i5 + i6 + i7 + i8;
}
static Object runtime;
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
index a4280de..18125fa 100644
--- a/test/482-checker-loop-back-edge-use/src/Main.java
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -36,11 +36,11 @@
}
/// CHECK-START: void Main.loop3(boolean) liveness (after)
- /// CHECK: ParameterValue liveness:4 ranges:{[4,64)} uses:[60,64]
- /// CHECK: Goto liveness:62
+ /// CHECK: ParameterValue liveness:4 ranges:{[4,60)} uses:[56,60]
+ /// CHECK: Goto liveness:58
- /// CHECK-START: void Main.loop3(boolean) liveness (after)
- /// CHECK-NOT: Goto liveness:56
+ // CHECK-START: void Main.loop3(boolean) liveness (after)
+ // CHECK-NOT: Goto liveness:50
public static void loop3(boolean incoming) {
// 'incoming' only needs a use at the outer loop's back edge.
while (System.currentTimeMillis() != 42) {
@@ -49,11 +49,11 @@
}
}
- /// CHECK-START: void Main.loop4(boolean) liveness (after)
- /// CHECK: ParameterValue liveness:4 ranges:{[4,24)} uses:[24]
+ // CHECK-START: void Main.loop4(boolean) liveness (after)
+ // CHECK: ParameterValue liveness:4 ranges:{[4,22)} uses:[22]
- /// CHECK-START: void Main.loop4(boolean) liveness (after)
- /// CHECK-NOT: Goto liveness:22
+ // CHECK-START: void Main.loop4(boolean) liveness (after)
+ // CHECK-NOT: Goto liveness:18
public static void loop4(boolean incoming) {
// 'incoming' has no loop use, so should not have back edge uses.
System.out.println(incoming);