Merge "ART: Change run-*-tests to ANDROID_{HOST|PRODUCT}_OUT"
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index bd13d16..a679ac2 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -158,6 +158,10 @@
# Enable warning for unreachable break & return.
art_clang_cflags += -Wunreachable-code-break -Wunreachable-code-return
+# Bug: http://b/29823425 Disable -Wconstant-conversion and
+# -Wundefined-var-template for Clang update to r271374
+art_clang_cflags += -Wno-constant-conversion -Wno-undefined-var-template
+
# Enable missing-noreturn only on non-Mac. As lots of things are not implemented for Apple, it's
# a pain.
ifneq ($(HOST_OS),darwin)
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index b1644df..e213dc4 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -38,7 +38,7 @@
ifneq ($(TMPDIR),)
ART_HOST_TEST_DIR := $(TMPDIR)/test-art-$(shell echo $$PPID)
else
-ART_HOST_TEST_DIR := /tmp/test-art-$(shell echo $$PPID)
+ART_HOST_TEST_DIR := /tmp/$(USER)/test-art-$(shell echo $$PPID)
endif
# core.oat location on the device.
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index cc96cf0..7f8fa8e 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -71,7 +71,7 @@
ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
ART_GTEST_dex_cache_test_DEX_DEPS := Main
ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
-ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS)
+ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) Statics
ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 9b4042c..f05648c 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -462,7 +462,7 @@
struct XGcOption {
// These defaults are used when the command line arguments for -Xgc:
// are either omitted completely or partially.
- gc::CollectorType collector_type_ = kUseReadBarrier ?
+ gc::CollectorType collector_type_ = kUseReadBarrier ?
// If RB is enabled (currently a build-time decision),
// use CC as the default GC.
gc::kCollectorTypeCC :
@@ -473,6 +473,7 @@
bool verify_pre_gc_rosalloc_ = kIsDebugBuild;
bool verify_pre_sweeping_rosalloc_ = false;
bool verify_post_gc_rosalloc_ = false;
+ bool measure_ = kIsDebugBuild;
bool gcstress_ = false;
};
@@ -515,6 +516,8 @@
xgc.gcstress_ = true;
} else if (gc_option == "nogcstress") {
xgc.gcstress_ = false;
+ } else if (gc_option == "measure") {
+ xgc.measure_ = true;
} else if ((gc_option == "precise") ||
(gc_option == "noprecise") ||
(gc_option == "verifycardtable") ||
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 689f2d0..e3f8a5c 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -68,6 +68,8 @@
optimizing/prepare_for_register_allocation.cc \
optimizing/reference_type_propagation.cc \
optimizing/register_allocator.cc \
+ optimizing/register_allocation_resolver.cc \
+ optimizing/register_allocator_linear_scan.cc \
optimizing/select_generator.cc \
optimizing/sharpening.cc \
optimizing/side_effects_analysis.cc \
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 1fc247f..8aefd9e 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -533,9 +533,6 @@
first_index_bounds_check_map_(
std::less<int>(),
graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
- dynamic_bce_standby_(
- graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
- record_dynamic_bce_standby_(true),
early_exit_loop_(
std::less<uint32_t>(),
graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
@@ -560,14 +557,6 @@
}
void Finish() {
- // Retry dynamic bce candidates on standby that are still in the graph.
- record_dynamic_bce_standby_ = false;
- for (HBoundsCheck* bounds_check : dynamic_bce_standby_) {
- if (bounds_check->IsInBlock()) {
- TryDynamicBCE(bounds_check);
- }
- }
-
// Preserve SSA structure which may have been broken by adding one or more
// new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
InsertPhiNodes();
@@ -576,7 +565,6 @@
early_exit_loop_.clear();
taken_test_loop_.clear();
finite_loop_.clear();
- dynamic_bce_standby_.clear();
}
private:
@@ -832,7 +820,6 @@
array_length->IsArrayLength() ||
array_length->IsPhi());
bool try_dynamic_bce = true;
-
// Analyze index range.
if (!index->IsIntConstant()) {
// Non-constant index.
@@ -896,10 +883,20 @@
// If static analysis fails, and OOB is not certain, try dynamic elimination.
if (try_dynamic_bce) {
// Try loop-based dynamic elimination.
- if (TryDynamicBCE(bounds_check)) {
+ HLoopInformation* loop = bounds_check->GetBlock()->GetLoopInformation();
+ bool needs_finite_test = false;
+ bool needs_taken_test = false;
+ if (DynamicBCESeemsProfitable(loop, bounds_check->GetBlock()) &&
+ induction_range_.CanGenerateCode(
+ bounds_check, index, &needs_finite_test, &needs_taken_test) &&
+ CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+ // Do this test last, since it may generate code.
+ CanHandleLength(loop, array_length, needs_taken_test)) {
+ TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ TransformLoopForDynamicBCE(loop, bounds_check);
return;
}
- // Prepare dominator-based dynamic elimination.
+ // Otherwise, prepare dominator-based dynamic elimination.
if (first_index_bounds_check_map_.find(array_length->GetId()) ==
first_index_bounds_check_map_.end()) {
// Remember the first bounds check against each array_length. That bounds check
@@ -1180,7 +1177,7 @@
}
}
- // Perform dominator-based dynamic elimination on suitable set of bounds checks.
+ /** Performs dominator-based dynamic elimination on suitable set of bounds checks. */
void AddCompareWithDeoptimization(HBasicBlock* block,
HInstruction* array_length,
HInstruction* base,
@@ -1190,6 +1187,12 @@
// Construct deoptimization on single or double bounds on range [base-min_c,base+max_c],
// for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1
// and base+3, since we made the assumption any in between value may occur too.
+ // In code, using unsigned comparisons:
+ // (1) constants only
+ // if (max_c >= a.length) deoptimize;
+ // (2) general case
+ // if (base-min_c > base+max_c) deoptimize;
+ // if (base+max_c >= a.length ) deoptimize;
static_assert(kMaxLengthForAddingDeoptimize < std::numeric_limits<int32_t>::max(),
"Incorrect max length may be subject to arithmetic wrap-around");
HInstruction* upper = GetGraph()->GetIntConstant(max_c);
@@ -1208,7 +1211,7 @@
has_dom_based_dynamic_bce_ = true;
}
- // Attempt dominator-based dynamic elimination on remaining candidates.
+ /** Attempts dominator-based dynamic elimination on remaining candidates. */
void AddComparesWithDeoptimization(HBasicBlock* block) {
for (const auto& entry : first_index_bounds_check_map_) {
HBoundsCheck* bounds_check = entry.second;
@@ -1272,17 +1275,19 @@
candidates.push_back(other_bounds_check);
}
}
- // Perform dominator-based deoptimization if it seems profitable. Note that we reject cases
- // where the distance min_c:max_c range gets close to the maximum possible array length,
- // since those cases are likely to always deopt (such situations do not necessarily go
- // OOB, though, since the programmer could rely on wrap-around from max to min).
+ // Perform dominator-based deoptimization if it seems profitable, where we eliminate
+ // bounds checks and replace these with deopt checks that guard against any possible
+ // OOB. Note that we reject cases where the distance min_c:max_c range gets close to
+ // the maximum possible array length, since those cases are likely to always deopt
+ // (such situations do not necessarily go OOB, though, since the array could be really
+ // large, or the programmer could rely on arithmetic wrap-around from max to min).
size_t threshold = kThresholdForAddingDeoptimize + (base == nullptr ? 0 : 1); // extra test?
uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
if (candidates.size() >= threshold &&
(base != nullptr || min_c >= 0) && // reject certain OOB
distance <= kMaxLengthForAddingDeoptimize) { // reject likely/certain deopt
AddCompareWithDeoptimization(block, array_length, base, min_c, max_c);
- for (HInstruction* other_bounds_check : candidates) {
+ for (HBoundsCheck* other_bounds_check : candidates) {
// Only replace if still in the graph. This avoids visiting the same
// bounds check twice if it occurred multiple times in the use list.
if (other_bounds_check->IsInBlock()) {
@@ -1328,45 +1333,127 @@
}
/**
- * When the compiler fails to remove a bounds check statically, we try to remove the bounds
- * check dynamically by adding runtime tests that trigger a deoptimization in case bounds
- * will go out of range (we want to be rather certain of that given the slowdown of
- * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
- * bounds checks and related null checks removed.
+ * Performs loop-based dynamic elimination on a bounds check. In order to minimize the
+ * number of eventually generated tests, related bounds checks with tests that can be
+ * combined with tests for the given bounds check are collected first.
*/
- bool TryDynamicBCE(HBoundsCheck* instruction) {
- HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
- HInstruction* index = instruction->InputAt(0);
- HInstruction* length = instruction->InputAt(1);
- // If dynamic bounds check elimination seems profitable and is possible, then proceed.
- bool needs_finite_test = false;
- bool needs_taken_test = false;
- if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) &&
- induction_range_.CanGenerateCode(
- instruction, index, &needs_finite_test, &needs_taken_test) &&
- CanHandleInfiniteLoop(loop, instruction, index, needs_finite_test) &&
- CanHandleLength(loop, length, needs_taken_test)) { // do this test last (may code gen)
- HInstruction* lower = nullptr;
- HInstruction* upper = nullptr;
- // Generate the following unsigned comparisons
- // if (lower > upper) deoptimize;
- // if (upper >= length) deoptimize;
- // or, for a non-induction index, just the unsigned comparison on its 'upper' value
- // if (upper >= length) deoptimize;
- // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
- // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
- // correctly guard against any possible OOB (including arithmetic wrap-around cases).
- TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
- HBasicBlock* block = GetPreHeader(loop, instruction);
- induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
- if (lower != nullptr) {
- InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
+ void TransformLoopForDynamicBCE(HLoopInformation* loop, HBoundsCheck* bounds_check) {
+ HInstruction* index = bounds_check->InputAt(0);
+ HInstruction* array_length = bounds_check->InputAt(1);
+ DCHECK(loop->IsDefinedOutOfTheLoop(array_length)); // pre-checked
+ DCHECK(loop->DominatesAllBackEdges(bounds_check->GetBlock()));
+ // Collect all bounds checks in the same loop that are related as "a[base + constant]"
+ // for a base instruction (possibly absent) and various constants.
+ ValueBound value = ValueBound::AsValueBound(index);
+ HInstruction* base = value.GetInstruction();
+ int32_t min_c = base == nullptr ? 0 : value.GetConstant();
+ int32_t max_c = value.GetConstant();
+ ArenaVector<HBoundsCheck*> candidates(
+ GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+ ArenaVector<HBoundsCheck*> standby(
+ GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+ for (const HUseListNode<HInstruction*>& use : array_length->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (user->IsBoundsCheck() && loop == user->GetBlock()->GetLoopInformation()) {
+ HBoundsCheck* other_bounds_check = user->AsBoundsCheck();
+ HInstruction* other_index = other_bounds_check->InputAt(0);
+ HInstruction* other_array_length = other_bounds_check->InputAt(1);
+ ValueBound other_value = ValueBound::AsValueBound(other_index);
+ int32_t other_c = other_value.GetConstant();
+ if (array_length == other_array_length && base == other_value.GetInstruction()) {
+ // Does the current basic block dominate all back edges? If not,
+ // add this candidate later only if it falls into the range.
+ if (!loop->DominatesAllBackEdges(user->GetBlock())) {
+ standby.push_back(other_bounds_check);
+ continue;
+ }
+ min_c = std::min(min_c, other_c);
+ max_c = std::max(max_c, other_c);
+ candidates.push_back(other_bounds_check);
+ }
}
- InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
- ReplaceInstruction(instruction, index);
- return true;
}
- return false;
+ // Add standby candidates that fall in selected range.
+ for (HBoundsCheck* other_bounds_check : standby) {
+ HInstruction* other_index = other_bounds_check->InputAt(0);
+ int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+ if (min_c <= other_c && other_c <= max_c) {
+ candidates.push_back(other_bounds_check);
+ }
+ }
+ // Perform loop-based deoptimization if it seems profitable, where we eliminate bounds
+ // checks and replace these with deopt checks that guard against any possible OOB.
+ DCHECK_LT(0u, candidates.size());
+ uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
+ if ((base != nullptr || min_c >= 0) && // reject certain OOB
+ distance <= kMaxLengthForAddingDeoptimize) { // reject likely/certain deopt
+ HBasicBlock* block = GetPreHeader(loop, bounds_check);
+ HInstruction* min_lower = nullptr;
+ HInstruction* min_upper = nullptr;
+ HInstruction* max_lower = nullptr;
+ HInstruction* max_upper = nullptr;
+ // Iterate over all bounds checks.
+ for (HBoundsCheck* other_bounds_check : candidates) {
+ // Only handle if still in the graph. This avoids visiting the same
+ // bounds check twice if it occurred multiple times in the use list.
+ if (other_bounds_check->IsInBlock()) {
+ HInstruction* other_index = other_bounds_check->InputAt(0);
+ int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+ // Generate code for either the maximum or minimum. Range analysis already was queried
+ // whether code generation on the original and, thus, related bounds check was possible.
+ // It handles either loop invariants (lower is not set) or unit strides.
+ if (other_c == max_c) {
+ induction_range_.GenerateRangeCode(
+ other_bounds_check, other_index, GetGraph(), block, &max_lower, &max_upper);
+ } else if (other_c == min_c && base != nullptr) {
+ induction_range_.GenerateRangeCode(
+ other_bounds_check, other_index, GetGraph(), block, &min_lower, &min_upper);
+ }
+ ReplaceInstruction(other_bounds_check, other_index);
+ }
+ }
+ // In code, using unsigned comparisons:
+ // (1) constants only
+ // if (max_upper >= a.length ) deoptimize;
+ // (2) two symbolic invariants
+ // if (min_upper > max_upper) deoptimize; unless min_c == max_c
+ // if (max_upper >= a.length ) deoptimize;
+ // (3) general case, unit strides (where lower would exceed upper for arithmetic wrap-around)
+ // if (min_lower > max_lower) deoptimize; unless min_c == max_c
+ // if (max_lower > max_upper) deoptimize;
+ // if (max_upper >= a.length ) deoptimize;
+ if (base == nullptr) {
+ // Constants only.
+ DCHECK_GE(min_c, 0);
+ DCHECK(min_lower == nullptr && min_upper == nullptr &&
+ max_lower == nullptr && max_upper != nullptr);
+ } else if (max_lower == nullptr) {
+ // Two symbolic invariants.
+ if (min_c != max_c) {
+ DCHECK(min_lower == nullptr && min_upper != nullptr &&
+ max_lower == nullptr && max_upper != nullptr);
+ InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_upper, max_upper));
+ } else {
+ DCHECK(min_lower == nullptr && min_upper == nullptr &&
+ max_lower == nullptr && max_upper != nullptr);
+ }
+ } else {
+ // General case, unit strides.
+ if (min_c != max_c) {
+ DCHECK(min_lower != nullptr && min_upper != nullptr &&
+ max_lower != nullptr && max_upper != nullptr);
+ InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_lower, max_lower));
+ } else {
+ DCHECK(min_lower == nullptr && min_upper == nullptr &&
+ max_lower != nullptr && max_upper != nullptr);
+ }
+ InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(max_lower, max_upper));
+ }
+ InsertDeoptInLoop(
+ loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(max_upper, array_length));
+ } else {
+ // TODO: if rejected, avoid doing this again for subsequent instructions in this set?
+ }
}
/**
@@ -1474,8 +1561,7 @@
* of the loop to use, dynamic bce in such cases is only allowed if other tests
* ensure the loop is finite.
*/
- bool CanHandleInfiniteLoop(
- HLoopInformation* loop, HBoundsCheck* check, HInstruction* index, bool needs_infinite_test) {
+ bool CanHandleInfiniteLoop(HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
if (needs_infinite_test) {
// If we already forced the loop to be finite, allow directly.
const uint32_t loop_id = loop->GetHeader()->GetBlockId();
@@ -1497,11 +1583,6 @@
}
}
}
- // If bounds check made it this far, it is worthwhile to check later if
- // the loop was forced finite by another candidate.
- if (record_dynamic_bce_standby_) {
- dynamic_bce_standby_.push_back(check);
- }
return false;
}
return true;
@@ -1727,10 +1808,6 @@
// in a block that checks an index against that HArrayLength.
ArenaSafeMap<int, HBoundsCheck*> first_index_bounds_check_map_;
- // Stand by list for dynamic bce.
- ArenaVector<HBoundsCheck*> dynamic_bce_standby_;
- bool record_dynamic_bce_standby_;
-
// Early-exit loop bookkeeping.
ArenaSafeMap<uint32_t, bool> early_exit_loop_;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 47e6625..5e6e175 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -80,7 +80,11 @@
virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
+ // Save live core and floating-point caller-save registers and
+ // update the stack mask in `locations` for registers holding object
+ // references.
virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
+ // Restore live core and floating-point caller-save registers.
virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
bool IsCoreRegisterSaved(int reg) const {
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1aa7b54..236ed20 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -316,7 +316,7 @@
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<
- kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+ kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
} else {
DCHECK(instruction_->IsCheckCast());
@@ -437,11 +437,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // R0 (if it is live), as it is clobbered by functions
- // art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
DCHECK_NE(reg, SP);
@@ -469,8 +467,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ b(GetExitLabel());
}
@@ -1937,7 +1933,7 @@
__ LoadFromOffset(kLoadWord, temp, temp,
mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kArmPointerSize));
+ invoke->GetImtIndex(), kArmPointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
uint32_t entry_point =
@@ -4286,6 +4282,122 @@
codegen_->GenerateNullCheck(instruction);
}
+static LoadOperandType GetLoadOperandType(Primitive::Type type) {
+ switch (type) {
+ case Primitive::kPrimNot:
+ return kLoadWord;
+ case Primitive::kPrimBoolean:
+ return kLoadUnsignedByte;
+ case Primitive::kPrimByte:
+ return kLoadSignedByte;
+ case Primitive::kPrimChar:
+ return kLoadUnsignedHalfword;
+ case Primitive::kPrimShort:
+ return kLoadSignedHalfword;
+ case Primitive::kPrimInt:
+ return kLoadWord;
+ case Primitive::kPrimLong:
+ return kLoadWordPair;
+ case Primitive::kPrimFloat:
+ return kLoadSWord;
+ case Primitive::kPrimDouble:
+ return kLoadDWord;
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+static StoreOperandType GetStoreOperandType(Primitive::Type type) {
+ switch (type) {
+ case Primitive::kPrimNot:
+ return kStoreWord;
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ return kStoreByte;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ return kStoreHalfword;
+ case Primitive::kPrimInt:
+ return kStoreWord;
+ case Primitive::kPrimLong:
+ return kStoreWordPair;
+ case Primitive::kPrimFloat:
+ return kStoreSWord;
+ case Primitive::kPrimDouble:
+ return kStoreDWord;
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+void CodeGeneratorARM::LoadFromShiftedRegOffset(Primitive::Type type,
+ Location out_loc,
+ Register base,
+ Register reg_offset,
+ Condition cond) {
+ uint32_t shift_count = Primitive::ComponentSizeShift(type);
+ Address mem_address(base, reg_offset, Shift::LSL, shift_count);
+
+ switch (type) {
+ case Primitive::kPrimByte:
+ __ ldrsb(out_loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimBoolean:
+ __ ldrb(out_loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimShort:
+ __ ldrsh(out_loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimChar:
+ __ ldrh(out_loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ __ ldr(out_loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+void CodeGeneratorARM::StoreToShiftedRegOffset(Primitive::Type type,
+ Location loc,
+ Register base,
+ Register reg_offset,
+ Condition cond) {
+ uint32_t shift_count = Primitive::ComponentSizeShift(type);
+ Address mem_address(base, reg_offset, Shift::LSL, shift_count);
+
+ switch (type) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ __ strb(loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ __ strh(loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ __ str(loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
@@ -4320,70 +4432,40 @@
Location index = locations->InAt(1);
Location out_loc = locations->Out();
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
-
Primitive::Type type = instruction->GetType();
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
switch (type) {
- case Primitive::kPrimBoolean: {
- Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
- } else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
- __ LoadFromOffset(kLoadUnsignedByte, out, IP, data_offset);
- }
- break;
- }
-
- case Primitive::kPrimByte: {
- Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
- } else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
- __ LoadFromOffset(kLoadSignedByte, out, IP, data_offset);
- }
- break;
- }
-
- case Primitive::kPrimShort: {
- Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
- } else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
- __ LoadFromOffset(kLoadSignedHalfword, out, IP, data_offset);
- }
- break;
- }
-
- case Primitive::kPrimChar: {
- Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
- } else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
- __ LoadFromOffset(kLoadUnsignedHalfword, out, IP, data_offset);
- }
- break;
- }
-
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
case Primitive::kPrimInt: {
- Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadFromOffset(kLoadWord, out, obj, offset);
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
+
+ LoadOperandType load_type = GetLoadOperandType(type);
+ __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
} else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+ Register temp = IP;
+
+ if (has_intermediate_address) {
+ // We do not need to compute the intermediate address from the array: the
+ // input instruction has done it already. See the comment in
+ // `TryExtractArrayAccessAddress()`.
+ if (kIsDebugBuild) {
+ HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+ DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+ }
+ temp = obj;
+ } else {
+ __ add(temp, obj, ShifterOperand(data_offset));
+ }
+ codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
}
break;
}
@@ -4412,8 +4494,22 @@
// reference, if heap poisoning is enabled).
codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
} else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+ Register temp = IP;
+
+ if (has_intermediate_address) {
+ // We do not need to compute the intermediate address from the array: the
+ // input instruction has done it already. See the comment in
+ // `TryExtractArrayAccessAddress()`.
+ if (kIsDebugBuild) {
+ HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+ DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+ }
+ temp = obj;
+ } else {
+ __ add(temp, obj, ShifterOperand(data_offset));
+ }
+ codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
+
codegen_->MaybeRecordImplicitNullCheck(instruction);
// If read barriers are enabled, emit read barriers other than
// Baker's using a slow path (and also unpoison the loaded
@@ -4512,54 +4608,68 @@
bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ uint32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
+ Location value_loc = locations->InAt(2);
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
switch (value_type) {
case Primitive::kPrimBoolean:
- case Primitive::kPrimByte: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
- Register value = locations->InAt(2).AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ StoreToOffset(kStoreByte, value, array, offset);
- } else {
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>()));
- __ StoreToOffset(kStoreByte, value, IP, data_offset);
- }
- break;
- }
-
+ case Primitive::kPrimByte:
case Primitive::kPrimShort:
- case Primitive::kPrimChar: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
- Register value = locations->InAt(2).AsRegister<Register>();
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt: {
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ StoreToOffset(kStoreHalfword, value, array, offset);
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ uint32_t full_offset =
+ data_offset + (const_index << Primitive::ComponentSizeShift(value_type));
+ StoreOperandType store_type = GetStoreOperandType(value_type);
+ __ StoreToOffset(store_type, value_loc.AsRegister<Register>(), array, full_offset);
} else {
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
- __ StoreToOffset(kStoreHalfword, value, IP, data_offset);
+ Register temp = IP;
+
+ if (has_intermediate_address) {
+ // We do not need to compute the intermediate address from the array: the
+ // input instruction has done it already. See the comment in
+ // `TryExtractArrayAccessAddress()`.
+ if (kIsDebugBuild) {
+ HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+ DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == data_offset);
+ }
+ temp = array;
+ } else {
+ __ add(temp, array, ShifterOperand(data_offset));
+ }
+ codegen_->StoreToShiftedRegOffset(value_type,
+ value_loc,
+ temp,
+ index.AsRegister<Register>());
}
break;
}
case Primitive::kPrimNot: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Location value_loc = locations->InAt(2);
Register value = value_loc.AsRegister<Register>();
- Register source = value;
+ // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
+ // See the comment in instruction_simplifier_shared.cc.
+ DCHECK(!has_intermediate_address);
if (instruction->InputAt(2)->IsNullConstant()) {
// Just setting null.
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreToOffset(kStoreWord, source, array, offset);
+ __ StoreToOffset(kStoreWord, value, array, offset);
} else {
DCHECK(index.IsRegister()) << index;
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ StoreToOffset(kStoreWord, source, IP, data_offset);
+ __ add(IP, array, ShifterOperand(data_offset));
+ codegen_->StoreToShiftedRegOffset(value_type,
+ value_loc,
+ IP,
+ index.AsRegister<Register>());
}
codegen_->MaybeRecordImplicitNullCheck(instruction);
DCHECK(!needs_write_barrier);
@@ -4588,8 +4698,11 @@
__ StoreToOffset(kStoreWord, value, array, offset);
} else {
DCHECK(index.IsRegister()) << index;
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ StoreToOffset(kStoreWord, value, IP, data_offset);
+ __ add(IP, array, ShifterOperand(data_offset));
+ codegen_->StoreToShiftedRegOffset(value_type,
+ value_loc,
+ IP,
+ index.AsRegister<Register>());
}
codegen_->MaybeRecordImplicitNullCheck(instruction);
__ b(&done);
@@ -4656,6 +4769,7 @@
}
}
+ Register source = value;
if (kPoisonHeapReferences) {
// Note that in the case where `value` is a null reference,
// we do not enter this block, as a null reference does not
@@ -4672,8 +4786,12 @@
__ StoreToOffset(kStoreWord, source, array, offset);
} else {
DCHECK(index.IsRegister()) << index;
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ StoreToOffset(kStoreWord, source, IP, data_offset);
+
+ __ add(IP, array, ShifterOperand(data_offset));
+ codegen_->StoreToShiftedRegOffset(value_type,
+ Location::RegisterLocation(source),
+ IP,
+ index.AsRegister<Register>());
}
if (!may_need_runtime_call_for_type_check) {
@@ -4693,23 +4811,7 @@
break;
}
- case Primitive::kPrimInt: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Register value = locations->InAt(2).AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreToOffset(kStoreWord, value, array, offset);
- } else {
- DCHECK(index.IsRegister()) << index;
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ StoreToOffset(kStoreWord, value, IP, data_offset);
- }
- break;
- }
-
case Primitive::kPrimLong: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
Location value = locations->InAt(2);
if (index.IsConstant()) {
size_t offset =
@@ -4723,7 +4825,6 @@
}
case Primitive::kPrimFloat: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
Location value = locations->InAt(2);
DCHECK(value.IsFpuRegister());
if (index.IsConstant()) {
@@ -4737,7 +4838,6 @@
}
case Primitive::kPrimDouble: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
Location value = locations->InAt(2);
DCHECK(value.IsFpuRegisterPair());
if (index.IsConstant()) {
@@ -4778,6 +4878,37 @@
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
+void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!kEmitCompilerReadBarrier);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location out = locations->Out();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!kEmitCompilerReadBarrier);
+
+ if (second.IsRegister()) {
+ __ add(out.AsRegister<Register>(),
+ first.AsRegister<Register>(),
+ ShifterOperand(second.AsRegister<Register>()));
+ } else {
+ __ AddConstant(out.AsRegister<Register>(),
+ first.AsRegister<Register>(),
+ second.GetConstant()->AsIntConstant()->GetValue());
+ }
+}
+
void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
? LocationSummary::kCallOnSlowPath
@@ -6979,7 +7110,7 @@
method_offset);
} else {
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- instruction->GetIndex() % ImTable::kSize, kArmPointerSize));
+ instruction->GetIndex(), kArmPointerSize));
__ LoadFromOffset(kLoadWord,
locations->Out().AsRegister<Register>(),
locations->InAt(0).AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index cc38f3e..ef7913b 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -365,6 +365,24 @@
// Helper method to move a 64bits value between two locations.
void Move64(Location destination, Location source);
+ void LoadOrStoreToOffset(Primitive::Type type,
+ Location loc,
+ Register base,
+ int32_t offset,
+ bool is_load,
+ Condition cond = AL);
+
+ void LoadFromShiftedRegOffset(Primitive::Type type,
+ Location out_loc,
+ Register base,
+ Register reg_offset,
+ Condition cond = AL);
+ void StoreToShiftedRegOffset(Primitive::Type type,
+ Location out_loc,
+ Register base,
+ Register reg_offset,
+ Condition cond = AL);
+
// Generate code to invoke a runtime entry point.
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 07b7823..76b0797 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -462,7 +462,7 @@
if (instruction_->IsInstanceOf()) {
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
- CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
+ CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t,
const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
@@ -603,11 +603,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // W0 (if it is live), as it is clobbered by functions
- // art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
DCHECK_NE(obj_.reg(), LR);
@@ -635,8 +633,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
@@ -690,10 +686,9 @@
instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
DCHECK(!(instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()));
+ instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
__ Bind(GetEntryLabel());
@@ -1983,9 +1978,8 @@
}
}
-void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -1994,10 +1988,9 @@
locations->SetOut(Location::RequiresRegister());
}
-void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
- HArm64IntermediateAddress* instruction) {
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+void InstructionCodeGeneratorARM64::VisitIntermediateAddress(
+ HIntermediateAddress* instruction) {
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
__ Add(OutputRegister(instruction),
InputRegisterAt(instruction, 0),
@@ -2097,9 +2090,8 @@
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Object ArrayGet with Baker's read barrier case.
Register temp = temps.AcquireW();
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
- DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!instruction->GetArray()->IsIntermediateAddress());
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -2112,15 +2104,15 @@
source = HeapOperand(obj, offset);
} else {
Register temp = temps.AcquireSameSizeAs(obj);
- if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+ if (instruction->GetArray()->IsIntermediateAddress()) {
// The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+ // HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
// We do not need to compute the intermediate address from the array: the
// input instruction has done it already. See the comment in
- // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+ // `TryExtractArrayAccessAddress()`.
if (kIsDebugBuild) {
- HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+ HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
}
temp = obj;
@@ -2204,15 +2196,15 @@
} else {
UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireSameSizeAs(array);
- if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+ if (instruction->GetArray()->IsIntermediateAddress()) {
// The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+ // HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
// We do not need to compute the intermediate address from the array: the
// input instruction has done it already. See the comment in
- // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+ // `TryExtractArrayAccessAddress()`.
if (kIsDebugBuild) {
- HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+ HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
}
temp = array;
@@ -2228,7 +2220,7 @@
codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
DCHECK(needs_write_barrier);
- DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
+ DCHECK(!instruction->GetArray()->IsIntermediateAddress());
vixl::aarch64::Label done;
SlowPathCodeARM64* slow_path = nullptr;
{
@@ -3561,7 +3553,7 @@
__ Ldr(temp,
MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kArm64PointerSize));
+ invoke->GetImtIndex(), kArm64PointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ Ldr(temp, MemOperand(temp, method_offset));
// lr = temp->GetEntryPoint();
@@ -5382,7 +5374,7 @@
MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
} else {
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- instruction->GetIndex() % ImTable::kSize, kArm64PointerSize));
+ instruction->GetIndex(), kArm64PointerSize));
__ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
__ Ldr(XRegisterFrom(locations->Out()),
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 2b71da0..39248aa 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -415,7 +415,7 @@
this,
IsDirectEntrypoint(kQuickInstanceofNonTrivial));
CheckEntrypointTypes<
- kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+ kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
@@ -3791,7 +3791,7 @@
__ LoadFromOffset(kLoadWord, temp, temp,
mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kMipsPointerSize));
+ invoke->GetImtIndex(), kMipsPointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
// T9 = temp->GetEntryPoint();
@@ -5389,7 +5389,7 @@
method_offset);
} else {
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- instruction->GetIndex() % ImTable::kSize, kMipsPointerSize));
+ instruction->GetIndex(), kMipsPointerSize));
__ LoadFromOffset(kLoadWord,
locations->Out().AsRegister<Register>(),
locations->InAt(0).AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index aa1ba84..29b8c20 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -362,7 +362,7 @@
dex_pc,
this);
CheckEntrypointTypes<
- kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+ kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
@@ -2951,7 +2951,7 @@
__ LoadFromOffset(kLoadDoubleword, temp, temp,
mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kMips64PointerSize));
+ invoke->GetImtIndex(), kMips64PointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
// T9 = temp->GetEntryPoint();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1cc6060..82baaa0 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -349,7 +349,7 @@
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<
- kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+ kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -472,11 +472,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // EAX (if it is live), as it is clobbered by functions
- // art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
DCHECK_NE(reg, ESP);
@@ -502,8 +500,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -2093,7 +2089,7 @@
Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
// temp = temp->GetImtEntryAt(method_offset);
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kX86PointerSize));
+ invoke->GetImtIndex(), kX86PointerSize));
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
__ call(Address(temp,
@@ -4115,7 +4111,7 @@
Address(locations->InAt(0).AsRegister<Register>(), method_offset));
} else {
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- instruction->GetIndex() % ImTable::kSize, kX86PointerSize));
+ instruction->GetIndex(), kX86PointerSize));
__ movl(locations->Out().AsRegister<Register>(),
Address(locations->InAt(0).AsRegister<Register>(),
mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a015893..b6ba30e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -369,7 +369,7 @@
dex_pc,
this);
CheckEntrypointTypes<
- kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+ kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -493,11 +493,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // RDI and/or RAX (if they are live), as they are clobbered by
- // functions art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
DCHECK_NE(reg, RSP);
@@ -523,8 +521,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -2322,7 +2318,7 @@
Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
// temp = temp->GetImtEntryAt(method_offset);
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kX86_64PointerSize));
+ invoke->GetImtIndex(), kX86_64PointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ movq(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
@@ -4048,7 +4044,7 @@
Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
} else {
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize));
+ instruction->GetIndex(), kX86_64PointerSize));
__ movq(locations->Out().AsRegister<CpuRegister>(),
Address(locations->InAt(0).AsRegister<CpuRegister>(),
mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 6be79fa..fe9a7af 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -44,7 +44,7 @@
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "prepare_for_register_allocation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
#include "utils.h"
#include "utils/arm/managed_register_arm.h"
@@ -219,7 +219,7 @@
PrepareForRegisterAllocation(graph).Run();
liveness.Analyze();
- RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+ RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
hook_before_codegen(graph);
InternalCodeAllocator allocator;
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index d2afa5b..af0ee4e 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -227,7 +227,7 @@
return vixl::aarch64::Assembler::IsImmMovn(value, vixl::aarch64::kXRegSize);
} else {
DCHECK(instr->IsAdd() ||
- instr->IsArm64IntermediateAddress() ||
+ instr->IsIntermediateAddress() ||
instr->IsBoundsCheck() ||
instr->IsCompare() ||
instr->IsCondition() ||
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index e14f603..0b4c569 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -31,7 +31,7 @@
#include "nodes.h"
#include "optimization.h"
#include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
#include "utils/assembler.h"
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 6c1292c..a592162 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -35,7 +35,7 @@
#include "nodes.h"
#include "optimizing_compiler.h"
#include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
#include "quick/inline_method_analyser.h"
#include "sharpening.h"
#include "ssa_builder.h"
@@ -208,12 +208,8 @@
DCHECK(cls->IsProxyClass()) << PrettyClass(cls);
// TODO: deal with proxy classes.
} else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+ DCHECK_EQ(cls->GetDexCache(), dex_cache.Get());
index = cls->GetDexTypeIndex();
- } else {
- index = cls->FindTypeIndexInOtherDexFile(dex_file);
- }
-
- if (index != DexFile::kDexNoIndex) {
// Update the dex cache to ensure the class is in. The generated code will
// consider it is. We make it safe by updating the dex cache, as other
// dex files might also load the class, and there is no guarantee the dex
@@ -221,6 +217,14 @@
if (dex_cache->GetResolvedType(index) == nullptr) {
dex_cache->SetResolvedType(index, cls);
}
+ } else {
+ index = cls->FindTypeIndexInOtherDexFile(dex_file);
+ // We cannot guarantee the entry in the dex cache will resolve to the same class,
+ // as there may be different class loaders. So only return the index if it's
+ // the right class in the dex cache already.
+ if (index != DexFile::kDexNoIndex && dex_cache->GetResolvedType(index) != cls) {
+ index = DexFile::kDexNoIndex;
+ }
}
return index;
@@ -273,7 +277,7 @@
return false;
}
MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
- mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file)
+ mirror::DexCache* const dex_cache = IsSameDexFile(caller_dex_file, *ref.dex_file)
? caller_compilation_unit_.GetDexCache().Get()
: class_linker->FindDexCache(soa.Self(), *ref.dex_file);
resolved_method = dex_cache->GetResolvedMethod(
@@ -657,7 +661,7 @@
ArtMethod* new_method = nullptr;
if (invoke_instruction->IsInvokeInterface()) {
new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get(
- method_index % ImTable::kSize, pointer_size);
+ method_index, pointer_size);
if (new_method->IsRuntimeMethod()) {
// Bail out as soon as we see a conflict trampoline in one of the target's
// interface table.
@@ -804,8 +808,6 @@
bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* method,
HInstruction** return_replacement) {
- const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-
if (method->IsProxyMethod()) {
VLOG(compiler) << "Method " << PrettyMethod(method)
<< " is not inlined because of unimplemented inline support for proxy methods.";
@@ -828,15 +830,6 @@
return false;
}
- uint32_t method_index = FindMethodIndexIn(
- method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
- if (method_index == DexFile::kDexNoIndex) {
- VLOG(compiler) << "Call to "
- << PrettyMethod(method)
- << " cannot be inlined because unaccessible to caller";
- return false;
- }
-
bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile());
const DexFile::CodeItem* code_item = method->GetCodeItem();
@@ -873,7 +866,7 @@
if (Runtime::Current()->UseJitCompilation() ||
!compiler_driver_->IsMethodVerifiedWithoutFailures(
method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+ VLOG(compiler) << "Method " << PrettyMethod(method)
<< " couldn't be verified, so it cannot be inlined";
return false;
}
@@ -883,7 +876,7 @@
invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
// Case of a static method that cannot be inlined because it implicitly
// requires an initialization check of its declaring class.
- VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+ VLOG(compiler) << "Method " << PrettyMethod(method)
<< " is not inlined because it is static and requires a clinit"
<< " check that cannot be emitted due to Dex cache limitations";
return false;
@@ -893,7 +886,7 @@
return false;
}
- VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, caller_dex_file);
+ VLOG(compiler) << "Successfully inlined " << PrettyMethod(method);
MaybeRecordStat(kInlinedInvoke);
return true;
}
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index b412529..afac5f9 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -16,6 +16,7 @@
#include "instruction_builder.h"
+#include "art_method-inl.h"
#include "bytecode_utils.h"
#include "class_linker.h"
#include "driver/compiler_options.h"
@@ -890,7 +891,7 @@
return_type,
dex_pc,
method_idx,
- resolved_method->GetDexMethodIndex());
+ resolved_method->GetImtIndex());
}
return HandleInvoke(invoke,
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index cd026b8..495f3fd 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -14,8 +14,10 @@
* limitations under the License.
*/
+#include "code_generator.h"
#include "instruction_simplifier_arm.h"
#include "instruction_simplifier_shared.h"
+#include "mirror/array-inl.h"
namespace art {
namespace arm {
@@ -38,6 +40,46 @@
}
}
+void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) {
+ size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+ Primitive::Type type = instruction->GetType();
+
+ if (type == Primitive::kPrimLong
+ || type == Primitive::kPrimFloat
+ || type == Primitive::kPrimDouble) {
+ // T32 doesn't support ShiftedRegOffset mem address mode for these types
+ // to enable optimization.
+ return;
+ }
+
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
+}
+
+void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) {
+ size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
+ size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
+ Primitive::Type type = instruction->GetComponentType();
+
+ if (type == Primitive::kPrimLong
+ || type == Primitive::kPrimFloat
+ || type == Primitive::kPrimDouble) {
+ // T32 doesn't support ShiftedRegOffset mem address mode for these types
+ // to enable optimization.
+ return;
+ }
+
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
+}
} // namespace arm
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 14c940e..3d297da 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -38,6 +38,8 @@
void VisitMul(HMul* instruction) OVERRIDE;
void VisitOr(HOr* instruction) OVERRIDE;
void VisitAnd(HAnd* instruction) OVERRIDE;
+ void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
+ void VisitArraySet(HArraySet* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
};
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 983d31d..6d107d5 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -28,56 +28,6 @@
using helpers::HasShifterOperand;
using helpers::ShifterOperandSupportsExtension;
-void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
- HInstruction* array,
- HInstruction* index,
- size_t data_offset) {
- if (kEmitCompilerReadBarrier) {
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
- //
- // TODO: Handle this case properly in the ARM64 code generator and
- // re-enable this optimization; otherwise, remove this TODO.
- // b/26601270
- return;
- }
- if (index->IsConstant() ||
- (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
- // When the index is a constant all the addressing can be fitted in the
- // memory access instruction, so do not split the access.
- return;
- }
- if (access->IsArraySet() &&
- access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
- // The access may require a runtime call or the original array pointer.
- return;
- }
-
- // Proceed to extract the base address computation.
- ArenaAllocator* arena = GetGraph()->GetArena();
-
- HIntConstant* offset = GetGraph()->GetIntConstant(data_offset);
- HArm64IntermediateAddress* address =
- new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc);
- address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
- access->GetBlock()->InsertInstructionBefore(address, access);
- access->ReplaceInput(address, 0);
- // Both instructions must depend on GC to prevent any instruction that can
- // trigger GC to be inserted between the two.
- access->AddSideEffects(SideEffects::DependsOnGC());
- DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
- DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
- // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
- // is an HArm64IntermediateAddress and generate appropriate code.
- // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
- // `HArm64Load` and `HArm64Store`). We defer these changes because these new instructions would
- // not bring any advantages yet.
- // Also see the comments in
- // `InstructionCodeGeneratorARM64::VisitArrayGet()` and
- // `InstructionCodeGeneratorARM64::VisitArraySet()`.
- RecordSimplification();
-}
-
bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
HInstruction* bitfield_op,
bool do_merge) {
@@ -190,19 +140,23 @@
void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
- TryExtractArrayAccessAddress(instruction,
- instruction->GetArray(),
- instruction->GetIndex(),
- data_offset);
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
}
void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) {
size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
- TryExtractArrayAccessAddress(instruction,
- instruction->GetArray(),
- instruction->GetIndex(),
- data_offset);
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
}
void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 4735f85..28648b3 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -35,10 +35,6 @@
}
}
- void TryExtractArrayAccessAddress(HInstruction* access,
- HInstruction* array,
- HInstruction* index,
- size_t data_offset);
bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
bool TryMergeIntoShifterOperand(HInstruction* use,
HInstruction* bitfield_op,
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index dab1ebc..8f7778f 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -226,4 +226,59 @@
return false;
}
+
+bool TryExtractArrayAccessAddress(HInstruction* access,
+ HInstruction* array,
+ HInstruction* index,
+ size_t data_offset) {
+ if (kEmitCompilerReadBarrier) {
+ // The read barrier instrumentation does not support the
+ // HIntermediateAddress instruction yet.
+ //
+ // TODO: Handle this case properly in the ARM64 and ARM code generator and
+ // re-enable this optimization; otherwise, remove this TODO.
+ // b/26601270
+ return false;
+ }
+ if (index->IsConstant() ||
+ (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
+ // When the index is a constant all the addressing can be fitted in the
+ // memory access instruction, so do not split the access.
+ return false;
+ }
+ if (access->IsArraySet() &&
+ access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
+ // The access may require a runtime call or the original array pointer.
+ return false;
+ }
+
+ // Proceed to extract the base address computation.
+ HGraph* graph = access->GetBlock()->GetGraph();
+ ArenaAllocator* arena = graph->GetArena();
+
+ HIntConstant* offset = graph->GetIntConstant(data_offset);
+ HIntermediateAddress* address =
+ new (arena) HIntermediateAddress(array, offset, kNoDexPc);
+ address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
+ access->GetBlock()->InsertInstructionBefore(address, access);
+ access->ReplaceInput(address, 0);
+ // Both instructions must depend on GC to prevent any instruction that can
+ // trigger GC to be inserted between the two.
+ access->AddSideEffects(SideEffects::DependsOnGC());
+ DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+ DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+ // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
+ // is an HIntermediateAddress and generate appropriate code.
+ // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
+ // `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes
+ // because these new instructions would not bring any advantages yet.
+ // Also see the comments in
+ // `InstructionCodeGeneratorARM::VisitArrayGet()`
+ // `InstructionCodeGeneratorARM::VisitArraySet()`
+ // `InstructionCodeGeneratorARM64::VisitArrayGet()`
+ // `InstructionCodeGeneratorARM64::VisitArraySet()`.
+ return true;
+}
+
+
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index b1fe8f4..56804f5 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -26,6 +26,11 @@
// a negated bitwise instruction.
bool TryMergeNegatedInput(HBinaryOperation* op);
+bool TryExtractArrayAccessAddress(HInstruction* access,
+ HInstruction* array,
+ HInstruction* index,
+ size_t data_offset);
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0f0ef26..23ac457 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1289,7 +1289,8 @@
#else
#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \
M(BitwiseNegatedRight, Instruction) \
- M(MultiplyAccumulate, Instruction)
+ M(MultiplyAccumulate, Instruction) \
+ M(IntermediateAddress, Instruction)
#endif
#ifndef ART_ENABLE_CODEGEN_arm
@@ -1303,8 +1304,7 @@
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
#else
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \
- M(Arm64DataProcWithShifterOp, Instruction) \
- M(Arm64IntermediateAddress, Instruction)
+ M(Arm64DataProcWithShifterOp, Instruction)
#endif
#ifndef ART_ENABLE_CODEGEN_mips
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 06b073c..3f88717 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -94,32 +94,6 @@
std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
-// This instruction computes an intermediate address pointing in the 'middle' of an object. The
-// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
-// never used across anything that can trigger GC.
-class HArm64IntermediateAddress FINAL : public HExpression<2> {
- public:
- HArm64IntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
- : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
- SetRawInputAt(0, base_address);
- SetRawInputAt(1, offset);
- }
-
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
- }
- bool IsActualObject() const OVERRIDE { return false; }
-
- HInstruction* GetBaseAddress() const { return InputAt(0); }
- HInstruction* GetOffset() const { return InputAt(1); }
-
- DECLARE_INSTRUCTION(Arm64IntermediateAddress);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
-};
-
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index f2d5cf3..8bd8667 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -113,6 +113,34 @@
DISALLOW_COPY_AND_ASSIGN(HBitwiseNegatedRight);
};
+
+// This instruction computes an intermediate address pointing in the 'middle' of an object. The
+// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
+// never used across anything that can trigger GC.
+class HIntermediateAddress FINAL : public HExpression<2> {
+ public:
+ HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
+ : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
+ SetRawInputAt(0, base_address);
+ SetRawInputAt(1, offset);
+ }
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ return true;
+ }
+ bool IsActualObject() const OVERRIDE { return false; }
+
+ HInstruction* GetBaseAddress() const { return InputAt(0); }
+ HInstruction* GetOffset() const { return InputAt(1); }
+
+ DECLARE_INSTRUCTION(IntermediateAddress);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
+};
+
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d6e09d7..0bca186 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -81,7 +81,7 @@
#include "oat_quick_method_header.h"
#include "prepare_for_register_allocation.h"
#include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
#include "select_generator.h"
#include "sharpening.h"
#include "side_effects_analysis.h"
@@ -448,8 +448,12 @@
arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
arm::InstructionSimplifierArm* simplifier =
new (arena) arm::InstructionSimplifierArm(graph, stats);
+ SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
+ GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch");
HOptimization* arm_optimizations[] = {
simplifier,
+ side_effects,
+ gvn,
fixups
};
RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
@@ -531,7 +535,7 @@
}
{
PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
- RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+ RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
}
}
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
new file mode 100644
index 0000000..3450286
--- /dev/null
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -0,0 +1,653 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocation_resolver.h"
+
+#include "code_generator.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+RegisterAllocationResolver::RegisterAllocationResolver(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& liveness)
+ : allocator_(allocator),
+ codegen_(codegen),
+ liveness_(liveness) {}
+
+void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs,
+ size_t max_safepoint_live_fp_regs,
+ size_t reserved_out_slots,
+ size_t int_spill_slots,
+ size_t long_spill_slots,
+ size_t float_spill_slots,
+ size_t double_spill_slots,
+ size_t catch_phi_spill_slots,
+ const ArenaVector<LiveInterval*>& temp_intervals) {
+ size_t spill_slots = int_spill_slots
+ + long_spill_slots
+ + float_spill_slots
+ + double_spill_slots
+ + catch_phi_spill_slots;
+
+ // Computes frame size and spill mask.
+ codegen_->InitializeCodeGeneration(spill_slots,
+ max_safepoint_live_core_regs,
+ max_safepoint_live_fp_regs,
+ reserved_out_slots, // Includes slot(s) for the art method.
+ codegen_->GetGraph()->GetLinearOrder());
+
+ // Resolve outputs, including stack locations.
+ // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
+ for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+ HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+ LiveInterval* current = instruction->GetLiveInterval();
+ LocationSummary* locations = instruction->GetLocations();
+ Location location = locations->Out();
+ if (instruction->IsParameterValue()) {
+ // Now that we know the frame size, adjust the parameter's location.
+ if (location.IsStackSlot()) {
+ location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+ current->SetSpillSlot(location.GetStackIndex());
+ locations->UpdateOut(location);
+ } else if (location.IsDoubleStackSlot()) {
+ location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+ current->SetSpillSlot(location.GetStackIndex());
+ locations->UpdateOut(location);
+ } else if (current->HasSpillSlot()) {
+ current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
+ }
+ } else if (instruction->IsCurrentMethod()) {
+ // The current method is always at offset 0.
+ DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0));
+ } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+ DCHECK(current->HasSpillSlot());
+ size_t slot = current->GetSpillSlot()
+ + spill_slots
+ + reserved_out_slots
+ - catch_phi_spill_slots;
+ current->SetSpillSlot(slot * kVRegSize);
+ } else if (current->HasSpillSlot()) {
+ // Adjust the stack slot, now that we know the number of them for each type.
+ // The way this implementation lays out the stack is the following:
+ // [parameter slots ]
+ // [catch phi spill slots ]
+ // [double spill slots ]
+ // [long spill slots ]
+ // [float spill slots ]
+ // [int/ref values ]
+ // [maximum out values ] (number of arguments for calls)
+ // [art method ].
+ size_t slot = current->GetSpillSlot();
+ switch (current->GetType()) {
+ case Primitive::kPrimDouble:
+ slot += long_spill_slots;
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimLong:
+ slot += float_spill_slots;
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimFloat:
+ slot += int_spill_slots;
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimShort:
+ slot += reserved_out_slots;
+ break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected type for interval " << current->GetType();
+ }
+ current->SetSpillSlot(slot * kVRegSize);
+ }
+
+ Location source = current->ToLocation();
+
+ if (location.IsUnallocated()) {
+ if (location.GetPolicy() == Location::kSameAsFirstInput) {
+ if (locations->InAt(0).IsUnallocated()) {
+ locations->SetInAt(0, source);
+ } else {
+ DCHECK(locations->InAt(0).Equals(source));
+ }
+ }
+ locations->UpdateOut(source);
+ } else {
+ DCHECK(source.Equals(location));
+ }
+ }
+
+ // Connect siblings and resolve inputs.
+ for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+ HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+ ConnectSiblings(instruction->GetLiveInterval(),
+ max_safepoint_live_core_regs + max_safepoint_live_fp_regs);
+ }
+
+ // Resolve non-linear control flow across branches. Order does not matter.
+ for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ if (block->IsCatchBlock() ||
+ (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+ // Instructions live at the top of catch blocks or irreducible loop header
+ // were forced to spill.
+ if (kIsDebugBuild) {
+ BitVector* live = liveness_.GetLiveInSet(*block);
+ for (uint32_t idx : live->Indexes()) {
+ LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+ LiveInterval* sibling = interval->GetSiblingAt(block->GetLifetimeStart());
+ // `GetSiblingAt` returns the sibling that contains a position, but there could be
+ // a lifetime hole in it. `CoversSlow` returns whether the interval is live at that
+ // position.
+ if ((sibling != nullptr) && sibling->CoversSlow(block->GetLifetimeStart())) {
+ DCHECK(!sibling->HasRegister());
+ }
+ }
+ }
+ } else {
+ BitVector* live = liveness_.GetLiveInSet(*block);
+ for (uint32_t idx : live->Indexes()) {
+ LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+ for (HBasicBlock* predecessor : block->GetPredecessors()) {
+ ConnectSplitSiblings(interval, predecessor, block);
+ }
+ }
+ }
+ }
+
+ // Resolve phi inputs. Order does not matter.
+ for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+ HBasicBlock* current = it.Current();
+ if (current->IsCatchBlock()) {
+ // Catch phi values are set at runtime by the exception delivery mechanism.
+ } else {
+ for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+ HInstruction* phi = inst_it.Current();
+ for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
+ HBasicBlock* predecessor = current->GetPredecessors()[i];
+ DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
+ HInstruction* input = phi->InputAt(i);
+ Location source = input->GetLiveInterval()->GetLocationAt(
+ predecessor->GetLifetimeEnd() - 1);
+ Location destination = phi->GetLiveInterval()->ToLocation();
+ InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
+ }
+ }
+ }
+ }
+
+ // Resolve temp locations.
+ for (LiveInterval* temp : temp_intervals) {
+ if (temp->IsHighInterval()) {
+ // High intervals can be skipped, they are already handled by the low interval.
+ continue;
+ }
+ HInstruction* at = liveness_.GetTempUser(temp);
+ size_t temp_index = liveness_.GetTempIndex(temp);
+ LocationSummary* locations = at->GetLocations();
+ switch (temp->GetType()) {
+ case Primitive::kPrimInt:
+ locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister()));
+ break;
+
+ case Primitive::kPrimDouble:
+ if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+ Location location = Location::FpuRegisterPairLocation(
+ temp->GetRegister(), temp->GetHighInterval()->GetRegister());
+ locations->SetTempAt(temp_index, location);
+ } else {
+ locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister()));
+ }
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type for temporary location "
+ << temp->GetType();
+ }
+ }
+}
+
+void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval,
+ size_t max_safepoint_live_regs) {
+ LiveInterval* current = interval;
+ if (current->HasSpillSlot()
+ && current->HasRegister()
+ // Currently, we spill unconditionnally the current method in the code generators.
+ && !interval->GetDefinedBy()->IsCurrentMethod()) {
+ // We spill eagerly, so move must be at definition.
+ InsertMoveAfter(interval->GetDefinedBy(),
+ interval->ToLocation(),
+ interval->NeedsTwoSpillSlots()
+ ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
+ : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+ }
+ UsePosition* use = current->GetFirstUse();
+ UsePosition* env_use = current->GetFirstEnvironmentUse();
+
+ // Walk over all siblings, updating locations of use positions, and
+ // connecting them when they are adjacent.
+ do {
+ Location source = current->ToLocation();
+
+ // Walk over all uses covered by this interval, and update the location
+ // information.
+
+ LiveRange* range = current->GetFirstRange();
+ while (range != nullptr) {
+ while (use != nullptr && use->GetPosition() < range->GetStart()) {
+ DCHECK(use->IsSynthesized());
+ use = use->GetNext();
+ }
+ while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+ DCHECK(!use->GetIsEnvironment());
+ DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
+ if (!use->IsSynthesized()) {
+ LocationSummary* locations = use->GetUser()->GetLocations();
+ Location expected_location = locations->InAt(use->GetInputIndex());
+ // The expected (actual) location may be invalid in case the input is unused. Currently
+ // this only happens for intrinsics.
+ if (expected_location.IsValid()) {
+ if (expected_location.IsUnallocated()) {
+ locations->SetInAt(use->GetInputIndex(), source);
+ } else if (!expected_location.IsConstant()) {
+ AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
+ }
+ } else {
+ DCHECK(use->GetUser()->IsInvoke());
+ DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+ }
+ }
+ use = use->GetNext();
+ }
+
+ // Walk over the environment uses, and update their locations.
+ while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
+ env_use = env_use->GetNext();
+ }
+
+ while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
+ DCHECK(current->CoversSlow(env_use->GetPosition())
+ || (env_use->GetPosition() == range->GetEnd()));
+ HEnvironment* environment = env_use->GetEnvironment();
+ environment->SetLocationAt(env_use->GetInputIndex(), source);
+ env_use = env_use->GetNext();
+ }
+
+ range = range->GetNext();
+ }
+
+ // If the next interval starts just after this one, and has a register,
+ // insert a move.
+ LiveInterval* next_sibling = current->GetNextSibling();
+ if (next_sibling != nullptr
+ && next_sibling->HasRegister()
+ && current->GetEnd() == next_sibling->GetStart()) {
+ Location destination = next_sibling->ToLocation();
+ InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
+ }
+
+ for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
+ safepoint_position != nullptr;
+ safepoint_position = safepoint_position->GetNext()) {
+ DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
+
+ LocationSummary* locations = safepoint_position->GetLocations();
+ if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
+ DCHECK(interval->GetDefinedBy()->IsActualObject())
+ << interval->GetDefinedBy()->DebugName()
+ << "@" << safepoint_position->GetInstruction()->DebugName();
+ locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
+ }
+
+ switch (source.GetKind()) {
+ case Location::kRegister: {
+ locations->AddLiveRegister(source);
+ if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
+ DCHECK_LE(locations->GetNumberOfLiveRegisters(),
+ max_safepoint_live_regs);
+ }
+ if (current->GetType() == Primitive::kPrimNot) {
+ DCHECK(interval->GetDefinedBy()->IsActualObject())
+ << interval->GetDefinedBy()->DebugName()
+ << "@" << safepoint_position->GetInstruction()->DebugName();
+ locations->SetRegisterBit(source.reg());
+ }
+ break;
+ }
+ case Location::kFpuRegister: {
+ locations->AddLiveRegister(source);
+ break;
+ }
+
+ case Location::kRegisterPair:
+ case Location::kFpuRegisterPair: {
+ locations->AddLiveRegister(source.ToLow());
+ locations->AddLiveRegister(source.ToHigh());
+ break;
+ }
+ case Location::kStackSlot: // Fall-through
+ case Location::kDoubleStackSlot: // Fall-through
+ case Location::kConstant: {
+ // Nothing to do.
+ break;
+ }
+ default: {
+ LOG(FATAL) << "Unexpected location for object";
+ }
+ }
+ }
+ current = next_sibling;
+ } while (current != nullptr);
+
+ if (kIsDebugBuild) {
+ // Following uses can only be synthesized uses.
+ while (use != nullptr) {
+ DCHECK(use->IsSynthesized());
+ use = use->GetNext();
+ }
+ }
+}
+
+static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
+ HInstruction* instruction) {
+ return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
+ (instruction->IsConstant() || instruction->IsCurrentMethod());
+}
+
+void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval,
+ HBasicBlock* from,
+ HBasicBlock* to) const {
+ if (interval->GetNextSibling() == nullptr) {
+ // Nothing to connect. The whole range was allocated to the same location.
+ return;
+ }
+
+ // Find the intervals that cover `from` and `to`.
+ size_t destination_position = to->GetLifetimeStart();
+ size_t source_position = from->GetLifetimeEnd() - 1;
+ LiveInterval* destination = interval->GetSiblingAt(destination_position);
+ LiveInterval* source = interval->GetSiblingAt(source_position);
+
+ if (destination == source) {
+ // Interval was not split.
+ return;
+ }
+
+ LiveInterval* parent = interval->GetParent();
+ HInstruction* defined_by = parent->GetDefinedBy();
+ if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+ (destination == nullptr || !destination->CoversSlow(destination_position))) {
+ // Our live_in fixed point calculation has found that the instruction is live
+ // in the `to` block because it will eventually enter an irreducible loop. Our
+ // live interval computation however does not compute a fixed point, and
+ // therefore will not have a location for that instruction for `to`.
+ // Because the instruction is a constant or the ArtMethod, we don't need to
+ // do anything: it will be materialized in the irreducible loop.
+ DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
+ << defined_by->DebugName() << ":" << defined_by->GetId()
+ << " " << from->GetBlockId() << " -> " << to->GetBlockId();
+ return;
+ }
+
+ if (!destination->HasRegister()) {
+ // Values are eagerly spilled. Spill slot already contains appropriate value.
+ return;
+ }
+
+ Location location_source;
+ // `GetSiblingAt` returns the interval whose start and end cover `position`,
+ // but does not check whether the interval is inactive at that position.
+ // The only situation where the interval is inactive at that position is in the
+ // presence of irreducible loops for constants and ArtMethod.
+ if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+ (source == nullptr || !source->CoversSlow(source_position))) {
+ DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+ if (defined_by->IsConstant()) {
+ location_source = defined_by->GetLocations()->Out();
+ } else {
+ DCHECK(defined_by->IsCurrentMethod());
+ location_source = parent->NeedsTwoSpillSlots()
+ ? Location::DoubleStackSlot(parent->GetSpillSlot())
+ : Location::StackSlot(parent->GetSpillSlot());
+ }
+ } else {
+ DCHECK(source != nullptr);
+ DCHECK(source->CoversSlow(source_position));
+ DCHECK(destination->CoversSlow(destination_position));
+ location_source = source->ToLocation();
+ }
+
+ // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
+ // we need to put the moves at the entry of `to`.
+ if (from->GetNormalSuccessors().size() == 1) {
+ InsertParallelMoveAtExitOf(from,
+ defined_by,
+ location_source,
+ destination->ToLocation());
+ } else {
+ DCHECK_EQ(to->GetPredecessors().size(), 1u);
+ InsertParallelMoveAtEntryOf(to,
+ defined_by,
+ location_source,
+ destination->ToLocation());
+ }
+}
+
+static bool IsValidDestination(Location destination) {
+ return destination.IsRegister()
+ || destination.IsRegisterPair()
+ || destination.IsFpuRegister()
+ || destination.IsFpuRegisterPair()
+ || destination.IsStackSlot()
+ || destination.IsDoubleStackSlot();
+}
+
+void RegisterAllocationResolver::AddMove(HParallelMove* move,
+ Location source,
+ Location destination,
+ HInstruction* instruction,
+ Primitive::Type type) const {
+ if (type == Primitive::kPrimLong
+ && codegen_->ShouldSplitLongMoves()
+ // The parallel move resolver knows how to deal with long constants.
+ && !source.IsConstant()) {
+ move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
+ move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
+ } else {
+ move->AddMove(source, destination, type, instruction);
+ }
+}
+
+void RegisterAllocationResolver::AddInputMoveFor(HInstruction* input,
+ HInstruction* user,
+ Location source,
+ Location destination) const {
+ if (source.Equals(destination)) return;
+
+ DCHECK(!user->IsPhi());
+
+ HInstruction* previous = user->GetPrevious();
+ HParallelMove* move = nullptr;
+ if (previous == nullptr
+ || !previous->IsParallelMove()
+ || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(user->GetLifetimePosition());
+ user->GetBlock()->InsertInstructionBefore(move, user);
+ } else {
+ move = previous->AsParallelMove();
+ }
+ DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
+ AddMove(move, source, destination, nullptr, input->GetType());
+}
+
+static bool IsInstructionStart(size_t position) {
+ return (position & 1) == 0;
+}
+
+static bool IsInstructionEnd(size_t position) {
+ return (position & 1) == 1;
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAt(size_t position,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const {
+ DCHECK(IsValidDestination(destination)) << destination;
+ if (source.Equals(destination)) return;
+
+ HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
+ HParallelMove* move;
+ if (at == nullptr) {
+ if (IsInstructionStart(position)) {
+ // Block boundary, don't do anything the connection of split siblings will handle it.
+ return;
+ } else {
+ // Move must happen before the first instruction of the block.
+ at = liveness_.GetInstructionFromPosition((position + 1) / 2);
+ // Note that parallel moves may have already been inserted, so we explicitly
+ // ask for the first instruction of the block: `GetInstructionFromPosition` does
+ // not contain the `HParallelMove` instructions.
+ at = at->GetBlock()->GetFirstInstruction();
+
+ if (at->GetLifetimePosition() < position) {
+ // We may insert moves for split siblings and phi spills at the beginning of the block.
+ // Since this is a different lifetime position, we need to go to the next instruction.
+ DCHECK(at->IsParallelMove());
+ at = at->GetNext();
+ }
+
+ if (at->GetLifetimePosition() != position) {
+ DCHECK_GT(at->GetLifetimePosition(), position);
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ at->GetBlock()->InsertInstructionBefore(move, at);
+ } else {
+ DCHECK(at->IsParallelMove());
+ move = at->AsParallelMove();
+ }
+ }
+ } else if (IsInstructionEnd(position)) {
+ // Move must happen after the instruction.
+ DCHECK(!at->IsControlFlow());
+ move = at->GetNext()->AsParallelMove();
+ // This is a parallel move for connecting siblings in a same block. We need to
+ // differentiate it with moves for connecting blocks, and input moves.
+ if (move == nullptr || move->GetLifetimePosition() > position) {
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
+ }
+ } else {
+ // Move must happen before the instruction.
+ HInstruction* previous = at->GetPrevious();
+ if (previous == nullptr
+ || !previous->IsParallelMove()
+ || previous->GetLifetimePosition() != position) {
+ // If the previous is a parallel move, then its position must be lower
+ // than the given `position`: it was added just after the non-parallel
+ // move instruction that precedes `instruction`.
+ DCHECK(previous == nullptr
+ || !previous->IsParallelMove()
+ || previous->GetLifetimePosition() < position);
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ at->GetBlock()->InsertInstructionBefore(move, at);
+ } else {
+ move = previous->AsParallelMove();
+ }
+ }
+ DCHECK_EQ(move->GetLifetimePosition(), position);
+ AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAtExitOf(HBasicBlock* block,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const {
+ DCHECK(IsValidDestination(destination)) << destination;
+ if (source.Equals(destination)) return;
+
+ DCHECK_EQ(block->GetNormalSuccessors().size(), 1u);
+ HInstruction* last = block->GetLastInstruction();
+ // We insert moves at exit for phi predecessors and connecting blocks.
+ // A block ending with an if or a packed switch cannot branch to a block
+ // with phis because we do not allow critical edges. It can also not connect
+ // a split interval between two blocks: the move has to happen in the successor.
+ DCHECK(!last->IsIf() && !last->IsPackedSwitch());
+ HInstruction* previous = last->GetPrevious();
+ HParallelMove* move;
+ // This is a parallel move for connecting blocks. We need to differentiate
+ // it with moves for connecting siblings in a same block, and output moves.
+ size_t position = last->GetLifetimePosition();
+ if (previous == nullptr || !previous->IsParallelMove()
+ || previous->AsParallelMove()->GetLifetimePosition() != position) {
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ block->InsertInstructionBefore(move, last);
+ } else {
+ move = previous->AsParallelMove();
+ }
+ AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAtEntryOf(HBasicBlock* block,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const {
+ DCHECK(IsValidDestination(destination)) << destination;
+ if (source.Equals(destination)) return;
+
+ HInstruction* first = block->GetFirstInstruction();
+ HParallelMove* move = first->AsParallelMove();
+ size_t position = block->GetLifetimeStart();
+ // This is a parallel move for connecting blocks. We need to differentiate
+ // it with moves for connecting siblings in a same block, and input moves.
+ if (move == nullptr || move->GetLifetimePosition() != position) {
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ block->InsertInstructionBefore(move, first);
+ }
+ AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertMoveAfter(HInstruction* instruction,
+ Location source,
+ Location destination) const {
+ DCHECK(IsValidDestination(destination)) << destination;
+ if (source.Equals(destination)) return;
+
+ if (instruction->IsPhi()) {
+ InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination);
+ return;
+ }
+
+ size_t position = instruction->GetLifetimePosition() + 1;
+ HParallelMove* move = instruction->GetNext()->AsParallelMove();
+ // This is a parallel move for moving the output of an instruction. We need
+ // to differentiate with input moves, moves for connecting siblings in a
+ // and moves for connecting blocks.
+ if (move == nullptr || move->GetLifetimePosition() != position) {
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
+ }
+ AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+} // namespace art
diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h
new file mode 100644
index 0000000..6ceb9bc
--- /dev/null
+++ b/compiler/optimizing/register_allocation_resolver.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
+
+#include "base/arena_containers.h"
+#include "base/value_object.h"
+#include "primitive.h"
+
+namespace art {
+
+class ArenaAllocator;
+class CodeGenerator;
+class HBasicBlock;
+class HInstruction;
+class HParallelMove;
+class LiveInterval;
+class Location;
+class SsaLivenessAnalysis;
+
+/**
+ * Reconciles the locations assigned to live intervals with the location
+ * summary of each instruction, and inserts moves to resolve split intervals,
+ * nonlinear control flow, and phi inputs.
+ */
+class RegisterAllocationResolver : ValueObject {
+ public:
+ RegisterAllocationResolver(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& liveness);
+
+ void Resolve(size_t max_safepoint_live_core_regs,
+ size_t max_safepoint_live_fp_regs,
+ size_t reserved_out_slots, // Includes slot(s) for the art method.
+ size_t int_spill_slots,
+ size_t long_spill_slots,
+ size_t float_spill_slots,
+ size_t double_spill_slots,
+ size_t catch_phi_spill_slots,
+ const ArenaVector<LiveInterval*>& temp_intervals);
+
+ private:
+ // Connect adjacent siblings within blocks, and resolve inputs along the way.
+ // Uses max_safepoint_live_regs to check that we did not underestimate the
+ // number of live registers at safepoints.
+ void ConnectSiblings(LiveInterval* interval, size_t max_safepoint_live_regs);
+
+ // Connect siblings between block entries and exits.
+ void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
+
+ // Helper methods for inserting parallel moves in the graph.
+ void InsertParallelMoveAtExitOf(HBasicBlock* block,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const;
+ void InsertParallelMoveAtEntryOf(HBasicBlock* block,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const;
+ void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
+ void AddInputMoveFor(HInstruction* input,
+ HInstruction* user,
+ Location source,
+ Location destination) const;
+ void InsertParallelMoveAt(size_t position,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const;
+ void AddMove(HParallelMove* move,
+ Location source,
+ Location destination,
+ HInstruction* instruction,
+ Primitive::Type type) const;
+
+ ArenaAllocator* const allocator_;
+ CodeGenerator* const codegen_;
+ const SsaLivenessAnalysis& liveness_;
+
+ DISALLOW_COPY_AND_ASSIGN(RegisterAllocationResolver);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1b33408..2367ce1 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,65 +21,30 @@
#include "base/bit_vector-inl.h"
#include "code_generator.h"
+#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
+
namespace art {
-static constexpr size_t kMaxLifetimePosition = -1;
-static constexpr size_t kDefaultNumberOfSpillSlots = 4;
-
-// For simplicity, we implement register pairs as (reg, reg + 1).
-// Note that this is a requirement for double registers on ARM, since we
-// allocate SRegister.
-static int GetHighForLowRegister(int reg) { return reg + 1; }
-static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
-static bool IsLowOfUnalignedPairInterval(LiveInterval* low) {
- return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
-}
-
RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
CodeGenerator* codegen,
const SsaLivenessAnalysis& liveness)
- : allocator_(allocator),
- codegen_(codegen),
- liveness_(liveness),
- unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- unhandled_(nullptr),
- handled_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- active_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- catch_phi_spill_slots_(0),
- safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- processing_core_registers_(false),
- number_of_registers_(-1),
- registers_array_(nullptr),
- blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
- blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
- reserved_out_slots_(0),
- maximum_number_of_live_core_registers_(0),
- maximum_number_of_live_fp_registers_(0) {
- temp_intervals_.reserve(4);
- int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
- long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
- float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
- double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+ : allocator_(allocator),
+ codegen_(codegen),
+ liveness_(liveness) {}
- codegen->SetupBlockedRegisters();
- physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
- physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
- // Always reserve for the current method and the graph's max out registers.
- // TODO: compute it instead.
- // ArtMethod* takes 2 vregs for 64 bits.
- reserved_out_slots_ = InstructionSetPointerSize(codegen->GetInstructionSet()) / kVRegSize +
- codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
+RegisterAllocator* RegisterAllocator::Create(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis,
+ Strategy strategy) {
+ switch (strategy) {
+ case kRegisterAllocatorLinearScan:
+ return new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis);
+ default:
+ LOG(FATAL) << "Invalid register allocation strategy: " << strategy;
+ UNREACHABLE();
+ }
}
bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED,
@@ -93,328 +58,6 @@
|| instruction_set == kX86_64;
}
-static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
- if (interval == nullptr) return false;
- bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
- && (interval->GetType() != Primitive::kPrimFloat);
- return processing_core_registers == is_core_register;
-}
-
-void RegisterAllocator::AllocateRegisters() {
- AllocateRegistersInternal();
- Resolve();
-
- if (kIsDebugBuild) {
- processing_core_registers_ = true;
- ValidateInternal(true);
- processing_core_registers_ = false;
- ValidateInternal(true);
- // Check that the linear order is still correct with regards to lifetime positions.
- // Since only parallel moves have been inserted during the register allocation,
- // these checks are mostly for making sure these moves have been added correctly.
- size_t current_liveness = 0;
- for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
- for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
- HInstruction* instruction = inst_it.Current();
- DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
- current_liveness = instruction->GetLifetimePosition();
- }
- for (HInstructionIterator inst_it(block->GetInstructions());
- !inst_it.Done();
- inst_it.Advance()) {
- HInstruction* instruction = inst_it.Current();
- DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName();
- current_liveness = instruction->GetLifetimePosition();
- }
- }
- }
-}
-
-void RegisterAllocator::BlockRegister(Location location, size_t start, size_t end) {
- int reg = location.reg();
- DCHECK(location.IsRegister() || location.IsFpuRegister());
- LiveInterval* interval = location.IsRegister()
- ? physical_core_register_intervals_[reg]
- : physical_fp_register_intervals_[reg];
- Primitive::Type type = location.IsRegister()
- ? Primitive::kPrimInt
- : Primitive::kPrimFloat;
- if (interval == nullptr) {
- interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
- if (location.IsRegister()) {
- physical_core_register_intervals_[reg] = interval;
- } else {
- physical_fp_register_intervals_[reg] = interval;
- }
- }
- DCHECK(interval->GetRegister() == reg);
- interval->AddRange(start, end);
-}
-
-void RegisterAllocator::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
- for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
- if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
- BlockRegister(Location::RegisterLocation(i), start, end);
- }
- }
- for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
- if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
- BlockRegister(Location::FpuRegisterLocation(i), start, end);
- }
- }
-}
-
-void RegisterAllocator::AllocateRegistersInternal() {
- // Iterate post-order, to ensure the list is sorted, and the last added interval
- // is the one with the lowest start position.
- for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
- for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
- back_it.Advance()) {
- ProcessInstruction(back_it.Current());
- }
- for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
- ProcessInstruction(inst_it.Current());
- }
-
- if (block->IsCatchBlock() ||
- (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
- // By blocking all registers at the top of each catch block or irreducible loop, we force
- // intervals belonging to the live-in set of the catch/header block to be spilled.
- // TODO(ngeoffray): Phis in this block could be allocated in register.
- size_t position = block->GetLifetimeStart();
- BlockRegisters(position, position + 1);
- }
- }
-
- number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
- registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
- kArenaAllocRegisterAllocator);
- processing_core_registers_ = true;
- unhandled_ = &unhandled_core_intervals_;
- for (LiveInterval* fixed : physical_core_register_intervals_) {
- if (fixed != nullptr) {
- // Fixed interval is added to inactive_ instead of unhandled_.
- // It's also the only type of inactive interval whose start position
- // can be after the current interval during linear scan.
- // Fixed interval is never split and never moves to unhandled_.
- inactive_.push_back(fixed);
- }
- }
- LinearScan();
-
- inactive_.clear();
- active_.clear();
- handled_.clear();
-
- number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
- registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
- kArenaAllocRegisterAllocator);
- processing_core_registers_ = false;
- unhandled_ = &unhandled_fp_intervals_;
- for (LiveInterval* fixed : physical_fp_register_intervals_) {
- if (fixed != nullptr) {
- // Fixed interval is added to inactive_ instead of unhandled_.
- // It's also the only type of inactive interval whose start position
- // can be after the current interval during linear scan.
- // Fixed interval is never split and never moves to unhandled_.
- inactive_.push_back(fixed);
- }
- }
- LinearScan();
-}
-
-void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- size_t position = instruction->GetLifetimePosition();
-
- if (locations == nullptr) return;
-
- // Create synthesized intervals for temporaries.
- for (size_t i = 0; i < locations->GetTempCount(); ++i) {
- Location temp = locations->GetTemp(i);
- if (temp.IsRegister() || temp.IsFpuRegister()) {
- BlockRegister(temp, position, position + 1);
- // Ensure that an explicit temporary register is marked as being allocated.
- codegen_->AddAllocatedRegister(temp);
- } else {
- DCHECK(temp.IsUnallocated());
- switch (temp.GetPolicy()) {
- case Location::kRequiresRegister: {
- LiveInterval* interval =
- LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
- temp_intervals_.push_back(interval);
- interval->AddTempUse(instruction, i);
- unhandled_core_intervals_.push_back(interval);
- break;
- }
-
- case Location::kRequiresFpuRegister: {
- LiveInterval* interval =
- LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
- temp_intervals_.push_back(interval);
- interval->AddTempUse(instruction, i);
- if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
- interval->AddHighInterval(/* is_temp */ true);
- LiveInterval* high = interval->GetHighInterval();
- temp_intervals_.push_back(high);
- unhandled_fp_intervals_.push_back(high);
- }
- unhandled_fp_intervals_.push_back(interval);
- break;
- }
-
- default:
- LOG(FATAL) << "Unexpected policy for temporary location "
- << temp.GetPolicy();
- }
- }
- }
-
- bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
- && (instruction->GetType() != Primitive::kPrimFloat);
-
- if (locations->NeedsSafepoint()) {
- if (codegen_->IsLeafMethod()) {
- // TODO: We do this here because we do not want the suspend check to artificially
- // create live registers. We should find another place, but this is currently the
- // simplest.
- DCHECK(instruction->IsSuspendCheckEntry());
- instruction->GetBlock()->RemoveInstruction(instruction);
- return;
- }
- safepoints_.push_back(instruction);
- if (locations->OnlyCallsOnSlowPath()) {
- // We add a synthesized range at this position to record the live registers
- // at this position. Ideally, we could just update the safepoints when locations
- // are updated, but we currently need to know the full stack size before updating
- // locations (because of parameters and the fact that we don't have a frame pointer).
- // And knowing the full stack size requires to know the maximum number of live
- // registers at calls in slow paths.
- // By adding the following interval in the algorithm, we can compute this
- // maximum before updating locations.
- LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
- interval->AddRange(position, position + 1);
- AddSorted(&unhandled_core_intervals_, interval);
- AddSorted(&unhandled_fp_intervals_, interval);
- }
- }
-
- if (locations->WillCall()) {
- BlockRegisters(position, position + 1, /* caller_save_only */ true);
- }
-
- for (size_t i = 0; i < locations->GetInputCount(); ++i) {
- Location input = locations->InAt(i);
- if (input.IsRegister() || input.IsFpuRegister()) {
- BlockRegister(input, position, position + 1);
- } else if (input.IsPair()) {
- BlockRegister(input.ToLow(), position, position + 1);
- BlockRegister(input.ToHigh(), position, position + 1);
- }
- }
-
- LiveInterval* current = instruction->GetLiveInterval();
- if (current == nullptr) return;
-
- ArenaVector<LiveInterval*>& unhandled = core_register
- ? unhandled_core_intervals_
- : unhandled_fp_intervals_;
-
- DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back()));
-
- if (codegen_->NeedsTwoRegisters(current->GetType())) {
- current->AddHighInterval();
- }
-
- for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
- HInstruction* safepoint = safepoints_[safepoint_index - 1u];
- size_t safepoint_position = safepoint->GetLifetimePosition();
-
- // Test that safepoints are ordered in the optimal way.
- DCHECK(safepoint_index == safepoints_.size() ||
- safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
-
- if (safepoint_position == current->GetStart()) {
- // The safepoint is for this instruction, so the location of the instruction
- // does not need to be saved.
- DCHECK_EQ(safepoint_index, safepoints_.size());
- DCHECK_EQ(safepoint, instruction);
- continue;
- } else if (current->IsDeadAt(safepoint_position)) {
- break;
- } else if (!current->Covers(safepoint_position)) {
- // Hole in the interval.
- continue;
- }
- current->AddSafepoint(safepoint);
- }
- current->ResetSearchCache();
-
- // Some instructions define their output in fixed register/stack slot. We need
- // to ensure we know these locations before doing register allocation. For a
- // given register, we create an interval that covers these locations. The register
- // will be unavailable at these locations when trying to allocate one for an
- // interval.
- //
- // The backwards walking ensures the ranges are ordered on increasing start positions.
- Location output = locations->Out();
- if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) {
- Location first = locations->InAt(0);
- if (first.IsRegister() || first.IsFpuRegister()) {
- current->SetFrom(position + 1);
- current->SetRegister(first.reg());
- } else if (first.IsPair()) {
- current->SetFrom(position + 1);
- current->SetRegister(first.low());
- LiveInterval* high = current->GetHighInterval();
- high->SetRegister(first.high());
- high->SetFrom(position + 1);
- }
- } else if (output.IsRegister() || output.IsFpuRegister()) {
- // Shift the interval's start by one to account for the blocked register.
- current->SetFrom(position + 1);
- current->SetRegister(output.reg());
- BlockRegister(output, position, position + 1);
- } else if (output.IsPair()) {
- current->SetFrom(position + 1);
- current->SetRegister(output.low());
- LiveInterval* high = current->GetHighInterval();
- high->SetRegister(output.high());
- high->SetFrom(position + 1);
- BlockRegister(output.ToLow(), position, position + 1);
- BlockRegister(output.ToHigh(), position, position + 1);
- } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
- current->SetSpillSlot(output.GetStackIndex());
- } else {
- DCHECK(output.IsUnallocated() || output.IsConstant());
- }
-
- if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
- AllocateSpillSlotForCatchPhi(instruction->AsPhi());
- }
-
- // If needed, add interval to the list of unhandled intervals.
- if (current->HasSpillSlot() || instruction->IsConstant()) {
- // Split just before first register use.
- size_t first_register_use = current->FirstRegisterUse();
- if (first_register_use != kNoLifetime) {
- LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
- // Don't add directly to `unhandled`, it needs to be sorted and the start
- // of this new interval might be after intervals already in the list.
- AddSorted(&unhandled, split);
- } else {
- // Nothing to do, we won't allocate a register for this value.
- }
- } else {
- // Don't add directly to `unhandled`, temp or safepoint intervals
- // for this instruction may have been added, and those can be
- // processed first.
- AddSorted(&unhandled, current);
- }
-}
-
class AllRangesIterator : public ValueObject {
public:
explicit AllRangesIterator(LiveInterval* interval)
@@ -442,36 +85,6 @@
DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
};
-bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
- // To simplify unit testing, we eagerly create the array of intervals, and
- // call the helper method.
- ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
- for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
- HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
- if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
- intervals.push_back(instruction->GetLiveInterval());
- }
- }
-
- const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
- ? &physical_core_register_intervals_
- : &physical_fp_register_intervals_;
- for (LiveInterval* fixed : *physical_register_intervals) {
- if (fixed != nullptr) {
- intervals.push_back(fixed);
- }
- }
-
- for (LiveInterval* temp : temp_intervals_) {
- if (ShouldProcess(processing_core_registers_, temp)) {
- intervals.push_back(temp);
- }
- }
-
- return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
- allocator_, processing_core_registers_, log_fatal_on_failure);
-}
-
bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
size_t number_of_spill_slots,
size_t number_of_out_slots,
@@ -564,638 +177,30 @@
return true;
}
-void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
- interval->Dump(stream);
- stream << ": ";
- if (interval->HasRegister()) {
- if (interval->IsFloatingPoint()) {
- codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
- } else {
- codegen_->DumpCoreRegister(stream, interval->GetRegister());
- }
- } else {
- stream << "spilled";
- }
- stream << std::endl;
-}
-
-void RegisterAllocator::DumpAllIntervals(std::ostream& stream) const {
- stream << "inactive: " << std::endl;
- for (LiveInterval* inactive_interval : inactive_) {
- DumpInterval(stream, inactive_interval);
- }
- stream << "active: " << std::endl;
- for (LiveInterval* active_interval : active_) {
- DumpInterval(stream, active_interval);
- }
- stream << "unhandled: " << std::endl;
- auto unhandled = (unhandled_ != nullptr) ?
- unhandled_ : &unhandled_core_intervals_;
- for (LiveInterval* unhandled_interval : *unhandled) {
- DumpInterval(stream, unhandled_interval);
- }
- stream << "handled: " << std::endl;
- for (LiveInterval* handled_interval : handled_) {
- DumpInterval(stream, handled_interval);
- }
-}
-
-// By the book implementation of a linear scan register allocator.
-void RegisterAllocator::LinearScan() {
- while (!unhandled_->empty()) {
- // (1) Remove interval with the lowest start position from unhandled.
- LiveInterval* current = unhandled_->back();
- unhandled_->pop_back();
-
- // Make sure the interval is an expected state.
- DCHECK(!current->IsFixed() && !current->HasSpillSlot());
- // Make sure we are going in the right order.
- DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart());
- // Make sure a low interval is always with a high.
- DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval());
- // Make sure a high interval is always with a low.
- DCHECK(current->IsLowInterval() ||
- unhandled_->empty() ||
- !unhandled_->back()->IsHighInterval());
-
- size_t position = current->GetStart();
-
- // Remember the inactive_ size here since the ones moved to inactive_ from
- // active_ below shouldn't need to be re-checked.
- size_t inactive_intervals_to_handle = inactive_.size();
-
- // (2) Remove currently active intervals that are dead at this position.
- // Move active intervals that have a lifetime hole at this position
- // to inactive.
- auto active_kept_end = std::remove_if(
- active_.begin(),
- active_.end(),
- [this, position](LiveInterval* interval) {
- if (interval->IsDeadAt(position)) {
- handled_.push_back(interval);
- return true;
- } else if (!interval->Covers(position)) {
- inactive_.push_back(interval);
- return true;
- } else {
- return false; // Keep this interval.
- }
- });
- active_.erase(active_kept_end, active_.end());
-
- // (3) Remove currently inactive intervals that are dead at this position.
- // Move inactive intervals that cover this position to active.
- auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
- auto inactive_kept_end = std::remove_if(
- inactive_.begin(),
- inactive_to_handle_end,
- [this, position](LiveInterval* interval) {
- DCHECK(interval->GetStart() < position || interval->IsFixed());
- if (interval->IsDeadAt(position)) {
- handled_.push_back(interval);
- return true;
- } else if (interval->Covers(position)) {
- active_.push_back(interval);
- return true;
- } else {
- return false; // Keep this interval.
- }
- });
- inactive_.erase(inactive_kept_end, inactive_to_handle_end);
-
- if (current->IsSlowPathSafepoint()) {
- // Synthesized interval to record the maximum number of live registers
- // at safepoints. No need to allocate a register for it.
- if (processing_core_registers_) {
- maximum_number_of_live_core_registers_ =
- std::max(maximum_number_of_live_core_registers_, active_.size());
- } else {
- maximum_number_of_live_fp_registers_ =
- std::max(maximum_number_of_live_fp_registers_, active_.size());
- }
- DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
- continue;
- }
-
- if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
- DCHECK(!current->HasRegister());
- // Allocating the low part was unsucessful. The splitted interval for the high part
- // will be handled next (it is in the `unhandled_` list).
- continue;
- }
-
- // (4) Try to find an available register.
- bool success = TryAllocateFreeReg(current);
-
- // (5) If no register could be found, we need to spill.
- if (!success) {
- success = AllocateBlockedReg(current);
- }
-
- // (6) If the interval had a register allocated, add it to the list of active
- // intervals.
- if (success) {
- codegen_->AddAllocatedRegister(processing_core_registers_
- ? Location::RegisterLocation(current->GetRegister())
- : Location::FpuRegisterLocation(current->GetRegister()));
- active_.push_back(current);
- if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
- current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
- }
- }
- }
-}
-
-static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) {
- DCHECK(!interval->IsHighInterval());
- // Note that the same instruction may occur multiple times in the input list,
- // so `free_until` may have changed already.
- // Since `position` is not the current scan position, we need to use CoversSlow.
- if (interval->IsDeadAt(position)) {
- // Set the register to be free. Note that inactive intervals might later
- // update this.
- free_until[interval->GetRegister()] = kMaxLifetimePosition;
+LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
+ DCHECK_GE(position, interval->GetStart());
+ DCHECK(!interval->IsDeadAt(position));
+ if (position == interval->GetStart()) {
+ // Spill slot will be allocated when handling `interval` again.
+ interval->ClearRegister();
if (interval->HasHighInterval()) {
- DCHECK(interval->GetHighInterval()->IsDeadAt(position));
- free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition;
+ interval->GetHighInterval()->ClearRegister();
+ } else if (interval->HasLowInterval()) {
+ interval->GetLowInterval()->ClearRegister();
}
- } else if (!interval->CoversSlow(position)) {
- // The interval becomes inactive at `defined_by`. We make its register
- // available only until the next use strictly after `defined_by`.
- free_until[interval->GetRegister()] = interval->FirstUseAfter(position);
+ return interval;
+ } else {
+ LiveInterval* new_interval = interval->SplitAt(position);
if (interval->HasHighInterval()) {
- DCHECK(!interval->GetHighInterval()->CoversSlow(position));
- free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()];
+ LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
+ new_interval->SetHighInterval(high);
+ high->SetLowInterval(new_interval);
+ } else if (interval->HasLowInterval()) {
+ LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
+ new_interval->SetLowInterval(low);
+ low->SetHighInterval(new_interval);
}
- }
-}
-
-// Find a free register. If multiple are found, pick the register that
-// is free the longest.
-bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) {
- size_t* free_until = registers_array_;
-
- // First set all registers to be free.
- for (size_t i = 0; i < number_of_registers_; ++i) {
- free_until[i] = kMaxLifetimePosition;
- }
-
- // For each active interval, set its register to not free.
- for (LiveInterval* interval : active_) {
- DCHECK(interval->HasRegister());
- free_until[interval->GetRegister()] = 0;
- }
-
- // An interval that starts an instruction (that is, it is not split), may
- // re-use the registers used by the inputs of that instruciton, based on the
- // location summary.
- HInstruction* defined_by = current->GetDefinedBy();
- if (defined_by != nullptr && !current->IsSplit()) {
- LocationSummary* locations = defined_by->GetLocations();
- if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
- HInputsRef inputs = defined_by->GetInputs();
- for (size_t i = 0; i < inputs.size(); ++i) {
- // Take the last interval of the input. It is the location of that interval
- // that will be used at `defined_by`.
- LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
- // Note that interval may have not been processed yet.
- // TODO: Handle non-split intervals last in the work list.
- if (locations->InAt(i).IsValid()
- && interval->HasRegister()
- && interval->SameRegisterKind(*current)) {
- // The input must be live until the end of `defined_by`, to comply to
- // the linear scan algorithm. So we use `defined_by`'s end lifetime
- // position to check whether the input is dead or is inactive after
- // `defined_by`.
- DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
- size_t position = defined_by->GetLifetimePosition() + 1;
- FreeIfNotCoverAt(interval, position, free_until);
- }
- }
- }
- }
-
- // For each inactive interval, set its register to be free until
- // the next intersection with `current`.
- for (LiveInterval* inactive : inactive_) {
- // Temp/Slow-path-safepoint interval has no holes.
- DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
- if (!current->IsSplit() && !inactive->IsFixed()) {
- // Neither current nor inactive are fixed.
- // Thanks to SSA, a non-split interval starting in a hole of an
- // inactive interval should never intersect with that inactive interval.
- // Only if it's not fixed though, because fixed intervals don't come from SSA.
- DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
- continue;
- }
-
- DCHECK(inactive->HasRegister());
- if (free_until[inactive->GetRegister()] == 0) {
- // Already used by some active interval. No need to intersect.
- continue;
- }
- size_t next_intersection = inactive->FirstIntersectionWith(current);
- if (next_intersection != kNoLifetime) {
- free_until[inactive->GetRegister()] =
- std::min(free_until[inactive->GetRegister()], next_intersection);
- }
- }
-
- int reg = kNoRegister;
- if (current->HasRegister()) {
- // Some instructions have a fixed register output.
- reg = current->GetRegister();
- if (free_until[reg] == 0) {
- DCHECK(current->IsHighInterval());
- // AllocateBlockedReg will spill the holder of the register.
- return false;
- }
- } else {
- DCHECK(!current->IsHighInterval());
- int hint = current->FindFirstRegisterHint(free_until, liveness_);
- if ((hint != kNoRegister)
- // For simplicity, if the hint we are getting for a pair cannot be used,
- // we are just going to allocate a new pair.
- && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) {
- DCHECK(!IsBlocked(hint));
- reg = hint;
- } else if (current->IsLowInterval()) {
- reg = FindAvailableRegisterPair(free_until, current->GetStart());
- } else {
- reg = FindAvailableRegister(free_until, current);
- }
- }
-
- DCHECK_NE(reg, kNoRegister);
- // If we could not find a register, we need to spill.
- if (free_until[reg] == 0) {
- return false;
- }
-
- if (current->IsLowInterval()) {
- // If the high register of this interval is not available, we need to spill.
- int high_reg = current->GetHighInterval()->GetRegister();
- if (high_reg == kNoRegister) {
- high_reg = GetHighForLowRegister(reg);
- }
- if (free_until[high_reg] == 0) {
- return false;
- }
- }
-
- current->SetRegister(reg);
- if (!current->IsDeadAt(free_until[reg])) {
- // If the register is only available for a subset of live ranges
- // covered by `current`, split `current` before the position where
- // the register is not available anymore.
- LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]);
- DCHECK(split != nullptr);
- AddSorted(unhandled_, split);
- }
- return true;
-}
-
-bool RegisterAllocator::IsBlocked(int reg) const {
- return processing_core_registers_
- ? blocked_core_registers_[reg]
- : blocked_fp_registers_[reg];
-}
-
-int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
- int reg = kNoRegister;
- // Pick the register pair that is used the last.
- for (size_t i = 0; i < number_of_registers_; ++i) {
- if (IsBlocked(i)) continue;
- if (!IsLowRegister(i)) continue;
- int high_register = GetHighForLowRegister(i);
- if (IsBlocked(high_register)) continue;
- int existing_high_register = GetHighForLowRegister(reg);
- if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
- && next_use[high_register] >= next_use[existing_high_register])) {
- reg = i;
- if (next_use[i] == kMaxLifetimePosition
- && next_use[high_register] == kMaxLifetimePosition) {
- break;
- }
- } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
- // If one of the current register is known to be unavailable, just unconditionally
- // try a new one.
- reg = i;
- }
- }
- return reg;
-}
-
-bool RegisterAllocator::IsCallerSaveRegister(int reg) const {
- return processing_core_registers_
- ? !codegen_->IsCoreCalleeSaveRegister(reg)
- : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
-}
-
-int RegisterAllocator::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
- // We special case intervals that do not span a safepoint to try to find a caller-save
- // register if one is available. We iterate from 0 to the number of registers,
- // so if there are caller-save registers available at the end, we continue the iteration.
- bool prefers_caller_save = !current->HasWillCallSafepoint();
- int reg = kNoRegister;
- for (size_t i = 0; i < number_of_registers_; ++i) {
- if (IsBlocked(i)) {
- // Register cannot be used. Continue.
- continue;
- }
-
- // Best case: we found a register fully available.
- if (next_use[i] == kMaxLifetimePosition) {
- if (prefers_caller_save && !IsCallerSaveRegister(i)) {
- // We can get shorter encodings on some platforms by using
- // small register numbers. So only update the candidate if the previous
- // one was not available for the whole method.
- if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) {
- reg = i;
- }
- // Continue the iteration in the hope of finding a caller save register.
- continue;
- } else {
- reg = i;
- // We know the register is good enough. Return it.
- break;
- }
- }
-
- // If we had no register before, take this one as a reference.
- if (reg == kNoRegister) {
- reg = i;
- continue;
- }
-
- // Pick the register that is used the last.
- if (next_use[i] > next_use[reg]) {
- reg = i;
- continue;
- }
- }
- return reg;
-}
-
-// Remove interval and its other half if any. Return iterator to the following element.
-static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf(
- ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) {
- DCHECK(intervals->begin() <= pos && pos < intervals->end());
- LiveInterval* interval = *pos;
- if (interval->IsLowInterval()) {
- DCHECK(pos + 1 < intervals->end());
- DCHECK_EQ(*(pos + 1), interval->GetHighInterval());
- return intervals->erase(pos, pos + 2);
- } else if (interval->IsHighInterval()) {
- DCHECK(intervals->begin() < pos);
- DCHECK_EQ(*(pos - 1), interval->GetLowInterval());
- return intervals->erase(pos - 1, pos + 1);
- } else {
- return intervals->erase(pos);
- }
-}
-
-bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
- size_t first_register_use,
- size_t* next_use) {
- for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
- LiveInterval* active = *it;
- DCHECK(active->HasRegister());
- if (active->IsFixed()) continue;
- if (active->IsHighInterval()) continue;
- if (first_register_use > next_use[active->GetRegister()]) continue;
-
- // Split the first interval found that is either:
- // 1) A non-pair interval.
- // 2) A pair interval whose high is not low + 1.
- // 3) A pair interval whose low is not even.
- if (!active->IsLowInterval() ||
- IsLowOfUnalignedPairInterval(active) ||
- !IsLowRegister(active->GetRegister())) {
- LiveInterval* split = Split(active, position);
- if (split != active) {
- handled_.push_back(active);
- }
- RemoveIntervalAndPotentialOtherHalf(&active_, it);
- AddSorted(unhandled_, split);
- return true;
- }
- }
- return false;
-}
-
-// Find the register that is used the last, and spill the interval
-// that holds it. If the first use of `current` is after that register
-// we spill `current` instead.
-bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
- size_t first_register_use = current->FirstRegisterUse();
- if (current->HasRegister()) {
- DCHECK(current->IsHighInterval());
- // The low interval has allocated the register for the high interval. In
- // case the low interval had to split both intervals, we may end up in a
- // situation where the high interval does not have a register use anymore.
- // We must still proceed in order to split currently active and inactive
- // uses of the high interval's register, and put the high interval in the
- // active set.
- DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr));
- } else if (first_register_use == kNoLifetime) {
- AllocateSpillSlotFor(current);
- return false;
- }
-
- // First set all registers as not being used.
- size_t* next_use = registers_array_;
- for (size_t i = 0; i < number_of_registers_; ++i) {
- next_use[i] = kMaxLifetimePosition;
- }
-
- // For each active interval, find the next use of its register after the
- // start of current.
- for (LiveInterval* active : active_) {
- DCHECK(active->HasRegister());
- if (active->IsFixed()) {
- next_use[active->GetRegister()] = current->GetStart();
- } else {
- size_t use = active->FirstRegisterUseAfter(current->GetStart());
- if (use != kNoLifetime) {
- next_use[active->GetRegister()] = use;
- }
- }
- }
-
- // For each inactive interval, find the next use of its register after the
- // start of current.
- for (LiveInterval* inactive : inactive_) {
- // Temp/Slow-path-safepoint interval has no holes.
- DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
- if (!current->IsSplit() && !inactive->IsFixed()) {
- // Neither current nor inactive are fixed.
- // Thanks to SSA, a non-split interval starting in a hole of an
- // inactive interval should never intersect with that inactive interval.
- // Only if it's not fixed though, because fixed intervals don't come from SSA.
- DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
- continue;
- }
- DCHECK(inactive->HasRegister());
- size_t next_intersection = inactive->FirstIntersectionWith(current);
- if (next_intersection != kNoLifetime) {
- if (inactive->IsFixed()) {
- next_use[inactive->GetRegister()] =
- std::min(next_intersection, next_use[inactive->GetRegister()]);
- } else {
- size_t use = inactive->FirstUseAfter(current->GetStart());
- if (use != kNoLifetime) {
- next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
- }
- }
- }
- }
-
- int reg = kNoRegister;
- bool should_spill = false;
- if (current->HasRegister()) {
- DCHECK(current->IsHighInterval());
- reg = current->GetRegister();
- // When allocating the low part, we made sure the high register was available.
- DCHECK_LT(first_register_use, next_use[reg]);
- } else if (current->IsLowInterval()) {
- reg = FindAvailableRegisterPair(next_use, first_register_use);
- // We should spill if both registers are not available.
- should_spill = (first_register_use >= next_use[reg])
- || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
- } else {
- DCHECK(!current->IsHighInterval());
- reg = FindAvailableRegister(next_use, current);
- should_spill = (first_register_use >= next_use[reg]);
- }
-
- DCHECK_NE(reg, kNoRegister);
- if (should_spill) {
- DCHECK(!current->IsHighInterval());
- bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1));
- if (is_allocation_at_use_site) {
- if (!current->IsLowInterval()) {
- DumpInterval(std::cerr, current);
- DumpAllIntervals(std::cerr);
- // This situation has the potential to infinite loop, so we make it a non-debug CHECK.
- HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2);
- CHECK(false) << "There is not enough registers available for "
- << current->GetParent()->GetDefinedBy()->DebugName() << " "
- << current->GetParent()->GetDefinedBy()->GetId()
- << " at " << first_register_use - 1 << " "
- << (at == nullptr ? "" : at->DebugName());
- }
-
- // If we're allocating a register for `current` because the instruction at
- // that position requires it, but we think we should spill, then there are
- // non-pair intervals or unaligned pair intervals blocking the allocation.
- // We split the first interval found, and put ourselves first in the
- // `unhandled_` list.
- bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(),
- first_register_use,
- next_use);
- DCHECK(success);
- LiveInterval* existing = unhandled_->back();
- DCHECK(existing->IsHighInterval());
- DCHECK_EQ(existing->GetLowInterval(), current);
- unhandled_->push_back(current);
- } else {
- // If the first use of that instruction is after the last use of the found
- // register, we split this interval just before its first register use.
- AllocateSpillSlotFor(current);
- LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
- DCHECK(current != split);
- AddSorted(unhandled_, split);
- }
- return false;
- } else {
- // Use this register and spill the active and inactives interval that
- // have that register.
- current->SetRegister(reg);
-
- for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
- LiveInterval* active = *it;
- if (active->GetRegister() == reg) {
- DCHECK(!active->IsFixed());
- LiveInterval* split = Split(active, current->GetStart());
- if (split != active) {
- handled_.push_back(active);
- }
- RemoveIntervalAndPotentialOtherHalf(&active_, it);
- AddSorted(unhandled_, split);
- break;
- }
- }
-
- // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body.
- for (auto it = inactive_.begin(); it != inactive_.end(); ) {
- LiveInterval* inactive = *it;
- bool erased = false;
- if (inactive->GetRegister() == reg) {
- if (!current->IsSplit() && !inactive->IsFixed()) {
- // Neither current nor inactive are fixed.
- // Thanks to SSA, a non-split interval starting in a hole of an
- // inactive interval should never intersect with that inactive interval.
- // Only if it's not fixed though, because fixed intervals don't come from SSA.
- DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
- } else {
- size_t next_intersection = inactive->FirstIntersectionWith(current);
- if (next_intersection != kNoLifetime) {
- if (inactive->IsFixed()) {
- LiveInterval* split = Split(current, next_intersection);
- DCHECK_NE(split, current);
- AddSorted(unhandled_, split);
- } else {
- // Split at the start of `current`, which will lead to splitting
- // at the end of the lifetime hole of `inactive`.
- LiveInterval* split = Split(inactive, current->GetStart());
- // If it's inactive, it must start before the current interval.
- DCHECK_NE(split, inactive);
- it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it);
- erased = true;
- handled_.push_back(inactive);
- AddSorted(unhandled_, split);
- }
- }
- }
- }
- // If we have erased the element, `it` already points to the next element.
- // Otherwise we need to move to the next element.
- if (!erased) {
- ++it;
- }
- }
-
- return true;
- }
-}
-
-void RegisterAllocator::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
- DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
- size_t insert_at = 0;
- for (size_t i = array->size(); i > 0; --i) {
- LiveInterval* current = (*array)[i - 1u];
- // High intervals must be processed right after their low equivalent.
- if (current->StartsAfter(interval) && !current->IsHighInterval()) {
- insert_at = i;
- break;
- } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
- // Ensure the slow path interval is the last to be processed at its location: we want the
- // interval to know all live registers at this location.
- DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
- insert_at = i;
- break;
- }
- }
-
- // Insert the high interval before the low, to ensure the low is processed before.
- auto insert_pos = array->begin() + insert_at;
- if (interval->HasHighInterval()) {
- array->insert(insert_pos, { interval->GetHighInterval(), interval });
- } else if (interval->HasLowInterval()) {
- array->insert(insert_pos, { interval, interval->GetLowInterval() });
- } else {
- array->insert(insert_pos, interval);
+ return new_interval;
}
}
@@ -1258,754 +263,4 @@
return Split(interval, block_to->GetLifetimeStart());
}
-LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
- DCHECK_GE(position, interval->GetStart());
- DCHECK(!interval->IsDeadAt(position));
- if (position == interval->GetStart()) {
- // Spill slot will be allocated when handling `interval` again.
- interval->ClearRegister();
- if (interval->HasHighInterval()) {
- interval->GetHighInterval()->ClearRegister();
- } else if (interval->HasLowInterval()) {
- interval->GetLowInterval()->ClearRegister();
- }
- return interval;
- } else {
- LiveInterval* new_interval = interval->SplitAt(position);
- if (interval->HasHighInterval()) {
- LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
- new_interval->SetHighInterval(high);
- high->SetLowInterval(new_interval);
- } else if (interval->HasLowInterval()) {
- LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
- new_interval->SetLowInterval(low);
- low->SetHighInterval(new_interval);
- }
- return new_interval;
- }
-}
-
-void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
- if (interval->IsHighInterval()) {
- // The low interval already took care of allocating the spill slot.
- DCHECK(!interval->GetLowInterval()->HasRegister());
- DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot());
- return;
- }
-
- LiveInterval* parent = interval->GetParent();
-
- // An instruction gets a spill slot for its entire lifetime. If the parent
- // of this interval already has a spill slot, there is nothing to do.
- if (parent->HasSpillSlot()) {
- return;
- }
-
- HInstruction* defined_by = parent->GetDefinedBy();
- DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi());
-
- if (defined_by->IsParameterValue()) {
- // Parameters have their own stack slot.
- parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
- return;
- }
-
- if (defined_by->IsCurrentMethod()) {
- parent->SetSpillSlot(0);
- return;
- }
-
- if (defined_by->IsConstant()) {
- // Constants don't need a spill slot.
- return;
- }
-
- ArenaVector<size_t>* spill_slots = nullptr;
- switch (interval->GetType()) {
- case Primitive::kPrimDouble:
- spill_slots = &double_spill_slots_;
- break;
- case Primitive::kPrimLong:
- spill_slots = &long_spill_slots_;
- break;
- case Primitive::kPrimFloat:
- spill_slots = &float_spill_slots_;
- break;
- case Primitive::kPrimNot:
- case Primitive::kPrimInt:
- case Primitive::kPrimChar:
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimShort:
- spill_slots = &int_spill_slots_;
- break;
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
- }
-
- // Find an available spill slot.
- size_t slot = 0;
- for (size_t e = spill_slots->size(); slot < e; ++slot) {
- if ((*spill_slots)[slot] <= parent->GetStart()) {
- if (!parent->NeedsTwoSpillSlots()) {
- // One spill slot is sufficient.
- break;
- }
- if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
- // Two spill slots are available.
- break;
- }
- }
- }
-
- size_t end = interval->GetLastSibling()->GetEnd();
- if (parent->NeedsTwoSpillSlots()) {
- if (slot + 2u > spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->resize(slot + 2u, end);
- }
- (*spill_slots)[slot] = end;
- (*spill_slots)[slot + 1] = end;
- } else {
- if (slot == spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->push_back(end);
- } else {
- (*spill_slots)[slot] = end;
- }
- }
-
- // Note that the exact spill slot location will be computed when we resolve,
- // that is when we know the number of spill slots for each type.
- parent->SetSpillSlot(slot);
-}
-
-static bool IsValidDestination(Location destination) {
- return destination.IsRegister()
- || destination.IsRegisterPair()
- || destination.IsFpuRegister()
- || destination.IsFpuRegisterPair()
- || destination.IsStackSlot()
- || destination.IsDoubleStackSlot();
-}
-
-void RegisterAllocator::AllocateSpillSlotForCatchPhi(HPhi* phi) {
- LiveInterval* interval = phi->GetLiveInterval();
-
- HInstruction* previous_phi = phi->GetPrevious();
- DCHECK(previous_phi == nullptr ||
- previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
- << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
-
- if (phi->IsVRegEquivalentOf(previous_phi)) {
- // This is an equivalent of the previous phi. We need to assign the same
- // catch phi slot.
- DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
- interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
- } else {
- // Allocate a new spill slot for this catch phi.
- // TODO: Reuse spill slots when intervals of phis from different catch
- // blocks do not overlap.
- interval->SetSpillSlot(catch_phi_spill_slots_);
- catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
- }
-}
-
-void RegisterAllocator::AddMove(HParallelMove* move,
- Location source,
- Location destination,
- HInstruction* instruction,
- Primitive::Type type) const {
- if (type == Primitive::kPrimLong
- && codegen_->ShouldSplitLongMoves()
- // The parallel move resolver knows how to deal with long constants.
- && !source.IsConstant()) {
- move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
- move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
- } else {
- move->AddMove(source, destination, type, instruction);
- }
-}
-
-void RegisterAllocator::AddInputMoveFor(HInstruction* input,
- HInstruction* user,
- Location source,
- Location destination) const {
- if (source.Equals(destination)) return;
-
- DCHECK(!user->IsPhi());
-
- HInstruction* previous = user->GetPrevious();
- HParallelMove* move = nullptr;
- if (previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(user->GetLifetimePosition());
- user->GetBlock()->InsertInstructionBefore(move, user);
- } else {
- move = previous->AsParallelMove();
- }
- DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
- AddMove(move, source, destination, nullptr, input->GetType());
-}
-
-static bool IsInstructionStart(size_t position) {
- return (position & 1) == 0;
-}
-
-static bool IsInstructionEnd(size_t position) {
- return (position & 1) == 1;
-}
-
-void RegisterAllocator::InsertParallelMoveAt(size_t position,
- HInstruction* instruction,
- Location source,
- Location destination) const {
- DCHECK(IsValidDestination(destination)) << destination;
- if (source.Equals(destination)) return;
-
- HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
- HParallelMove* move;
- if (at == nullptr) {
- if (IsInstructionStart(position)) {
- // Block boundary, don't do anything the connection of split siblings will handle it.
- return;
- } else {
- // Move must happen before the first instruction of the block.
- at = liveness_.GetInstructionFromPosition((position + 1) / 2);
- // Note that parallel moves may have already been inserted, so we explicitly
- // ask for the first instruction of the block: `GetInstructionFromPosition` does
- // not contain the `HParallelMove` instructions.
- at = at->GetBlock()->GetFirstInstruction();
-
- if (at->GetLifetimePosition() < position) {
- // We may insert moves for split siblings and phi spills at the beginning of the block.
- // Since this is a different lifetime position, we need to go to the next instruction.
- DCHECK(at->IsParallelMove());
- at = at->GetNext();
- }
-
- if (at->GetLifetimePosition() != position) {
- DCHECK_GT(at->GetLifetimePosition(), position);
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- at->GetBlock()->InsertInstructionBefore(move, at);
- } else {
- DCHECK(at->IsParallelMove());
- move = at->AsParallelMove();
- }
- }
- } else if (IsInstructionEnd(position)) {
- // Move must happen after the instruction.
- DCHECK(!at->IsControlFlow());
- move = at->GetNext()->AsParallelMove();
- // This is a parallel move for connecting siblings in a same block. We need to
- // differentiate it with moves for connecting blocks, and input moves.
- if (move == nullptr || move->GetLifetimePosition() > position) {
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
- }
- } else {
- // Move must happen before the instruction.
- HInstruction* previous = at->GetPrevious();
- if (previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() != position) {
- // If the previous is a parallel move, then its position must be lower
- // than the given `position`: it was added just after the non-parallel
- // move instruction that precedes `instruction`.
- DCHECK(previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() < position);
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- at->GetBlock()->InsertInstructionBefore(move, at);
- } else {
- move = previous->AsParallelMove();
- }
- }
- DCHECK_EQ(move->GetLifetimePosition(), position);
- AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
- HInstruction* instruction,
- Location source,
- Location destination) const {
- DCHECK(IsValidDestination(destination)) << destination;
- if (source.Equals(destination)) return;
-
- DCHECK_EQ(block->GetNormalSuccessors().size(), 1u);
- HInstruction* last = block->GetLastInstruction();
- // We insert moves at exit for phi predecessors and connecting blocks.
- // A block ending with an if or a packed switch cannot branch to a block
- // with phis because we do not allow critical edges. It can also not connect
- // a split interval between two blocks: the move has to happen in the successor.
- DCHECK(!last->IsIf() && !last->IsPackedSwitch());
- HInstruction* previous = last->GetPrevious();
- HParallelMove* move;
- // This is a parallel move for connecting blocks. We need to differentiate
- // it with moves for connecting siblings in a same block, and output moves.
- size_t position = last->GetLifetimePosition();
- if (previous == nullptr || !previous->IsParallelMove()
- || previous->AsParallelMove()->GetLifetimePosition() != position) {
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- block->InsertInstructionBefore(move, last);
- } else {
- move = previous->AsParallelMove();
- }
- AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
- HInstruction* instruction,
- Location source,
- Location destination) const {
- DCHECK(IsValidDestination(destination)) << destination;
- if (source.Equals(destination)) return;
-
- HInstruction* first = block->GetFirstInstruction();
- HParallelMove* move = first->AsParallelMove();
- size_t position = block->GetLifetimeStart();
- // This is a parallel move for connecting blocks. We need to differentiate
- // it with moves for connecting siblings in a same block, and input moves.
- if (move == nullptr || move->GetLifetimePosition() != position) {
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- block->InsertInstructionBefore(move, first);
- }
- AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
- Location source,
- Location destination) const {
- DCHECK(IsValidDestination(destination)) << destination;
- if (source.Equals(destination)) return;
-
- if (instruction->IsPhi()) {
- InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination);
- return;
- }
-
- size_t position = instruction->GetLifetimePosition() + 1;
- HParallelMove* move = instruction->GetNext()->AsParallelMove();
- // This is a parallel move for moving the output of an instruction. We need
- // to differentiate with input moves, moves for connecting siblings in a
- // and moves for connecting blocks.
- if (move == nullptr || move->GetLifetimePosition() != position) {
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
- }
- AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
- LiveInterval* current = interval;
- if (current->HasSpillSlot()
- && current->HasRegister()
- // Currently, we spill unconditionnally the current method in the code generators.
- && !interval->GetDefinedBy()->IsCurrentMethod()) {
- // We spill eagerly, so move must be at definition.
- InsertMoveAfter(interval->GetDefinedBy(),
- interval->ToLocation(),
- interval->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
- : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
- }
- UsePosition* use = current->GetFirstUse();
- UsePosition* env_use = current->GetFirstEnvironmentUse();
-
- // Walk over all siblings, updating locations of use positions, and
- // connecting them when they are adjacent.
- do {
- Location source = current->ToLocation();
-
- // Walk over all uses covered by this interval, and update the location
- // information.
-
- LiveRange* range = current->GetFirstRange();
- while (range != nullptr) {
- while (use != nullptr && use->GetPosition() < range->GetStart()) {
- DCHECK(use->IsSynthesized());
- use = use->GetNext();
- }
- while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
- DCHECK(!use->GetIsEnvironment());
- DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
- if (!use->IsSynthesized()) {
- LocationSummary* locations = use->GetUser()->GetLocations();
- Location expected_location = locations->InAt(use->GetInputIndex());
- // The expected (actual) location may be invalid in case the input is unused. Currently
- // this only happens for intrinsics.
- if (expected_location.IsValid()) {
- if (expected_location.IsUnallocated()) {
- locations->SetInAt(use->GetInputIndex(), source);
- } else if (!expected_location.IsConstant()) {
- AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
- }
- } else {
- DCHECK(use->GetUser()->IsInvoke());
- DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
- }
- }
- use = use->GetNext();
- }
-
- // Walk over the environment uses, and update their locations.
- while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
- env_use = env_use->GetNext();
- }
-
- while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
- DCHECK(current->CoversSlow(env_use->GetPosition())
- || (env_use->GetPosition() == range->GetEnd()));
- HEnvironment* environment = env_use->GetEnvironment();
- environment->SetLocationAt(env_use->GetInputIndex(), source);
- env_use = env_use->GetNext();
- }
-
- range = range->GetNext();
- }
-
- // If the next interval starts just after this one, and has a register,
- // insert a move.
- LiveInterval* next_sibling = current->GetNextSibling();
- if (next_sibling != nullptr
- && next_sibling->HasRegister()
- && current->GetEnd() == next_sibling->GetStart()) {
- Location destination = next_sibling->ToLocation();
- InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
- }
-
- for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
- safepoint_position != nullptr;
- safepoint_position = safepoint_position->GetNext()) {
- DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
-
- LocationSummary* locations = safepoint_position->GetLocations();
- if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
- DCHECK(interval->GetDefinedBy()->IsActualObject())
- << interval->GetDefinedBy()->DebugName()
- << "@" << safepoint_position->GetInstruction()->DebugName();
- locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
- }
-
- switch (source.GetKind()) {
- case Location::kRegister: {
- locations->AddLiveRegister(source);
- if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
- DCHECK_LE(locations->GetNumberOfLiveRegisters(),
- maximum_number_of_live_core_registers_ +
- maximum_number_of_live_fp_registers_);
- }
- if (current->GetType() == Primitive::kPrimNot) {
- DCHECK(interval->GetDefinedBy()->IsActualObject())
- << interval->GetDefinedBy()->DebugName()
- << "@" << safepoint_position->GetInstruction()->DebugName();
- locations->SetRegisterBit(source.reg());
- }
- break;
- }
- case Location::kFpuRegister: {
- locations->AddLiveRegister(source);
- break;
- }
-
- case Location::kRegisterPair:
- case Location::kFpuRegisterPair: {
- locations->AddLiveRegister(source.ToLow());
- locations->AddLiveRegister(source.ToHigh());
- break;
- }
- case Location::kStackSlot: // Fall-through
- case Location::kDoubleStackSlot: // Fall-through
- case Location::kConstant: {
- // Nothing to do.
- break;
- }
- default: {
- LOG(FATAL) << "Unexpected location for object";
- }
- }
- }
- current = next_sibling;
- } while (current != nullptr);
-
- if (kIsDebugBuild) {
- // Following uses can only be synthesized uses.
- while (use != nullptr) {
- DCHECK(use->IsSynthesized());
- use = use->GetNext();
- }
- }
-}
-
-static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
- HInstruction* instruction) {
- return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
- (instruction->IsConstant() || instruction->IsCurrentMethod());
-}
-
-void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
- HBasicBlock* from,
- HBasicBlock* to) const {
- if (interval->GetNextSibling() == nullptr) {
- // Nothing to connect. The whole range was allocated to the same location.
- return;
- }
-
- // Find the intervals that cover `from` and `to`.
- size_t destination_position = to->GetLifetimeStart();
- size_t source_position = from->GetLifetimeEnd() - 1;
- LiveInterval* destination = interval->GetSiblingAt(destination_position);
- LiveInterval* source = interval->GetSiblingAt(source_position);
-
- if (destination == source) {
- // Interval was not split.
- return;
- }
-
- LiveInterval* parent = interval->GetParent();
- HInstruction* defined_by = parent->GetDefinedBy();
- if (codegen_->GetGraph()->HasIrreducibleLoops() &&
- (destination == nullptr || !destination->CoversSlow(destination_position))) {
- // Our live_in fixed point calculation has found that the instruction is live
- // in the `to` block because it will eventually enter an irreducible loop. Our
- // live interval computation however does not compute a fixed point, and
- // therefore will not have a location for that instruction for `to`.
- // Because the instruction is a constant or the ArtMethod, we don't need to
- // do anything: it will be materialized in the irreducible loop.
- DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
- << defined_by->DebugName() << ":" << defined_by->GetId()
- << " " << from->GetBlockId() << " -> " << to->GetBlockId();
- return;
- }
-
- if (!destination->HasRegister()) {
- // Values are eagerly spilled. Spill slot already contains appropriate value.
- return;
- }
-
- Location location_source;
- // `GetSiblingAt` returns the interval whose start and end cover `position`,
- // but does not check whether the interval is inactive at that position.
- // The only situation where the interval is inactive at that position is in the
- // presence of irreducible loops for constants and ArtMethod.
- if (codegen_->GetGraph()->HasIrreducibleLoops() &&
- (source == nullptr || !source->CoversSlow(source_position))) {
- DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
- if (defined_by->IsConstant()) {
- location_source = defined_by->GetLocations()->Out();
- } else {
- DCHECK(defined_by->IsCurrentMethod());
- location_source = parent->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(parent->GetSpillSlot())
- : Location::StackSlot(parent->GetSpillSlot());
- }
- } else {
- DCHECK(source != nullptr);
- DCHECK(source->CoversSlow(source_position));
- DCHECK(destination->CoversSlow(destination_position));
- location_source = source->ToLocation();
- }
-
- // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
- // we need to put the moves at the entry of `to`.
- if (from->GetNormalSuccessors().size() == 1) {
- InsertParallelMoveAtExitOf(from,
- defined_by,
- location_source,
- destination->ToLocation());
- } else {
- DCHECK_EQ(to->GetPredecessors().size(), 1u);
- InsertParallelMoveAtEntryOf(to,
- defined_by,
- location_source,
- destination->ToLocation());
- }
-}
-
-void RegisterAllocator::Resolve() {
- codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(),
- maximum_number_of_live_core_registers_,
- maximum_number_of_live_fp_registers_,
- reserved_out_slots_,
- codegen_->GetGraph()->GetLinearOrder());
-
- // Adjust the Out Location of instructions.
- // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
- for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
- HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
- LiveInterval* current = instruction->GetLiveInterval();
- LocationSummary* locations = instruction->GetLocations();
- Location location = locations->Out();
- if (instruction->IsParameterValue()) {
- // Now that we know the frame size, adjust the parameter's location.
- if (location.IsStackSlot()) {
- location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
- current->SetSpillSlot(location.GetStackIndex());
- locations->UpdateOut(location);
- } else if (location.IsDoubleStackSlot()) {
- location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
- current->SetSpillSlot(location.GetStackIndex());
- locations->UpdateOut(location);
- } else if (current->HasSpillSlot()) {
- current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
- }
- } else if (instruction->IsCurrentMethod()) {
- // The current method is always at offset 0.
- DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0));
- } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
- DCHECK(current->HasSpillSlot());
- size_t slot = current->GetSpillSlot()
- + GetNumberOfSpillSlots()
- + reserved_out_slots_
- - catch_phi_spill_slots_;
- current->SetSpillSlot(slot * kVRegSize);
- } else if (current->HasSpillSlot()) {
- // Adjust the stack slot, now that we know the number of them for each type.
- // The way this implementation lays out the stack is the following:
- // [parameter slots ]
- // [catch phi spill slots ]
- // [double spill slots ]
- // [long spill slots ]
- // [float spill slots ]
- // [int/ref values ]
- // [maximum out values ] (number of arguments for calls)
- // [art method ].
- size_t slot = current->GetSpillSlot();
- switch (current->GetType()) {
- case Primitive::kPrimDouble:
- slot += long_spill_slots_.size();
- FALLTHROUGH_INTENDED;
- case Primitive::kPrimLong:
- slot += float_spill_slots_.size();
- FALLTHROUGH_INTENDED;
- case Primitive::kPrimFloat:
- slot += int_spill_slots_.size();
- FALLTHROUGH_INTENDED;
- case Primitive::kPrimNot:
- case Primitive::kPrimInt:
- case Primitive::kPrimChar:
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimShort:
- slot += reserved_out_slots_;
- break;
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unexpected type for interval " << current->GetType();
- }
- current->SetSpillSlot(slot * kVRegSize);
- }
-
- Location source = current->ToLocation();
-
- if (location.IsUnallocated()) {
- if (location.GetPolicy() == Location::kSameAsFirstInput) {
- if (locations->InAt(0).IsUnallocated()) {
- locations->SetInAt(0, source);
- } else {
- DCHECK(locations->InAt(0).Equals(source));
- }
- }
- locations->UpdateOut(source);
- } else {
- DCHECK(source.Equals(location));
- }
- }
-
- // Connect siblings.
- for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
- HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
- ConnectSiblings(instruction->GetLiveInterval());
- }
-
- // Resolve non-linear control flow across branches. Order does not matter.
- for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
- if (block->IsCatchBlock() ||
- (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
- // Instructions live at the top of catch blocks or irreducible loop header
- // were forced to spill.
- if (kIsDebugBuild) {
- BitVector* live = liveness_.GetLiveInSet(*block);
- for (uint32_t idx : live->Indexes()) {
- LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
- LiveInterval* sibling = interval->GetSiblingAt(block->GetLifetimeStart());
- // `GetSiblingAt` returns the sibling that contains a position, but there could be
- // a lifetime hole in it. `CoversSlow` returns whether the interval is live at that
- // position.
- if ((sibling != nullptr) && sibling->CoversSlow(block->GetLifetimeStart())) {
- DCHECK(!sibling->HasRegister());
- }
- }
- }
- } else {
- BitVector* live = liveness_.GetLiveInSet(*block);
- for (uint32_t idx : live->Indexes()) {
- LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
- for (HBasicBlock* predecessor : block->GetPredecessors()) {
- ConnectSplitSiblings(interval, predecessor, block);
- }
- }
- }
- }
-
- // Resolve phi inputs. Order does not matter.
- for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
- if (current->IsCatchBlock()) {
- // Catch phi values are set at runtime by the exception delivery mechanism.
- } else {
- for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
- HInstruction* phi = inst_it.Current();
- for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
- HBasicBlock* predecessor = current->GetPredecessors()[i];
- DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
- HInstruction* input = phi->InputAt(i);
- Location source = input->GetLiveInterval()->GetLocationAt(
- predecessor->GetLifetimeEnd() - 1);
- Location destination = phi->GetLiveInterval()->ToLocation();
- InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
- }
- }
- }
- }
-
- // Assign temp locations.
- for (LiveInterval* temp : temp_intervals_) {
- if (temp->IsHighInterval()) {
- // High intervals can be skipped, they are already handled by the low interval.
- continue;
- }
- HInstruction* at = liveness_.GetTempUser(temp);
- size_t temp_index = liveness_.GetTempIndex(temp);
- LocationSummary* locations = at->GetLocations();
- switch (temp->GetType()) {
- case Primitive::kPrimInt:
- locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister()));
- break;
-
- case Primitive::kPrimDouble:
- if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
- Location location = Location::FpuRegisterPairLocation(
- temp->GetRegister(), temp->GetHighInterval()->GetRegister());
- locations->SetTempAt(temp_index, location);
- } else {
- locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister()));
- }
- break;
-
- default:
- LOG(FATAL) << "Unexpected type for temporary location "
- << temp->GetType();
- }
- }
-}
-
} // namespace art
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 58600b7..729eede 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
#include "arch/instruction_set.h"
#include "base/arena_containers.h"
+#include "base/arena_object.h"
#include "base/macros.h"
#include "primitive.h"
@@ -29,36 +30,40 @@
class HGraph;
class HInstruction;
class HParallelMove;
-class HPhi;
class LiveInterval;
class Location;
class SsaLivenessAnalysis;
/**
- * An implementation of a linear scan register allocator on an `HGraph` with SSA form.
+ * Base class for any register allocator.
*/
-class RegisterAllocator {
+class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> {
public:
- RegisterAllocator(ArenaAllocator* allocator,
- CodeGenerator* codegen,
- const SsaLivenessAnalysis& analysis);
+ enum Strategy {
+ kRegisterAllocatorLinearScan
+ };
+
+ static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan;
+
+ static RegisterAllocator* Create(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis,
+ Strategy strategy = kRegisterAllocatorDefault);
+
+ virtual ~RegisterAllocator() = default;
// Main entry point for the register allocator. Given the liveness analysis,
// allocates registers to live intervals.
- void AllocateRegisters();
+ virtual void AllocateRegisters() = 0;
// Validate that the register allocator did not allocate the same register to
- // intervals that intersect each other. Returns false if it did not.
- bool Validate(bool log_fatal_on_failure) {
- processing_core_registers_ = true;
- if (!ValidateInternal(log_fatal_on_failure)) {
- return false;
- }
- processing_core_registers_ = false;
- return ValidateInternal(log_fatal_on_failure);
- }
+ // intervals that intersect each other. Returns false if it failed.
+ virtual bool Validate(bool log_fatal_on_failure) = 0;
- // Helper method for validation. Used by unit testing.
+ static bool CanAllocateRegistersFor(const HGraph& graph,
+ InstructionSet instruction_set);
+
+ // Verifies that live intervals do not conflict. Used by unit testing.
static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
size_t number_of_spill_slots,
size_t number_of_out_slots,
@@ -67,178 +72,25 @@
bool processing_core_registers,
bool log_fatal_on_failure);
- static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
-
- size_t GetNumberOfSpillSlots() const {
- return int_spill_slots_.size()
- + long_spill_slots_.size()
- + float_spill_slots_.size()
- + double_spill_slots_.size()
- + catch_phi_spill_slots_;
- }
-
static constexpr const char* kRegisterAllocatorPassName = "register";
- private:
- // Main methods of the allocator.
- void LinearScan();
- bool TryAllocateFreeReg(LiveInterval* interval);
- bool AllocateBlockedReg(LiveInterval* interval);
- void Resolve();
-
- // Add `interval` in the given sorted list.
- static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
+ protected:
+ RegisterAllocator(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis);
// Split `interval` at the position `position`. The new interval starts at `position`.
- LiveInterval* Split(LiveInterval* interval, size_t position);
+ // If `position` is at the start of `interval`, returns `interval` with its
+ // register location(s) cleared.
+ static LiveInterval* Split(LiveInterval* interval, size_t position);
// Split `interval` at a position between `from` and `to`. The method will try
// to find an optimal split position.
LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to);
- // Returns whether `reg` is blocked by the code generator.
- bool IsBlocked(int reg) const;
-
- // Update the interval for the register in `location` to cover [start, end).
- void BlockRegister(Location location, size_t start, size_t end);
- void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
-
- // Allocate a spill slot for the given interval. Should be called in linear
- // order of interval starting positions.
- void AllocateSpillSlotFor(LiveInterval* interval);
-
- // Allocate a spill slot for the given catch phi. Will allocate the same slot
- // for phis which share the same vreg. Must be called in reverse linear order
- // of lifetime positions and ascending vreg numbers for correctness.
- void AllocateSpillSlotForCatchPhi(HPhi* phi);
-
- // Connect adjacent siblings within blocks.
- void ConnectSiblings(LiveInterval* interval);
-
- // Connect siblings between block entries and exits.
- void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
-
- // Helper methods to insert parallel moves in the graph.
- void InsertParallelMoveAtExitOf(HBasicBlock* block,
- HInstruction* instruction,
- Location source,
- Location destination) const;
- void InsertParallelMoveAtEntryOf(HBasicBlock* block,
- HInstruction* instruction,
- Location source,
- Location destination) const;
- void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
- void AddInputMoveFor(HInstruction* input,
- HInstruction* user,
- Location source,
- Location destination) const;
- void InsertParallelMoveAt(size_t position,
- HInstruction* instruction,
- Location source,
- Location destination) const;
-
- void AddMove(HParallelMove* move,
- Location source,
- Location destination,
- HInstruction* instruction,
- Primitive::Type type) const;
-
- // Helper methods.
- void AllocateRegistersInternal();
- void ProcessInstruction(HInstruction* instruction);
- bool ValidateInternal(bool log_fatal_on_failure) const;
- void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
- void DumpAllIntervals(std::ostream& stream) const;
- int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
- int FindAvailableRegister(size_t* next_use, LiveInterval* current) const;
- bool IsCallerSaveRegister(int reg) const;
-
- // Try splitting an active non-pair or unaligned pair interval at the given `position`.
- // Returns whether it was successful at finding such an interval.
- bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
- size_t first_register_use,
- size_t* next_use);
-
ArenaAllocator* const allocator_;
CodeGenerator* const codegen_;
const SsaLivenessAnalysis& liveness_;
-
- // List of intervals for core registers that must be processed, ordered by start
- // position. Last entry is the interval that has the lowest start position.
- // This list is initially populated before doing the linear scan.
- ArenaVector<LiveInterval*> unhandled_core_intervals_;
-
- // List of intervals for floating-point registers. Same comments as above.
- ArenaVector<LiveInterval*> unhandled_fp_intervals_;
-
- // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_`
- // or `unhandled_fp_intervals_`.
- ArenaVector<LiveInterval*>* unhandled_;
-
- // List of intervals that have been processed.
- ArenaVector<LiveInterval*> handled_;
-
- // List of intervals that are currently active when processing a new live interval.
- // That is, they have a live range that spans the start of the new interval.
- ArenaVector<LiveInterval*> active_;
-
- // List of intervals that are currently inactive when processing a new live interval.
- // That is, they have a lifetime hole that spans the start of the new interval.
- ArenaVector<LiveInterval*> inactive_;
-
- // Fixed intervals for physical registers. Such intervals cover the positions
- // where an instruction requires a specific register.
- ArenaVector<LiveInterval*> physical_core_register_intervals_;
- ArenaVector<LiveInterval*> physical_fp_register_intervals_;
-
- // Intervals for temporaries. Such intervals cover the positions
- // where an instruction requires a temporary.
- ArenaVector<LiveInterval*> temp_intervals_;
-
- // The spill slots allocated for live intervals. We ensure spill slots
- // are typed to avoid (1) doing moves and swaps between two different kinds
- // of registers, and (2) swapping between a single stack slot and a double
- // stack slot. This simplifies the parallel move resolver.
- ArenaVector<size_t> int_spill_slots_;
- ArenaVector<size_t> long_spill_slots_;
- ArenaVector<size_t> float_spill_slots_;
- ArenaVector<size_t> double_spill_slots_;
-
- // Spill slots allocated to catch phis. This category is special-cased because
- // (1) slots are allocated prior to linear scan and in reverse linear order,
- // (2) equivalent phis need to share slots despite having different types.
- size_t catch_phi_spill_slots_;
-
- // Instructions that need a safepoint.
- ArenaVector<HInstruction*> safepoints_;
-
- // True if processing core registers. False if processing floating
- // point registers.
- bool processing_core_registers_;
-
- // Number of registers for the current register kind (core or floating point).
- size_t number_of_registers_;
-
- // Temporary array, allocated ahead of time for simplicity.
- size_t* registers_array_;
-
- // Blocked registers, as decided by the code generator.
- bool* const blocked_core_registers_;
- bool* const blocked_fp_registers_;
-
- // Slots reserved for out arguments.
- size_t reserved_out_slots_;
-
- // The maximum live core registers at safepoints.
- size_t maximum_number_of_live_core_registers_;
-
- // The maximum live FP registers at safepoints.
- size_t maximum_number_of_live_fp_registers_;
-
- ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
- ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
-
- DISALLOW_COPY_AND_ASSIGN(RegisterAllocator);
};
} // namespace art
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
new file mode 100644
index 0000000..a9151ba
--- /dev/null
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -0,0 +1,1224 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocator_linear_scan.h"
+
+#include <iostream>
+#include <sstream>
+
+#include "base/bit_vector-inl.h"
+#include "code_generator.h"
+#include "register_allocation_resolver.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+static constexpr size_t kMaxLifetimePosition = -1;
+static constexpr size_t kDefaultNumberOfSpillSlots = 4;
+
+// For simplicity, we implement register pairs as (reg, reg + 1).
+// Note that this is a requirement for double registers on ARM, since we
+// allocate SRegister.
+static int GetHighForLowRegister(int reg) { return reg + 1; }
+static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
+static bool IsLowOfUnalignedPairInterval(LiveInterval* low) {
+ return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
+}
+
+RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& liveness)
+ : RegisterAllocator(allocator, codegen, liveness),
+ unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ unhandled_(nullptr),
+ handled_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ active_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ catch_phi_spill_slots_(0),
+ safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ processing_core_registers_(false),
+ number_of_registers_(-1),
+ registers_array_(nullptr),
+ blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
+ blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
+ reserved_out_slots_(0),
+ maximum_number_of_live_core_registers_(0),
+ maximum_number_of_live_fp_registers_(0) {
+ temp_intervals_.reserve(4);
+ int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+ long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+ float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+ double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+
+ codegen->SetupBlockedRegisters();
+ physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
+ physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
+ // Always reserve for the current method and the graph's max out registers.
+ // TODO: compute it instead.
+ // ArtMethod* takes 2 vregs for 64 bits.
+ reserved_out_slots_ = InstructionSetPointerSize(codegen->GetInstructionSet()) / kVRegSize +
+ codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
+}
+
+static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
+ if (interval == nullptr) return false;
+ bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
+ && (interval->GetType() != Primitive::kPrimFloat);
+ return processing_core_registers == is_core_register;
+}
+
+void RegisterAllocatorLinearScan::AllocateRegisters() {
+ AllocateRegistersInternal();
+ RegisterAllocationResolver(allocator_, codegen_, liveness_)
+ .Resolve(maximum_number_of_live_core_registers_,
+ maximum_number_of_live_fp_registers_,
+ reserved_out_slots_,
+ int_spill_slots_.size(),
+ long_spill_slots_.size(),
+ float_spill_slots_.size(),
+ double_spill_slots_.size(),
+ catch_phi_spill_slots_,
+ temp_intervals_);
+
+ if (kIsDebugBuild) {
+ processing_core_registers_ = true;
+ ValidateInternal(true);
+ processing_core_registers_ = false;
+ ValidateInternal(true);
+ // Check that the linear order is still correct with regards to lifetime positions.
+ // Since only parallel moves have been inserted during the register allocation,
+ // these checks are mostly for making sure these moves have been added correctly.
+ size_t current_liveness = 0;
+ for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+ HInstruction* instruction = inst_it.Current();
+ DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
+ current_liveness = instruction->GetLifetimePosition();
+ }
+ for (HInstructionIterator inst_it(block->GetInstructions());
+ !inst_it.Done();
+ inst_it.Advance()) {
+ HInstruction* instruction = inst_it.Current();
+ DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName();
+ current_liveness = instruction->GetLifetimePosition();
+ }
+ }
+ }
+}
+
+void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start, size_t end) {
+ int reg = location.reg();
+ DCHECK(location.IsRegister() || location.IsFpuRegister());
+ LiveInterval* interval = location.IsRegister()
+ ? physical_core_register_intervals_[reg]
+ : physical_fp_register_intervals_[reg];
+ Primitive::Type type = location.IsRegister()
+ ? Primitive::kPrimInt
+ : Primitive::kPrimFloat;
+ if (interval == nullptr) {
+ interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
+ if (location.IsRegister()) {
+ physical_core_register_intervals_[reg] = interval;
+ } else {
+ physical_fp_register_intervals_[reg] = interval;
+ }
+ }
+ DCHECK(interval->GetRegister() == reg);
+ interval->AddRange(start, end);
+}
+
+void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+ for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+ if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
+ BlockRegister(Location::RegisterLocation(i), start, end);
+ }
+ }
+ for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+ if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
+ BlockRegister(Location::FpuRegisterLocation(i), start, end);
+ }
+ }
+}
+
+void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
+ // Iterate post-order, to ensure the list is sorted, and the last added interval
+ // is the one with the lowest start position.
+ for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
+ back_it.Advance()) {
+ ProcessInstruction(back_it.Current());
+ }
+ for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+ ProcessInstruction(inst_it.Current());
+ }
+
+ if (block->IsCatchBlock() ||
+ (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+ // By blocking all registers at the top of each catch block or irreducible loop, we force
+ // intervals belonging to the live-in set of the catch/header block to be spilled.
+ // TODO(ngeoffray): Phis in this block could be allocated in register.
+ size_t position = block->GetLifetimeStart();
+ BlockRegisters(position, position + 1);
+ }
+ }
+
+ number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
+ registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+ kArenaAllocRegisterAllocator);
+ processing_core_registers_ = true;
+ unhandled_ = &unhandled_core_intervals_;
+ for (LiveInterval* fixed : physical_core_register_intervals_) {
+ if (fixed != nullptr) {
+ // Fixed interval is added to inactive_ instead of unhandled_.
+ // It's also the only type of inactive interval whose start position
+ // can be after the current interval during linear scan.
+ // Fixed interval is never split and never moves to unhandled_.
+ inactive_.push_back(fixed);
+ }
+ }
+ LinearScan();
+
+ inactive_.clear();
+ active_.clear();
+ handled_.clear();
+
+ number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
+ registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+ kArenaAllocRegisterAllocator);
+ processing_core_registers_ = false;
+ unhandled_ = &unhandled_fp_intervals_;
+ for (LiveInterval* fixed : physical_fp_register_intervals_) {
+ if (fixed != nullptr) {
+ // Fixed interval is added to inactive_ instead of unhandled_.
+ // It's also the only type of inactive interval whose start position
+ // can be after the current interval during linear scan.
+ // Fixed interval is never split and never moves to unhandled_.
+ inactive_.push_back(fixed);
+ }
+ }
+ LinearScan();
+}
+
+void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ size_t position = instruction->GetLifetimePosition();
+
+ if (locations == nullptr) return;
+
+ // Create synthesized intervals for temporaries.
+ for (size_t i = 0; i < locations->GetTempCount(); ++i) {
+ Location temp = locations->GetTemp(i);
+ if (temp.IsRegister() || temp.IsFpuRegister()) {
+ BlockRegister(temp, position, position + 1);
+ // Ensure that an explicit temporary register is marked as being allocated.
+ codegen_->AddAllocatedRegister(temp);
+ } else {
+ DCHECK(temp.IsUnallocated());
+ switch (temp.GetPolicy()) {
+ case Location::kRequiresRegister: {
+ LiveInterval* interval =
+ LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
+ temp_intervals_.push_back(interval);
+ interval->AddTempUse(instruction, i);
+ unhandled_core_intervals_.push_back(interval);
+ break;
+ }
+
+ case Location::kRequiresFpuRegister: {
+ LiveInterval* interval =
+ LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
+ temp_intervals_.push_back(interval);
+ interval->AddTempUse(instruction, i);
+ if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+ interval->AddHighInterval(/* is_temp */ true);
+ LiveInterval* high = interval->GetHighInterval();
+ temp_intervals_.push_back(high);
+ unhandled_fp_intervals_.push_back(high);
+ }
+ unhandled_fp_intervals_.push_back(interval);
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected policy for temporary location "
+ << temp.GetPolicy();
+ }
+ }
+ }
+
+ bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
+ && (instruction->GetType() != Primitive::kPrimFloat);
+
+ if (locations->NeedsSafepoint()) {
+ if (codegen_->IsLeafMethod()) {
+ // TODO: We do this here because we do not want the suspend check to artificially
+ // create live registers. We should find another place, but this is currently the
+ // simplest.
+ DCHECK(instruction->IsSuspendCheckEntry());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+ safepoints_.push_back(instruction);
+ if (locations->OnlyCallsOnSlowPath()) {
+ // We add a synthesized range at this position to record the live registers
+ // at this position. Ideally, we could just update the safepoints when locations
+ // are updated, but we currently need to know the full stack size before updating
+ // locations (because of parameters and the fact that we don't have a frame pointer).
+ // And knowing the full stack size requires to know the maximum number of live
+ // registers at calls in slow paths.
+ // By adding the following interval in the algorithm, we can compute this
+ // maximum before updating locations.
+ LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
+ interval->AddRange(position, position + 1);
+ AddSorted(&unhandled_core_intervals_, interval);
+ AddSorted(&unhandled_fp_intervals_, interval);
+ }
+ }
+
+ if (locations->WillCall()) {
+ BlockRegisters(position, position + 1, /* caller_save_only */ true);
+ }
+
+ for (size_t i = 0; i < locations->GetInputCount(); ++i) {
+ Location input = locations->InAt(i);
+ if (input.IsRegister() || input.IsFpuRegister()) {
+ BlockRegister(input, position, position + 1);
+ } else if (input.IsPair()) {
+ BlockRegister(input.ToLow(), position, position + 1);
+ BlockRegister(input.ToHigh(), position, position + 1);
+ }
+ }
+
+ LiveInterval* current = instruction->GetLiveInterval();
+ if (current == nullptr) return;
+
+ ArenaVector<LiveInterval*>& unhandled = core_register
+ ? unhandled_core_intervals_
+ : unhandled_fp_intervals_;
+
+ DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back()));
+
+ if (codegen_->NeedsTwoRegisters(current->GetType())) {
+ current->AddHighInterval();
+ }
+
+ for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
+ HInstruction* safepoint = safepoints_[safepoint_index - 1u];
+ size_t safepoint_position = safepoint->GetLifetimePosition();
+
+ // Test that safepoints are ordered in the optimal way.
+ DCHECK(safepoint_index == safepoints_.size() ||
+ safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
+
+ if (safepoint_position == current->GetStart()) {
+ // The safepoint is for this instruction, so the location of the instruction
+ // does not need to be saved.
+ DCHECK_EQ(safepoint_index, safepoints_.size());
+ DCHECK_EQ(safepoint, instruction);
+ continue;
+ } else if (current->IsDeadAt(safepoint_position)) {
+ break;
+ } else if (!current->Covers(safepoint_position)) {
+ // Hole in the interval.
+ continue;
+ }
+ current->AddSafepoint(safepoint);
+ }
+ current->ResetSearchCache();
+
+ // Some instructions define their output in fixed register/stack slot. We need
+ // to ensure we know these locations before doing register allocation. For a
+ // given register, we create an interval that covers these locations. The register
+ // will be unavailable at these locations when trying to allocate one for an
+ // interval.
+ //
+ // The backwards walking ensures the ranges are ordered on increasing start positions.
+ Location output = locations->Out();
+ if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) {
+ Location first = locations->InAt(0);
+ if (first.IsRegister() || first.IsFpuRegister()) {
+ current->SetFrom(position + 1);
+ current->SetRegister(first.reg());
+ } else if (first.IsPair()) {
+ current->SetFrom(position + 1);
+ current->SetRegister(first.low());
+ LiveInterval* high = current->GetHighInterval();
+ high->SetRegister(first.high());
+ high->SetFrom(position + 1);
+ }
+ } else if (output.IsRegister() || output.IsFpuRegister()) {
+ // Shift the interval's start by one to account for the blocked register.
+ current->SetFrom(position + 1);
+ current->SetRegister(output.reg());
+ BlockRegister(output, position, position + 1);
+ } else if (output.IsPair()) {
+ current->SetFrom(position + 1);
+ current->SetRegister(output.low());
+ LiveInterval* high = current->GetHighInterval();
+ high->SetRegister(output.high());
+ high->SetFrom(position + 1);
+ BlockRegister(output.ToLow(), position, position + 1);
+ BlockRegister(output.ToHigh(), position, position + 1);
+ } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
+ current->SetSpillSlot(output.GetStackIndex());
+ } else {
+ DCHECK(output.IsUnallocated() || output.IsConstant());
+ }
+
+ if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+ AllocateSpillSlotForCatchPhi(instruction->AsPhi());
+ }
+
+ // If needed, add interval to the list of unhandled intervals.
+ if (current->HasSpillSlot() || instruction->IsConstant()) {
+ // Split just before first register use.
+ size_t first_register_use = current->FirstRegisterUse();
+ if (first_register_use != kNoLifetime) {
+ LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
+ // Don't add directly to `unhandled`, it needs to be sorted and the start
+ // of this new interval might be after intervals already in the list.
+ AddSorted(&unhandled, split);
+ } else {
+ // Nothing to do, we won't allocate a register for this value.
+ }
+ } else {
+ // Don't add directly to `unhandled`, temp or safepoint intervals
+ // for this instruction may have been added, and those can be
+ // processed first.
+ AddSorted(&unhandled, current);
+ }
+}
+
+class AllRangesIterator : public ValueObject {
+ public:
+ explicit AllRangesIterator(LiveInterval* interval)
+ : current_interval_(interval),
+ current_range_(interval->GetFirstRange()) {}
+
+ bool Done() const { return current_interval_ == nullptr; }
+ LiveRange* CurrentRange() const { return current_range_; }
+ LiveInterval* CurrentInterval() const { return current_interval_; }
+
+ void Advance() {
+ current_range_ = current_range_->GetNext();
+ if (current_range_ == nullptr) {
+ current_interval_ = current_interval_->GetNextSibling();
+ if (current_interval_ != nullptr) {
+ current_range_ = current_interval_->GetFirstRange();
+ }
+ }
+ }
+
+ private:
+ LiveInterval* current_interval_;
+ LiveRange* current_range_;
+
+ DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
+};
+
+bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) const {
+ // To simplify unit testing, we eagerly create the array of intervals, and
+ // call the helper method.
+ ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
+ for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
+ HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+ if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
+ intervals.push_back(instruction->GetLiveInterval());
+ }
+ }
+
+ const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
+ ? &physical_core_register_intervals_
+ : &physical_fp_register_intervals_;
+ for (LiveInterval* fixed : *physical_register_intervals) {
+ if (fixed != nullptr) {
+ intervals.push_back(fixed);
+ }
+ }
+
+ for (LiveInterval* temp : temp_intervals_) {
+ if (ShouldProcess(processing_core_registers_, temp)) {
+ intervals.push_back(temp);
+ }
+ }
+
+ return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
+ allocator_, processing_core_registers_, log_fatal_on_failure);
+}
+
+void RegisterAllocatorLinearScan::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
+ interval->Dump(stream);
+ stream << ": ";
+ if (interval->HasRegister()) {
+ if (interval->IsFloatingPoint()) {
+ codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
+ } else {
+ codegen_->DumpCoreRegister(stream, interval->GetRegister());
+ }
+ } else {
+ stream << "spilled";
+ }
+ stream << std::endl;
+}
+
+void RegisterAllocatorLinearScan::DumpAllIntervals(std::ostream& stream) const {
+ stream << "inactive: " << std::endl;
+ for (LiveInterval* inactive_interval : inactive_) {
+ DumpInterval(stream, inactive_interval);
+ }
+ stream << "active: " << std::endl;
+ for (LiveInterval* active_interval : active_) {
+ DumpInterval(stream, active_interval);
+ }
+ stream << "unhandled: " << std::endl;
+ auto unhandled = (unhandled_ != nullptr) ?
+ unhandled_ : &unhandled_core_intervals_;
+ for (LiveInterval* unhandled_interval : *unhandled) {
+ DumpInterval(stream, unhandled_interval);
+ }
+ stream << "handled: " << std::endl;
+ for (LiveInterval* handled_interval : handled_) {
+ DumpInterval(stream, handled_interval);
+ }
+}
+
+// By the book implementation of a linear scan register allocator.
+void RegisterAllocatorLinearScan::LinearScan() {
+ while (!unhandled_->empty()) {
+ // (1) Remove interval with the lowest start position from unhandled.
+ LiveInterval* current = unhandled_->back();
+ unhandled_->pop_back();
+
+ // Make sure the interval is an expected state.
+ DCHECK(!current->IsFixed() && !current->HasSpillSlot());
+ // Make sure we are going in the right order.
+ DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart());
+ // Make sure a low interval is always with a high.
+ DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval());
+ // Make sure a high interval is always with a low.
+ DCHECK(current->IsLowInterval() ||
+ unhandled_->empty() ||
+ !unhandled_->back()->IsHighInterval());
+
+ size_t position = current->GetStart();
+
+ // Remember the inactive_ size here since the ones moved to inactive_ from
+ // active_ below shouldn't need to be re-checked.
+ size_t inactive_intervals_to_handle = inactive_.size();
+
+ // (2) Remove currently active intervals that are dead at this position.
+ // Move active intervals that have a lifetime hole at this position
+ // to inactive.
+ auto active_kept_end = std::remove_if(
+ active_.begin(),
+ active_.end(),
+ [this, position](LiveInterval* interval) {
+ if (interval->IsDeadAt(position)) {
+ handled_.push_back(interval);
+ return true;
+ } else if (!interval->Covers(position)) {
+ inactive_.push_back(interval);
+ return true;
+ } else {
+ return false; // Keep this interval.
+ }
+ });
+ active_.erase(active_kept_end, active_.end());
+
+ // (3) Remove currently inactive intervals that are dead at this position.
+ // Move inactive intervals that cover this position to active.
+ auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
+ auto inactive_kept_end = std::remove_if(
+ inactive_.begin(),
+ inactive_to_handle_end,
+ [this, position](LiveInterval* interval) {
+ DCHECK(interval->GetStart() < position || interval->IsFixed());
+ if (interval->IsDeadAt(position)) {
+ handled_.push_back(interval);
+ return true;
+ } else if (interval->Covers(position)) {
+ active_.push_back(interval);
+ return true;
+ } else {
+ return false; // Keep this interval.
+ }
+ });
+ inactive_.erase(inactive_kept_end, inactive_to_handle_end);
+
+ if (current->IsSlowPathSafepoint()) {
+ // Synthesized interval to record the maximum number of live registers
+ // at safepoints. No need to allocate a register for it.
+ if (processing_core_registers_) {
+ maximum_number_of_live_core_registers_ =
+ std::max(maximum_number_of_live_core_registers_, active_.size());
+ } else {
+ maximum_number_of_live_fp_registers_ =
+ std::max(maximum_number_of_live_fp_registers_, active_.size());
+ }
+ DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
+ continue;
+ }
+
+ if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
+ DCHECK(!current->HasRegister());
+ // Allocating the low part was unsucessful. The splitted interval for the high part
+ // will be handled next (it is in the `unhandled_` list).
+ continue;
+ }
+
+ // (4) Try to find an available register.
+ bool success = TryAllocateFreeReg(current);
+
+ // (5) If no register could be found, we need to spill.
+ if (!success) {
+ success = AllocateBlockedReg(current);
+ }
+
+ // (6) If the interval had a register allocated, add it to the list of active
+ // intervals.
+ if (success) {
+ codegen_->AddAllocatedRegister(processing_core_registers_
+ ? Location::RegisterLocation(current->GetRegister())
+ : Location::FpuRegisterLocation(current->GetRegister()));
+ active_.push_back(current);
+ if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
+ current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
+ }
+ }
+ }
+}
+
+static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) {
+ DCHECK(!interval->IsHighInterval());
+ // Note that the same instruction may occur multiple times in the input list,
+ // so `free_until` may have changed already.
+ // Since `position` is not the current scan position, we need to use CoversSlow.
+ if (interval->IsDeadAt(position)) {
+ // Set the register to be free. Note that inactive intervals might later
+ // update this.
+ free_until[interval->GetRegister()] = kMaxLifetimePosition;
+ if (interval->HasHighInterval()) {
+ DCHECK(interval->GetHighInterval()->IsDeadAt(position));
+ free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition;
+ }
+ } else if (!interval->CoversSlow(position)) {
+ // The interval becomes inactive at `defined_by`. We make its register
+ // available only until the next use strictly after `defined_by`.
+ free_until[interval->GetRegister()] = interval->FirstUseAfter(position);
+ if (interval->HasHighInterval()) {
+ DCHECK(!interval->GetHighInterval()->CoversSlow(position));
+ free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()];
+ }
+ }
+}
+
+// Find a free register. If multiple are found, pick the register that
+// is free the longest.
+bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) {
+ size_t* free_until = registers_array_;
+
+ // First set all registers to be free.
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ free_until[i] = kMaxLifetimePosition;
+ }
+
+ // For each active interval, set its register to not free.
+ for (LiveInterval* interval : active_) {
+ DCHECK(interval->HasRegister());
+ free_until[interval->GetRegister()] = 0;
+ }
+
+ // An interval that starts an instruction (that is, it is not split), may
+ // re-use the registers used by the inputs of that instruciton, based on the
+ // location summary.
+ HInstruction* defined_by = current->GetDefinedBy();
+ if (defined_by != nullptr && !current->IsSplit()) {
+ LocationSummary* locations = defined_by->GetLocations();
+ if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
+ HInputsRef inputs = defined_by->GetInputs();
+ for (size_t i = 0; i < inputs.size(); ++i) {
+ // Take the last interval of the input. It is the location of that interval
+ // that will be used at `defined_by`.
+ LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
+ // Note that interval may have not been processed yet.
+ // TODO: Handle non-split intervals last in the work list.
+ if (locations->InAt(i).IsValid()
+ && interval->HasRegister()
+ && interval->SameRegisterKind(*current)) {
+ // The input must be live until the end of `defined_by`, to comply to
+ // the linear scan algorithm. So we use `defined_by`'s end lifetime
+ // position to check whether the input is dead or is inactive after
+ // `defined_by`.
+ DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
+ size_t position = defined_by->GetLifetimePosition() + 1;
+ FreeIfNotCoverAt(interval, position, free_until);
+ }
+ }
+ }
+ }
+
+ // For each inactive interval, set its register to be free until
+ // the next intersection with `current`.
+ for (LiveInterval* inactive : inactive_) {
+ // Temp/Slow-path-safepoint interval has no holes.
+ DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+ if (!current->IsSplit() && !inactive->IsFixed()) {
+ // Neither current nor inactive are fixed.
+ // Thanks to SSA, a non-split interval starting in a hole of an
+ // inactive interval should never intersect with that inactive interval.
+ // Only if it's not fixed though, because fixed intervals don't come from SSA.
+ DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+ continue;
+ }
+
+ DCHECK(inactive->HasRegister());
+ if (free_until[inactive->GetRegister()] == 0) {
+ // Already used by some active interval. No need to intersect.
+ continue;
+ }
+ size_t next_intersection = inactive->FirstIntersectionWith(current);
+ if (next_intersection != kNoLifetime) {
+ free_until[inactive->GetRegister()] =
+ std::min(free_until[inactive->GetRegister()], next_intersection);
+ }
+ }
+
+ int reg = kNoRegister;
+ if (current->HasRegister()) {
+ // Some instructions have a fixed register output.
+ reg = current->GetRegister();
+ if (free_until[reg] == 0) {
+ DCHECK(current->IsHighInterval());
+ // AllocateBlockedReg will spill the holder of the register.
+ return false;
+ }
+ } else {
+ DCHECK(!current->IsHighInterval());
+ int hint = current->FindFirstRegisterHint(free_until, liveness_);
+ if ((hint != kNoRegister)
+ // For simplicity, if the hint we are getting for a pair cannot be used,
+ // we are just going to allocate a new pair.
+ && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) {
+ DCHECK(!IsBlocked(hint));
+ reg = hint;
+ } else if (current->IsLowInterval()) {
+ reg = FindAvailableRegisterPair(free_until, current->GetStart());
+ } else {
+ reg = FindAvailableRegister(free_until, current);
+ }
+ }
+
+ DCHECK_NE(reg, kNoRegister);
+ // If we could not find a register, we need to spill.
+ if (free_until[reg] == 0) {
+ return false;
+ }
+
+ if (current->IsLowInterval()) {
+ // If the high register of this interval is not available, we need to spill.
+ int high_reg = current->GetHighInterval()->GetRegister();
+ if (high_reg == kNoRegister) {
+ high_reg = GetHighForLowRegister(reg);
+ }
+ if (free_until[high_reg] == 0) {
+ return false;
+ }
+ }
+
+ current->SetRegister(reg);
+ if (!current->IsDeadAt(free_until[reg])) {
+ // If the register is only available for a subset of live ranges
+ // covered by `current`, split `current` before the position where
+ // the register is not available anymore.
+ LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]);
+ DCHECK(split != nullptr);
+ AddSorted(unhandled_, split);
+ }
+ return true;
+}
+
+bool RegisterAllocatorLinearScan::IsBlocked(int reg) const {
+ return processing_core_registers_
+ ? blocked_core_registers_[reg]
+ : blocked_fp_registers_[reg];
+}
+
+int RegisterAllocatorLinearScan::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
+ int reg = kNoRegister;
+ // Pick the register pair that is used the last.
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ if (IsBlocked(i)) continue;
+ if (!IsLowRegister(i)) continue;
+ int high_register = GetHighForLowRegister(i);
+ if (IsBlocked(high_register)) continue;
+ int existing_high_register = GetHighForLowRegister(reg);
+ if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
+ && next_use[high_register] >= next_use[existing_high_register])) {
+ reg = i;
+ if (next_use[i] == kMaxLifetimePosition
+ && next_use[high_register] == kMaxLifetimePosition) {
+ break;
+ }
+ } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
+ // If one of the current register is known to be unavailable, just unconditionally
+ // try a new one.
+ reg = i;
+ }
+ }
+ return reg;
+}
+
+bool RegisterAllocatorLinearScan::IsCallerSaveRegister(int reg) const {
+ return processing_core_registers_
+ ? !codegen_->IsCoreCalleeSaveRegister(reg)
+ : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
+}
+
+int RegisterAllocatorLinearScan::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
+ // We special case intervals that do not span a safepoint to try to find a caller-save
+ // register if one is available. We iterate from 0 to the number of registers,
+ // so if there are caller-save registers available at the end, we continue the iteration.
+ bool prefers_caller_save = !current->HasWillCallSafepoint();
+ int reg = kNoRegister;
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ if (IsBlocked(i)) {
+ // Register cannot be used. Continue.
+ continue;
+ }
+
+ // Best case: we found a register fully available.
+ if (next_use[i] == kMaxLifetimePosition) {
+ if (prefers_caller_save && !IsCallerSaveRegister(i)) {
+ // We can get shorter encodings on some platforms by using
+ // small register numbers. So only update the candidate if the previous
+ // one was not available for the whole method.
+ if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) {
+ reg = i;
+ }
+ // Continue the iteration in the hope of finding a caller save register.
+ continue;
+ } else {
+ reg = i;
+ // We know the register is good enough. Return it.
+ break;
+ }
+ }
+
+ // If we had no register before, take this one as a reference.
+ if (reg == kNoRegister) {
+ reg = i;
+ continue;
+ }
+
+ // Pick the register that is used the last.
+ if (next_use[i] > next_use[reg]) {
+ reg = i;
+ continue;
+ }
+ }
+ return reg;
+}
+
+// Remove interval and its other half if any. Return iterator to the following element.
+static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf(
+ ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) {
+ DCHECK(intervals->begin() <= pos && pos < intervals->end());
+ LiveInterval* interval = *pos;
+ if (interval->IsLowInterval()) {
+ DCHECK(pos + 1 < intervals->end());
+ DCHECK_EQ(*(pos + 1), interval->GetHighInterval());
+ return intervals->erase(pos, pos + 2);
+ } else if (interval->IsHighInterval()) {
+ DCHECK(intervals->begin() < pos);
+ DCHECK_EQ(*(pos - 1), interval->GetLowInterval());
+ return intervals->erase(pos - 1, pos + 1);
+ } else {
+ return intervals->erase(pos);
+ }
+}
+
+bool RegisterAllocatorLinearScan::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+ size_t first_register_use,
+ size_t* next_use) {
+ for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+ LiveInterval* active = *it;
+ DCHECK(active->HasRegister());
+ if (active->IsFixed()) continue;
+ if (active->IsHighInterval()) continue;
+ if (first_register_use > next_use[active->GetRegister()]) continue;
+
+ // Split the first interval found that is either:
+ // 1) A non-pair interval.
+ // 2) A pair interval whose high is not low + 1.
+ // 3) A pair interval whose low is not even.
+ if (!active->IsLowInterval() ||
+ IsLowOfUnalignedPairInterval(active) ||
+ !IsLowRegister(active->GetRegister())) {
+ LiveInterval* split = Split(active, position);
+ if (split != active) {
+ handled_.push_back(active);
+ }
+ RemoveIntervalAndPotentialOtherHalf(&active_, it);
+ AddSorted(unhandled_, split);
+ return true;
+ }
+ }
+ return false;
+}
+
+// Find the register that is used the last, and spill the interval
+// that holds it. If the first use of `current` is after that register
+// we spill `current` instead.
+bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) {
+ size_t first_register_use = current->FirstRegisterUse();
+ if (current->HasRegister()) {
+ DCHECK(current->IsHighInterval());
+ // The low interval has allocated the register for the high interval. In
+ // case the low interval had to split both intervals, we may end up in a
+ // situation where the high interval does not have a register use anymore.
+ // We must still proceed in order to split currently active and inactive
+ // uses of the high interval's register, and put the high interval in the
+ // active set.
+ DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr));
+ } else if (first_register_use == kNoLifetime) {
+ AllocateSpillSlotFor(current);
+ return false;
+ }
+
+ // First set all registers as not being used.
+ size_t* next_use = registers_array_;
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ next_use[i] = kMaxLifetimePosition;
+ }
+
+ // For each active interval, find the next use of its register after the
+ // start of current.
+ for (LiveInterval* active : active_) {
+ DCHECK(active->HasRegister());
+ if (active->IsFixed()) {
+ next_use[active->GetRegister()] = current->GetStart();
+ } else {
+ size_t use = active->FirstRegisterUseAfter(current->GetStart());
+ if (use != kNoLifetime) {
+ next_use[active->GetRegister()] = use;
+ }
+ }
+ }
+
+ // For each inactive interval, find the next use of its register after the
+ // start of current.
+ for (LiveInterval* inactive : inactive_) {
+ // Temp/Slow-path-safepoint interval has no holes.
+ DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+ if (!current->IsSplit() && !inactive->IsFixed()) {
+ // Neither current nor inactive are fixed.
+ // Thanks to SSA, a non-split interval starting in a hole of an
+ // inactive interval should never intersect with that inactive interval.
+ // Only if it's not fixed though, because fixed intervals don't come from SSA.
+ DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+ continue;
+ }
+ DCHECK(inactive->HasRegister());
+ size_t next_intersection = inactive->FirstIntersectionWith(current);
+ if (next_intersection != kNoLifetime) {
+ if (inactive->IsFixed()) {
+ next_use[inactive->GetRegister()] =
+ std::min(next_intersection, next_use[inactive->GetRegister()]);
+ } else {
+ size_t use = inactive->FirstUseAfter(current->GetStart());
+ if (use != kNoLifetime) {
+ next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
+ }
+ }
+ }
+ }
+
+ int reg = kNoRegister;
+ bool should_spill = false;
+ if (current->HasRegister()) {
+ DCHECK(current->IsHighInterval());
+ reg = current->GetRegister();
+ // When allocating the low part, we made sure the high register was available.
+ DCHECK_LT(first_register_use, next_use[reg]);
+ } else if (current->IsLowInterval()) {
+ reg = FindAvailableRegisterPair(next_use, first_register_use);
+ // We should spill if both registers are not available.
+ should_spill = (first_register_use >= next_use[reg])
+ || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
+ } else {
+ DCHECK(!current->IsHighInterval());
+ reg = FindAvailableRegister(next_use, current);
+ should_spill = (first_register_use >= next_use[reg]);
+ }
+
+ DCHECK_NE(reg, kNoRegister);
+ if (should_spill) {
+ DCHECK(!current->IsHighInterval());
+ bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1));
+ if (is_allocation_at_use_site) {
+ if (!current->IsLowInterval()) {
+ DumpInterval(std::cerr, current);
+ DumpAllIntervals(std::cerr);
+ // This situation has the potential to infinite loop, so we make it a non-debug CHECK.
+ HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2);
+ CHECK(false) << "There is not enough registers available for "
+ << current->GetParent()->GetDefinedBy()->DebugName() << " "
+ << current->GetParent()->GetDefinedBy()->GetId()
+ << " at " << first_register_use - 1 << " "
+ << (at == nullptr ? "" : at->DebugName());
+ }
+
+ // If we're allocating a register for `current` because the instruction at
+ // that position requires it, but we think we should spill, then there are
+ // non-pair intervals or unaligned pair intervals blocking the allocation.
+ // We split the first interval found, and put ourselves first in the
+ // `unhandled_` list.
+ bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(),
+ first_register_use,
+ next_use);
+ DCHECK(success);
+ LiveInterval* existing = unhandled_->back();
+ DCHECK(existing->IsHighInterval());
+ DCHECK_EQ(existing->GetLowInterval(), current);
+ unhandled_->push_back(current);
+ } else {
+ // If the first use of that instruction is after the last use of the found
+ // register, we split this interval just before its first register use.
+ AllocateSpillSlotFor(current);
+ LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
+ DCHECK(current != split);
+ AddSorted(unhandled_, split);
+ }
+ return false;
+ } else {
+ // Use this register and spill the active and inactives interval that
+ // have that register.
+ current->SetRegister(reg);
+
+ for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+ LiveInterval* active = *it;
+ if (active->GetRegister() == reg) {
+ DCHECK(!active->IsFixed());
+ LiveInterval* split = Split(active, current->GetStart());
+ if (split != active) {
+ handled_.push_back(active);
+ }
+ RemoveIntervalAndPotentialOtherHalf(&active_, it);
+ AddSorted(unhandled_, split);
+ break;
+ }
+ }
+
+ // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body.
+ for (auto it = inactive_.begin(); it != inactive_.end(); ) {
+ LiveInterval* inactive = *it;
+ bool erased = false;
+ if (inactive->GetRegister() == reg) {
+ if (!current->IsSplit() && !inactive->IsFixed()) {
+ // Neither current nor inactive are fixed.
+ // Thanks to SSA, a non-split interval starting in a hole of an
+ // inactive interval should never intersect with that inactive interval.
+ // Only if it's not fixed though, because fixed intervals don't come from SSA.
+ DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+ } else {
+ size_t next_intersection = inactive->FirstIntersectionWith(current);
+ if (next_intersection != kNoLifetime) {
+ if (inactive->IsFixed()) {
+ LiveInterval* split = Split(current, next_intersection);
+ DCHECK_NE(split, current);
+ AddSorted(unhandled_, split);
+ } else {
+ // Split at the start of `current`, which will lead to splitting
+ // at the end of the lifetime hole of `inactive`.
+ LiveInterval* split = Split(inactive, current->GetStart());
+ // If it's inactive, it must start before the current interval.
+ DCHECK_NE(split, inactive);
+ it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it);
+ erased = true;
+ handled_.push_back(inactive);
+ AddSorted(unhandled_, split);
+ }
+ }
+ }
+ }
+ // If we have erased the element, `it` already points to the next element.
+ // Otherwise we need to move to the next element.
+ if (!erased) {
+ ++it;
+ }
+ }
+
+ return true;
+ }
+}
+
+void RegisterAllocatorLinearScan::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
+ DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
+ size_t insert_at = 0;
+ for (size_t i = array->size(); i > 0; --i) {
+ LiveInterval* current = (*array)[i - 1u];
+ // High intervals must be processed right after their low equivalent.
+ if (current->StartsAfter(interval) && !current->IsHighInterval()) {
+ insert_at = i;
+ break;
+ } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
+ // Ensure the slow path interval is the last to be processed at its location: we want the
+ // interval to know all live registers at this location.
+ DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
+ insert_at = i;
+ break;
+ }
+ }
+
+ // Insert the high interval before the low, to ensure the low is processed before.
+ auto insert_pos = array->begin() + insert_at;
+ if (interval->HasHighInterval()) {
+ array->insert(insert_pos, { interval->GetHighInterval(), interval });
+ } else if (interval->HasLowInterval()) {
+ array->insert(insert_pos, { interval, interval->GetLowInterval() });
+ } else {
+ array->insert(insert_pos, interval);
+ }
+}
+
+void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) {
+ if (interval->IsHighInterval()) {
+ // The low interval already took care of allocating the spill slot.
+ DCHECK(!interval->GetLowInterval()->HasRegister());
+ DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot());
+ return;
+ }
+
+ LiveInterval* parent = interval->GetParent();
+
+ // An instruction gets a spill slot for its entire lifetime. If the parent
+ // of this interval already has a spill slot, there is nothing to do.
+ if (parent->HasSpillSlot()) {
+ return;
+ }
+
+ HInstruction* defined_by = parent->GetDefinedBy();
+ DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi());
+
+ if (defined_by->IsParameterValue()) {
+ // Parameters have their own stack slot.
+ parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+ return;
+ }
+
+ if (defined_by->IsCurrentMethod()) {
+ parent->SetSpillSlot(0);
+ return;
+ }
+
+ if (defined_by->IsConstant()) {
+ // Constants don't need a spill slot.
+ return;
+ }
+
+ ArenaVector<size_t>* spill_slots = nullptr;
+ switch (interval->GetType()) {
+ case Primitive::kPrimDouble:
+ spill_slots = &double_spill_slots_;
+ break;
+ case Primitive::kPrimLong:
+ spill_slots = &long_spill_slots_;
+ break;
+ case Primitive::kPrimFloat:
+ spill_slots = &float_spill_slots_;
+ break;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimShort:
+ spill_slots = &int_spill_slots_;
+ break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
+ }
+
+ // Find an available spill slot.
+ size_t slot = 0;
+ for (size_t e = spill_slots->size(); slot < e; ++slot) {
+ if ((*spill_slots)[slot] <= parent->GetStart()) {
+ if (!parent->NeedsTwoSpillSlots()) {
+ // One spill slot is sufficient.
+ break;
+ }
+ if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
+ // Two spill slots are available.
+ break;
+ }
+ }
+ }
+
+ size_t end = interval->GetLastSibling()->GetEnd();
+ if (parent->NeedsTwoSpillSlots()) {
+ if (slot + 2u > spill_slots->size()) {
+ // We need a new spill slot.
+ spill_slots->resize(slot + 2u, end);
+ }
+ (*spill_slots)[slot] = end;
+ (*spill_slots)[slot + 1] = end;
+ } else {
+ if (slot == spill_slots->size()) {
+ // We need a new spill slot.
+ spill_slots->push_back(end);
+ } else {
+ (*spill_slots)[slot] = end;
+ }
+ }
+
+ // Note that the exact spill slot location will be computed when we resolve,
+ // that is when we know the number of spill slots for each type.
+ parent->SetSpillSlot(slot);
+}
+
+void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
+ LiveInterval* interval = phi->GetLiveInterval();
+
+ HInstruction* previous_phi = phi->GetPrevious();
+ DCHECK(previous_phi == nullptr ||
+ previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+ << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
+
+ if (phi->IsVRegEquivalentOf(previous_phi)) {
+ // This is an equivalent of the previous phi. We need to assign the same
+ // catch phi slot.
+ DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
+ interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
+ } else {
+ // Allocate a new spill slot for this catch phi.
+ // TODO: Reuse spill slots when intervals of phis from different catch
+ // blocks do not overlap.
+ interval->SetSpillSlot(catch_phi_spill_slots_);
+ catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
new file mode 100644
index 0000000..b6e4f92
--- /dev/null
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
+
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
+#include "base/macros.h"
+#include "primitive.h"
+#include "register_allocator.h"
+
+namespace art {
+
+class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
+class HPhi;
+class LiveInterval;
+class Location;
+class SsaLivenessAnalysis;
+
+/**
+ * An implementation of a linear scan register allocator on an `HGraph` with SSA form.
+ */
+class RegisterAllocatorLinearScan : public RegisterAllocator {
+ public:
+ RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis);
+
+ void AllocateRegisters() OVERRIDE;
+
+ bool Validate(bool log_fatal_on_failure) OVERRIDE {
+ processing_core_registers_ = true;
+ if (!ValidateInternal(log_fatal_on_failure)) {
+ return false;
+ }
+ processing_core_registers_ = false;
+ return ValidateInternal(log_fatal_on_failure);
+ }
+
+ size_t GetNumberOfSpillSlots() const {
+ return int_spill_slots_.size()
+ + long_spill_slots_.size()
+ + float_spill_slots_.size()
+ + double_spill_slots_.size()
+ + catch_phi_spill_slots_;
+ }
+
+ private:
+ // Main methods of the allocator.
+ void LinearScan();
+ bool TryAllocateFreeReg(LiveInterval* interval);
+ bool AllocateBlockedReg(LiveInterval* interval);
+
+ // Add `interval` in the given sorted list.
+ static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
+
+ // Returns whether `reg` is blocked by the code generator.
+ bool IsBlocked(int reg) const;
+
+ // Update the interval for the register in `location` to cover [start, end).
+ void BlockRegister(Location location, size_t start, size_t end);
+ void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
+
+ // Allocate a spill slot for the given interval. Should be called in linear
+ // order of interval starting positions.
+ void AllocateSpillSlotFor(LiveInterval* interval);
+
+ // Allocate a spill slot for the given catch phi. Will allocate the same slot
+ // for phis which share the same vreg. Must be called in reverse linear order
+ // of lifetime positions and ascending vreg numbers for correctness.
+ void AllocateSpillSlotForCatchPhi(HPhi* phi);
+
+ // Helper methods.
+ void AllocateRegistersInternal();
+ void ProcessInstruction(HInstruction* instruction);
+ bool ValidateInternal(bool log_fatal_on_failure) const;
+ void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
+ void DumpAllIntervals(std::ostream& stream) const;
+ int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
+ int FindAvailableRegister(size_t* next_use, LiveInterval* current) const;
+ bool IsCallerSaveRegister(int reg) const;
+
+ // Try splitting an active non-pair or unaligned pair interval at the given `position`.
+ // Returns whether it was successful at finding such an interval.
+ bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+ size_t first_register_use,
+ size_t* next_use);
+
+ // List of intervals for core registers that must be processed, ordered by start
+ // position. Last entry is the interval that has the lowest start position.
+ // This list is initially populated before doing the linear scan.
+ ArenaVector<LiveInterval*> unhandled_core_intervals_;
+
+ // List of intervals for floating-point registers. Same comments as above.
+ ArenaVector<LiveInterval*> unhandled_fp_intervals_;
+
+ // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_`
+ // or `unhandled_fp_intervals_`.
+ ArenaVector<LiveInterval*>* unhandled_;
+
+ // List of intervals that have been processed.
+ ArenaVector<LiveInterval*> handled_;
+
+ // List of intervals that are currently active when processing a new live interval.
+ // That is, they have a live range that spans the start of the new interval.
+ ArenaVector<LiveInterval*> active_;
+
+ // List of intervals that are currently inactive when processing a new live interval.
+ // That is, they have a lifetime hole that spans the start of the new interval.
+ ArenaVector<LiveInterval*> inactive_;
+
+ // Fixed intervals for physical registers. Such intervals cover the positions
+ // where an instruction requires a specific register.
+ ArenaVector<LiveInterval*> physical_core_register_intervals_;
+ ArenaVector<LiveInterval*> physical_fp_register_intervals_;
+
+ // Intervals for temporaries. Such intervals cover the positions
+ // where an instruction requires a temporary.
+ ArenaVector<LiveInterval*> temp_intervals_;
+
+ // The spill slots allocated for live intervals. We ensure spill slots
+ // are typed to avoid (1) doing moves and swaps between two different kinds
+ // of registers, and (2) swapping between a single stack slot and a double
+ // stack slot. This simplifies the parallel move resolver.
+ ArenaVector<size_t> int_spill_slots_;
+ ArenaVector<size_t> long_spill_slots_;
+ ArenaVector<size_t> float_spill_slots_;
+ ArenaVector<size_t> double_spill_slots_;
+
+ // Spill slots allocated to catch phis. This category is special-cased because
+ // (1) slots are allocated prior to linear scan and in reverse linear order,
+ // (2) equivalent phis need to share slots despite having different types.
+ size_t catch_phi_spill_slots_;
+
+ // Instructions that need a safepoint.
+ ArenaVector<HInstruction*> safepoints_;
+
+ // True if processing core registers. False if processing floating
+ // point registers.
+ bool processing_core_registers_;
+
+ // Number of registers for the current register kind (core or floating point).
+ size_t number_of_registers_;
+
+ // Temporary array, allocated ahead of time for simplicity.
+ size_t* registers_array_;
+
+ // Blocked registers, as decided by the code generator.
+ bool* const blocked_core_registers_;
+ bool* const blocked_fp_registers_;
+
+ // Slots reserved for out arguments.
+ size_t reserved_out_slots_;
+
+ // The maximum live core registers at safepoints.
+ size_t maximum_number_of_live_core_registers_;
+
+ // The maximum live FP registers at safepoints.
+ size_t maximum_number_of_live_fp_registers_;
+
+ ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
+ ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
+
+ DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorLinearScan);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index a9de7c3..cbb7b2f 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -25,6 +25,7 @@
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
#include "ssa_phi_elimination.h"
@@ -44,9 +45,9 @@
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
- return register_allocator.Validate(false);
+ RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
+ return register_allocator->Validate(false);
}
/**
@@ -295,9 +296,9 @@
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
- ASSERT_TRUE(register_allocator.Validate(false));
+ RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
+ ASSERT_TRUE(register_allocator->Validate(false));
HBasicBlock* loop_header = graph->GetBlocks()[2];
HPhi* phi = loop_header->GetFirstPhi()->AsPhi();
@@ -384,9 +385,9 @@
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
- ASSERT_TRUE(register_allocator.Validate(false));
+ RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
+ ASSERT_TRUE(register_allocator->Validate(false));
}
/**
@@ -408,7 +409,7 @@
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+ RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
// Add an artifical range to cover the temps that will be put in the unhandled list.
LiveInterval* unhandled = graph->GetEntryBlock()->GetFirstInstruction()->GetLiveInterval();
@@ -541,8 +542,9 @@
liveness.Analyze();
// Check that the register allocator is deterministic.
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 0);
@@ -560,8 +562,9 @@
// Set the phi to a specific register, and check that the inputs get allocated
// the same register.
phi->GetLocations()->UpdateOut(Location::RegisterLocation(2));
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -579,8 +582,9 @@
// Set input1 to a specific register, and check that the phi and other input get allocated
// the same register.
input1->GetLocations()->UpdateOut(Location::RegisterLocation(2));
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -598,8 +602,9 @@
// Set input2 to a specific register, and check that the phi and other input get allocated
// the same register.
input2->GetLocations()->UpdateOut(Location::RegisterLocation(2));
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -658,8 +663,9 @@
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
// Sanity check that in normal conditions, the register should be hinted to 0 (EAX).
ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 0);
@@ -677,8 +683,9 @@
// Don't use SetInAt because we are overriding an already allocated location.
ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2);
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2);
}
@@ -726,8 +733,9 @@
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
// Sanity check that in normal conditions, the registers are the same.
ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 1);
@@ -748,8 +756,9 @@
ASSERT_EQ(first_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(second_sub->GetLiveInterval()->GetRegister(), 2);
@@ -795,8 +804,9 @@
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
// div on x86 requires its first input in eax and the output be the same as the first input.
ASSERT_EQ(div->GetLiveInterval()->GetRegister(), 0);
@@ -892,7 +902,7 @@
liveness.instructions_from_lifetime_position_.push_back(user);
}
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+ RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
register_allocator.unhandled_core_intervals_.push_back(fourth);
register_allocator.unhandled_core_intervals_.push_back(third);
register_allocator.unhandled_core_intervals_.push_back(second);
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 8747dad..353c729 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2456,6 +2456,9 @@
} else if (!byte) {
encoding |= B22;
}
+ if (load && is_signed && (byte || half)) {
+ encoding |= B24;
+ }
Emit32(encoding);
} else {
// 16 bit register offset.
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index f3fa72c..abb09f7 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -1450,4 +1450,23 @@
DriverStr(expected, "vpaddl");
}
+TEST_F(AssemblerThumb2Test, LoadFromShiftedRegOffset) {
+ arm::Address mem_address(arm::R0, arm::R1, arm::Shift::LSL, 2);
+
+ __ ldrsb(arm::R2, mem_address);
+ __ ldrb(arm::R2, mem_address);
+ __ ldrsh(arm::R2, mem_address);
+ __ ldrh(arm::R2, mem_address);
+ __ ldr(arm::R2, mem_address);
+
+ std::string expected =
+ "ldrsb r2, [r0, r1, LSL #2]\n"
+ "ldrb r2, [r0, r1, LSL #2]\n"
+ "ldrsh r2, [r0, r1, LSL #2]\n"
+ "ldrh r2, [r0, r1, LSL #2]\n"
+ "ldr r2, [r0, r1, LSL #2]\n";
+
+ DriverStr(expected, "LoadFromShiftedRegOffset");
+}
+
} // namespace art
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 93351e9..a8f15d0 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -14,9 +14,10 @@
* limitations under the License.
*/
+#include <regex>
+#include <sstream>
#include <string>
#include <vector>
-#include <sstream>
#include "common_runtime_test.h"
@@ -207,7 +208,7 @@
std::string dex_location = GetScratchDir() + "/Dex2OatSwapTest.jar";
std::string odex_location = GetOdexDir() + "/Dex2OatSwapTest.odex";
- Copy(GetDexSrc1(), dex_location);
+ Copy(GetTestDexFileName(), dex_location);
std::vector<std::string> copy(extra_args);
@@ -226,7 +227,11 @@
CheckResult(expect_use);
}
- void CheckResult(bool expect_use) {
+ virtual std::string GetTestDexFileName() {
+ return GetDexSrc1();
+ }
+
+ virtual void CheckResult(bool expect_use) {
if (kIsTargetBuild) {
CheckTargetResult(expect_use);
} else {
@@ -234,13 +239,13 @@
}
}
- void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) {
+ virtual void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) {
// TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
// something for variants with file descriptor where we can control the lifetime of
// the swap file and thus take a look at it.
}
- void CheckHostResult(bool expect_use) {
+ virtual void CheckHostResult(bool expect_use) {
if (!kIsTargetBuild) {
if (expect_use) {
EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos)
@@ -253,7 +258,7 @@
}
// Check whether the dex2oat run was really successful.
- void CheckValidity() {
+ virtual void CheckValidity() {
if (kIsTargetBuild) {
CheckTargetValidity();
} else {
@@ -261,14 +266,14 @@
}
}
- void CheckTargetValidity() {
+ virtual void CheckTargetValidity() {
// TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
// something for variants with file descriptor where we can control the lifetime of
// the swap file and thus take a look at it.
}
// On the host, we can get the dex2oat output. Here, look for "dex2oat took."
- void CheckHostValidity() {
+ virtual void CheckHostValidity() {
EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
}
};
@@ -297,6 +302,96 @@
{ "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
}
+class Dex2oatSwapUseTest : public Dex2oatSwapTest {
+ protected:
+ void CheckHostResult(bool expect_use) OVERRIDE {
+ if (!kIsTargetBuild) {
+ if (expect_use) {
+ EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos)
+ << output_;
+ } else {
+ EXPECT_EQ(output_.find("Large app, accepted running with swap."), std::string::npos)
+ << output_;
+ }
+ }
+ }
+
+ std::string GetTestDexFileName() OVERRIDE {
+ // Use Statics as it has a handful of functions.
+ return CommonRuntimeTest::GetTestDexFileName("Statics");
+ }
+
+ size_t ParseNativeAlloc() {
+ std::regex native_alloc_regex("dex2oat took.*native alloc=[^ ]+ \\(([0-9]+)B\\)");
+ std::smatch native_alloc_match;
+ bool found = std::regex_search(output_, native_alloc_match, native_alloc_regex);
+ if (!found) {
+ EXPECT_TRUE(found);
+ return 0;
+ }
+ if (native_alloc_match.size() != 2U) {
+ EXPECT_EQ(native_alloc_match.size(), 2U);
+ return 0;
+ }
+
+ std::istringstream stream(native_alloc_match[1].str());
+ size_t value;
+ stream >> value;
+
+ return value;
+ }
+
+ size_t ParseSwap(bool expected) {
+ std::regex swap_regex("dex2oat took[^\\n]+swap=[^ ]+ \\(([0-9]+)B\\)");
+ std::smatch swap_match;
+ bool found = std::regex_search(output_, swap_match, swap_regex);
+ if (found != expected) {
+ EXPECT_EQ(expected, found);
+ return 0;
+ }
+
+ if (!found) {
+ return 0;
+ }
+
+ if (swap_match.size() != 2U) {
+ EXPECT_EQ(swap_match.size(), 2U);
+ return 0;
+ }
+
+ std::istringstream stream(swap_match[1].str());
+ size_t value;
+ stream >> value;
+
+ return value;
+ }
+};
+
+TEST_F(Dex2oatSwapUseTest, CheckSwapUsage) {
+ RunTest(false /* use_fd */,
+ false /* expect_use */);
+ size_t native_without = ParseNativeAlloc();
+ size_t swap_without = ParseSwap(false /* expected */);
+ std::string output_without = output_;
+
+ output_ = "";
+
+ RunTest(false /* use_fd */,
+ true /* expect_use */,
+ { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
+ size_t native_with = ParseNativeAlloc();
+ size_t swap_with = ParseSwap(true /* expected */);
+ std::string output_with = output_;
+
+ if (native_with >= native_without || swap_without >= swap_with) {
+ EXPECT_LT(native_with, native_without);
+ EXPECT_LT(swap_without, swap_with);
+
+ LOG(ERROR) << output_without;
+ LOG(ERROR) << output_with;
+ }
+}
+
class Dex2oatVeryLargeTest : public Dex2oatTest {
protected:
void CheckFilter(CompilerFilter::Filter input ATTRIBUTE_UNUSED,
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 214222d..f5669d7 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -729,7 +729,7 @@
os << " " << reinterpret_cast<void*>(obj) << " ";
os << " entryPointFromJni: "
<< reinterpret_cast<const void*>(
- art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
+ art_method->GetDataPtrSize(pointer_size)) << ", ";
os << " entryPointFromQuickCompiledCode: "
<< reinterpret_cast<const void*>(
art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
@@ -810,7 +810,7 @@
os << " " << reinterpret_cast<void*>(obj) << " ";
os << " entryPointFromJni: "
<< reinterpret_cast<const void*>(
- art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
+ art_method->GetDataPtrSize(pointer_size)) << ", ";
os << " entryPointFromQuickCompiledCode: "
<< reinterpret_cast<const void*>(
art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 5bb61bb..569c5e9 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -748,8 +748,8 @@
copy->SetEntryPointFromQuickCompiledCodePtrSize(RelocatedAddressOfPointer(
object->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)), pointer_size);
// No special handling for IMT conflict table since all pointers are moved by the same offset.
- copy->SetEntryPointFromJniPtrSize(RelocatedAddressOfPointer(
- object->GetEntryPointFromJniPtrSize(pointer_size)), pointer_size);
+ copy->SetDataPtrSize(RelocatedAddressOfPointer(
+ object->GetDataPtrSize(pointer_size)), pointer_size);
}
bool PatchOat::Patch(File* input_oat, off_t delta, File* output_oat, TimingLogger* timings,
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 8f18ff3..966587d 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -27,13 +27,15 @@
namespace art {
// Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
- const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+ const mirror::Class* ref_class);
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
@@ -119,7 +121,7 @@
// Read barrier.
qpoints->pReadBarrierJni = ReadBarrierJni;
- qpoints->pReadBarrierMarkReg00 = artReadBarrierMark;
+ qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 82d64b9..34d3158 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -672,6 +672,12 @@
.endif
.endm
+// Save rReg's value to [sp, #offset].
+.macro PUSH_REG rReg, offset
+ str \rReg, [sp, #\offset] @ save rReg
+ .cfi_rel_offset \rReg, \offset
+.endm
+
/*
* Macro to insert read barrier, only used in art_quick_aput_obj.
* rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
@@ -1042,6 +1048,18 @@
#endif
POISON_HEAP_REF r2
str r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
+ // Fence. This is "ish" not "ishst" so
+ // that it also ensures ordering of
+ // the class status load with respect
+ // to later accesses to the class
+ // object. Alternatively we could use
+ // "ishst" if we use load-acquire for
+ // the class status load.)
+ // Needs to be done before pushing on
+ // allocation since Heap::VisitObjects
+ // relies on seeing the class pointer.
+ // b/28790624
+ dmb ish
// Push the new object onto the thread
// local allocation stack and
// increment the thread local
@@ -1056,14 +1074,7 @@
// and the list head store above using
// strd.
str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
- // Fence. This is "ish" not "ishst" so
- // that the code after this allocation
- // site will see the right values in
- // the fields of the class.
- // Alternatively we could use "ishst"
- // if we use load-acquire for the
- // class status load.)
- dmb ish
+
mov r0, r3 // Set the return value and return.
bx lr
@@ -1747,30 +1758,83 @@
/*
* Create a function `name` calling the ReadBarrier::Mark routine,
* getting its argument and returning its result through register
- * `reg`, thus following a non-standard runtime calling convention:
- * - `reg` is used to pass the (sole) argument of this function
+ * `reg`, saving and restoring all caller-save registers.
+ *
+ * If `reg` is different from `r0`, the generated function follows a
+ * non-standard runtime calling convention:
+ * - register `reg` is used to pass the (sole) argument of this
+ * function (instead of R0);
+ * - register `reg` is used to return the result of this function
* (instead of R0);
- * - `reg` is used to return the result of this function (instead of R0);
* - R0 is treated like a normal (non-argument) caller-save register;
* - everything else is the same as in the standard runtime calling
- * convention (e.g. same callee-save registers).
+ * convention (e.g. standard callee-save registers are preserved).
*/
.macro READ_BARRIER_MARK_REG name, reg
ENTRY \name
- push {lr} @ save return address
- .cfi_adjust_cfa_offset 4
- .cfi_rel_offset lr, 0
- sub sp, #4 @ push padding (native calling convention 8-byte alignment)
- .cfi_adjust_cfa_offset 4
- mov r0, \reg @ pass arg1 - obj from `reg`
- bl artReadBarrierMark @ artReadBarrierMark(obj)
- mov \reg, r0 @ return result into `reg`
- add sp, #4 @ pop padding
- .cfi_adjust_cfa_offset -4
- pop {pc} @ return
+ push {r0-r4, r9, r12, lr} @ save return address and core caller-save registers
+ .cfi_adjust_cfa_offset 32
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset r1, 4
+ .cfi_rel_offset r2, 8
+ .cfi_rel_offset r3, 12
+ .cfi_rel_offset r4, 16
+ .cfi_rel_offset r9, 20
+ .cfi_rel_offset r12, 24
+ .cfi_rel_offset lr, 28
+ vpush {s0-s15} @ save floating-point caller-save registers
+ .cfi_adjust_cfa_offset 64
+
+ .ifnc \reg, r0
+ mov r0, \reg @ pass arg1 - obj from `reg`
+ .endif
+ bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj)
+
+ vpop {s0-s15} @ restore floating-point registers
+ .cfi_adjust_cfa_offset -64
+ @ If `reg` is a caller-save register, save the result to its
+ @ corresponding stack slot; it will be restored by the "pop"
+ @ instruction below. Otherwise, move result into `reg`.
+ @
+ @ (Note that saving `reg` to its stack slot will overwrite the value
+ @ previously stored by the "push" instruction above. That is
+ @ alright, as in that case we know that `reg` is not a live
+ @ register, as it is used to pass the argument and return the result
+ @ of this function.)
+ .ifc \reg, r0
+ PUSH_REG r0, 0 @ copy result to r0's stack location
+ .else
+ .ifc \reg, r1
+ PUSH_REG r0, 4 @ copy result to r1's stack location
+ .else
+ .ifc \reg, r2
+ PUSH_REG r0, 8 @ copy result to r2's stack location
+ .else
+ .ifc \reg, r3
+ PUSH_REG r0, 12 @ copy result to r3's stack location
+ .else
+ .ifc \reg, r4
+ PUSH_REG r0, 16 @ copy result to r4's stack location
+ .else
+ .ifc \reg, r9
+ PUSH_REG r0, 20 @ copy result to r9's stack location
+ .else
+ .ifc \reg, r12
+ PUSH_REG r0, 24 @ copy result to r12's stack location
+ .else
+ mov \reg, r0 @ return result into `reg`
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ pop {r0-r4, r9, r12, pc} @ restore caller-save registers and return
END \name
.endm
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index c3188b6..2e5f5ad 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -27,13 +27,15 @@
namespace art {
// Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
- const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+ const mirror::Class* ref_class);
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
@@ -122,7 +124,7 @@
// Read barrier.
qpoints->pReadBarrierJni = ReadBarrierJni;
- qpoints->pReadBarrierMarkReg00 = artReadBarrierMark;
+ qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index e9ad1f4..6173ae7 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1253,6 +1253,22 @@
.endif
.endm
+// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
+// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
+.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
+ .ifc \xReg1, \xExclude
+ ldr \xReg2, [sp, #(\offset + 8)] // restore xReg2
+ .else
+ .ifc \xReg2, \xExclude
+ ldr \xReg1, [sp, #\offset] // restore xReg1
+ .else
+ ldp \xReg1, \xReg2, [sp, #\offset] // restore xReg1 and xReg2
+ .endif
+ .endif
+ .cfi_restore \xReg1
+ .cfi_restore \xReg2
+.endm
+
/*
* Macro to insert read barrier, only used in art_quick_aput_obj.
* xDest, wDest and xObj are registers, offset is a defined literal such as
@@ -1633,6 +1649,18 @@
#endif
POISON_HEAP_REF w2
str w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
+ // Fence. This is "ish" not "ishst" so
+ // that it also ensures ordering of
+ // the class status load with respect
+ // to later accesses to the class
+ // object. Alternatively we could use
+ // "ishst" if we use load-acquire for
+ // the class status load.)
+ // Needs to be done before pushing on
+ // allocation since Heap::VisitObjects
+ // relies on seeing the class pointer.
+ // b/28790624
+ dmb ish
// Push the new object onto the thread
// local allocation stack and
// increment the thread local
@@ -1647,14 +1675,7 @@
// and the list head store above using
// strd.
str w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
- // Fence. This is "ish" not "ishst" so
- // that the code after this allocation
- // site will see the right values in
- // the fields of the class.
- // Alternatively we could use "ishst"
- // if we use load-acquire for the
- // class status load.)
- dmb ish
+
mov x0, x3 // Set the return value and return.
ret
.Lart_quick_alloc_object_rosalloc_slow_path:
@@ -2217,56 +2238,148 @@
/*
* Create a function `name` calling the ReadBarrier::Mark routine,
- * getting its argument and returning its result through register
- * `reg`, thus following a non-standard runtime calling convention:
- * - `reg` is used to pass the (sole) argument of this function
+ * getting its argument and returning its result through W register
+ * `wreg` (corresponding to X register `xreg`), saving and restoring
+ * all caller-save registers.
+ *
+ * If `wreg` is different from `w0`, the generated function follows a
+ * non-standard runtime calling convention:
+ * - register `wreg` is used to pass the (sole) argument of this
+ * function (instead of W0);
+ * - register `wreg` is used to return the result of this function
* (instead of W0);
- * - `reg` is used to return the result of this function (instead of W0);
* - W0 is treated like a normal (non-argument) caller-save register;
* - everything else is the same as in the standard runtime calling
- * convention (e.g. same callee-save registers).
+ * convention (e.g. standard callee-save registers are preserved).
*/
-.macro READ_BARRIER_MARK_REG name, reg
+.macro READ_BARRIER_MARK_REG name, wreg, xreg
ENTRY \name
- str xLR, [sp, #-16]! // Save return address and add padding (16B align stack).
- .cfi_adjust_cfa_offset 16
- .cfi_rel_offset x30, 0
- mov w0, \reg // Pass arg1 - obj from `reg`
+ /*
+ * Allocate 46 stack slots * 8 = 368 bytes:
+ * - 20 slots for core registers X0-X19
+ * - 24 slots for floating-point registers D0-D7 and D16-D31
+ * - 1 slot for return address register XLR
+ * - 1 padding slot for 16-byte stack alignment
+ */
+ // Save all potentially live caller-save core registers.
+ stp x0, x1, [sp, #-368]!
+ .cfi_adjust_cfa_offset 368
+ .cfi_rel_offset x0, 0
+ .cfi_rel_offset x1, 8
+ stp x2, x3, [sp, #16]
+ .cfi_rel_offset x2, 16
+ .cfi_rel_offset x3, 24
+ stp x4, x5, [sp, #32]
+ .cfi_rel_offset x4, 32
+ .cfi_rel_offset x5, 40
+ stp x6, x7, [sp, #48]
+ .cfi_rel_offset x6, 48
+ .cfi_rel_offset x7, 56
+ stp x8, x9, [sp, #64]
+ .cfi_rel_offset x8, 64
+ .cfi_rel_offset x9, 72
+ stp x10, x11, [sp, #80]
+ .cfi_rel_offset x10, 80
+ .cfi_rel_offset x11, 88
+ stp x12, x13, [sp, #96]
+ .cfi_rel_offset x12, 96
+ .cfi_rel_offset x13, 104
+ stp x14, x15, [sp, #112]
+ .cfi_rel_offset x14, 112
+ .cfi_rel_offset x15, 120
+ stp x16, x17, [sp, #128]
+ .cfi_rel_offset x16, 128
+ .cfi_rel_offset x17, 136
+ stp x18, x19, [sp, #144]
+ .cfi_rel_offset x18, 144
+ .cfi_rel_offset x19, 152
+ // Save all potentially live caller-save floating-point registers.
+ stp d0, d1, [sp, #160]
+ stp d2, d3, [sp, #176]
+ stp d4, d5, [sp, #192]
+ stp d6, d7, [sp, #208]
+ stp d16, d17, [sp, #224]
+ stp d18, d19, [sp, #240]
+ stp d20, d21, [sp, #256]
+ stp d22, d23, [sp, #272]
+ stp d24, d25, [sp, #288]
+ stp d26, d27, [sp, #304]
+ stp d28, d29, [sp, #320]
+ stp d30, d31, [sp, #336]
+ // Save return address.
+ str xLR, [sp, #352]
+ .cfi_rel_offset x30, 352
+ // (sp + #360 is a padding slot)
+
+ .ifnc \wreg, w0
+ mov w0, \wreg // Pass arg1 - obj from `wreg`
+ .endif
bl artReadBarrierMark // artReadBarrierMark(obj)
- mov \reg, w0 // Return result into `reg`
- ldr xLR, [sp], #16 // Restore return address and remove padding.
+ .ifnc \wreg, w0
+ mov \wreg, w0 // Return result into `wreg`
+ .endif
+
+ // Restore core regs, except `xreg`, as `wreg` is used to return the
+ // result of this function (simply remove it from the stack instead).
+ POP_REGS_NE x0, x1, 0, \xreg
+ POP_REGS_NE x2, x3, 16, \xreg
+ POP_REGS_NE x4, x5, 32, \xreg
+ POP_REGS_NE x6, x7, 48, \xreg
+ POP_REGS_NE x8, x9, 64, \xreg
+ POP_REGS_NE x10, x11, 80, \xreg
+ POP_REGS_NE x12, x13, 96, \xreg
+ POP_REGS_NE x14, x15, 112, \xreg
+ POP_REGS_NE x16, x17, 128, \xreg
+ POP_REGS_NE x18, x19, 144, \xreg
+ // Restore floating-point registers.
+ ldp d0, d1, [sp, #160]
+ ldp d2, d3, [sp, #176]
+ ldp d4, d5, [sp, #192]
+ ldp d6, d7, [sp, #208]
+ ldp d16, d17, [sp, #224]
+ ldp d18, d19, [sp, #240]
+ ldp d20, d21, [sp, #256]
+ ldp d22, d23, [sp, #272]
+ ldp d24, d25, [sp, #288]
+ ldp d26, d27, [sp, #304]
+ ldp d28, d29, [sp, #320]
+ ldp d30, d31, [sp, #336]
+ // Restore return address and remove padding.
+ ldr xLR, [sp, #352]
.cfi_restore x30
- .cfi_adjust_cfa_offset -16
+ add sp, sp, #368
+ .cfi_adjust_cfa_offset -368
ret
END \name
.endm
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0, x0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1, x1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2, x2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3, x3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4, x4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5, x5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6, x6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7, x7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8, x8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9, x9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index e3cc0e0..22efd19 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -28,8 +28,8 @@
namespace art {
// Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
- const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+ const mirror::Class* ref_class);
// Math entrypoints.
extern int32_t CmpgDouble(double a, double b);
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index b19aa01..b02edb6 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -28,8 +28,8 @@
namespace art {
// Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
- const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+ const mirror::Class* ref_class);
// Math entrypoints.
extern int32_t CmpgDouble(double a, double b);
extern int32_t CmplDouble(double a, double b);
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 69c939e..4e9756c 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -25,13 +25,14 @@
namespace art {
// Cast entrypoints.
-extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
- const mirror::Class* ref_class);
+extern "C" size_t art_quick_is_assignable(const mirror::Class* klass,
+ const mirror::Class* ref_class);
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index e75fecb..77e04e7 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1908,41 +1908,73 @@
UNREACHABLE
END_FUNCTION art_nested_signal_return
-// Call the ReadBarrierMark entry point, getting input and returning
-// result through EAX (register 0), following the standard runtime
-// calling convention.
-DEFINE_FUNCTION art_quick_read_barrier_mark_reg00
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
- PUSH eax // pass arg1 - obj
- call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- addl LITERAL(12), %esp // pop argument and remove padding
- CFI_ADJUST_CFA_OFFSET(-12)
- ret
-END_FUNCTION art_quick_read_barrier_mark_reg00
-
// Create a function `name` calling the ReadBarrier::Mark routine,
// getting its argument and returning its result through register
-// `reg`, thus following a non-standard runtime calling convention:
-// - `reg` is used to pass the (sole) argument of this function
+// `reg`, saving and restoring all caller-save registers.
+//
+// If `reg` is different from `eax`, the generated function follows a
+// non-standard runtime calling convention:
+// - register `reg` is used to pass the (sole) argument of this function
// (instead of EAX);
-// - `reg` is used to return the result of this function (instead of EAX);
+// - register `reg` is used to return the result of this function
+// (instead of EAX);
// - EAX is treated like a normal (non-argument) caller-save register;
// - everything else is the same as in the standard runtime calling
-// convention (e.g. same callee-save registers).
+// convention (e.g. standard callee-save registers are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
+ // Save all potentially live caller-save core registers.
+ PUSH eax
+ PUSH ecx
+ PUSH edx
+ PUSH ebx
+ // 8-byte align the stack to improve (8-byte) XMM register saving and restoring.
+ // and create space for caller-save floating-point registers.
+ subl MACRO_LITERAL(4 + 8 * 8), %esp
+ CFI_ADJUST_CFA_OFFSET(4 + 8 * 8)
+ // Save all potentially live caller-save floating-point registers.
+ movsd %xmm0, 0(%esp)
+ movsd %xmm1, 8(%esp)
+ movsd %xmm2, 16(%esp)
+ movsd %xmm3, 24(%esp)
+ movsd %xmm4, 32(%esp)
+ movsd %xmm5, 40(%esp)
+ movsd %xmm6, 48(%esp)
+ movsd %xmm7, 56(%esp)
+
+ subl LITERAL(4), %esp // alignment padding
+ CFI_ADJUST_CFA_OFFSET(4)
PUSH RAW_VAR(reg) // pass arg1 - obj from `reg`
call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- movl %eax, REG_VAR(reg) // return result into `reg`
- addl LITERAL(12), %esp // pop argument and remove padding
- CFI_ADJUST_CFA_OFFSET(-12)
+ .ifnc RAW_VAR(reg), eax
+ movl %eax, REG_VAR(reg) // return result into `reg`
+ .endif
+ addl LITERAL(8), %esp // pop argument and remove padding
+ CFI_ADJUST_CFA_OFFSET(-8)
+
+ // Restore floating-point registers.
+ movsd 0(%esp), %xmm0
+ movsd 8(%esp), %xmm1
+ movsd 16(%esp), %xmm2
+ movsd 24(%esp), %xmm3
+ movsd 32(%esp), %xmm4
+ movsd 40(%esp), %xmm5
+ movsd 48(%esp), %xmm6
+ movsd 56(%esp), %xmm7
+ // Remove floating-point registers and padding.
+ addl MACRO_LITERAL(8 * 8 + 4), %esp
+ CFI_ADJUST_CFA_OFFSET(-(8 * 8 + 4))
+ // Restore core regs, except `reg`, as it is used to return the
+ // result of this function (simply remove it from the stack instead).
+ POP_REG_NE ebx, RAW_VAR(reg)
+ POP_REG_NE edx, RAW_VAR(reg)
+ POP_REG_NE ecx, RAW_VAR(reg)
+ POP_REG_NE eax, RAW_VAR(reg)
ret
END_FUNCTION VAR(name)
END_MACRO
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, eax
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, ecx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, edx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, ebx
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index cf0039c..c4e723c 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -52,7 +52,7 @@
#define LITERAL(value) $value
#if defined(__APPLE__)
- #define MACRO_LITERAL(value) $$(value)
+ #define MACRO_LITERAL(value) $(value)
#else
#define MACRO_LITERAL(value) $value
#endif
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 2bea3db..c2e3023 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -28,13 +28,14 @@
namespace art {
// Cast entrypoints.
-extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
- const mirror::Class* ref_class);
+extern "C" size_t art_quick_assignable_from_code(const mirror::Class* klass,
+ const mirror::Class* ref_class);
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 496e6a8..784ec39 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1815,73 +1815,93 @@
UNREACHABLE
END_FUNCTION art_nested_signal_return
-// Call the ReadBarrier::Mark routine, getting argument and returning
-// result through RAX (register 0), thus following a non-standard
-// runtime calling convention:
-// - RAX is used to pass the (sole) argument of this function (instead
-// of RDI);
-// - RDI is treated like a normal (non-argument) caller-save register;
-// - everything else is the same as in the standard runtime calling
-// convention; in particular, RAX is still used to return the result
-// of this function.
-DEFINE_FUNCTION art_quick_read_barrier_mark_reg00
- SETUP_FP_CALLEE_SAVE_FRAME
- subq LITERAL(8), %rsp // Alignment padding.
- CFI_ADJUST_CFA_OFFSET(8)
- movq %rax, %rdi // Pass arg1 - obj from RAX.
- call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- addq LITERAL(8), %rsp // Remove padding.
- CFI_ADJUST_CFA_OFFSET(-8)
- RESTORE_FP_CALLEE_SAVE_FRAME
- ret
-END_FUNCTION art_quick_read_barrier_mark_reg00
-
-// Call the ReadBarrier::Mark routine, getting argument and returning
-// result through RDI (register 7), thus following a non-standard
-// runtime calling convention:
-// - RDI is used to return the result of this function (instead of RAX);
-// - RAX is treated like a normal (non-result) caller-save register;
-// - everything else is the same as in the standard runtime calling
-// convention; in particular, RDI is still used to pass the (sole)
-// argument of this function.
-DEFINE_FUNCTION art_quick_read_barrier_mark_reg07
- SETUP_FP_CALLEE_SAVE_FRAME
- subq LITERAL(8), %rsp // Alignment padding.
- CFI_ADJUST_CFA_OFFSET(8)
- call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- movq %rax, %rdi // Return result into RDI.
- addq LITERAL(8), %rsp // Remove padding.
- CFI_ADJUST_CFA_OFFSET(-8)
- RESTORE_FP_CALLEE_SAVE_FRAME
- ret
-END_FUNCTION art_quick_read_barrier_mark_reg07
-
// Create a function `name` calling the ReadBarrier::Mark routine,
// getting its argument and returning its result through register
-// `reg`, thus following a non-standard runtime calling convention:
-// - `reg` is used to pass the (sole) argument of this function (instead
-// of RDI);
-// - `reg` is used to return the result of this function (instead of RAX);
-// - RDI is treated like a normal (non-argument) caller-save register;
-// - RAX is treated like a normal (non-result) caller-save register;
+// `reg`, saving and restoring all caller-save registers.
+//
+// The generated function follows a non-standard runtime calling
+// convention:
+// - register `reg` (which may be different from RDI) is used to pass
+// the (sole) argument of this function;
+// - register `reg` (which may be different from RAX) is used to return
+// the result of this function (instead of RAX);
+// - if `reg` is different from `rdi`, RDI is treated like a normal
+// (non-argument) caller-save register;
+// - if `reg` is different from `rax`, RAX is treated like a normal
+// (non-result) caller-save register;
// - everything else is the same as in the standard runtime calling
-// convention (e.g. same callee-save registers).
+// convention (e.g. standard callee-save registers are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
+ // Save all potentially live caller-save core registers.
+ PUSH rax
+ PUSH rcx
+ PUSH rdx
+ PUSH rsi
+ PUSH rdi
+ PUSH r8
+ PUSH r9
+ PUSH r10
+ PUSH r11
+ // Create space for caller-save floating-point registers.
+ subq MACRO_LITERAL(12 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(12 * 8)
+ // Save all potentially live caller-save floating-point registers.
+ movq %xmm0, 0(%rsp)
+ movq %xmm1, 8(%rsp)
+ movq %xmm2, 16(%rsp)
+ movq %xmm3, 24(%rsp)
+ movq %xmm4, 32(%rsp)
+ movq %xmm5, 40(%rsp)
+ movq %xmm6, 48(%rsp)
+ movq %xmm7, 56(%rsp)
+ movq %xmm8, 64(%rsp)
+ movq %xmm9, 72(%rsp)
+ movq %xmm10, 80(%rsp)
+ movq %xmm11, 88(%rsp)
SETUP_FP_CALLEE_SAVE_FRAME
- subq LITERAL(8), %rsp // Alignment padding.
- CFI_ADJUST_CFA_OFFSET(8)
- movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`.
+
+ .ifnc RAW_VAR(reg), rdi
+ movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`.
+ .endif
call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- movq %rax, REG_VAR(reg) // Return result into `reg`.
- addq LITERAL(8), %rsp // Remove padding.
- CFI_ADJUST_CFA_OFFSET(-8)
+ .ifnc RAW_VAR(reg), rax
+ movq %rax, REG_VAR(reg) // Return result into `reg`.
+ .endif
+
RESTORE_FP_CALLEE_SAVE_FRAME
+ // Restore floating-point registers.
+ movq 0(%rsp), %xmm0
+ movq 8(%rsp), %xmm1
+ movq 16(%rsp), %xmm2
+ movq 24(%rsp), %xmm3
+ movq 32(%rsp), %xmm4
+ movq 40(%rsp), %xmm5
+ movq 48(%rsp), %xmm6
+ movq 56(%rsp), %xmm7
+ movq 64(%rsp), %xmm8
+ movq 72(%rsp), %xmm9
+ movq 80(%rsp), %xmm10
+ movq 88(%rsp), %xmm11
+ // Remove floating-point registers.
+ addq MACRO_LITERAL(12 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-(12 * 8))
+ // Restore core regs, except `reg`, as it is used to return the
+ // result of this function (simply remove it from the stack instead).
+ POP_REG_NE r11, RAW_VAR(reg)
+ POP_REG_NE r10, RAW_VAR(reg)
+ POP_REG_NE r9, RAW_VAR(reg)
+ POP_REG_NE r8, RAW_VAR(reg)
+ POP_REG_NE rdi, RAW_VAR(reg)
+ POP_REG_NE rsi, RAW_VAR(reg)
+ POP_REG_NE rdx, RAW_VAR(reg)
+ POP_REG_NE rcx, RAW_VAR(reg)
+ POP_REG_NE rax, RAW_VAR(reg)
ret
END_FUNCTION VAR(name)
END_MACRO
-// Note: art_quick_read_barrier_mark_reg00 is implemented above.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
@@ -1889,7 +1909,7 @@
// cannot be used to pass arguments.
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
-// Note: art_quick_read_barrier_mark_reg07 is implemented above.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 26450c4..32425d8 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -120,6 +120,10 @@
return dex_method_index_;
}
+inline uint32_t ArtMethod::GetImtIndex() {
+ return GetDexMethodIndex() % ImTable::kSize;
+}
+
inline ArtMethod** ArtMethod::GetDexCacheResolvedMethods(size_t pointer_size) {
return GetNativePointer<ArtMethod**>(DexCacheResolvedMethodsOffset(pointer_size),
pointer_size);
@@ -503,7 +507,7 @@
SetEntryPointFromJniPtrSize(new_native_code, pointer_size);
}
} else {
- DCHECK(GetEntryPointFromJniPtrSize(pointer_size) == nullptr);
+ DCHECK(GetDataPtrSize(pointer_size) == nullptr);
}
const void* old_code = GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
const void* new_code = visitor(old_code);
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f86cb13..113827a 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -16,6 +16,8 @@
#include "art_method.h"
+#include <cstddef>
+
#include "arch/context.h"
#include "art_field-inl.h"
#include "art_method-inl.h"
@@ -497,4 +499,24 @@
hotness_count_ = 0;
}
+bool ArtMethod::IsImagePointerSize(size_t pointer_size) {
+ // Hijack this function to get access to PtrSizedFieldsOffset.
+ //
+ // Ensure that PrtSizedFieldsOffset is correct. We rely here on usually having both 32-bit and
+ // 64-bit builds.
+ static_assert(std::is_standard_layout<ArtMethod>::value, "ArtMethod is not standard layout.");
+ static_assert((sizeof(void*) != 4) ||
+ (offsetof(ArtMethod, ptr_sized_fields_) == PtrSizedFieldsOffset(4)),
+ "Unexpected 32-bit class layout.");
+ static_assert((sizeof(void*) != 8) ||
+ (offsetof(ArtMethod, ptr_sized_fields_) == PtrSizedFieldsOffset(8)),
+ "Unexpected 64-bit class layout.");
+
+ Runtime* runtime = Runtime::Current();
+ if (runtime == nullptr) {
+ return true;
+ }
+ return runtime->GetClassLinker()->GetImagePointerSize() == pointer_size;
+}
+
} // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 90b2406..1d14203 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -17,6 +17,8 @@
#ifndef ART_RUNTIME_ART_METHOD_H_
#define ART_RUNTIME_ART_METHOD_H_
+#include <cstddef>
+
#include "base/bit_utils.h"
#include "base/casts.h"
#include "dex_file.h"
@@ -219,7 +221,7 @@
class ArtMethod FINAL {
public:
ArtMethod() : access_flags_(0), dex_code_item_offset_(0), dex_method_index_(0),
- method_index_(0) { }
+ method_index_(0), hotness_count_(0) { }
ArtMethod(ArtMethod* src, size_t image_pointer_size) {
CopyFrom(src, image_pointer_size);
@@ -419,6 +421,8 @@
ALWAYS_INLINE uint32_t GetDexMethodIndex() SHARED_REQUIRES(Locks::mutator_lock_);
+ ALWAYS_INLINE uint32_t GetImtIndex() SHARED_REQUIRES(Locks::mutator_lock_);
+
void SetDexMethodIndex(uint32_t new_idx) {
// Not called within a transaction.
dex_method_index_ = new_idx;
@@ -506,9 +510,13 @@
PtrSizedFields, dex_cache_resolved_types_) / sizeof(void*) * pointer_size);
}
- static MemberOffset EntryPointFromJniOffset(size_t pointer_size) {
+ static MemberOffset DataOffset(size_t pointer_size) {
return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
- PtrSizedFields, entry_point_from_jni_) / sizeof(void*) * pointer_size);
+ PtrSizedFields, data_) / sizeof(void*) * pointer_size);
+ }
+
+ static MemberOffset EntryPointFromJniOffset(size_t pointer_size) {
+ return DataOffset(pointer_size);
}
static MemberOffset EntryPointFromQuickCompiledCodeOffset(size_t pointer_size) {
@@ -516,37 +524,40 @@
PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size);
}
- ProfilingInfo* GetProfilingInfo(size_t pointer_size) {
- return reinterpret_cast<ProfilingInfo*>(GetEntryPointFromJniPtrSize(pointer_size));
- }
-
ImtConflictTable* GetImtConflictTable(size_t pointer_size) {
DCHECK(IsRuntimeMethod());
- return reinterpret_cast<ImtConflictTable*>(GetEntryPointFromJniPtrSize(pointer_size));
+ return reinterpret_cast<ImtConflictTable*>(GetDataPtrSize(pointer_size));
}
ALWAYS_INLINE void SetImtConflictTable(ImtConflictTable* table, size_t pointer_size) {
- SetEntryPointFromJniPtrSize(table, pointer_size);
+ DCHECK(IsRuntimeMethod());
+ SetDataPtrSize(table, pointer_size);
+ }
+
+ ProfilingInfo* GetProfilingInfo(size_t pointer_size) {
+ return reinterpret_cast<ProfilingInfo*>(GetDataPtrSize(pointer_size));
}
ALWAYS_INLINE void SetProfilingInfo(ProfilingInfo* info) {
- SetEntryPointFromJniPtrSize(info, sizeof(void*));
+ SetDataPtrSize(info, sizeof(void*));
}
ALWAYS_INLINE void SetProfilingInfoPtrSize(ProfilingInfo* info, size_t pointer_size) {
- SetEntryPointFromJniPtrSize(info, pointer_size);
+ SetDataPtrSize(info, pointer_size);
}
static MemberOffset ProfilingInfoOffset() {
- return EntryPointFromJniOffset(sizeof(void*));
+ DCHECK(IsImagePointerSize(sizeof(void*)));
+ return DataOffset(sizeof(void*));
}
void* GetEntryPointFromJni() {
+ DCHECK(IsNative());
return GetEntryPointFromJniPtrSize(sizeof(void*));
}
ALWAYS_INLINE void* GetEntryPointFromJniPtrSize(size_t pointer_size) {
- return GetNativePointer<void*>(EntryPointFromJniOffset(pointer_size), pointer_size);
+ return GetDataPtrSize(pointer_size);
}
void SetEntryPointFromJni(const void* entrypoint) {
@@ -555,7 +566,17 @@
}
ALWAYS_INLINE void SetEntryPointFromJniPtrSize(const void* entrypoint, size_t pointer_size) {
- SetNativePointer(EntryPointFromJniOffset(pointer_size), entrypoint, pointer_size);
+ SetDataPtrSize(entrypoint, pointer_size);
+ }
+
+ ALWAYS_INLINE void* GetDataPtrSize(size_t pointer_size) {
+ DCHECK(IsImagePointerSize(pointer_size));
+ return GetNativePointer<void*>(DataOffset(pointer_size), pointer_size);
+ }
+
+ ALWAYS_INLINE void SetDataPtrSize(const void* data, size_t pointer_size) {
+ DCHECK(IsImagePointerSize(pointer_size));
+ SetNativePointer(DataOffset(pointer_size), data, pointer_size);
}
// Is this a CalleSaveMethod or ResolutionMethod and therefore doesn't adhere to normal
@@ -640,7 +661,7 @@
// Size of an instance of this native class.
static size_t Size(size_t pointer_size) {
- return RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size) +
+ return PtrSizedFieldsOffset(pointer_size) +
(sizeof(PtrSizedFields) / sizeof(void*)) * pointer_size;
}
@@ -727,9 +748,7 @@
// Fake padding field gets inserted here.
// Must be the last fields in the method.
- // PACKED(4) is necessary for the correctness of
- // RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size).
- struct PACKED(4) PtrSizedFields {
+ struct PtrSizedFields {
// Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
ArtMethod** dex_cache_resolved_methods_;
@@ -738,7 +757,7 @@
// Pointer to JNI function registered to this method, or a function to resolve the JNI function,
// or the profiling data for non-native methods, or an ImtConflictTable.
- void* entry_point_from_jni_;
+ void* data_;
// Method dispatch from quick compiled code invokes this pointer which may cause bridging into
// the interpreter.
@@ -746,11 +765,14 @@
} ptr_sized_fields_;
private:
- static size_t PtrSizedFieldsOffset(size_t pointer_size) {
- // Round up to pointer size for padding field.
- return RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size);
+ static constexpr size_t PtrSizedFieldsOffset(size_t pointer_size) {
+ // Round up to pointer size for padding field. Tested in art_method.cc.
+ return RoundUp(offsetof(ArtMethod, hotness_count_) + sizeof(hotness_count_), pointer_size);
}
+ // Compare given pointer size to the image pointer size.
+ static bool IsImagePointerSize(size_t pointer_size);
+
template<typename T>
ALWAYS_INLINE T GetNativePointer(MemberOffset offset, size_t pointer_size) const {
static_assert(std::is_pointer<T>::value, "T must be a pointer type");
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index cb97faa..d0dad64 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -6159,11 +6159,6 @@
}
}
-static inline uint32_t GetIMTIndex(ArtMethod* interface_method)
- SHARED_REQUIRES(Locks::mutator_lock_) {
- return interface_method->GetDexMethodIndex() % ImTable::kSize;
-}
-
ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count,
LinearAlloc* linear_alloc,
size_t image_pointer_size) {
@@ -6215,7 +6210,7 @@
// or interface methods in the IMT here they will not create extra conflicts since we compare
// names and signatures in SetIMTRef.
ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
- const uint32_t imt_index = GetIMTIndex(interface_method);
+ const uint32_t imt_index = interface_method->GetImtIndex();
// There is only any conflicts if all of the interface methods for an IMT slot don't have
// the same implementation method, keep track of this to avoid creating a conflict table in
@@ -6269,7 +6264,7 @@
}
DCHECK(implementation_method != nullptr);
ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
- const uint32_t imt_index = GetIMTIndex(interface_method);
+ const uint32_t imt_index = interface_method->GetImtIndex();
if (!imt[imt_index]->IsRuntimeMethod() ||
imt[imt_index] == unimplemented_method ||
imt[imt_index] == imt_conflict_method) {
@@ -6675,7 +6670,7 @@
auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
MethodNameAndSignatureComparator interface_name_comparator(
interface_method->GetInterfaceMethodIfProxy(image_pointer_size_));
- uint32_t imt_index = GetIMTIndex(interface_method);
+ uint32_t imt_index = interface_method->GetImtIndex();
ArtMethod** imt_ptr = &out_imt[imt_index];
// For each method listed in the interface's method list, find the
// matching method in our class's method list. We want to favor the
@@ -7700,7 +7695,7 @@
}
if (is_static) {
- resolved = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
+ resolved = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx);
} else {
resolved = klass->FindInstanceField(dex_cache.Get(), field_idx);
}
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index ab14655..7ecd595 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -19,7 +19,7 @@
#include "entrypoint_utils.h"
-#include "art_method.h"
+#include "art_method-inl.h"
#include "class_linker-inl.h"
#include "common_throws.h"
#include "dex_file.h"
@@ -600,7 +600,7 @@
}
}
case kInterface: {
- uint32_t imt_index = resolved_method->GetDexMethodIndex() % ImTable::kSize;
+ uint32_t imt_index = resolved_method->GetImtIndex();
size_t pointer_size = class_linker->GetImagePointerSize();
ArtMethod* imt_method = (*this_object)->GetClass()->GetImt(pointer_size)->
Get(imt_index, pointer_size);
diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
index 968ac53..8db69a3 100644
--- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
@@ -20,7 +20,7 @@
namespace art {
// Assignable test for code, won't throw. Null and equality tests already performed
-extern "C" uint32_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class)
+extern "C" size_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class)
SHARED_REQUIRES(Locks::mutator_lock_) {
DCHECK(klass != nullptr);
DCHECK(ref_class != nullptr);
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index d0dad34..86fb881 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -50,16 +50,16 @@
extern "C" int art_quick_set64_static(uint32_t, int64_t);
extern "C" int art_quick_set_obj_instance(uint32_t, void*, void*);
extern "C" int art_quick_set_obj_static(uint32_t, void*);
-extern "C" int8_t art_quick_get_byte_instance(uint32_t, void*);
-extern "C" uint8_t art_quick_get_boolean_instance(uint32_t, void*);
-extern "C" int8_t art_quick_get_byte_static(uint32_t);
-extern "C" uint8_t art_quick_get_boolean_static(uint32_t);
-extern "C" int16_t art_quick_get_short_instance(uint32_t, void*);
-extern "C" uint16_t art_quick_get_char_instance(uint32_t, void*);
-extern "C" int16_t art_quick_get_short_static(uint32_t);
-extern "C" uint16_t art_quick_get_char_static(uint32_t);
-extern "C" int32_t art_quick_get32_instance(uint32_t, void*);
-extern "C" int32_t art_quick_get32_static(uint32_t);
+extern "C" ssize_t art_quick_get_byte_instance(uint32_t, void*);
+extern "C" size_t art_quick_get_boolean_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get_byte_static(uint32_t);
+extern "C" size_t art_quick_get_boolean_static(uint32_t);
+extern "C" ssize_t art_quick_get_short_instance(uint32_t, void*);
+extern "C" size_t art_quick_get_char_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get_short_static(uint32_t);
+extern "C" size_t art_quick_get_char_static(uint32_t);
+extern "C" ssize_t art_quick_get32_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get32_static(uint32_t);
extern "C" int64_t art_quick_get64_instance(uint32_t, void*);
extern "C" int64_t art_quick_get64_static(uint32_t);
extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index d6b7d9e..e0ec68e 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -33,7 +33,7 @@
V(AllocStringFromChars, void*, int32_t, int32_t, void*) \
V(AllocStringFromString, void*, void*) \
\
- V(InstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*) \
+ V(InstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*) \
V(CheckCast, void, const mirror::Class*, const mirror::Class*) \
\
V(InitializeStaticStorage, void*, uint32_t) \
@@ -51,16 +51,16 @@
V(Set64Static, int, uint32_t, int64_t) \
V(SetObjInstance, int, uint32_t, void*, void*) \
V(SetObjStatic, int, uint32_t, void*) \
- V(GetByteInstance, int8_t, uint32_t, void*) \
- V(GetBooleanInstance, uint8_t, uint32_t, void*) \
- V(GetByteStatic, int8_t, uint32_t) \
- V(GetBooleanStatic, uint8_t, uint32_t) \
- V(GetShortInstance, int16_t, uint32_t, void*) \
- V(GetCharInstance, uint16_t, uint32_t, void*) \
- V(GetShortStatic, int16_t, uint32_t) \
- V(GetCharStatic, uint16_t, uint32_t) \
- V(Get32Instance, int32_t, uint32_t, void*) \
- V(Get32Static, int32_t, uint32_t) \
+ V(GetByteInstance, ssize_t, uint32_t, void*) \
+ V(GetBooleanInstance, size_t, uint32_t, void*) \
+ V(GetByteStatic, ssize_t, uint32_t) \
+ V(GetBooleanStatic, size_t, uint32_t) \
+ V(GetShortInstance, ssize_t, uint32_t, void*) \
+ V(GetCharInstance, size_t, uint32_t, void*) \
+ V(GetShortStatic, ssize_t, uint32_t) \
+ V(GetCharStatic, size_t, uint32_t) \
+ V(Get32Instance, ssize_t, uint32_t, void*) \
+ V(Get32Static, ssize_t, uint32_t) \
V(Get64Instance, int64_t, uint32_t, void*) \
V(Get64Static, int64_t, uint32_t) \
V(GetObjInstance, void*, uint32_t, void*) \
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index a245f18..1a12bd4 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -55,9 +55,7 @@
return field;
}
-extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx,
- ArtMethod* referrer,
- Thread* self)
+extern "C" ssize_t artGetByteStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t));
@@ -71,9 +69,7 @@
return 0; // Will throw exception by checking with Thread::Current.
}
-extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx,
- ArtMethod* referrer,
- Thread* self)
+extern "C" size_t artGetBooleanStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t));
@@ -87,9 +83,7 @@
return 0; // Will throw exception by checking with Thread::Current.
}
-extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx,
- ArtMethod* referrer,
- Thread* self)
+extern "C" ssize_t artGetShortStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t));
@@ -103,9 +97,7 @@
return 0; // Will throw exception by checking with Thread::Current.
}
-extern "C" uint16_t artGetCharStaticFromCode(uint32_t field_idx,
- ArtMethod* referrer,
- Thread* self)
+extern "C" size_t artGetCharStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t));
@@ -119,9 +111,7 @@
return 0; // Will throw exception by checking with Thread::Current.
}
-extern "C" uint32_t artGet32StaticFromCode(uint32_t field_idx,
- ArtMethod* referrer,
- Thread* self)
+extern "C" size_t artGet32StaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int32_t));
@@ -173,10 +163,10 @@
return nullptr; // Will throw exception by checking with Thread::Current.
}
-extern "C" int8_t artGetByteInstanceFromCode(uint32_t field_idx,
- mirror::Object* obj,
- ArtMethod* referrer,
- Thread* self)
+extern "C" ssize_t artGetByteInstanceFromCode(uint32_t field_idx,
+ mirror::Object* obj,
+ ArtMethod* referrer,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
@@ -194,10 +184,10 @@
return 0; // Will throw exception by checking with Thread::Current.
}
-extern "C" uint8_t artGetBooleanInstanceFromCode(uint32_t field_idx,
- mirror::Object* obj,
- ArtMethod* referrer,
- Thread* self)
+extern "C" size_t artGetBooleanInstanceFromCode(uint32_t field_idx,
+ mirror::Object* obj,
+ ArtMethod* referrer,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
@@ -214,7 +204,7 @@
}
return 0; // Will throw exception by checking with Thread::Current.
}
-extern "C" int16_t artGetShortInstanceFromCode(uint32_t field_idx,
+extern "C" ssize_t artGetShortInstanceFromCode(uint32_t field_idx,
mirror::Object* obj,
ArtMethod* referrer,
Thread* self)
@@ -235,10 +225,10 @@
return 0; // Will throw exception by checking with Thread::Current.
}
-extern "C" uint16_t artGetCharInstanceFromCode(uint32_t field_idx,
- mirror::Object* obj,
- ArtMethod* referrer,
- Thread* self)
+extern "C" size_t artGetCharInstanceFromCode(uint32_t field_idx,
+ mirror::Object* obj,
+ ArtMethod* referrer,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t));
@@ -256,10 +246,10 @@
return 0; // Will throw exception by checking with Thread::Current.
}
-extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx,
- mirror::Object* obj,
- ArtMethod* referrer,
- Thread* self)
+extern "C" size_t artGet32InstanceFromCode(uint32_t field_idx,
+ mirror::Object* obj,
+ ArtMethod* referrer,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int32_t));
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1152b94..49043f6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2170,8 +2170,7 @@
if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
// If the dex cache already resolved the interface method, look whether we have
// a match in the ImtConflictTable.
- uint32_t imt_index = interface_method->GetDexMethodIndex();
- ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
+ ArtMethod* conflict_method = imt->Get(interface_method->GetImtIndex(), sizeof(void*));
if (LIKELY(conflict_method->IsRuntimeMethod())) {
ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
DCHECK(current_table != nullptr);
@@ -2222,8 +2221,8 @@
// We arrive here if we have found an implementation, and it is not in the ImtConflictTable.
// We create a new table with the new pair { interface_method, method }.
- uint32_t imt_index = interface_method->GetDexMethodIndex();
- ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
+ uint32_t imt_index = interface_method->GetImtIndex();
+ ArtMethod* conflict_method = imt->Get(imt_index, sizeof(void*));
if (conflict_method->IsRuntimeMethod()) {
ArtMethod* new_conflict_method = Runtime::Current()->GetClassLinker()->AddMethodToConflictTable(
cls.Get(),
@@ -2234,7 +2233,7 @@
if (new_conflict_method != conflict_method) {
// Update the IMT if we create a new conflict method. No fence needed here, as the
// data is consistent.
- imt->Set(imt_index % ImTable::kSize,
+ imt->Set(imt_index,
new_conflict_method,
sizeof(void*));
}
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 3011112..4019a5b 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -153,6 +153,14 @@
}
}
+inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
+ // TODO: Consider removing this check when we are done investigating slow paths. b/30162165
+ if (UNLIKELY(mark_from_read_barrier_measurements_)) {
+ return MarkFromReadBarrierWithMeasurements(from_ref);
+ }
+ return Mark(from_ref);
+}
+
inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
DCHECK(region_space_->IsInFromSpace(from_ref));
LockWord lw = from_ref->GetLockWord(false);
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index b7b5aa0..d2d2f23 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -17,7 +17,9 @@
#include "concurrent_copying.h"
#include "art_field-inl.h"
+#include "base/histogram-inl.h"
#include "base/stl_util.h"
+#include "base/systrace.h"
#include "debugger.h"
#include "gc/accounting/heap_bitmap-inl.h"
#include "gc/accounting/space_bitmap-inl.h"
@@ -39,7 +41,9 @@
static constexpr size_t kDefaultGcMarkStackSize = 2 * MB;
-ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix)
+ConcurrentCopying::ConcurrentCopying(Heap* heap,
+ const std::string& name_prefix,
+ bool measure_read_barrier_slow_path)
: GarbageCollector(heap,
name_prefix + (name_prefix.empty() ? "" : " ") +
"concurrent copying + mark sweep"),
@@ -54,6 +58,14 @@
heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff),
weak_ref_access_enabled_(true),
skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
+ measure_read_barrier_slow_path_(measure_read_barrier_slow_path),
+ rb_slow_path_ns_(0),
+ rb_slow_path_count_(0),
+ rb_slow_path_count_gc_(0),
+ rb_slow_path_histogram_lock_("Read barrier histogram lock"),
+ rb_slow_path_time_histogram_("Mutator time in read barrier slow path", 500, 32),
+ rb_slow_path_count_total_(0),
+ rb_slow_path_count_gc_total_(0),
rb_table_(heap_->GetReadBarrierTable()),
force_evacuate_all_(false),
immune_gray_stack_lock_("concurrent copying immune gray stack lock",
@@ -162,6 +174,14 @@
MutexLock mu(Thread::Current(), mark_stack_lock_);
CHECK(false_gray_stack_.empty());
}
+
+ mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_;
+ if (measure_read_barrier_slow_path_) {
+ rb_slow_path_ns_.StoreRelaxed(0);
+ rb_slow_path_count_.StoreRelaxed(0);
+ rb_slow_path_count_gc_.StoreRelaxed(0);
+ }
+
immune_spaces_.Reset();
bytes_moved_.StoreRelaxed(0);
objects_moved_.StoreRelaxed(0);
@@ -194,7 +214,7 @@
}
// Used to switch the thread roots of a thread from from-space refs to to-space refs.
-class ConcurrentCopying::ThreadFlipVisitor : public Closure {
+class ConcurrentCopying::ThreadFlipVisitor : public Closure, public RootVisitor {
public:
ThreadFlipVisitor(ConcurrentCopying* concurrent_copying, bool use_tlab)
: concurrent_copying_(concurrent_copying), use_tlab_(use_tlab) {
@@ -221,10 +241,44 @@
thread->RevokeThreadLocalAllocationStack();
}
ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
- thread->VisitRoots(concurrent_copying_);
+ // We can use the non-CAS VisitRoots functions below because we update thread-local GC roots
+ // only.
+ thread->VisitRoots(this);
concurrent_copying_->GetBarrier().Pass(self);
}
+ void VisitRoots(mirror::Object*** roots,
+ size_t count,
+ const RootInfo& info ATTRIBUTE_UNUSED)
+ SHARED_REQUIRES(Locks::mutator_lock_) {
+ for (size_t i = 0; i < count; ++i) {
+ mirror::Object** root = roots[i];
+ mirror::Object* ref = *root;
+ if (ref != nullptr) {
+ mirror::Object* to_ref = concurrent_copying_->Mark(ref);
+ if (to_ref != ref) {
+ *root = to_ref;
+ }
+ }
+ }
+ }
+
+ void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
+ size_t count,
+ const RootInfo& info ATTRIBUTE_UNUSED)
+ SHARED_REQUIRES(Locks::mutator_lock_) {
+ for (size_t i = 0; i < count; ++i) {
+ mirror::CompressedReference<mirror::Object>* const root = roots[i];
+ if (!root->IsNull()) {
+ mirror::Object* ref = root->AsMirrorPtr();
+ mirror::Object* to_ref = concurrent_copying_->Mark(ref);
+ if (to_ref != ref) {
+ root->Assign(to_ref);
+ }
+ }
+ }
+ }
+
private:
ConcurrentCopying* const concurrent_copying_;
const bool use_tlab_;
@@ -1996,9 +2050,17 @@
MutexLock mu(Thread::Current(), skipped_blocks_lock_);
skipped_blocks_map_.clear();
}
- ReaderMutexLock mu(self, *Locks::mutator_lock_);
- WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
- heap_->ClearMarkedObjects();
+ {
+ ReaderMutexLock mu(self, *Locks::mutator_lock_);
+ WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
+ heap_->ClearMarkedObjects();
+ }
+ if (measure_read_barrier_slow_path_) {
+ MutexLock mu(self, rb_slow_path_histogram_lock_);
+ rb_slow_path_time_histogram_.AdjustAndAddValue(rb_slow_path_ns_.LoadRelaxed());
+ rb_slow_path_count_total_ += rb_slow_path_count_.LoadRelaxed();
+ rb_slow_path_count_gc_total_ += rb_slow_path_count_gc_.LoadRelaxed();
+ }
}
bool ConcurrentCopying::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) {
@@ -2036,6 +2098,37 @@
region_space_->RevokeAllThreadLocalBuffers();
}
+mirror::Object* ConcurrentCopying::MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) {
+ if (Thread::Current() != thread_running_gc_) {
+ rb_slow_path_count_.FetchAndAddRelaxed(1u);
+ } else {
+ rb_slow_path_count_gc_.FetchAndAddRelaxed(1u);
+ }
+ ScopedTrace tr(__FUNCTION__);
+ const uint64_t start_time = measure_read_barrier_slow_path_ ? NanoTime() : 0u;
+ mirror::Object* ret = Mark(from_ref);
+ if (measure_read_barrier_slow_path_) {
+ rb_slow_path_ns_.FetchAndAddRelaxed(NanoTime() - start_time);
+ }
+ return ret;
+}
+
+void ConcurrentCopying::DumpPerformanceInfo(std::ostream& os) {
+ GarbageCollector::DumpPerformanceInfo(os);
+ MutexLock mu(Thread::Current(), rb_slow_path_histogram_lock_);
+ if (rb_slow_path_time_histogram_.SampleSize() > 0) {
+ Histogram<uint64_t>::CumulativeData cumulative_data;
+ rb_slow_path_time_histogram_.CreateHistogram(&cumulative_data);
+ rb_slow_path_time_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data);
+ }
+ if (rb_slow_path_count_total_ > 0) {
+ os << "Slow path count " << rb_slow_path_count_total_ << "\n";
+ }
+ if (rb_slow_path_count_gc_total_ > 0) {
+ os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n";
+ }
+}
+
} // namespace collector
} // namespace gc
} // namespace art
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 166a1f0..6a8d052 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -58,17 +58,24 @@
// Enable verbose mode.
static constexpr bool kVerboseMode = false;
- ConcurrentCopying(Heap* heap, const std::string& name_prefix = "");
+ ConcurrentCopying(Heap* heap,
+ const std::string& name_prefix = "",
+ bool measure_read_barrier_slow_path = false);
~ConcurrentCopying();
virtual void RunPhases() OVERRIDE
- REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+ REQUIRES(!immune_gray_stack_lock_,
+ !mark_stack_lock_,
+ !rb_slow_path_histogram_lock_,
+ !skipped_blocks_lock_);
void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_)
REQUIRES(!mark_stack_lock_, !immune_gray_stack_lock_);
void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_)
REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
- void FinishPhase() REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+ void FinishPhase() REQUIRES(!mark_stack_lock_,
+ !rb_slow_path_histogram_lock_,
+ !skipped_blocks_lock_);
void BindBitmaps() SHARED_REQUIRES(Locks::mutator_lock_)
REQUIRES(!Locks::heap_bitmap_lock_);
@@ -95,7 +102,11 @@
return IsMarked(ref) == ref;
}
template<bool kGrayImmuneObject = true>
- ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
+ ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref)
+ SHARED_REQUIRES(Locks::mutator_lock_)
+ REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+ ALWAYS_INLINE mirror::Object* MarkFromReadBarrier(mirror::Object* from_ref)
+ SHARED_REQUIRES(Locks::mutator_lock_)
REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
bool IsMarking() const {
return is_marking_;
@@ -203,6 +214,10 @@
REQUIRES(!mark_stack_lock_);
void ScanImmuneObject(mirror::Object* obj)
SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+ mirror::Object* MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref)
+ SHARED_REQUIRES(Locks::mutator_lock_)
+ REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+ void DumpPerformanceInfo(std::ostream& os) OVERRIDE REQUIRES(!rb_slow_path_histogram_lock_);
space::RegionSpace* region_space_; // The underlying region space.
std::unique_ptr<Barrier> gc_barrier_;
@@ -251,6 +266,20 @@
Atomic<size_t> to_space_bytes_skipped_;
Atomic<size_t> to_space_objects_skipped_;
+ // If measure_read_barrier_slow_path_ is true, we count how long is spent in MarkFromReadBarrier
+ // and also log.
+ bool measure_read_barrier_slow_path_;
+ // mark_from_read_barrier_measurements_ is true if systrace is enabled or
+ // measure_read_barrier_time_ is true.
+ bool mark_from_read_barrier_measurements_;
+ Atomic<uint64_t> rb_slow_path_ns_;
+ Atomic<uint64_t> rb_slow_path_count_;
+ Atomic<uint64_t> rb_slow_path_count_gc_;
+ mutable Mutex rb_slow_path_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+ Histogram<uint64_t> rb_slow_path_time_histogram_ GUARDED_BY(rb_slow_path_histogram_lock_);
+ uint64_t rb_slow_path_count_total_ GUARDED_BY(rb_slow_path_histogram_lock_);
+ uint64_t rb_slow_path_count_gc_total_ GUARDED_BY(rb_slow_path_histogram_lock_);
+
accounting::ReadBarrierTable* rb_table_;
bool force_evacuate_all_; // True if all regions are evacuated.
Atomic<bool> updated_all_immune_objects_;
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 580486a..e0b71a7 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -181,7 +181,7 @@
void RecordFree(const ObjectBytePair& freed);
// Record a free of large objects.
void RecordFreeLOS(const ObjectBytePair& freed);
- void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_);
+ virtual void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_);
// Helper functions for querying if objects are marked. These are used for processing references,
// and will be used for reading system weaks while the GC is running.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 8da1493..6f4767e 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -121,6 +121,10 @@
static constexpr size_t kNativeAllocationHistogramBuckets = 16;
+// Extra added to the heap growth multiplier. Used to adjust the GC ergonomics for the read barrier
+// config.
+static constexpr double kExtraHeapGrowthMultiplier = kUseReadBarrier ? 1.0 : 0.0;
+
static inline bool CareAboutPauseTimes() {
return Runtime::Current()->InJankPerceptibleProcessState();
}
@@ -153,6 +157,7 @@
bool verify_pre_sweeping_rosalloc,
bool verify_post_gc_rosalloc,
bool gc_stress_mode,
+ bool measure_gc_performance,
bool use_homogeneous_space_compaction_for_oom,
uint64_t min_interval_homogeneous_space_compaction_by_oom)
: non_moving_space_(nullptr),
@@ -220,7 +225,8 @@
min_free_(min_free),
max_free_(max_free),
target_utilization_(target_utilization),
- foreground_heap_growth_multiplier_(foreground_heap_growth_multiplier),
+ foreground_heap_growth_multiplier_(
+ foreground_heap_growth_multiplier + kExtraHeapGrowthMultiplier),
total_wait_time_(0),
verify_object_mode_(kVerifyObjectModeDisabled),
disable_moving_gc_count_(0),
@@ -594,7 +600,9 @@
garbage_collectors_.push_back(semi_space_collector_);
}
if (MayUseCollector(kCollectorTypeCC)) {
- concurrent_copying_collector_ = new collector::ConcurrentCopying(this);
+ concurrent_copying_collector_ = new collector::ConcurrentCopying(this,
+ "",
+ measure_gc_performance);
garbage_collectors_.push_back(concurrent_copying_collector_);
}
if (MayUseCollector(kCollectorTypeMC)) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 6fb048a..bb0d11a 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -182,6 +182,7 @@
bool verify_pre_sweeping_rosalloc,
bool verify_post_gc_rosalloc,
bool gc_stress_mode,
+ bool measure_gc_performance,
bool use_homogeneous_space_compaction,
uint64_t min_interval_homogeneous_space_compaction_by_oom);
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 8c42b3a..f1f7f42 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -324,7 +324,7 @@
} else {
while (true) {
// Mterp does not support all instrumentation/debugging.
- if (MterpShouldSwitchInterpreters()) {
+ if (MterpShouldSwitchInterpreters() != 0) {
return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
false);
}
diff --git a/runtime/interpreter/mterp/arm64/fbinop2addr.S b/runtime/interpreter/mterp/arm64/fbinop2addr.S
index 0d57cbf..04236ad 100644
--- a/runtime/interpreter/mterp/arm64/fbinop2addr.S
+++ b/runtime/interpreter/mterp/arm64/fbinop2addr.S
@@ -7,8 +7,7 @@
*/
/* binop/2addr vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
$instr // s2<- op
diff --git a/runtime/interpreter/mterp/arm64/fcmp.S b/runtime/interpreter/mterp/arm64/fcmp.S
index a45e789..cad6318 100644
--- a/runtime/interpreter/mterp/arm64/fcmp.S
+++ b/runtime/interpreter/mterp/arm64/fcmp.S
@@ -1,4 +1,4 @@
-%default {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1","cond":"le"}
+%default {"wide":"", "r1":"s1", "r2":"s2", "cond":"lt"}
/*
* Compare two floating-point values. Puts 0, 1, or -1 into the
* destination register based on the results of the comparison.
@@ -10,10 +10,9 @@
lsr w3, w0, #8 // w3<- CC
GET_VREG$wide $r1, w2
GET_VREG$wide $r2, w3
- mov w0, #$default_val
fcmp $r1, $r2
- csneg w0, w0, w0, $cond
- csel w0, wzr, w0, eq
+ cset w0, ne
+ cneg w0, w0, $cond
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w4 // vAA<- w0
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index 2d3a11e..7628ed3 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -234,7 +234,7 @@
#if MTERP_LOGGING
mov x0, xSELF
add x1, xFP, #OFF_FP_SHADOWFRAME
- sbfm x2, xINST, 0, 31
+ sxtw x2, wINST
bl MterpLogOSR
#endif
mov x0, #1 // Signal normal return
diff --git a/runtime/interpreter/mterp/arm64/funopNarrow.S b/runtime/interpreter/mterp/arm64/funopNarrow.S
index 9f5ad1e..aed830b 100644
--- a/runtime/interpreter/mterp/arm64/funopNarrow.S
+++ b/runtime/interpreter/mterp/arm64/funopNarrow.S
@@ -8,10 +8,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG $srcreg, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
$instr // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG $tgtreg, w4 // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopNarrower.S b/runtime/interpreter/mterp/arm64/funopNarrower.S
index 411396b..6fddfea 100644
--- a/runtime/interpreter/mterp/arm64/funopNarrower.S
+++ b/runtime/interpreter/mterp/arm64/funopNarrower.S
@@ -7,10 +7,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE $srcreg, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
$instr // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG $tgtreg, w4 // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopWide.S b/runtime/interpreter/mterp/arm64/funopWide.S
index d83b39c..409e26b 100644
--- a/runtime/interpreter/mterp/arm64/funopWide.S
+++ b/runtime/interpreter/mterp/arm64/funopWide.S
@@ -7,10 +7,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE $srcreg, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
$instr // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE $tgtreg, w4 // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopWider.S b/runtime/interpreter/mterp/arm64/funopWider.S
index 50a73f1..4c91ebc 100644
--- a/runtime/interpreter/mterp/arm64/funopWider.S
+++ b/runtime/interpreter/mterp/arm64/funopWider.S
@@ -7,10 +7,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG $srcreg, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
$instr // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE $tgtreg, w4 // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/op_cmp_long.S b/runtime/interpreter/mterp/arm64/op_cmp_long.S
index 982e5b1..c4ad984 100644
--- a/runtime/interpreter/mterp/arm64/op_cmp_long.S
+++ b/runtime/interpreter/mterp/arm64/op_cmp_long.S
@@ -5,8 +5,8 @@
GET_VREG_WIDE x1, w2
GET_VREG_WIDE x2, w3
cmp x1, x2
- csinc w0, wzr, wzr, eq
- csneg w0, w0, w0, ge
+ cset w0, ne
+ cneg w0, w0, lt
FETCH_ADVANCE_INST 2 // advance rPC, load wINST
SET_VREG w0, w4
GET_INST_OPCODE ip // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_double.S b/runtime/interpreter/mterp/arm64/op_cmpg_double.S
index 14f9ff8..30cb7eb 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpg_double.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_double.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"1", "cond":"pl"}
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "cond":"cc"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_float.S b/runtime/interpreter/mterp/arm64/op_cmpg_float.S
index 3a20cba..ba23f43 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpg_float.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_float.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"1", "cond":"pl"}
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "cond":"cc"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_double.S b/runtime/interpreter/mterp/arm64/op_cmpl_double.S
index 06d5917..c739685 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpl_double.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_double.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"-1", "cond":"le"}
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "cond":"lt"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_float.S b/runtime/interpreter/mterp/arm64/op_cmpl_float.S
index d87d086..32a9319 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpl_float.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_float.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1", "cond":"le"}
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "cond":"lt"}
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_16.S b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
index e43628b..553d481 100644
--- a/runtime/interpreter/mterp/arm64/op_const_wide_16.S
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
@@ -1,8 +1,7 @@
/* const-wide/16 vAA, #+BBBB */
- FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended
+ FETCH_S x0, 1 // x0<- ssssssssssssBBBB (sign-extended)
lsr w3, wINST, #8 // w3<- AA
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- sbfm x0, x0, 0, 31
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3
GOTO_OPCODE ip // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_32.S b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
index 527f7d8..9dc4fc3 100644
--- a/runtime/interpreter/mterp/arm64/op_const_wide_32.S
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
@@ -1,10 +1,9 @@
/* const-wide/32 vAA, #+BBBBbbbb */
- FETCH w0, 1 // w0<- 0000bbbb (low)
+ FETCH w0, 1 // x0<- 000000000000bbbb (low)
lsr w3, wINST, #8 // w3<- AA
- FETCH_S w2, 2 // w2<- ssssBBBB (high)
+ FETCH_S x2, 2 // x2<- ssssssssssssBBBB (high)
FETCH_ADVANCE_INST 3 // advance rPC, load wINST
GET_INST_OPCODE ip // extract opcode from wINST
- orr w0, w0, w2, lsl #16 // w0<- BBBBbbbb
- sbfm x0, x0, 0, 31
+ orr x0, x0, x2, lsl #16 // x0<- ssssssssBBBBbbbb
SET_VREG_WIDE x0, w3
GOTO_OPCODE ip // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_fill_array_data.S b/runtime/interpreter/mterp/arm64/op_fill_array_data.S
index f50d9e4..86fa6db 100644
--- a/runtime/interpreter/mterp/arm64/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/arm64/op_fill_array_data.S
@@ -1,11 +1,11 @@
/* fill-array-data vAA, +BBBBBBBB */
EXPORT_PC
- FETCH w0, 1 // w0<- bbbb (lo)
- FETCH w1, 2 // w1<- BBBB (hi)
+ FETCH w0, 1 // x0<- 000000000000bbbb (lo)
+ FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi)
lsr w3, wINST, #8 // w3<- AA
- orr w1, w0, w1, lsl #16 // w1<- BBBBbbbb
+ orr x1, x0, x1, lsl #16 // x1<- ssssssssBBBBbbbb
GET_VREG w0, w3 // w0<- vAA (array object)
- add x1, xPC, w1, lsl #1 // w1<- PC + BBBBbbbb*2 (array data off.)
+ add x1, xPC, x1, lsl #1 // x1<- PC + ssssssssBBBBbbbb*2 (array data off.)
bl MterpFillArrayData // (obj, payload)
cbz w0, MterpPossibleException // exception?
FETCH_ADVANCE_INST 3 // advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm64/op_iget_quick.S b/runtime/interpreter/mterp/arm64/op_iget_quick.S
index 45c68a3..699b2c4 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_quick.S
@@ -5,8 +5,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
$load w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
$extend
diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
index 2480d2d..30b30c2 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
@@ -3,7 +3,7 @@
FETCH w4, 1 // w4<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cbz w3, common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
add x4, x3, x4 // create direct pointer
ldr x0, [x4]
FETCH_ADVANCE_INST 2 // advance rPC, load wINST
diff --git a/runtime/interpreter/mterp/arm64/op_instance_of.S b/runtime/interpreter/mterp/arm64/op_instance_of.S
index 647bc75..a56705a 100644
--- a/runtime/interpreter/mterp/arm64/op_instance_of.S
+++ b/runtime/interpreter/mterp/arm64/op_instance_of.S
@@ -13,8 +13,7 @@
mov x3, xSELF // w3<- self
bl MterpInstanceOf // (index, &obj, method, self)
ldr x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
- lsr w2, wINST, #8 // w2<- A+
- and w2, w2, #15 // w2<- A
+ ubfx w2, wINST, #8, #4 // w2<- A
PREFETCH_INST 2
cbnz x1, MterpException
ADVANCE 2 // advance rPC
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S
index 13d2120..35830f3 100644
--- a/runtime/interpreter/mterp/arm64/op_int_to_long.S
+++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S
@@ -1 +1 @@
-%include "arm64/funopWider.S" {"instr":"sbfm x0, x0, 0, 31", "srcreg":"w0", "tgtreg":"x0"}
+%include "arm64/funopWider.S" {"instr":"sxtw x0, w0", "srcreg":"w0", "tgtreg":"x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
index 27b5dc5..566e2bf 100644
--- a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
@@ -3,8 +3,7 @@
FETCH w3, 1 // w3<- field byte offset
GET_VREG w2, w2 // w2<- fp[B], the object pointer
ubfx w0, wINST, #8, #4 // w0<- A
- cmp w2, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w2, common_errNullObject // object was null
GET_VREG_WIDE x0, w0 // x0-< fp[A]
FETCH_ADVANCE_INST 2 // advance rPC, load wINST
add x1, x2, x3 // create a direct pointer
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
index 1456f1a..408e030 100644
--- a/runtime/interpreter/mterp/arm64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -9,12 +9,12 @@
* for: packed-switch, sparse-switch
*/
/* op vAA, +BBBB */
- FETCH w0, 1 // w0<- bbbb (lo)
- FETCH w1, 2 // w1<- BBBB (hi)
+ FETCH w0, 1 // x0<- 000000000000bbbb (lo)
+ FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi)
lsr w3, wINST, #8 // w3<- AA
- orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb
+ orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb
GET_VREG w1, w3 // w1<- vAA
- add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2
+ add x0, xPC, x0, lsl #1 // x0<- PC + ssssssssBBBBbbbb*2
bl $func // w0<- code-unit branch offset
- sbfm xINST, x0, 0, 31
+ sxtw xINST, w0
b MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
index 0b91891..95f81c5 100644
--- a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
@@ -1,12 +1,10 @@
/* rem vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
bl fmodf
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG s0, w9
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int.S b/runtime/interpreter/mterp/arm64/op_shl_int.S
index bd0f237..3062a3f 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"lsl w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
index b4671d2..9a7e09f 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"lsl w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
index 4dd32e0..17f57f9 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"}
+%include "arm64/binopLit8.S" {"instr":"lsl w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int.S b/runtime/interpreter/mterp/arm64/op_shr_int.S
index c214a18..493b740 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"asr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
index 3c1484b..6efe8ee 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"asr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
index 26d5024..274080c 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"}
+%include "arm64/binopLit8.S" {"instr":"asr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int.S b/runtime/interpreter/mterp/arm64/op_ushr_int.S
index bb8382b..005452b 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"lsr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
index dbccb99..1cb8cb7 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"lsr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
index 35090c4..ff30e1f 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"}
+%include "arm64/binopLit8.S" {"instr":"lsr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/shiftWide.S b/runtime/interpreter/mterp/arm64/shiftWide.S
index 6306fca..dcb2fb7 100644
--- a/runtime/interpreter/mterp/arm64/shiftWide.S
+++ b/runtime/interpreter/mterp/arm64/shiftWide.S
@@ -12,8 +12,7 @@
and w1, w0, #255 // w1<- BB
GET_VREG_WIDE x1, w1 // x1<- vBB
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and x2, x2, #63 // Mask low 6
- $opcode x0, x1, x2 // Do the shift.
+ $opcode x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3 // vAA<- x0
GOTO_OPCODE ip // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/shiftWide2addr.S b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
index 77d104a..b860dfd 100644
--- a/runtime/interpreter/mterp/arm64/shiftWide2addr.S
+++ b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
@@ -8,8 +8,7 @@
GET_VREG w1, w1 // x1<- vB
GET_VREG_WIDE x0, w2 // x0<- vA
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and x1, x1, #63 // Mask low 6 bits.
- $opcode x0, x0, x1
+ $opcode x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w2 // vAA<- result
GOTO_OPCODE ip // jump to next instruction
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 8aa87b1..c25cd78 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -57,7 +57,7 @@
* Returns 3 if we don't find a match (it's the size of the sparse-switch
* instruction).
*/
-extern "C" int32_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) {
+extern "C" ssize_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) {
const int kInstrLen = 3;
uint16_t size;
const int32_t* keys;
@@ -109,7 +109,7 @@
return kInstrLen;
}
-extern "C" int32_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) {
+extern "C" ssize_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) {
const int kInstrLen = 3;
/*
@@ -142,7 +142,7 @@
return entries[index];
}
-extern "C" bool MterpShouldSwitchInterpreters()
+extern "C" size_t MterpShouldSwitchInterpreters()
SHARED_REQUIRES(Locks::mutator_lock_) {
const instrumentation::Instrumentation* const instrumentation =
Runtime::Current()->GetInstrumentation();
@@ -150,8 +150,10 @@
}
-extern "C" bool MterpInvokeVirtual(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtual(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -159,8 +161,10 @@
self, *shadow_frame, inst, inst_data, result_register);
}
-extern "C" bool MterpInvokeSuper(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeSuper(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -168,8 +172,10 @@
self, *shadow_frame, inst, inst_data, result_register);
}
-extern "C" bool MterpInvokeInterface(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeInterface(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -177,8 +183,10 @@
self, *shadow_frame, inst, inst_data, result_register);
}
-extern "C" bool MterpInvokeDirect(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeDirect(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -186,8 +194,10 @@
self, *shadow_frame, inst, inst_data, result_register);
}
-extern "C" bool MterpInvokeStatic(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeStatic(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -195,8 +205,10 @@
self, *shadow_frame, inst, inst_data, result_register);
}
-extern "C" bool MterpInvokeVirtualRange(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtualRange(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -204,8 +216,10 @@
self, *shadow_frame, inst, inst_data, result_register);
}
-extern "C" bool MterpInvokeSuperRange(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeSuperRange(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -213,8 +227,10 @@
self, *shadow_frame, inst, inst_data, result_register);
}
-extern "C" bool MterpInvokeInterfaceRange(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeInterfaceRange(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -222,8 +238,10 @@
self, *shadow_frame, inst, inst_data, result_register);
}
-extern "C" bool MterpInvokeDirectRange(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeDirectRange(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -231,8 +249,10 @@
self, *shadow_frame, inst, inst_data, result_register);
}
-extern "C" bool MterpInvokeStaticRange(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeStaticRange(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -240,8 +260,10 @@
self, *shadow_frame, inst, inst_data, result_register);
}
-extern "C" bool MterpInvokeVirtualQuick(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtualQuick(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -249,8 +271,10 @@
self, *shadow_frame, inst, inst_data, result_register);
}
-extern "C" bool MterpInvokeVirtualQuickRange(Thread* self, ShadowFrame* shadow_frame,
- uint16_t* dex_pc_ptr, uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtualQuickRange(Thread* self,
+ ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint16_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
JValue* result_register = shadow_frame->GetResultRegister();
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -262,8 +286,10 @@
QuasiAtomic::ThreadFenceForConstructor();
}
-extern "C" bool MterpConstString(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame,
- Thread* self)
+extern "C" size_t MterpConstString(uint32_t index,
+ uint32_t tgt_vreg,
+ ShadowFrame* shadow_frame,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
String* s = ResolveString(self, *shadow_frame, index);
if (UNLIKELY(s == nullptr)) {
@@ -273,8 +299,10 @@
return false;
}
-extern "C" bool MterpConstClass(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame,
- Thread* self)
+extern "C" size_t MterpConstClass(uint32_t index,
+ uint32_t tgt_vreg,
+ ShadowFrame* shadow_frame,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
Class* c = ResolveVerifyAndClinit(index, shadow_frame->GetMethod(), self, false, false);
if (UNLIKELY(c == nullptr)) {
@@ -284,8 +312,10 @@
return false;
}
-extern "C" bool MterpCheckCast(uint32_t index, StackReference<mirror::Object>* vreg_addr,
- art::ArtMethod* method, Thread* self)
+extern "C" size_t MterpCheckCast(uint32_t index,
+ StackReference<mirror::Object>* vreg_addr,
+ art::ArtMethod* method,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
if (UNLIKELY(c == nullptr)) {
@@ -300,8 +330,10 @@
return false;
}
-extern "C" bool MterpInstanceOf(uint32_t index, StackReference<mirror::Object>* vreg_addr,
- art::ArtMethod* method, Thread* self)
+extern "C" size_t MterpInstanceOf(uint32_t index,
+ StackReference<mirror::Object>* vreg_addr,
+ art::ArtMethod* method,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
if (UNLIKELY(c == nullptr)) {
@@ -312,12 +344,12 @@
return (obj != nullptr) && obj->InstanceOf(c);
}
-extern "C" bool MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload)
+extern "C" size_t MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload)
SHARED_REQUIRES(Locks::mutator_lock_) {
return FillArrayData(obj, payload);
}
-extern "C" bool MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data)
+extern "C" size_t MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
Object* obj = nullptr;
@@ -342,7 +374,7 @@
return true;
}
-extern "C" bool MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+extern "C" size_t MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
uint32_t inst_data, Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -350,23 +382,27 @@
(self, *shadow_frame, inst, inst_data);
}
-extern "C" bool MterpIputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
- uint32_t inst_data, Thread* self)
+extern "C" size_t MterpIputObject(ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint32_t inst_data,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
const Instruction* inst = Instruction::At(dex_pc_ptr);
return DoFieldPut<InstanceObjectWrite, Primitive::kPrimNot, false, false>
(self, *shadow_frame, inst, inst_data);
}
-extern "C" bool MterpIputObjectQuick(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
- uint32_t inst_data)
+extern "C" size_t MterpIputObjectQuick(ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint32_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
const Instruction* inst = Instruction::At(dex_pc_ptr);
return DoIPutQuick<Primitive::kPrimNot, false>(*shadow_frame, inst, inst_data);
}
-extern "C" bool MterpAputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
- uint32_t inst_data)
+extern "C" size_t MterpAputObject(ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint32_t inst_data)
SHARED_REQUIRES(Locks::mutator_lock_) {
const Instruction* inst = Instruction::At(dex_pc_ptr);
Object* a = shadow_frame->GetVRegReference(inst->VRegB_23x());
@@ -383,24 +419,27 @@
return false;
}
-extern "C" bool MterpFilledNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
- Thread* self)
+extern "C" size_t MterpFilledNewArray(ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
const Instruction* inst = Instruction::At(dex_pc_ptr);
return DoFilledNewArray<false, false, false>(inst, *shadow_frame, self,
shadow_frame->GetResultRegister());
}
-extern "C" bool MterpFilledNewArrayRange(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
- Thread* self)
+extern "C" size_t MterpFilledNewArrayRange(ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
const Instruction* inst = Instruction::At(dex_pc_ptr);
return DoFilledNewArray<true, false, false>(inst, *shadow_frame, self,
shadow_frame->GetResultRegister());
}
-extern "C" bool MterpNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
- uint32_t inst_data, Thread* self)
+extern "C" size_t MterpNewArray(ShadowFrame* shadow_frame,
+ uint16_t* dex_pc_ptr,
+ uint32_t inst_data, Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
const Instruction* inst = Instruction::At(dex_pc_ptr);
int32_t length = shadow_frame->GetVReg(inst->VRegB_22c(inst_data));
@@ -414,7 +453,7 @@
return true;
}
-extern "C" bool MterpHandleException(Thread* self, ShadowFrame* shadow_frame)
+extern "C" size_t MterpHandleException(Thread* self, ShadowFrame* shadow_frame)
SHARED_REQUIRES(Locks::mutator_lock_) {
DCHECK(self->IsExceptionPending());
const instrumentation::Instrumentation* const instrumentation =
@@ -526,14 +565,16 @@
}
}
-extern "C" bool MterpSuspendCheck(Thread* self)
+extern "C" size_t MterpSuspendCheck(Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
self->AllowThreadSuspension();
return MterpShouldSwitchInterpreters();
}
-extern "C" int artSet64IndirectStaticFromMterp(uint32_t field_idx, ArtMethod* referrer,
- uint64_t* new_value, Thread* self)
+extern "C" ssize_t artSet64IndirectStaticFromMterp(uint32_t field_idx,
+ ArtMethod* referrer,
+ uint64_t* new_value,
+ Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t));
@@ -551,8 +592,10 @@
return -1; // failure
}
-extern "C" int artSet8InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint8_t new_value,
- ArtMethod* referrer)
+extern "C" ssize_t artSet8InstanceFromMterp(uint32_t field_idx,
+ mirror::Object* obj,
+ uint8_t new_value,
+ ArtMethod* referrer)
SHARED_REQUIRES(Locks::mutator_lock_) {
ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int8_t));
if (LIKELY(field != nullptr && obj != nullptr)) {
@@ -568,8 +611,10 @@
return -1; // failure
}
-extern "C" int artSet16InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint16_t new_value,
- ArtMethod* referrer)
+extern "C" ssize_t artSet16InstanceFromMterp(uint32_t field_idx,
+ mirror::Object* obj,
+ uint16_t new_value,
+ ArtMethod* referrer)
SHARED_REQUIRES(Locks::mutator_lock_) {
ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
sizeof(int16_t));
@@ -586,8 +631,10 @@
return -1; // failure
}
-extern "C" int artSet32InstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
- uint32_t new_value, ArtMethod* referrer)
+extern "C" ssize_t artSet32InstanceFromMterp(uint32_t field_idx,
+ mirror::Object* obj,
+ uint32_t new_value,
+ ArtMethod* referrer)
SHARED_REQUIRES(Locks::mutator_lock_) {
ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
sizeof(int32_t));
@@ -598,8 +645,10 @@
return -1; // failure
}
-extern "C" int artSet64InstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
- uint64_t* new_value, ArtMethod* referrer)
+extern "C" ssize_t artSet64InstanceFromMterp(uint32_t field_idx,
+ mirror::Object* obj,
+ uint64_t* new_value,
+ ArtMethod* referrer)
SHARED_REQUIRES(Locks::mutator_lock_) {
ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
sizeof(int64_t));
@@ -610,8 +659,10 @@
return -1; // failure
}
-extern "C" int artSetObjInstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
- mirror::Object* new_value, ArtMethod* referrer)
+extern "C" ssize_t artSetObjInstanceFromMterp(uint32_t field_idx,
+ mirror::Object* obj,
+ mirror::Object* new_value,
+ ArtMethod* referrer)
SHARED_REQUIRES(Locks::mutator_lock_) {
ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
sizeof(mirror::HeapReference<mirror::Object>));
@@ -651,7 +702,7 @@
* to the full instrumentation via MterpAddHotnessBatch. Called once on entry to the method,
* and regenerated following batch updates.
*/
-extern "C" int MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame)
+extern "C" ssize_t MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame)
SHARED_REQUIRES(Locks::mutator_lock_) {
uint16_t hotness_count = method->GetCounter();
int32_t countdown_value = jit::kJitHotnessDisabled;
@@ -689,7 +740,7 @@
* Report a batch of hotness events to the instrumentation and then return the new
* countdown value to the next time we should report.
*/
-extern "C" int16_t MterpAddHotnessBatch(ArtMethod* method,
+extern "C" ssize_t MterpAddHotnessBatch(ArtMethod* method,
ShadowFrame* shadow_frame,
Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -702,7 +753,7 @@
}
// TUNING: Unused by arm/arm64/x86/x86_64. Remove when mips/mips64 mterps support batch updates.
-extern "C" bool MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+extern "C" size_t MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
SHARED_REQUIRES(Locks::mutator_lock_) {
ArtMethod* method = shadow_frame->GetMethod();
JValue* result = shadow_frame->GetResultRegister();
@@ -719,9 +770,9 @@
}
}
-extern "C" bool MterpMaybeDoOnStackReplacement(Thread* self,
- ShadowFrame* shadow_frame,
- int32_t offset)
+extern "C" size_t MterpMaybeDoOnStackReplacement(Thread* self,
+ ShadowFrame* shadow_frame,
+ int32_t offset)
SHARED_REQUIRES(Locks::mutator_lock_) {
ArtMethod* method = shadow_frame->GetMethod();
JValue* result = shadow_frame->GetResultRegister();
diff --git a/runtime/interpreter/mterp/mterp.h b/runtime/interpreter/mterp/mterp.h
index 88e17bc..45ab98b 100644
--- a/runtime/interpreter/mterp/mterp.h
+++ b/runtime/interpreter/mterp/mterp.h
@@ -30,7 +30,12 @@
void InitMterpTls(Thread* self);
void CheckMterpAsmConstants();
-extern "C" bool MterpShouldSwitchInterpreters();
+
+// The return type should be 'bool' but our assembly stubs expect 'bool'
+// to be zero-extended to the whole register and that's broken on x86-64
+// as a 'bool' is returned in 'al' and the rest of 'rax' is garbage.
+// TODO: Fix mterp and stubs and revert this workaround. http://b/30232671
+extern "C" size_t MterpShouldSwitchInterpreters();
// Poison value for TestExportPC. If we segfault with this value, it means that a mterp
// handler for a recent opcode failed to export the Dalvik PC prior to a possible exit from
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index df0b686..e318782 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -747,10 +747,9 @@
.L_op_const_wide_16: /* 0x16 */
/* File: arm64/op_const_wide_16.S */
/* const-wide/16 vAA, #+BBBB */
- FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended
+ FETCH_S x0, 1 // x0<- ssssssssssssBBBB (sign-extended)
lsr w3, wINST, #8 // w3<- AA
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- sbfm x0, x0, 0, 31
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3
GOTO_OPCODE ip // jump to next instruction
@@ -760,13 +759,12 @@
.L_op_const_wide_32: /* 0x17 */
/* File: arm64/op_const_wide_32.S */
/* const-wide/32 vAA, #+BBBBbbbb */
- FETCH w0, 1 // w0<- 0000bbbb (low)
+ FETCH w0, 1 // x0<- 000000000000bbbb (low)
lsr w3, wINST, #8 // w3<- AA
- FETCH_S w2, 2 // w2<- ssssBBBB (high)
+ FETCH_S x2, 2 // x2<- ssssssssssssBBBB (high)
FETCH_ADVANCE_INST 3 // advance rPC, load wINST
GET_INST_OPCODE ip // extract opcode from wINST
- orr w0, w0, w2, lsl #16 // w0<- BBBBbbbb
- sbfm x0, x0, 0, 31
+ orr x0, x0, x2, lsl #16 // x0<- ssssssssBBBBbbbb
SET_VREG_WIDE x0, w3
GOTO_OPCODE ip // jump to next instruction
@@ -934,8 +932,7 @@
mov x3, xSELF // w3<- self
bl MterpInstanceOf // (index, &obj, method, self)
ldr x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
- lsr w2, wINST, #8 // w2<- A+
- and w2, w2, #15 // w2<- A
+ ubfx w2, wINST, #8, #4 // w2<- A
PREFETCH_INST 2
cbnz x1, MterpException
ADVANCE 2 // advance rPC
@@ -1053,12 +1050,12 @@
/* File: arm64/op_fill_array_data.S */
/* fill-array-data vAA, +BBBBBBBB */
EXPORT_PC
- FETCH w0, 1 // w0<- bbbb (lo)
- FETCH w1, 2 // w1<- BBBB (hi)
+ FETCH w0, 1 // x0<- 000000000000bbbb (lo)
+ FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi)
lsr w3, wINST, #8 // w3<- AA
- orr w1, w0, w1, lsl #16 // w1<- BBBBbbbb
+ orr x1, x0, x1, lsl #16 // x1<- ssssssssBBBBbbbb
GET_VREG w0, w3 // w0<- vAA (array object)
- add x1, xPC, w1, lsl #1 // w1<- PC + BBBBbbbb*2 (array data off.)
+ add x1, xPC, x1, lsl #1 // x1<- PC + ssssssssBBBBbbbb*2 (array data off.)
bl MterpFillArrayData // (obj, payload)
cbz w0, MterpPossibleException // exception?
FETCH_ADVANCE_INST 3 // advance rPC, load rINST
@@ -1143,14 +1140,14 @@
* for: packed-switch, sparse-switch
*/
/* op vAA, +BBBB */
- FETCH w0, 1 // w0<- bbbb (lo)
- FETCH w1, 2 // w1<- BBBB (hi)
+ FETCH w0, 1 // x0<- 000000000000bbbb (lo)
+ FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi)
lsr w3, wINST, #8 // w3<- AA
- orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb
+ orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb
GET_VREG w1, w3 // w1<- vAA
- add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2
+ add x0, xPC, x0, lsl #1 // x0<- PC + ssssssssBBBBbbbb*2
bl MterpDoPackedSwitch // w0<- code-unit branch offset
- sbfm xINST, x0, 0, 31
+ sxtw xINST, w0
b MterpCommonTakenBranchNoFlags
/* ------------------------------ */
@@ -1168,14 +1165,14 @@
* for: packed-switch, sparse-switch
*/
/* op vAA, +BBBB */
- FETCH w0, 1 // w0<- bbbb (lo)
- FETCH w1, 2 // w1<- BBBB (hi)
+ FETCH w0, 1 // x0<- 000000000000bbbb (lo)
+ FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi)
lsr w3, wINST, #8 // w3<- AA
- orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb
+ orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb
GET_VREG w1, w3 // w1<- vAA
- add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2
+ add x0, xPC, x0, lsl #1 // x0<- PC + ssssssssBBBBbbbb*2
bl MterpDoSparseSwitch // w0<- code-unit branch offset
- sbfm xINST, x0, 0, 31
+ sxtw xINST, w0
b MterpCommonTakenBranchNoFlags
@@ -1195,10 +1192,9 @@
lsr w3, w0, #8 // w3<- CC
GET_VREG s1, w2
GET_VREG s2, w3
- mov w0, #-1
fcmp s1, s2
- csneg w0, w0, w0, le
- csel w0, wzr, w0, eq
+ cset w0, ne
+ cneg w0, w0, lt
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w4 // vAA<- w0
@@ -1221,10 +1217,9 @@
lsr w3, w0, #8 // w3<- CC
GET_VREG s1, w2
GET_VREG s2, w3
- mov w0, #1
fcmp s1, s2
- csneg w0, w0, w0, pl
- csel w0, wzr, w0, eq
+ cset w0, ne
+ cneg w0, w0, cc
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w4 // vAA<- w0
@@ -1247,10 +1242,9 @@
lsr w3, w0, #8 // w3<- CC
GET_VREG_WIDE d1, w2
GET_VREG_WIDE d2, w3
- mov w0, #-1
fcmp d1, d2
- csneg w0, w0, w0, le
- csel w0, wzr, w0, eq
+ cset w0, ne
+ cneg w0, w0, lt
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w4 // vAA<- w0
@@ -1273,10 +1267,9 @@
lsr w3, w0, #8 // w3<- CC
GET_VREG_WIDE d1, w2
GET_VREG_WIDE d2, w3
- mov w0, #1
fcmp d1, d2
- csneg w0, w0, w0, pl
- csel w0, wzr, w0, eq
+ cset w0, ne
+ cneg w0, w0, cc
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w4 // vAA<- w0
@@ -1294,8 +1287,8 @@
GET_VREG_WIDE x1, w2
GET_VREG_WIDE x2, w3
cmp x1, x2
- csinc w0, wzr, wzr, eq
- csneg w0, w0, w0, ge
+ cset w0, ne
+ cneg w0, w0, lt
FETCH_ADVANCE_INST 2 // advance rPC, load wINST
SET_VREG w0, w4
GET_INST_OPCODE ip // extract opcode from wINST
@@ -3345,11 +3338,10 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG w0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
- sbfm x0, x0, 0, 31 // d0<- op
+ sxtw x0, w0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE x0, w4 // vA<- d0
GOTO_OPCODE ip // jump to next instruction
@@ -3369,10 +3361,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG w0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
scvtf s0, w0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG s0, w4 // vA<- d0
@@ -3392,10 +3383,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG w0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
scvtf d0, w0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE d0, w4 // vA<- d0
@@ -3415,10 +3405,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE x0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
// d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG w0, w4 // vA<- d0
@@ -3438,10 +3427,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE x0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
scvtf s0, x0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG s0, w4 // vA<- d0
@@ -3461,10 +3449,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE x0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
scvtf d0, x0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE d0, w4 // vA<- d0
@@ -3485,10 +3472,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG s0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvtzs w0, s0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG w0, w4 // vA<- d0
@@ -3508,10 +3494,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG s0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvtzs x0, s0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE x0, w4 // vA<- d0
@@ -3531,10 +3516,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG s0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvt d0, s0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE d0, w4 // vA<- d0
@@ -3554,10 +3538,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE d0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvtzs w0, d0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG w0, w4 // vA<- d0
@@ -3577,10 +3560,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE d0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvtzs x0, d0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE x0, w4 // vA<- d0
@@ -3600,10 +3582,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE d0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvt s0, d0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG s0, w4 // vA<- d0
@@ -4032,7 +4013,7 @@
cbz w1, common_errDivideByZero // is second operand zero?
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsl w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -4071,7 +4052,7 @@
cbz w1, common_errDivideByZero // is second operand zero?
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
asr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -4110,7 +4091,7 @@
cbz w1, common_errDivideByZero // is second operand zero?
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -4424,8 +4405,7 @@
and w1, w0, #255 // w1<- BB
GET_VREG_WIDE x1, w1 // x1<- vBB
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and x2, x2, #63 // Mask low 6
- lsl x0, x1, x2 // Do the shift.
+ lsl x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3 // vAA<- x0
GOTO_OPCODE ip // jump to next instruction
@@ -4450,8 +4430,7 @@
and w1, w0, #255 // w1<- BB
GET_VREG_WIDE x1, w1 // x1<- vBB
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and x2, x2, #63 // Mask low 6
- asr x0, x1, x2 // Do the shift.
+ asr x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3 // vAA<- x0
GOTO_OPCODE ip // jump to next instruction
@@ -4476,8 +4455,7 @@
and w1, w0, #255 // w1<- BB
GET_VREG_WIDE x1, w1 // x1<- vBB
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and x2, x2, #63 // Mask low 6
- lsr x0, x1, x2 // Do the shift.
+ lsr x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3 // vAA<- x0
GOTO_OPCODE ip // jump to next instruction
@@ -5089,7 +5067,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsl w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -5125,7 +5103,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
asr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -5161,7 +5139,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -5463,8 +5441,7 @@
GET_VREG w1, w1 // x1<- vB
GET_VREG_WIDE x0, w2 // x0<- vA
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and x1, x1, #63 // Mask low 6 bits.
- lsl x0, x0, x1
+ lsl x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w2 // vAA<- result
GOTO_OPCODE ip // jump to next instruction
@@ -5485,8 +5462,7 @@
GET_VREG w1, w1 // x1<- vB
GET_VREG_WIDE x0, w2 // x0<- vA
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and x1, x1, #63 // Mask low 6 bits.
- asr x0, x0, x1
+ asr x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w2 // vAA<- result
GOTO_OPCODE ip // jump to next instruction
@@ -5507,8 +5483,7 @@
GET_VREG w1, w1 // x1<- vB
GET_VREG_WIDE x0, w2 // x0<- vA
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and x1, x1, #63 // Mask low 6 bits.
- lsr x0, x0, x1
+ lsr x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w2 // vAA<- result
GOTO_OPCODE ip // jump to next instruction
@@ -5529,8 +5504,7 @@
*/
/* binop/2addr vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
fadd s2, s0, s1 // s2<- op
@@ -5554,8 +5528,7 @@
*/
/* binop/2addr vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
fsub s2, s0, s1 // s2<- op
@@ -5579,8 +5552,7 @@
*/
/* binop/2addr vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
fmul s2, s0, s1 // s2<- op
@@ -5604,8 +5576,7 @@
*/
/* binop/2addr vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
fdiv s2, s0, s1 // s2<- op
@@ -5621,13 +5592,11 @@
/* File: arm64/op_rem_float_2addr.S */
/* rem vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
bl fmodf
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG s0, w9
@@ -6381,7 +6350,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsl w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -6417,7 +6386,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
asr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -6453,7 +6422,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -6471,8 +6440,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
ldr w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
@@ -6489,7 +6457,7 @@
FETCH w4, 1 // w4<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cbz w3, common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
add x4, x3, x4 // create direct pointer
ldr x0, [x4]
FETCH_ADVANCE_INST 2 // advance rPC, load wINST
@@ -6544,8 +6512,7 @@
FETCH w3, 1 // w3<- field byte offset
GET_VREG w2, w2 // w2<- fp[B], the object pointer
ubfx w0, wINST, #8, #4 // w0<- A
- cmp w2, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w2, common_errNullObject // object was null
GET_VREG_WIDE x0, w0 // x0-< fp[A]
FETCH_ADVANCE_INST 2 // advance rPC, load wINST
add x1, x2, x3 // create a direct pointer
@@ -6710,8 +6677,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
ldrb w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
@@ -6731,8 +6697,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
ldrsb w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
@@ -6752,8 +6717,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
ldrh w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
@@ -6773,8 +6737,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
ldrsh w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
@@ -11521,7 +11484,7 @@
#if MTERP_LOGGING
mov x0, xSELF
add x1, xFP, #OFF_FP_SHADOWFRAME
- sbfm x2, xINST, 0, 31
+ sxtw x2, wINST
bl MterpLogOSR
#endif
mov x0, #1 // Signal normal return
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index 9e2dcea..2f7b854 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -965,8 +965,8 @@
/* File: x86_64/op_fill_array_data.S */
/* fill-array-data vAA, +BBBBBBBB */
EXPORT_PC
- movl 2(rPC), %ecx # ecx <- BBBBbbbb
- leaq (rPC,%rcx,2), OUT_ARG1 # OUT_ARG1 <- PC + BBBBbbbb*2
+ movslq 2(rPC), %rcx # rcx <- ssssssssBBBBbbbb
+ leaq (rPC,%rcx,2), OUT_ARG1 # OUT_ARG1 <- PC + ssssssssBBBBbbbb*2
GET_VREG OUT_32_ARG0, rINSTq # OUT_ARG0 <- vAA (array object)
call SYMBOL(MterpFillArrayData) # (obj, payload)
testb %al, %al # 0 means an exception is thrown
@@ -1051,8 +1051,8 @@
* for: packed-switch, sparse-switch
*/
/* op vAA, +BBBB */
- movslq 2(rPC), OUT_ARG0 # rcx <- BBBBbbbb
- leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + BBBBbbbb*2
+ movslq 2(rPC), OUT_ARG0 # rcx <- ssssssssBBBBbbbb
+ leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + ssssssssBBBBbbbb*2
GET_VREG OUT_32_ARG1, rINSTq # eax <- vAA
call SYMBOL(MterpDoPackedSwitch)
testl %eax, %eax
@@ -1074,8 +1074,8 @@
* for: packed-switch, sparse-switch
*/
/* op vAA, +BBBB */
- movslq 2(rPC), OUT_ARG0 # rcx <- BBBBbbbb
- leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + BBBBbbbb*2
+ movslq 2(rPC), OUT_ARG0 # rcx <- ssssssssBBBBbbbb
+ leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + ssssssssBBBBbbbb*2
GET_VREG OUT_32_ARG1, rINSTq # eax <- vAA
call SYMBOL(MterpDoSparseSwitch)
testl %eax, %eax
diff --git a/runtime/interpreter/mterp/x86_64/op_fill_array_data.S b/runtime/interpreter/mterp/x86_64/op_fill_array_data.S
index 626bad4..7ea36a6 100644
--- a/runtime/interpreter/mterp/x86_64/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/x86_64/op_fill_array_data.S
@@ -1,7 +1,7 @@
/* fill-array-data vAA, +BBBBBBBB */
EXPORT_PC
- movl 2(rPC), %ecx # ecx <- BBBBbbbb
- leaq (rPC,%rcx,2), OUT_ARG1 # OUT_ARG1 <- PC + BBBBbbbb*2
+ movslq 2(rPC), %rcx # rcx <- ssssssssBBBBbbbb
+ leaq (rPC,%rcx,2), OUT_ARG1 # OUT_ARG1 <- PC + ssssssssBBBBbbbb*2
GET_VREG OUT_32_ARG0, rINSTq # OUT_ARG0 <- vAA (array object)
call SYMBOL(MterpFillArrayData) # (obj, payload)
testb %al, %al # 0 means an exception is thrown
diff --git a/runtime/interpreter/mterp/x86_64/op_packed_switch.S b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
index fdf5a50..148552f 100644
--- a/runtime/interpreter/mterp/x86_64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
@@ -9,8 +9,8 @@
* for: packed-switch, sparse-switch
*/
/* op vAA, +BBBB */
- movslq 2(rPC), OUT_ARG0 # rcx <- BBBBbbbb
- leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + BBBBbbbb*2
+ movslq 2(rPC), OUT_ARG0 # rcx <- ssssssssBBBBbbbb
+ leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + ssssssssBBBBbbbb*2
GET_VREG OUT_32_ARG1, rINSTq # eax <- vAA
call SYMBOL($func)
testl %eax, %eax
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 9c77d38..1c31c57 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -748,21 +748,24 @@
return nullptr;
}
-ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+ArtField* Class::FindStaticField(Thread* self,
+ Class* klass,
+ const DexCache* dex_cache,
uint32_t dex_field_idx) {
- for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
+ for (Class* k = klass; k != nullptr; k = k->GetSuperClass()) {
// Is the field in this class?
ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
if (f != nullptr) {
return f;
}
- // Wrap k incase it moves during GetDirectInterface.
+ // Though GetDirectInterface() should not cause thread suspension when called
+ // from here, it takes a Handle as an argument, so we need to wrap `k`.
+ ScopedAssertNoThreadSuspension ants(self, __FUNCTION__);
StackHandleScope<1> hs(self);
- HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k));
+ Handle<mirror::Class> h_k(hs.NewHandle(k));
// Is this field in any of this class' interfaces?
for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) {
- StackHandleScope<1> hs2(self);
- Handle<mirror::Class> interface(hs2.NewHandle(GetDirectInterface(self, h_k, i)));
+ mirror::Class* interface = GetDirectInterface(self, h_k, i);
f = FindStaticField(self, interface, dex_cache, dex_field_idx);
if (f != nullptr) {
return f;
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index f044b59..9be9f01 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1091,7 +1091,9 @@
// Finds the given static field in this class or superclass, only searches classes that
// have the same dex cache.
- static ArtField* FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+ static ArtField* FindStaticField(Thread* self,
+ Class* klass,
+ const DexCache* dex_cache,
uint32_t dex_field_idx)
SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 79b18aa..d987f65 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -342,7 +342,7 @@
return;
}
if (is_static) {
- field = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
+ field = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx);
} else {
field = klass->FindInstanceField(dex_cache.Get(), field_idx);
}
diff --git a/runtime/oat.h b/runtime/oat.h
index e506e3c..9b8f545 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '0', '8', '3', '\0' };
+ static constexpr uint8_t kOatVersion[] = { '0', '8', '4', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 6728123..8700a90 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -81,11 +81,18 @@
load_executable_ = false;
}
- // If the user gave a target oat location, save that as the cached oat
- // location now so we won't try to construct the default location later.
+ std::string error_msg;
+ if (!DexLocationToOdexFilename(dex_location_, isa_, &odex_file_name_, &error_msg)) {
+ LOG(WARNING) << "Failed to determine odex file name: " << error_msg;
+ }
+
if (oat_location != nullptr) {
- cached_oat_file_name_ = std::string(oat_location);
- cached_oat_file_name_attempted_ = true;
+ oat_file_name_ = std::string(oat_location);
+ } else {
+ if (!DexLocationToOatFilename(dex_location_, isa_, &oat_file_name_, &error_msg)) {
+ LOG(WARNING) << "Failed to determine oat file name for dex location "
+ << dex_location_ << ": " << error_msg;
+ }
}
}
@@ -351,17 +358,7 @@
}
const std::string* OatFileAssistant::OdexFileName() {
- if (!cached_odex_file_name_attempted_) {
- cached_odex_file_name_attempted_ = true;
-
- std::string error_msg;
- if (!DexFilenameToOdexFilename(dex_location_, isa_, &cached_odex_file_name_, &error_msg)) {
- // If we can't figure out the odex file, we treat it as if the odex
- // file was inaccessible.
- LOG(WARNING) << "Failed to determine odex file name: " << error_msg;
- }
- }
- return cached_odex_file_name_.empty() ? nullptr : &cached_odex_file_name_;
+ return odex_file_name_.empty() ? nullptr : &odex_file_name_;
}
bool OatFileAssistant::OdexFileExists() {
@@ -412,25 +409,7 @@
}
const std::string* OatFileAssistant::OatFileName() {
- if (!cached_oat_file_name_attempted_) {
- cached_oat_file_name_attempted_ = true;
-
- // Compute the oat file name from the dex location.
- // TODO: The oat file assistant should be the definitive place for
- // determining the oat file name from the dex location, not
- // GetDalvikCacheFilename.
- std::string cache_dir = StringPrintf("%s%s",
- DalvikCacheDirectory().c_str(), GetInstructionSetString(isa_));
- std::string error_msg;
- if (!GetDalvikCacheFilename(dex_location_.c_str(),
- cache_dir.c_str(), &cached_oat_file_name_, &error_msg)) {
- // If we can't determine the oat file name, we treat the oat file as
- // inaccessible.
- LOG(WARNING) << "Failed to determine oat file name for dex location "
- << dex_location_ << ": " << error_msg;
- }
- }
- return cached_oat_file_name_.empty() ? nullptr : &cached_oat_file_name_;
+ return oat_file_name_.empty() ? nullptr : &oat_file_name_;
}
bool OatFileAssistant::OatFileExists() {
@@ -750,8 +729,10 @@
return Exec(argv, error_msg);
}
-bool OatFileAssistant::DexFilenameToOdexFilename(const std::string& location,
- InstructionSet isa, std::string* odex_filename, std::string* error_msg) {
+bool OatFileAssistant::DexLocationToOdexFilename(const std::string& location,
+ InstructionSet isa,
+ std::string* odex_filename,
+ std::string* error_msg) {
CHECK(odex_filename != nullptr);
CHECK(error_msg != nullptr);
@@ -790,9 +771,12 @@
return true;
}
-std::string OatFileAssistant::DalvikCacheDirectory() {
- // Note: We don't cache this, because it will only be called once by
- // OatFileName.
+bool OatFileAssistant::DexLocationToOatFilename(const std::string& location,
+ InstructionSet isa,
+ std::string* oat_filename,
+ std::string* error_msg) {
+ CHECK(oat_filename != nullptr);
+ CHECK(error_msg != nullptr);
// TODO: The work done in GetDalvikCache is overkill for what we need.
// Ideally a new API for getting the DalvikCacheDirectory the way we want
@@ -800,12 +784,16 @@
// of the GetDalvikCache family of functions. Until such an API is in place,
// we use GetDalvikCache to avoid duplicating the logic for determining the
// dalvik cache directory.
- std::string result;
- bool have_android_data;
- bool dalvik_cache_exists;
- bool is_global_cache;
- GetDalvikCache("", false, &result, &have_android_data, &dalvik_cache_exists, &is_global_cache);
- return result;
+ std::string dalvik_cache_dir;
+ bool ignored;
+ GetDalvikCache("", false, &dalvik_cache_dir, &ignored, &ignored, &ignored);
+
+ // TODO: The oat file assistant should be the definitive place for
+ // determining the oat file name from the dex location, not
+ // GetDalvikCacheFilename.
+ std::string cache_dir = StringPrintf("%s%s",
+ dalvik_cache_dir.c_str(), GetInstructionSetString(isa));
+ return GetDalvikCacheFilename(location.c_str(), cache_dir.c_str(), oat_filename, error_msg);
}
std::string OatFileAssistant::ImageLocation() {
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index e4aba3f..04bd20c 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -280,8 +280,21 @@
// Returns false on error, in which case error_msg describes the error and
// odex_filename is not changed.
// Neither odex_filename nor error_msg may be null.
- static bool DexFilenameToOdexFilename(const std::string& location,
- InstructionSet isa, std::string* odex_filename, std::string* error_msg);
+ static bool DexLocationToOdexFilename(const std::string& location,
+ InstructionSet isa,
+ std::string* odex_filename,
+ std::string* error_msg);
+
+ // Constructs the oat file name for the given dex location.
+ // Returns true on success, in which case oat_filename is set to the oat
+ // file name.
+ // Returns false on error, in which case error_msg describes the error and
+ // oat_filename is not changed.
+ // Neither oat_filename nor error_msg may be null.
+ static bool DexLocationToOatFilename(const std::string& location,
+ InstructionSet isa,
+ std::string* oat_filename,
+ std::string* error_msg);
static uint32_t CalculateCombinedImageChecksum(InstructionSet isa = kRuntimeISA);
@@ -293,11 +306,6 @@
std::string location;
};
- // Returns the path to the dalvik cache directory.
- // Does not check existence of the cache or try to create it.
- // Includes the trailing slash.
- // Returns an empty string if we can't get the dalvik cache directory path.
- std::string DalvikCacheDirectory();
// Returns the current image location.
// Returns an empty string if the image location could not be retrieved.
@@ -383,12 +391,9 @@
bool required_dex_checksum_found_;
bool has_original_dex_files_;
- // Cached value of the odex file name.
- // This should be accessed only by the OdexFileName() method.
// The sentinel value "" is used if the odex file name could not be
// determined.
- bool cached_odex_file_name_attempted_ = false;
- std::string cached_odex_file_name_;
+ std::string odex_file_name_;
// Cached value of the loaded odex file.
// Use the GetOdexFile method rather than accessing this directly, unless you
@@ -400,12 +405,9 @@
bool odex_file_status_attempted_ = false;
OatStatus cached_odex_file_status_;
- // Cached value of the oat file name.
- // This should be accessed only by the OatFileName() method.
// The sentinel value "" is used if the oat file name could not be
// determined.
- bool cached_oat_file_name_attempted_ = false;
- std::string cached_oat_file_name_;
+ std::string oat_file_name_;
// Cached value of the loaded oat file.
// Use the GetOatFile method rather than accessing this directly, unless you
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 6bccea6..39848b4 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -213,22 +213,22 @@
// generation of oat files.
static void GenerateOatForTest(const char* dex_location, CompilerFilter::Filter filter) {
// Use an oat file assistant to find the proper oat location.
- OatFileAssistant ofa(dex_location, kRuntimeISA, false);
- const std::string* oat_location = ofa.OatFileName();
- ASSERT_TRUE(oat_location != nullptr);
+ std::string oat_location;
+ std::string error_msg;
+ ASSERT_TRUE(OatFileAssistant::DexLocationToOatFilename(
+ dex_location, kRuntimeISA, &oat_location, &error_msg)) << error_msg;
std::vector<std::string> args;
args.push_back("--dex-file=" + std::string(dex_location));
- args.push_back("--oat-file=" + *oat_location);
+ args.push_back("--oat-file=" + oat_location);
args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
args.push_back("--runtime-arg");
args.push_back("-Xnorelocate");
- std::string error_msg;
ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
// Verify the oat file was generated as expected.
- std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location->c_str(),
- oat_location->c_str(),
+ std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location.c_str(),
+ oat_location.c_str(),
nullptr,
nullptr,
false,
@@ -1212,21 +1212,21 @@
oat_file_assistant.MakeUpToDate(false, &error_msg));
}
-TEST(OatFileAssistantUtilsTest, DexFilenameToOdexFilename) {
+TEST(OatFileAssistantUtilsTest, DexLocationToOdexFilename) {
std::string error_msg;
std::string odex_file;
- EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename(
+ EXPECT_TRUE(OatFileAssistant::DexLocationToOdexFilename(
"/foo/bar/baz.jar", kArm, &odex_file, &error_msg)) << error_msg;
EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file);
- EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename(
+ EXPECT_TRUE(OatFileAssistant::DexLocationToOdexFilename(
"/foo/bar/baz.funnyext", kArm, &odex_file, &error_msg)) << error_msg;
EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file);
- EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename(
+ EXPECT_FALSE(OatFileAssistant::DexLocationToOdexFilename(
"nopath.jar", kArm, &odex_file, &error_msg));
- EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename(
+ EXPECT_FALSE(OatFileAssistant::DexLocationToOdexFilename(
"/foo/bar/baz_noext", kArm, &odex_file, &error_msg));
}
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 0c3eb3b..92efa21 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -220,7 +220,7 @@
}
inline mirror::Object* ReadBarrier::Mark(mirror::Object* obj) {
- return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->Mark(obj);
+ return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->MarkFromReadBarrier(obj);
}
inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 21cd2aa..079c079 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -989,6 +989,7 @@
xgc_option.verify_pre_sweeping_rosalloc_,
xgc_option.verify_post_gc_rosalloc_,
xgc_option.gcstress_,
+ xgc_option.measure_,
runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM),
runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs));
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 0acc54d..e77a11e 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -869,7 +869,7 @@
bool Trace::RegisterThread(Thread* thread) {
pid_t tid = thread->GetTid();
CHECK_LT(0U, static_cast<uint32_t>(tid));
- CHECK_LT(static_cast<uint32_t>(tid), 65536U);
+ CHECK_LT(static_cast<uint32_t>(tid), kMaxThreadIdNumber);
if (!(*seen_threads_)[tid]) {
seen_threads_->set(tid);
diff --git a/runtime/trace.h b/runtime/trace.h
index 80f1a4c..9b29fb9 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -41,7 +41,9 @@
class Thread;
using DexIndexBitSet = std::bitset<65536>;
-using ThreadIDBitSet = std::bitset<65536>;
+
+constexpr size_t kMaxThreadIdNumber = kIsTargetBuild ? 65536U : 1048576U;
+using ThreadIDBitSet = std::bitset<kMaxThreadIdNumber>;
enum TracingMode {
kTracingInactive,
diff --git a/test/412-new-array/info.txt b/test/412-new-array/info.txt
index cb388b6..b5f834a 100644
--- a/test/412-new-array/info.txt
+++ b/test/412-new-array/info.txt
@@ -1 +1,3 @@
Simple tests for new-array, filled-new-array and fill-array-data.
+Regression test for the arm64 mterp miscalculating the fill-array-data-payload
+address, zero-extending a register instead of sign-extending.
diff --git a/test/412-new-array/smali/fill_array_data.smali b/test/412-new-array/smali/fill_array_data.smali
index 34776db..2b24e56 100644
--- a/test/412-new-array/smali/fill_array_data.smali
+++ b/test/412-new-array/smali/fill_array_data.smali
@@ -15,6 +15,21 @@
.end method
+.method public static intArrayFillInstructionAfterData([I)V
+ .registers 1
+ goto :FillInstruction
+
+:ArrayData
+ .array-data 4
+ 1 2 3 4 5
+ .end array-data
+
+:FillInstruction
+ fill-array-data v0, :ArrayData
+ return-void
+
+.end method
+
.method public static shortArray([S)V
.registers 1
diff --git a/test/412-new-array/src/Main.java b/test/412-new-array/src/Main.java
index b9c2a05..d95d2c5 100644
--- a/test/412-new-array/src/Main.java
+++ b/test/412-new-array/src/Main.java
@@ -259,6 +259,45 @@
}
{
+ Method m = c.getMethod("intArrayFillInstructionAfterData", int[].class);
+ int[] array = new int[7];
+ Object[] args = { array };
+ m.invoke(null, args);
+ assertEquals(7, array.length);
+ assertEquals(1, array[0]);
+ assertEquals(2, array[1]);
+ assertEquals(3, array[2]);
+ assertEquals(4, array[3]);
+ assertEquals(5, array[4]);
+ assertEquals(0, array[5]);
+ assertEquals(0, array[6]);
+
+ array = new int[2];
+ args[0] = array;
+ Throwable exception = null;
+ try {
+ m.invoke(null, args);
+ } catch (InvocationTargetException e) {
+ exception = e.getCause();
+ assertTrue(exception instanceof IndexOutOfBoundsException);
+ }
+ assertNotNull(exception);
+ exception = null;
+ // Test that nothing has been written to the array.
+ assertEquals(0, array[0]);
+ assertEquals(0, array[1]);
+
+ args[0] = null;
+ try {
+ m.invoke(null, args);
+ } catch (InvocationTargetException e) {
+ exception = e.getCause();
+ assertTrue(exception instanceof NullPointerException);
+ }
+ assertNotNull(exception);
+ }
+
+ {
Method m = c.getMethod("shortArray", short[].class);
short[] array = new short[7];
Object[] args = { array };
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 41771b5..c125e33 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -1204,9 +1204,6 @@
/// CHECK: Deoptimize
/// CHECK: Deoptimize
/// CHECK: Deoptimize
- /// CHECK: Deoptimize
- /// CHECK: Deoptimize
- /// CHECK: Deoptimize
/// CHECK-NOT: Deoptimize
/// CHECK: Goto
/// CHECK: Goto
@@ -1217,7 +1214,7 @@
for (int i = array.length - 1 ; i >= 0; i--) {
array[i] = 1;
}
- // Several HDeoptimize will be added. Two for each index.
+ // Three HDeoptimize will be added for the bounds.
// The null check is not necessary.
for (int i = end - 2 ; i > 0; i--) {
if (expectInterpreter) {
@@ -1266,20 +1263,12 @@
/// CHECK: Deoptimize
/// CHECK: Deoptimize
/// CHECK: Deoptimize
- /// CHECK: Deoptimize
- /// CHECK: Deoptimize
- /// CHECK: Deoptimize
- /// CHECK: Deoptimize
- /// CHECK: Deoptimize
- /// CHECK: Deoptimize
- /// CHECK: Deoptimize
/// CHECK-NOT: Deoptimize
/// CHECK: Goto
/// CHECK: Goto
/// CHECK: Goto
void foo6(int[] array, int start, int end, boolean expectInterpreter) {
- // Several HDeoptimize will be added.
for (int i = end; i >= start; i--) {
if (expectInterpreter) {
assertIsInterpreted();
@@ -1398,8 +1387,8 @@
/// CHECK-NOT: Deoptimize
void foo9(int[] array, boolean expectInterpreter) {
- // Two HDeoptimize will be added. Two for the index
- // and one for null check on array.
+ // Three HDeoptimize will be added. Two for the index and one for null check on array. Then
+ // simplification removes one redundant HDeoptimize.
for (int i = 0 ; i < 10; i++) {
if (expectInterpreter) {
assertIsInterpreted();
diff --git a/test/501-regression-packed-switch/info.txt b/test/501-regression-packed-switch/info.txt
index fbd93fa..988b220 100644
--- a/test/501-regression-packed-switch/info.txt
+++ b/test/501-regression-packed-switch/info.txt
@@ -1,2 +1,4 @@
Regression test for the interpreter and optimizing's builder which used
to trip when compiled code contained a packed switch with no targets.
+Regression test for the arm64 mterp miscalculating the switch table
+address, zero-extending a register instead of sign-extending.
diff --git a/test/501-regression-packed-switch/smali/Test.smali b/test/501-regression-packed-switch/smali/Test.smali
index 8756ed5..5a760c7 100644
--- a/test/501-regression-packed-switch/smali/Test.smali
+++ b/test/501-regression-packed-switch/smali/Test.smali
@@ -27,3 +27,28 @@
.packed-switch 0x0
.end packed-switch
.end method
+
+.method public static PackedSwitchAfterData(I)I
+ .registers 1
+ goto :pswitch_instr
+
+ :case0
+ const/4 v0, 0x1
+ return v0
+
+ :pswitch_data
+ .packed-switch 0x0
+ :case0
+ :case1
+ .end packed-switch
+
+ :pswitch_instr
+ packed-switch v0, :pswitch_data
+ const/4 v0, 0x7
+ return v0
+
+ :case1
+ const/4 v0, 0x4
+ return v0
+
+.end method
diff --git a/test/501-regression-packed-switch/src/Main.java b/test/501-regression-packed-switch/src/Main.java
index b80bc62..12bc1a8 100644
--- a/test/501-regression-packed-switch/src/Main.java
+++ b/test/501-regression-packed-switch/src/Main.java
@@ -29,5 +29,10 @@
if (result != 5) {
throw new Error("Expected 5, got " + result);
}
+ m = c.getMethod("PackedSwitchAfterData", new Class[] { int.class });
+ result = (Integer) m.invoke(null, new Integer(0));
+ if (result != 1) {
+ throw new Error("Expected 1, got " + result);
+ }
}
}
diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java
index ead9446..3366f20 100644
--- a/test/527-checker-array-access-split/src/Main.java
+++ b/test/527-checker-array-access-split/src/Main.java
@@ -34,9 +34,21 @@
/// CHECK-START-ARM64: int Main.constantIndexGet(int[]) instruction_simplifier_arm64 (after)
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK-NOT: Arm64IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
/// CHECK: ArrayGet [<<Array>>,<<Index>>]
+
+ /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (before)
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: ArrayGet [<<Array>>,<<Index>>]
+
+ /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (after)
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK-NOT: IntermediateAddress
+ /// CHECK: ArrayGet [<<Array>>,<<Index>>]
+
public static int constantIndexGet(int array[]) {
return array[1];
}
@@ -55,10 +67,23 @@
/// CHECK: <<Const2:i\d+>> IntConstant 2
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK-NOT: Arm64IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
/// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+ /// CHECK-START-ARM: void Main.constantIndexSet(int[]) instruction_simplifier_arm (before)
+ /// CHECK: <<Const2:i\d+>> IntConstant 2
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
+ /// CHECK-START-ARM: void Main.constantIndexSet(int[]) instruction_simplifier_arm (after)
+ /// CHECK: <<Const2:i\d+>> IntConstant 2
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK-NOT: IntermediateAddress
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
public static void constantIndexSet(int array[]) {
array[1] = 2;
}
@@ -76,7 +101,20 @@
/// CHECK: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: ArrayGet [<<Address>>,<<Index>>]
+
+
+ /// CHECK-START-ARM: int Main.get(int[], int) instruction_simplifier_arm (before)
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: ArrayGet [<<Array>>,<<Index>>]
+
+ /// CHECK-START-ARM: int Main.get(int[], int) instruction_simplifier_arm (after)
+ /// CHECK: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: ArrayGet [<<Address>>,<<Index>>]
public static int get(int array[], int index) {
@@ -102,7 +140,26 @@
/// CHECK: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: ArraySet [<<Address>>,<<Index>>,<<Arg>>]
+
+
+ /// CHECK-START-ARM: void Main.set(int[], int, int) instruction_simplifier_arm (before)
+ /// CHECK: ParameterValue
+ /// CHECK: ParameterValue
+ /// CHECK: <<Arg:i\d+>> ParameterValue
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Arg>>]
+
+ /// CHECK-START-ARM: void Main.set(int[], int, int) instruction_simplifier_arm (after)
+ /// CHECK: ParameterValue
+ /// CHECK: ParameterValue
+ /// CHECK: <<Arg:i\d+>> ParameterValue
+ /// CHECK: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: ArraySet [<<Address>>,<<Index>>,<<Arg>>]
public static void set(int array[], int index, int value) {
@@ -126,10 +183,10 @@
/// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
- /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
/// CHECK-START-ARM64: void Main.getSet(int[], int) GVN_after_arch (after)
@@ -137,12 +194,42 @@
/// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
- /// CHECK-NOT: Arm64IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
/// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: void Main.getSet(int[], int) instruction_simplifier_arm (before)
+ /// CHECK: <<Const1:i\d+>> IntConstant 1
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: void Main.getSet(int[], int) instruction_simplifier_arm (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: void Main.getSet(int[], int) GVN_after_arch (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK-NOT: IntermediateAddress
+ /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>]
public static void getSet(int array[], int index) {
array[index] = array[index] + 1;
}
@@ -166,11 +253,11 @@
/// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
/// CHECK: NewArray
- /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
/// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN_after_arch (after)
@@ -178,11 +265,45 @@
/// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
/// CHECK: NewArray
- /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+
+ /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) instruction_simplifier_arm (before)
+ /// CHECK: <<Const1:i\d+>> IntConstant 1
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: NewArray
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) instruction_simplifier_arm (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: NewArray
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) GVN_after_arch (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: NewArray
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
public static int[] accrossGC(int array[], int index) {
@@ -196,6 +317,14 @@
* Test that the intermediate address is shared between array accesses after
* the bounds check have been removed by BCE.
*/
+ // For checker tests `instruction_simplifier_<arch> (after)` below, by the time we reach
+ // the architecture-specific instruction simplifier, BCE has removed the bounds checks in
+ // the loop.
+
+ // Note that we do not care that the `DataOffset` is `12`. But if we do not
+ // specify it and any other `IntConstant` appears before that instruction,
+ // checker will match the previous `IntConstant`, and we will thus fail the
+ // check.
/// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (before)
/// CHECK: <<Const1:i\d+>> IntConstant 1
@@ -207,14 +336,6 @@
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
/// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>]
- // By the time we reach the architecture-specific instruction simplifier, BCE
- // has removed the bounds checks in the loop.
-
- // Note that we do not care that the `DataOffset` is `12`. But if we do not
- // specify it and any other `IntConstant` appears before that instruction,
- // checker will match the previous `IntConstant`, and we will thus fail the
- // check.
-
/// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (after)
/// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
/// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
@@ -222,10 +343,10 @@
/// CHECK: <<Index:i\d+>> Phi
/// CHECK: If
// -------------- Loop
- /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
- /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
/// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN_after_arch (after)
@@ -235,10 +356,47 @@
/// CHECK: <<Index:i\d+>> Phi
/// CHECK: If
// -------------- Loop
- /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
- /// CHECK-NOT: Arm64IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
+ /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>]
+
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE1() instruction_simplifier_arm (before)
+ /// CHECK: <<Const1:i\d+>> IntConstant 1
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE1() instruction_simplifier_arm (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE1() GVN_after_arch (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK-NOT: IntermediateAddress
/// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>]
public static int canMergeAfterBCE1() {
@@ -279,12 +437,12 @@
/// CHECK: If
// -------------- Loop
/// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>]
- /// CHECK-DAG: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
- /// CHECK-DAG: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address2>>,<<Index1>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>]
- /// CHECK: <<Address3:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address3:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
/// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
@@ -295,7 +453,7 @@
/// CHECK: If
// -------------- Loop
/// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>]
- /// CHECK-DAG: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address>>,<<Index>>]
/// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address>>,<<Index1>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>]
@@ -304,8 +462,55 @@
// There should be only one intermediate address computation in the loop.
/// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
- /// CHECK: Arm64IntermediateAddress
- /// CHECK-NOT: Arm64IntermediateAddress
+ /// CHECK: IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
+
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE2() instruction_simplifier_arm (before)
+ /// CHECK: <<Const1:i\d+>> IntConstant 1
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>]
+ /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Array>>,<<Index>>]
+ /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Array>>,<<Index1>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+ /// CHECK: ArraySet [<<Array>>,<<Index1>>,<<Add>>]
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE2() instruction_simplifier_arm (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>]
+ /// CHECK-DAG: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
+ /// CHECK-DAG: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address2>>,<<Index1>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+ /// CHECK: <<Address3:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE2() GVN_after_arch (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>]
+ /// CHECK-DAG: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address>>,<<Index>>]
+ /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address>>,<<Index1>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+ /// CHECK: ArraySet [<<Address>>,<<Index1>>,<<Add>>]
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE2() GVN_after_arch (after)
+ /// CHECK: IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
public static int canMergeAfterBCE2() {
int[] array = {0, 1, 2, 3};
@@ -315,6 +520,37 @@
return array[array.length - 1];
}
+ /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (before)
+ /// CHECK-DAG: <<Array1:l\d+>> NewArray
+ /// CHECK-DAG: <<Array2:l\d+>> NewArray
+ /// CHECK-DAG: <<Array3:l\d+>> NewArray
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: ArrayGet [<<Array1>>,<<Index>>]
+ /// CHECK-DAG: ArrayGet [<<Array2>>,<<Index>>]
+ /// CHECK-DAG: ArrayGet [<<Array3>>,<<Index>>]
+
+ /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after)
+ /// CHECK-DAG: <<Array1:l\d+>> NewArray
+ /// CHECK-DAG: <<Array2:l\d+>> NewArray
+ /// CHECK-DAG: <<Array3:l\d+>> NewArray
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: ArrayGet [<<Array1>>,<<Index>>]
+ /// CHECK-DAG: ArrayGet [<<Array2>>,<<Index>>]
+ /// CHECK-DAG: ArrayGet [<<Array3>>,<<Index>>]
+
+ /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after)
+ /// CHECK-NOT: IntermediateAddress
+ public static int checkLongFloatDouble() {
+ long[] array_long = {0, 1, 2, 3};
+ float[] array_float = {(float)0.0, (float)1.0, (float)2.0, (float)3.0};
+ double[] array_double = {0.0, 1.0, 2.0, 3.0};
+ double s = 0.0;
+
+ for (int i = 0; i < 4; i++) {
+ s += (double)array_long[i] + (double)array_float[i] + array_double[i];
+ }
+ return (int)s;
+ }
public static void main(String[] args) {
int[] array = {123, 456, 789};
@@ -337,5 +573,7 @@
assertIntEquals(4, canMergeAfterBCE1());
assertIntEquals(6, canMergeAfterBCE2());
+
+ assertIntEquals(18, checkLongFloatDouble());
}
}
diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java
index 5a36ba5..7b5cbc1 100644
--- a/test/529-checker-unresolved/src/Main.java
+++ b/test/529-checker-unresolved/src/Main.java
@@ -77,6 +77,16 @@
expectEquals(123456789123456789f, UnresolvedClass.staticFloat);
expectEquals(123456789123456789d, UnresolvedClass.staticDouble);
expectEquals(o, UnresolvedClass.staticObject);
+
+ // Check "large" values.
+
+ UnresolvedClass.staticByte = (byte)-1;
+ UnresolvedClass.staticChar = (char)32768;
+ UnresolvedClass.staticInt = -1;
+
+ expectEquals((byte)-1, UnresolvedClass.staticByte);
+ expectEquals((char)32768, UnresolvedClass.staticChar);
+ expectEquals(-1, UnresolvedClass.staticInt);
}
/// CHECK-START: void Main.callUnresolvedInstanceFieldAccess(UnresolvedClass) register (before)
diff --git a/test/530-checker-loops3/expected.txt b/test/530-checker-loops3/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/530-checker-loops3/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/530-checker-loops3/info.txt b/test/530-checker-loops3/info.txt
new file mode 100644
index 0000000..07d99a3
--- /dev/null
+++ b/test/530-checker-loops3/info.txt
@@ -0,0 +1 @@
+Test on loop optimizations, in particular loop-based dynamic bce.
diff --git a/test/530-checker-loops3/src/Main.java b/test/530-checker-loops3/src/Main.java
new file mode 100644
index 0000000..5ffcbe9
--- /dev/null
+++ b/test/530-checker-loops3/src/Main.java
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on loop optimizations, in particular dynamic BCE. In all cases,
+// bounds check on a[] is resolved statically. Bounds checks on b[]
+// exercise various different scenarios. In all cases, loop-based
+// dynamic BCE is better than the dominator-based BCE, since it
+// generates the test outside the loop.
+//
+public class Main {
+
+ /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (before)
+ /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ //
+ /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after)
+ /// CHECK-NOT: BoundsCheck
+ public static void oneConstantIndex(int[] a, int[] b) {
+ // Dynamic bce on b requires two deopts: one null and one bound.
+ for (int i = 0; i < a.length; i++) {
+ a[i] = b[1];
+ }
+ }
+
+ /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (before)
+ /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ //
+ /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after)
+ /// CHECK-NOT: BoundsCheck
+ public static void multipleConstantIndices(int[] a, int[] b) {
+ // Dynamic bce on b requires two deopts: one null and one bound.
+ for (int i = 0; i < a.length; i++) {
+ a[i] = b[0] + b[1] + b[2];
+ }
+ }
+
+ /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (before)
+ /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ //
+ /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after)
+ /// CHECK-NOT: BoundsCheck
+ public static void oneInvariantIndex(int[] a, int[] b, int c) {
+ // Dynamic bce on b requires two deopts: one null and one bound.
+ for (int i = 0; i < a.length; i++) {
+ a[i] = b[c];
+ }
+ }
+
+ /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (before)
+ /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ //
+ /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after)
+ /// CHECK-NOT: BoundsCheck
+ public static void multipleInvariantIndices(int[] a, int[] b, int c) {
+ // Dynamic bce on b requires three deopts: one null and two bounds.
+ for (int i = 0; i < a.length; i++) {
+ a[i] = b[c-1] + b[c] + b[c+1];
+ }
+ }
+
+ /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (before)
+ /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ //
+ /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after)
+ /// CHECK-NOT: BoundsCheck
+ public static void oneUnitStride(int[] a, int[] b) {
+ // Dynamic bce on b requires three deopts: one null and two bounds.
+ for (int i = 0; i < a.length; i++) {
+ a[i] = b[i];
+ }
+ }
+
+ /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (before)
+ /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ //
+ /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) instruction_simplifier_after_bce (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after)
+ /// CHECK-NOT: BoundsCheck
+ public static void multipleUnitStrides(int[] a, int[] b) {
+ // Dynamic bce on b requires four deopts: one null and three bounds.
+ // One redundant deopt is removed by simplifier.
+ // TODO: range information could remove another
+ for (int i = 1; i < a.length - 1; i++) {
+ a[i] = b[i-1] + b[i] + b[i+1];
+ }
+ }
+
+ /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (before)
+ /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ //
+ /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) instruction_simplifier_after_bce (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after)
+ /// CHECK-NOT: BoundsCheck
+ public static void multipleUnitStridesConditional(int[] a, int[] b) {
+ // Dynamic bce on b requires four deopts: one null and three bounds.
+ // The two conditional references may be included, since they are in range.
+ // One redundant deopt is removed by simplifier.
+ for (int i = 2; i < a.length - 2; i++) {
+ int t = b[i-2] + b[i] + b[i+2] + (((i & 1) == 0) ? b[i+1] : b[i-1]);
+ a[i] = t;
+ }
+ }
+
+ /// CHECK-START: void Main.shifter(int[]) BCE (before)
+ /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ //
+ /// CHECK-START: void Main.shifter(int[]) BCE (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.shifter(int[]) instruction_simplifier_after_bce (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.shifter(int[]) BCE (after)
+ /// CHECK-NOT: BoundsCheck
+ public static void shifter(int[] x) {
+ // Real-life example: should have four deopts: one null and three bounds.
+ // Two redundant deopts are removed by simplifier.
+ for (int i = 16; i < 80; i++) {
+ int t = x[i - 3] ^ x[i - 8] ^ x[i - 14] ^ x[i - 16];
+ x[i] = t << 1 | t >>> 31;
+ }
+ }
+
+ /// CHECK-START: void Main.stencil(int[], int, int) BCE (before)
+ /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+ //
+ /// CHECK-START: void Main.stencil(int[], int, int) BCE (after)
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-DAG: Deoptimize loop:none
+ /// CHECK-NOT: Deoptimize
+ //
+ /// CHECK-START: void Main.stencil(int[], int, int) BCE (after)
+ /// CHECK-NOT: BoundsCheck
+ public static void stencil(int[] array, int start, int end) {
+ // Real-life example: should have four deopts: one null and three bounds.
+ for (int i = end; i >= start; i--) {
+ array[i] = (array[i-2] + array[i-1] + array[i] + array[i+1] + array[i+2]) / 5;
+ }
+ }
+
+ //
+ // Verifier.
+ //
+
+ public static void main(String[] args) {
+ int[] a = new int[10];
+ int b[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+ int b1[] = { 100 };
+
+ oneConstantIndex(a, b);
+ for (int i = 0; i < a.length; i++) {
+ expectEquals(2, a[i]);;
+ }
+ try {
+ oneConstantIndex(a, b1);
+ throw new Error("Should throw AIOOBE");
+ } catch (ArrayIndexOutOfBoundsException e) {
+ }
+
+ multipleConstantIndices(a, b);
+ for (int i = 0; i < a.length; i++) {
+ expectEquals(6, a[i]);;
+ }
+ try {
+ multipleConstantIndices(a, b1);
+ throw new Error("Should throw AIOOBE");
+ } catch (ArrayIndexOutOfBoundsException e) {
+ }
+
+ oneInvariantIndex(a, b, 1);
+ for (int i = 0; i < a.length; i++) {
+ expectEquals(2, a[i]);;
+ }
+ try {
+ oneInvariantIndex(a, b1, 1);
+ throw new Error("Should throw AIOOBE");
+ } catch (ArrayIndexOutOfBoundsException e) {
+ }
+
+ multipleInvariantIndices(a, b, 1);
+ for (int i = 0; i < a.length; i++) {
+ expectEquals(6, a[i]);;
+ }
+ try {
+ multipleInvariantIndices(a, b1, 1);
+ throw new Error("Should throw AIOOBE");
+ } catch (ArrayIndexOutOfBoundsException e) {
+ }
+
+ oneUnitStride(a, b);
+ for (int i = 0; i < a.length; i++) {
+ expectEquals(i + 1, a[i]);;
+ }
+ try {
+ oneUnitStride(a, b1);
+ throw new Error("Should throw AIOOBE");
+ } catch (ArrayIndexOutOfBoundsException e) {
+ expectEquals(100, a[0]);;
+ }
+
+ multipleUnitStrides(a, b);
+ for (int i = 1; i < a.length - 1; i++) {
+ expectEquals(3 * i + 3, a[i]);;
+ }
+ try {
+ multipleUnitStrides(a, b1);
+ throw new Error("Should throw AIOOBE");
+ } catch (ArrayIndexOutOfBoundsException e) {
+ }
+
+ multipleUnitStridesConditional(a, b);
+ for (int i = 2; i < a.length - 2; i++) {
+ int e = 3 * i + 3 + (((i & 1) == 0) ? i + 2 : i);
+ expectEquals(e, a[i]);;
+ }
+ try {
+ multipleUnitStridesConditional(a, b1);
+ throw new Error("Should throw AIOOBE");
+ } catch (ArrayIndexOutOfBoundsException e) {
+ }
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/604-hot-static-interface/hot_static_interface.cc b/test/604-hot-static-interface/hot_static_interface.cc
deleted file mode 100644
index 475a11d..0000000
--- a/test/604-hot-static-interface/hot_static_interface.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "art_method.h"
-#include "jit/jit.h"
-#include "jit/jit_code_cache.h"
-#include "jit/profiling_info.h"
-#include "oat_quick_method_header.h"
-#include "scoped_thread_state_change.h"
-#include "ScopedUtfChars.h"
-#include "stack_map.h"
-
-namespace art {
-
-extern "C" JNIEXPORT void JNICALL Java_Main_waitUntilJitted(JNIEnv* env,
- jclass,
- jclass itf,
- jstring method_name) {
- jit::Jit* jit = Runtime::Current()->GetJit();
- if (jit == nullptr) {
- return;
- }
-
- ScopedObjectAccess soa(Thread::Current());
-
- ScopedUtfChars chars(env, method_name);
- CHECK(chars.c_str() != nullptr);
-
- mirror::Class* klass = soa.Decode<mirror::Class*>(itf);
- ArtMethod* method = klass->FindDeclaredDirectMethodByName(chars.c_str(), sizeof(void*));
-
- jit::JitCodeCache* code_cache = jit->GetCodeCache();
- OatQuickMethodHeader* header = nullptr;
- // Make sure there is a profiling info, required by the compiler.
- ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
- while (true) {
- header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode());
- if (code_cache->ContainsPc(header->GetCode())) {
- break;
- } else {
- // Sleep to yield to the compiler thread.
- usleep(1000);
- // Will either ensure it's compiled or do the compilation itself.
- jit->CompileMethod(method, soa.Self(), /* osr */ false);
- }
- }
-}
-
-} // namespace art
diff --git a/test/604-hot-static-interface/src/Main.java b/test/604-hot-static-interface/src/Main.java
index 559f15d..04d7cd6 100644
--- a/test/604-hot-static-interface/src/Main.java
+++ b/test/604-hot-static-interface/src/Main.java
@@ -22,14 +22,14 @@
Itf.foo(new Object());
}
- waitUntilJitted(Itf.class, "foo");
+ ensureJitCompiled(Itf.class, "foo");
if (!Itf.foo(new Object())) {
throw new Error("Unexpected result");
}
}
- private static native void waitUntilJitted(Class itf, String method_name);
+ private static native void ensureJitCompiled(Class itf, String method_name);
}
interface Itf {
diff --git a/test/612-jit-dex-cache/expected.txt b/test/612-jit-dex-cache/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/612-jit-dex-cache/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/612-jit-dex-cache/info.txt b/test/612-jit-dex-cache/info.txt
new file mode 100644
index 0000000..e80f642
--- /dev/null
+++ b/test/612-jit-dex-cache/info.txt
@@ -0,0 +1,2 @@
+Regression test for the JIT compiler which used to
+wrongly update the dex cache of a class loader.
diff --git a/test/612-jit-dex-cache/src-ex/B.java b/test/612-jit-dex-cache/src-ex/B.java
new file mode 100644
index 0000000..4da9a1d
--- /dev/null
+++ b/test/612-jit-dex-cache/src-ex/B.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class B {
+}
diff --git a/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
new file mode 100644
index 0000000..1d6158a
--- /dev/null
+++ b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class LoadedByAppClassLoader {
+ public static void letMeInlineYou(A a) {
+ a.foo();
+ }
+
+ public static ClassLoader areYouB() {
+ // Ensure letMeInlineYou is JITted and tries to do inlining of A.foo.
+ // The compiler used to wrongly update the dex cache of letMeInlineYou's
+ // class loader.
+ Main.ensureJitCompiled(LoadedByAppClassLoader.class, "letMeInlineYou");
+ return OtherClass.getB().getClassLoader();
+ }
+}
+
+class OtherClass {
+ public static Class getB() {
+ // This used to return the B class of another class loader.
+ return B.class;
+ }
+}
diff --git a/test/612-jit-dex-cache/src/A.java b/test/612-jit-dex-cache/src/A.java
new file mode 100644
index 0000000..415c712
--- /dev/null
+++ b/test/612-jit-dex-cache/src/A.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+ public int foo() {
+ return 42;
+ }
+}
diff --git a/test/612-jit-dex-cache/src/B.java b/test/612-jit-dex-cache/src/B.java
new file mode 100644
index 0000000..46c878b
--- /dev/null
+++ b/test/612-jit-dex-cache/src/B.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class B extends A {
+}
diff --git a/test/612-jit-dex-cache/src/Main.java b/test/612-jit-dex-cache/src/Main.java
new file mode 100644
index 0000000..0e4bd22
--- /dev/null
+++ b/test/612-jit-dex-cache/src/Main.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+import dalvik.system.PathClassLoader;
+
+// ClassLoader not delegating for non java. packages.
+class DelegateLastPathClassLoader extends PathClassLoader {
+
+ public DelegateLastPathClassLoader(String dexPath, ClassLoader parent) {
+ super(dexPath, parent);
+ }
+
+ @Override
+ protected Class<?> loadClass(String name, boolean resolve) throws ClassNotFoundException {
+ if (!name.startsWith("java.")) {
+ try {
+ return findClass(name);
+ } catch (ClassNotFoundException ignore) {
+ // Ignore and fall through to parent class loader.
+ }
+ }
+ return super.loadClass(name, resolve);
+ }
+}
+
+public class Main {
+
+ private static Class classFromDifferentLoader() throws Exception {
+ final String DEX_FILE = System.getenv("DEX_LOCATION") + "/612-jit-dex-cache-ex.jar";
+ ClassLoader loader = new DelegateLastPathClassLoader(DEX_FILE, Main.class.getClassLoader());
+ return loader.loadClass("LoadedByAppClassLoader");
+ }
+
+ public static void main(String[] args) throws Exception {
+ System.loadLibrary(args[0]);
+ Class cls = classFromDifferentLoader();
+ Method m = cls.getDeclaredMethod("letMeInlineYou", A.class);
+ B b = new B();
+ // Invoke the method enough times to get an inline cache and get JITted.
+ for (int i = 0; i < 10000; ++i) {
+ m.invoke(null, b);
+ }
+ m = cls.getDeclaredMethod("areYouB", null);
+ ClassLoader loader = (ClassLoader) m.invoke(null);
+ if (loader != cls.getClassLoader()) {
+ throw new Error("Wrong class loader");
+ }
+ }
+
+ public static native void ensureJitCompiled(Class cls, String method_name);
+}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 75e74ec..7813d16 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -47,8 +47,7 @@
570-checker-osr/osr.cc \
595-profile-saving/profile-saving.cc \
596-app-images/app_images.cc \
- 597-deopt-new-string/deopt.cc \
- 604-hot-static-interface/hot_static_interface.cc
+ 597-deopt-new-string/deopt.cc
ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index dd6b6f3..8f8b667 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -527,7 +527,7 @@
# Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
# 484: Baker's fast path based read barrier compiler instrumentation generates code containing
# more parallel moves on x86, thus some Checker assertions may fail.
-# 527: On ARM64, the read barrier instrumentation does not support the HArm64IntermediateAddress
+# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress
# instruction yet (b/26601270).
# 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are
# not yet handled in the read barrier configuration.
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index fd41fd2..e70a95c 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -18,10 +18,14 @@
#include "base/logging.h"
#include "dex_file-inl.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
#include "mirror/class-inl.h"
#include "nth_caller_visitor.h"
+#include "oat_quick_method_header.h"
#include "runtime.h"
#include "scoped_thread_state_change.h"
+#include "ScopedUtfChars.h"
#include "stack.h"
#include "thread-inl.h"
@@ -116,4 +120,38 @@
return JNI_TRUE;
}
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureJitCompiled(JNIEnv* env,
+ jclass,
+ jclass cls,
+ jstring method_name) {
+ jit::Jit* jit = Runtime::Current()->GetJit();
+ if (jit == nullptr) {
+ return;
+ }
+
+ ScopedObjectAccess soa(Thread::Current());
+
+ ScopedUtfChars chars(env, method_name);
+ CHECK(chars.c_str() != nullptr);
+
+ mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
+ ArtMethod* method = klass->FindDeclaredDirectMethodByName(chars.c_str(), sizeof(void*));
+
+ jit::JitCodeCache* code_cache = jit->GetCodeCache();
+ OatQuickMethodHeader* header = nullptr;
+ // Make sure there is a profiling info, required by the compiler.
+ ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
+ while (true) {
+ header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode());
+ if (code_cache->ContainsPc(header->GetCode())) {
+ break;
+ } else {
+ // Sleep to yield to the compiler thread.
+ usleep(1000);
+ // Will either ensure it's compiled or do the compilation itself.
+ jit->CompileMethod(method, soa.Self(), /* osr */ false);
+ }
+ }
+}
+
} // namespace art
diff --git a/test/run-test b/test/run-test
index bbcd4b0..1ef5428 100755
--- a/test/run-test
+++ b/test/run-test
@@ -37,7 +37,7 @@
if [ -z "$TMPDIR" ]; then
tmp_dir="/tmp/$USER/${test_dir}"
else
- tmp_dir="${TMPDIR}/$USER/${test_dir}"
+ tmp_dir="${TMPDIR}/${test_dir}"
fi
checker="${progdir}/../tools/checker/checker.py"
export JAVA="java"