Merge "Thumb2: Clean up 16-bit LDR/STR detection."
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 7a34683..7c87a60 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -39,6 +39,7 @@
#include "gc/accounting/card_table-inl.h"
#include "gc/accounting/heap_bitmap.h"
#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/collector/concurrent_copying.h"
#include "gc/heap.h"
#include "gc/space/large_object_space.h"
#include "gc/space/space-inl.h"
@@ -1377,6 +1378,8 @@
runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
image_methods_[ImageHeader::kRefsAndArgsSaveMethod] =
runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
+ image_methods_[ImageHeader::kSaveEverythingMethod] =
+ runtime->GetCalleeSaveMethod(Runtime::kSaveEverything);
// Visit image methods first to have the main runtime methods in the first image.
for (auto* m : image_methods_) {
CHECK(m != nullptr);
@@ -1823,6 +1826,11 @@
const auto it = saved_hashcode_map_.find(obj);
dst->SetLockWord(it != saved_hashcode_map_.end() ?
LockWord::FromHashCode(it->second, 0u) : LockWord::Default(), false);
+ if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) {
+ // Treat all of the objects in the image as marked to avoid unnecessary dirty pages. This is
+ // safe since we mark all of the objects that may reference non immune objects as gray.
+ CHECK(dst->AtomicSetMarkBit(0, 1));
+ }
FixupObject(obj, dst);
}
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 626a975..7d13656 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -217,8 +217,7 @@
// uint32 = typeof(lockword_)
// Subtract read barrier bits since we want these to remain 0, or else it may result in DCHECK
// failures due to invalid read barrier bits during object field reads.
- static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits -
- LockWord::kReadBarrierStateSize;
+ static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits - LockWord::kGCStateSize;
// 111000.....0
static const size_t kBinMask = ((static_cast<size_t>(1) << kBinBits) - 1) << kBinShift;
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index d4dd978..2471f79 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -31,10 +31,6 @@
}
uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
- // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it
- // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk
- // of code. To avoid any alignment discrepancies for the final chunk, we always align the
- // offset after reserving of writing any chunk.
uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset,
MethodReference(nullptr, 0u),
@@ -46,7 +42,7 @@
unprocessed_patches_.clear();
thunk_locations_.push_back(aligned_offset);
- offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_);
+ offset = aligned_offset + thunk_code_.size();
}
return offset;
}
@@ -65,13 +61,7 @@
if (UNLIKELY(!WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk_code_)))) {
return 0u;
}
- uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
- // Align after writing chunk, see the ReserveSpace() above.
- offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_);
- aligned_code_delta = offset - thunk_end_offset;
- if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
- return 0u;
- }
+ offset = aligned_offset + thunk_code_.size();
}
return offset;
}
@@ -92,7 +82,7 @@
MethodReference method_ref,
uint32_t max_extra_space) {
uint32_t quick_code_size = compiled_method->GetQuickCode().size();
- uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+ uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
// Adjust for extra space required by the subclass.
next_aligned_offset = compiled_method->AlignCode(next_aligned_offset + max_extra_space);
@@ -106,9 +96,9 @@
if (needs_thunk) {
// A single thunk will cover all pending patches.
unprocessed_patches_.clear();
- uint32_t thunk_location = compiled_method->AlignCode(offset);
+ uint32_t thunk_location = CompiledMethod::AlignCode(offset, instruction_set_);
thunk_locations_.push_back(thunk_location);
- offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_);
+ offset = thunk_location + thunk_code_.size();
}
}
for (const LinkerPatch& patch : compiled_method->GetPatches()) {
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index a8078e3..eace3d4 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -48,18 +48,18 @@
const ArrayRef<const LinkerPatch>& method3_patches,
uint32_t distance_without_thunks) {
CHECK_EQ(distance_without_thunks % kArmAlignment, 0u);
- const uint32_t method1_offset =
- CompiledCode::AlignCode(kTrampolineSize, kThumb2) + sizeof(OatQuickMethodHeader);
+ uint32_t method1_offset =
+ kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
// We want to put the method3 at a very precise offset.
const uint32_t method3_offset = method1_offset + distance_without_thunks;
- CHECK_ALIGNED(method3_offset - sizeof(OatQuickMethodHeader), kArmAlignment);
+ CHECK_ALIGNED(method3_offset, kArmAlignment);
// Calculate size of method2 so that we put method3 at the correct place.
+ const uint32_t method1_end = method1_offset + method1_code.size();
const uint32_t method2_offset =
- CompiledCode::AlignCode(method1_offset + method1_code.size(), kThumb2) +
- sizeof(OatQuickMethodHeader);
+ method1_end + CodeAlignmentSize(method1_end) + sizeof(OatQuickMethodHeader);
const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset);
std::vector<uint8_t> method2_raw_code(method2_size);
ArrayRef<const uint8_t> method2_code(method2_raw_code);
@@ -78,8 +78,11 @@
if (result3.second == method3_offset + 1 /* thumb mode */) {
return false; // No thunk.
} else {
- uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kThumb2);
- CHECK_EQ(result3.second, method3_offset + aligned_thunk_size + 1 /* thumb mode */);
+ uint32_t thunk_end =
+ CompiledCode::AlignCode(method3_offset - sizeof(OatQuickMethodHeader), kThumb2) +
+ ThunkSize();
+ uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
+ CHECK_EQ(result3.second, header_offset + sizeof(OatQuickMethodHeader) + 1 /* thumb mode */);
return true; // Thunk present.
}
}
@@ -352,9 +355,12 @@
uint32_t method1_offset = GetMethodOffset(1u);
uint32_t method3_offset = GetMethodOffset(3u);
+ ASSERT_TRUE(IsAligned<kArmAlignment>(method3_offset));
uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader);
- ASSERT_TRUE(IsAligned<kArmAlignment>(method3_header_offset));
- uint32_t thunk_offset = method3_header_offset - CompiledCode::AlignCode(ThunkSize(), kThumb2);
+ uint32_t thunk_offset =
+ RoundDown(method3_header_offset - ThunkSize(), GetInstructionSetAlignment(kThumb2));
+ DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()),
+ method3_header_offset);
ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset));
uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */);
ASSERT_EQ(diff & 1u, 0u);
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index fdd14be..4c8788e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -83,7 +83,7 @@
// Now that we have the actual offset where the code will be placed, locate the ADRP insns
// that actually require the thunk.
- uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+ uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size());
DCHECK(compiled_method != nullptr);
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index 09729fd..573de73 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -67,36 +67,39 @@
const ArrayRef<const LinkerPatch>& last_method_patches,
uint32_t distance_without_thunks) {
CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u);
- const uint32_t method1_offset =
- CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+ uint32_t method1_offset =
+ kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
- const uint32_t gap_start =
- CompiledCode::AlignCode(method1_offset + method1_code.size(), kArm64);
+ const uint32_t gap_start = method1_offset + method1_code.size();
// We want to put the method3 at a very precise offset.
const uint32_t last_method_offset = method1_offset + distance_without_thunks;
+ CHECK_ALIGNED(last_method_offset, kArm64Alignment);
const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader);
- CHECK_ALIGNED(gap_end, kArm64Alignment);
- // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB).
+ // Fill the gap with intermediate methods in chunks of 2MiB and the first in [2MiB, 4MiB).
// (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB
- // offsets by this test.)
+ // offsets by this test. Making the first chunk bigger makes it easy to give all intermediate
+ // methods the same alignment of the end, so the thunk insertion adds a predictable size as
+ // long as it's after the first chunk.)
uint32_t method_idx = 2u;
constexpr uint32_t kSmallChunkSize = 2 * MB;
std::vector<uint8_t> gap_code;
- size_t gap_size = gap_end - gap_start;
- for (; gap_size >= 2u * kSmallChunkSize; gap_size -= kSmallChunkSize) {
- uint32_t chunk_code_size = kSmallChunkSize - sizeof(OatQuickMethodHeader);
+ uint32_t gap_size = gap_end - gap_start;
+ uint32_t num_small_chunks = std::max(gap_size / kSmallChunkSize, 1u) - 1u;
+ uint32_t chunk_start = gap_start;
+ uint32_t chunk_size = gap_size - num_small_chunks * kSmallChunkSize;
+ for (uint32_t i = 0; i <= num_small_chunks; ++i) { // num_small_chunks+1 iterations.
+ uint32_t chunk_code_size =
+ chunk_size - CodeAlignmentSize(chunk_start) - sizeof(OatQuickMethodHeader);
gap_code.resize(chunk_code_size, 0u);
AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
ArrayRef<const LinkerPatch>());
method_idx += 1u;
+ chunk_start += chunk_size;
+ chunk_size = kSmallChunkSize; // For all but the first chunk.
+ DCHECK_EQ(CodeAlignmentSize(gap_end), CodeAlignmentSize(chunk_start));
}
- uint32_t chunk_code_size = gap_size - sizeof(OatQuickMethodHeader);
- gap_code.resize(chunk_code_size, 0u);
- AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
- ArrayRef<const LinkerPatch>());
- method_idx += 1u;
// Add the last method and link
AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches);
@@ -109,8 +112,9 @@
// There may be a thunk before method2.
if (last_result.second != last_method_offset) {
// Thunk present. Check that there's only one.
- uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kArm64);
- CHECK_EQ(last_result.second, last_method_offset + aligned_thunk_size);
+ uint32_t thunk_end = CompiledCode::AlignCode(gap_end, kArm64) + ThunkSize();
+ uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
+ CHECK_EQ(last_result.second, header_offset + sizeof(OatQuickMethodHeader));
}
return method_idx;
}
@@ -341,7 +345,7 @@
uint32_t dex_cache_arrays_begin,
uint32_t element_offset) {
uint32_t method1_offset =
- CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+ kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
ASSERT_LT(method1_offset, adrp_offset);
CHECK_ALIGNED(adrp_offset, 4u);
uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
@@ -391,7 +395,7 @@
bool has_thunk,
uint32_t string_offset) {
uint32_t method1_offset =
- CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+ kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
ASSERT_LT(method1_offset, adrp_offset);
CHECK_ALIGNED(adrp_offset, 4u);
uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
@@ -614,10 +618,12 @@
uint32_t method1_offset = GetMethodOffset(1u);
uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+ ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_offset));
uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader);
- ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_header_offset));
- uint32_t thunk_offset = last_method_header_offset - CompiledCode::AlignCode(ThunkSize(), kArm64);
- ASSERT_TRUE(IsAligned<kArm64Alignment>(thunk_offset));
+ uint32_t thunk_offset =
+ RoundDown(last_method_header_offset - ThunkSize(), GetInstructionSetAlignment(kArm64));
+ DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()),
+ last_method_header_offset);
uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1);
CHECK_ALIGNED(diff, 4u);
ASSERT_LT(diff, 128 * MB);
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index ec69107..d21f33e 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -98,6 +98,14 @@
patches));
}
+ uint32_t CodeAlignmentSize(uint32_t header_offset_to_align) {
+ // We want to align the code rather than the preheader.
+ uint32_t unaligned_code_offset = header_offset_to_align + sizeof(OatQuickMethodHeader);
+ uint32_t aligned_code_offset =
+ CompiledMethod::AlignCode(unaligned_code_offset, instruction_set_);
+ return aligned_code_offset - unaligned_code_offset;
+ }
+
void Link() {
// Reserve space.
static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset.");
@@ -106,9 +114,8 @@
for (auto& compiled_method : compiled_methods_) {
offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]);
- uint32_t aligned_offset = compiled_method->AlignCode(offset);
- uint32_t aligned_code_delta = aligned_offset - offset;
- offset += aligned_code_delta;
+ uint32_t alignment_size = CodeAlignmentSize(offset);
+ offset += alignment_size;
offset += sizeof(OatQuickMethodHeader);
uint32_t quick_code_offset = offset + compiled_method->CodeDelta();
@@ -136,11 +143,10 @@
for (auto& compiled_method : compiled_methods_) {
offset = patcher_->WriteThunks(&out_, offset);
- uint32_t aligned_offset = compiled_method->AlignCode(offset);
- uint32_t aligned_code_delta = aligned_offset - offset;
- CHECK_LE(aligned_code_delta, sizeof(kPadding));
- out_.WriteFully(kPadding, aligned_code_delta);
- offset += aligned_code_delta;
+ uint32_t alignment_size = CodeAlignmentSize(offset);
+ CHECK_LE(alignment_size, sizeof(kPadding));
+ out_.WriteFully(kPadding, alignment_size);
+ offset += alignment_size;
out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader));
offset += sizeof(OatQuickMethodHeader);
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index f20c715..8273b15 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -87,6 +87,13 @@
OatHeader* const oat_header_;
};
+inline uint32_t CodeAlignmentSize(uint32_t header_offset, const CompiledMethod& compiled_method) {
+ // We want to align the code rather than the preheader.
+ uint32_t unaligned_code_offset = header_offset + sizeof(OatQuickMethodHeader);
+ uint32_t aligned_code_offset = compiled_method.AlignCode(unaligned_code_offset);
+ return aligned_code_offset - unaligned_code_offset;
+}
+
} // anonymous namespace
// Defines the location of the raw dex file to write.
@@ -817,8 +824,8 @@
uint32_t thumb_offset) {
offset_ = writer_->relative_patcher_->ReserveSpace(
offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex()));
- offset_ = compiled_method->AlignCode(offset_);
- DCHECK_ALIGNED_PARAM(offset_,
+ offset_ += CodeAlignmentSize(offset_, *compiled_method);
+ DCHECK_ALIGNED_PARAM(offset_ + sizeof(OatQuickMethodHeader),
GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
return offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
}
@@ -1011,17 +1018,16 @@
ReportWriteFailure("relative call thunk", it);
return false;
}
- uint32_t aligned_offset = compiled_method->AlignCode(offset_);
- uint32_t aligned_code_delta = aligned_offset - offset_;
- if (aligned_code_delta != 0) {
- if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
+ uint32_t alignment_size = CodeAlignmentSize(offset_, *compiled_method);
+ if (alignment_size != 0) {
+ if (!writer_->WriteCodeAlignment(out, alignment_size)) {
ReportWriteFailure("code alignment padding", it);
return false;
}
- offset_ += aligned_code_delta;
+ offset_ += alignment_size;
DCHECK_OFFSET_();
}
- DCHECK_ALIGNED_PARAM(offset_,
+ DCHECK_ALIGNED_PARAM(offset_ + sizeof(OatQuickMethodHeader),
GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
DCHECK_EQ(method_offsets.code_offset_,
offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 4a4b98c..a5493ab 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -765,16 +765,24 @@
LocationSummary* locations = instruction->GetLocations();
uint32_t register_mask = locations->GetRegisterMask();
- if (locations->OnlyCallsOnSlowPath()) {
- // In case of slow path, we currently set the location of caller-save registers
- // to register (instead of their stack location when pushed before the slow-path
- // call). Therefore register_mask contains both callee-save and caller-save
- // registers that hold objects. We must remove the caller-save from the mask, since
- // they will be overwritten by the callee.
- register_mask &= core_callee_save_mask_;
+ if (instruction->IsSuspendCheck()) {
+ // Suspend check has special ABI that saves the caller-save registers in callee,
+ // so we want to emit stack maps containing the registers.
+ // TODO: Register allocator still reserves space for the caller-save registers.
+ // We should add slow-path-specific caller-save information into LocationSummary
+ // and refactor the code here as well as in the register allocator to use it.
+ } else {
+ if (locations->OnlyCallsOnSlowPath()) {
+ // In case of slow path, we currently set the location of caller-save registers
+ // to register (instead of their stack location when pushed before the slow-path
+ // call). Therefore register_mask contains both callee-save and caller-save
+ // registers that hold objects. We must remove the caller-save from the mask, since
+ // they will be overwritten by the callee.
+ register_mask &= core_callee_save_mask_;
+ }
+ // The register mask must be a subset of callee-save registers.
+ DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
}
- // The register mask must be a subset of callee-save registers.
- DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
stack_map_stream_.BeginStackMapEntry(outer_dex_pc,
native_pc,
register_mask,
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index c18b793..c105940 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -119,11 +119,9 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
arm_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ b(GetReturnLabel());
} else {
@@ -1289,6 +1287,44 @@
void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
+void InstructionCodeGeneratorARM::GenerateVcmp(HInstruction* instruction) {
+ Primitive::Type type = instruction->InputAt(0)->GetType();
+ Location lhs_loc = instruction->GetLocations()->InAt(0);
+ Location rhs_loc = instruction->GetLocations()->InAt(1);
+ if (rhs_loc.IsConstant()) {
+ // 0.0 is the only immediate that can be encoded directly in
+ // a VCMP instruction.
+ //
+ // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+ // specify that in a floating-point comparison, positive zero
+ // and negative zero are considered equal, so we can use the
+ // literal 0.0 for both cases here.
+ //
+ // Note however that some methods (Float.equal, Float.compare,
+ // Float.compareTo, Double.equal, Double.compare,
+ // Double.compareTo, Math.max, Math.min, StrictMath.max,
+ // StrictMath.min) consider 0.0 to be (strictly) greater than
+ // -0.0. So if we ever translate calls to these methods into a
+ // HCompare instruction, we must handle the -0.0 case with
+ // care here.
+ DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+ if (type == Primitive::kPrimFloat) {
+ __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>());
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()));
+ }
+ } else {
+ if (type == Primitive::kPrimFloat) {
+ __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>());
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()),
+ FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>()));
+ }
+ }
+}
+
void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
Label* true_label,
Label* false_label ATTRIBUTE_UNUSED) {
@@ -1389,22 +1425,14 @@
Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
- LocationSummary* locations = condition->GetLocations();
- Location left = locations->InAt(0);
- Location right = locations->InAt(1);
-
Primitive::Type type = condition->InputAt(0)->GetType();
switch (type) {
case Primitive::kPrimLong:
GenerateLongComparesAndJumps(condition, true_target, false_target);
break;
case Primitive::kPrimFloat:
- __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
- GenerateFPJumps(condition, true_target, false_target);
- break;
case Primitive::kPrimDouble:
- __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
- FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
+ GenerateVcmp(condition);
GenerateFPJumps(condition, true_target, false_target);
break;
default:
@@ -1585,7 +1613,7 @@
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
if (!cond->IsEmittedAtUseSite()) {
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
@@ -1632,12 +1660,8 @@
GenerateLongComparesAndJumps(cond, &true_label, &false_label);
break;
case Primitive::kPrimFloat:
- __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
- GenerateFPJumps(cond, &true_label, &false_label);
- break;
case Primitive::kPrimDouble:
- __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
- FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
+ GenerateVcmp(cond);
GenerateFPJumps(cond, &true_label, &false_label);
break;
}
@@ -3654,7 +3678,7 @@
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
locations->SetOut(Location::RequiresRegister());
break;
}
@@ -3699,12 +3723,7 @@
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
__ LoadImmediate(out, 0);
- if (type == Primitive::kPrimFloat) {
- __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
- } else {
- __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
- FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
- }
+ GenerateVcmp(compare);
__ vmstat(); // transfer FP status register to ARM APSR.
less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
break;
@@ -3998,6 +4017,17 @@
}
}
+Location LocationsBuilderARM::ArithmeticZeroOrFpuRegister(HInstruction* input) {
+ DCHECK(input->GetType() == Primitive::kPrimDouble || input->GetType() == Primitive::kPrimFloat)
+ << input->GetType();
+ if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
+ (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
+ return Location::ConstantLocation(input->AsConstant());
+ } else {
+ return Location::RequiresFpuRegister();
+ }
+}
+
Location LocationsBuilderARM::ArmEncodableConstantOrRegister(HInstruction* constant,
Opcode opcode) {
DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index f9fcabd..fa7709b 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -180,6 +180,7 @@
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ Location ArithmeticZeroOrFpuRegister(HInstruction* input);
Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode);
bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode);
bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode);
@@ -281,6 +282,7 @@
void GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target,
Label* false_target);
+ void GenerateVcmp(HInstruction* instruction);
void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 115cee6..54c9efc 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -398,11 +398,9 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
@@ -609,6 +607,8 @@
DCHECK_NE(obj_.reg(), LR);
DCHECK_NE(obj_.reg(), WSP);
DCHECK_NE(obj_.reg(), WZR);
+ // WIP0 is used by the slow path as a temp, it can not be the object register.
+ DCHECK_NE(obj_.reg(), IP0);
DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
// "Compact" slow path, saving two moves.
//
@@ -751,10 +751,7 @@
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
<< instruction_->AsInvoke()->GetIntrinsic();
DCHECK_EQ(offset_, 0U);
- DCHECK(index_.IsRegisterPair());
- // UnsafeGet's offset location is a register pair, the low
- // part contains the correct offset.
- index = index_.ToLow();
+ DCHECK(index_.IsRegister());
}
}
@@ -1284,17 +1281,21 @@
UseScratchRegisterScope temps(GetVIXLAssembler());
HConstant* src_cst = source.GetConstant();
CPURegister temp;
- if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) {
- temp = temps.AcquireW();
- } else if (src_cst->IsLongConstant()) {
- temp = temps.AcquireX();
- } else if (src_cst->IsFloatConstant()) {
- temp = temps.AcquireS();
+ if (src_cst->IsZeroBitPattern()) {
+ temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) ? xzr : wzr;
} else {
- DCHECK(src_cst->IsDoubleConstant());
- temp = temps.AcquireD();
+ if (src_cst->IsIntConstant()) {
+ temp = temps.AcquireW();
+ } else if (src_cst->IsLongConstant()) {
+ temp = temps.AcquireX();
+ } else if (src_cst->IsFloatConstant()) {
+ temp = temps.AcquireS();
+ } else {
+ DCHECK(src_cst->IsDoubleConstant());
+ temp = temps.AcquireD();
+ }
+ MoveConstant(temp, src_cst);
}
- MoveConstant(temp, src_cst);
__ Str(temp, StackOperandFrom(destination));
} else {
DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 240936c..1b5fa85 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -243,7 +243,7 @@
}
Arm64Assembler* GetAssembler() const { return assembler_; }
- vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+ vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
private:
void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
@@ -364,7 +364,7 @@
private:
Arm64Assembler* GetAssembler() const;
vixl::aarch64::MacroAssembler* GetVIXLAssembler() const {
- return GetAssembler()->vixl_masm_;
+ return GetAssembler()->GetVIXLAssembler();
}
CodeGeneratorARM64* const codegen_;
@@ -413,7 +413,7 @@
HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
const Arm64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
- vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+ vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
// Emit a write barrier.
void MarkGCCard(vixl::aarch64::Register object,
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8dd82ef..59e103a 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -351,14 +351,12 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
instruction_,
instruction_->GetDexPc(),
this,
IsDirectEntrypoint(kQuickTestSuspend));
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3472830..fe1fddc 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -300,13 +300,11 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
instruction_,
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ Bc(GetReturnLabel());
} else {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a2fa245..ade2117 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -192,13 +192,11 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
instruction_,
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5d5fa85..eadb431 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -149,13 +149,11 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
instruction_,
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 0b4c569..89d80cc 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -298,6 +298,12 @@
stream << constant->AsIntConstant()->GetValue();
} else if (constant->IsLongConstant()) {
stream << constant->AsLongConstant()->GetValue();
+ } else if (constant->IsFloatConstant()) {
+ stream << constant->AsFloatConstant()->GetValue();
+ } else if (constant->IsDoubleConstant()) {
+ stream << constant->AsDoubleConstant()->GetValue();
+ } else if (constant->IsNullConstant()) {
+ stream << "null";
}
} else if (location.IsInvalid()) {
stream << "invalid";
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 06d1148..e3a9d27 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -26,7 +26,6 @@
#include "mirror/string.h"
#include "thread.h"
#include "utils/arm64/assembler_arm64.h"
-#include "utils/arm64/constants_arm64.h"
using namespace vixl::aarch64; // NOLINT(build/namespaces)
@@ -62,14 +61,14 @@
} // namespace
MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
- return codegen_->GetAssembler()->vixl_masm_;
+ return codegen_->GetVIXLAssembler();
}
ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
return codegen_->GetGraph()->GetArena();
}
-#define __ codegen->GetAssembler()->vixl_masm_->
+#define __ codegen->GetVIXLAssembler()->
static void MoveFromReturnRegister(Location trg,
Primitive::Type type,
@@ -782,7 +781,7 @@
DCHECK((type == Primitive::kPrimInt) ||
(type == Primitive::kPrimLong) ||
(type == Primitive::kPrimNot));
- MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
Location base_loc = locations->InAt(1);
Register base = WRegisterFrom(base_loc); // Object pointer.
Location offset_loc = locations->InAt(2);
@@ -916,7 +915,7 @@
bool is_volatile,
bool is_ordered,
CodeGeneratorARM64* codegen) {
- MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
@@ -1035,7 +1034,7 @@
}
static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
- MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
Register out = WRegisterFrom(locations->Out()); // Boolean result.
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index d82caf5..dc1f24a 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -28,7 +28,7 @@
#ifdef ___
#error "ARM64 Assembler macro already defined."
#else
-#define ___ vixl_masm_->
+#define ___ vixl_masm_.
#endif
void Arm64Assembler::FinalizeCode() {
@@ -39,16 +39,16 @@
}
size_t Arm64Assembler::CodeSize() const {
- return vixl_masm_->GetBufferCapacity() - vixl_masm_->GetRemainingBufferSpace();
+ return vixl_masm_.GetBufferCapacity() - vixl_masm_.GetRemainingBufferSpace();
}
const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const {
- return vixl_masm_->GetStartAddress<uint8_t*>();
+ return vixl_masm_.GetStartAddress<uint8_t*>();
}
void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) {
// Copy the instructions from the buffer.
- MemoryRegion from(vixl_masm_->GetStartAddress<void*>(), CodeSize());
+ MemoryRegion from(vixl_masm_.GetStartAddress<void*>(), CodeSize());
region.CopyFrom(0, from);
}
@@ -86,7 +86,7 @@
} else {
// temp = rd + value
// rd = cond ? temp : rn
- UseScratchRegisterScope temps(vixl_masm_);
+ UseScratchRegisterScope temps(&vixl_masm_);
temps.Exclude(reg_x(rd), reg_x(rn));
Register temp = temps.AcquireX();
___ Add(temp, reg_x(rn), value);
@@ -183,7 +183,7 @@
}
void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset64 tr_offs) {
- UseScratchRegisterScope temps(vixl_masm_);
+ UseScratchRegisterScope temps(&vixl_masm_);
Register temp = temps.AcquireX();
___ Mov(temp, reg_x(SP));
___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
@@ -207,7 +207,7 @@
// temp = value
// rd = cond ? temp : rd
if (value != 0) {
- UseScratchRegisterScope temps(vixl_masm_);
+ UseScratchRegisterScope temps(&vixl_masm_);
temps.Exclude(reg_x(dest));
Register temp = temps.AcquireX();
___ Mov(temp, value);
@@ -314,7 +314,7 @@
Arm64ManagedRegister base = m_base.AsArm64();
CHECK(dst.IsXRegister() && base.IsXRegister());
// Remove dst and base form the temp list - higher level API uses IP1, IP0.
- UseScratchRegisterScope temps(vixl_masm_);
+ UseScratchRegisterScope temps(&vixl_masm_);
temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister()));
___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
}
@@ -528,7 +528,7 @@
CHECK(base.IsXRegister()) << base;
CHECK(scratch.IsXRegister()) << scratch;
// Remove base and scratch form the temp list - higher level API uses IP1, IP0.
- UseScratchRegisterScope temps(vixl_masm_);
+ UseScratchRegisterScope temps(&vixl_masm_);
temps.Exclude(reg_x(base.AsXRegister()), reg_x(scratch.AsXRegister()));
___ Ldr(reg_x(scratch.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
___ Br(reg_x(scratch.AsXRegister()));
@@ -621,7 +621,7 @@
}
void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
- UseScratchRegisterScope temps(vixl_masm_);
+ UseScratchRegisterScope temps(&vixl_masm_);
temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
Register temp = temps.AcquireX();
@@ -653,7 +653,7 @@
void Arm64Assembler::SpillRegisters(CPURegList registers, int offset) {
int size = registers.GetRegisterSizeInBytes();
- const Register sp = vixl_masm_->StackPointer();
+ const Register sp = vixl_masm_.StackPointer();
// Since we are operating on register pairs, we would like to align on
// double the standard size; on the other hand, we don't want to insert
// an extra store, which will happen if the number of registers is even.
@@ -681,7 +681,7 @@
void Arm64Assembler::UnspillRegisters(CPURegList registers, int offset) {
int size = registers.GetRegisterSizeInBytes();
- const Register sp = vixl_masm_->StackPointer();
+ const Register sp = vixl_masm_.StackPointer();
// Be consistent with the logic for spilling registers.
if (!IsAlignedParam(offset, 2 * size) && registers.GetCount() % 2 != 0) {
const CPURegister& dst0 = registers.PopLowestIndex();
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 24b7982..b8434b9 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -23,7 +23,6 @@
#include "base/arena_containers.h"
#include "base/logging.h"
-#include "constants_arm64.h"
#include "utils/arm64/managed_register_arm64.h"
#include "utils/assembler.h"
#include "offsets.h"
@@ -84,16 +83,13 @@
class Arm64Assembler FINAL : public Assembler {
public:
- // We indicate the size of the initial code generation buffer to the VIXL
- // assembler. From there we it will automatically manage the buffer.
explicit Arm64Assembler(ArenaAllocator* arena)
: Assembler(arena),
- exception_blocks_(arena->Adapter(kArenaAllocAssembler)),
- vixl_masm_(new vixl::aarch64::MacroAssembler(kArm64BaseBufferSize)) {}
+ exception_blocks_(arena->Adapter(kArenaAllocAssembler)) {}
- virtual ~Arm64Assembler() {
- delete vixl_masm_;
- }
+ virtual ~Arm64Assembler() {}
+
+ vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return &vixl_masm_; }
// Finalize the code.
void FinalizeCode() OVERRIDE;
@@ -287,9 +283,8 @@
// List of exception blocks to generate at the end of the code cache.
ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_;
- public:
- // Vixl assembler.
- vixl::aarch64::MacroAssembler* const vixl_masm_;
+ // VIXL assembler.
+ vixl::aarch64::MacroAssembler vixl_masm_;
// Used for testing.
friend class Arm64ManagedRegister_VixlRegisters_Test;
diff --git a/compiler/utils/arm64/constants_arm64.h b/compiler/utils/arm64/constants_arm64.h
deleted file mode 100644
index 01e8be9..0000000
--- a/compiler/utils/arm64/constants_arm64.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
-#define ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
-
-#include <stdint.h>
-#include <iosfwd>
-#include "arch/arm64/registers_arm64.h"
-#include "base/casts.h"
-#include "base/logging.h"
-#include "globals.h"
-
-// TODO: Extend this file by adding missing functionality.
-
-namespace art {
-namespace arm64 {
-
-constexpr size_t kArm64BaseBufferSize = 4096;
-
-} // namespace arm64
-} // namespace art
-
-#endif // ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index f7d74d2..7378a0a 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -17,8 +17,8 @@
#ifndef ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_
#define ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_
+#include "arch/arm64/registers_arm64.h"
#include "base/logging.h"
-#include "constants_arm64.h"
#include "debug/dwarf/register.h"
#include "utils/managed_register.h"
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 6c43e86..4f0e144 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -941,17 +941,11 @@
opcode << (op != 0 ? "vsqrt" : "vneg") << (S != 0 ? ".f64" : ".f32");
args << d << ", " << m;
} else if (op5 == 4) {
- opcode << "vcmp" << (S != 0 ? ".f64" : ".f32");
+ opcode << "vcmp" << ((op != 0) ? "e" : "") << (S != 0 ? ".f64" : ".f32");
args << d << ", " << m;
- if (op != 0) {
- args << " (quiet nan)";
- }
} else if (op5 == 5) {
- opcode << "vcmpe" << (S != 0 ? ".f64" : ".f32");
+ opcode << "vcmp" << ((op != 0) ? "e" : "") << (S != 0 ? ".f64" : ".f32");
args << d << ", #0.0";
- if (op != 0) {
- args << " (quiet nan)";
- }
if ((instr & 0x2f) != 0) {
args << " (UNPREDICTABLE)";
}
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 8c3c5e5..a0def61 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -76,6 +76,7 @@
"kCalleeSaveMethod",
"kRefsOnlySaveMethod",
"kRefsAndArgsSaveMethod",
+ "kSaveEverythingMethod",
};
const char* image_roots_descriptions_[] = {
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index ee31c58..6d80eb6 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -69,7 +69,9 @@
#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE;
+#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE
+} // namespace arm
namespace arm64 {
#include "arch/arm64/asm_support_arm64.h"
@@ -79,7 +81,9 @@
#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE;
+#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE
+} // namespace arm64
namespace mips {
#include "arch/mips/asm_support_mips.h"
@@ -89,7 +93,9 @@
#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE;
+#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE
+} // namespace mips
namespace mips64 {
#include "arch/mips64/asm_support_mips64.h"
@@ -99,7 +105,9 @@
#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE;
+#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE
+} // namespace mips64
namespace x86 {
#include "arch/x86/asm_support_x86.h"
@@ -109,7 +117,9 @@
#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE;
+#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE
+} // namespace x86
namespace x86_64 {
#include "arch/x86_64/asm_support_x86_64.h"
@@ -119,13 +129,18 @@
#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE;
+#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE
+} // namespace x86_64
// Check architecture specific constants are sound.
TEST_F(ArchTest, ARM) {
CheckFrameSize(InstructionSet::kArm, Runtime::kSaveAll, arm::kFrameSizeSaveAllCalleeSave);
CheckFrameSize(InstructionSet::kArm, Runtime::kRefsOnly, arm::kFrameSizeRefsOnlyCalleeSave);
CheckFrameSize(InstructionSet::kArm, Runtime::kRefsAndArgs, arm::kFrameSizeRefsAndArgsCalleeSave);
+ CheckFrameSize(InstructionSet::kArm,
+ Runtime::kSaveEverything,
+ arm::kFrameSizeSaveEverythingCalleeSave);
}
@@ -134,33 +149,51 @@
CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsOnly, arm64::kFrameSizeRefsOnlyCalleeSave);
CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsAndArgs,
arm64::kFrameSizeRefsAndArgsCalleeSave);
+ CheckFrameSize(InstructionSet::kArm64,
+ Runtime::kSaveEverything,
+ arm64::kFrameSizeSaveEverythingCalleeSave);
}
TEST_F(ArchTest, MIPS) {
CheckFrameSize(InstructionSet::kMips, Runtime::kSaveAll, mips::kFrameSizeSaveAllCalleeSave);
CheckFrameSize(InstructionSet::kMips, Runtime::kRefsOnly, mips::kFrameSizeRefsOnlyCalleeSave);
- CheckFrameSize(InstructionSet::kMips, Runtime::kRefsAndArgs,
+ CheckFrameSize(InstructionSet::kMips,
+ Runtime::kRefsAndArgs,
mips::kFrameSizeRefsAndArgsCalleeSave);
+ CheckFrameSize(InstructionSet::kMips,
+ Runtime::kSaveEverything,
+ mips::kFrameSizeSaveEverythingCalleeSave);
}
TEST_F(ArchTest, MIPS64) {
CheckFrameSize(InstructionSet::kMips64, Runtime::kSaveAll, mips64::kFrameSizeSaveAllCalleeSave);
CheckFrameSize(InstructionSet::kMips64, Runtime::kRefsOnly, mips64::kFrameSizeRefsOnlyCalleeSave);
- CheckFrameSize(InstructionSet::kMips64, Runtime::kRefsAndArgs,
+ CheckFrameSize(InstructionSet::kMips64,
+ Runtime::kRefsAndArgs,
mips64::kFrameSizeRefsAndArgsCalleeSave);
+ CheckFrameSize(InstructionSet::kMips64,
+ Runtime::kSaveEverything,
+ mips64::kFrameSizeSaveEverythingCalleeSave);
}
TEST_F(ArchTest, X86) {
CheckFrameSize(InstructionSet::kX86, Runtime::kSaveAll, x86::kFrameSizeSaveAllCalleeSave);
CheckFrameSize(InstructionSet::kX86, Runtime::kRefsOnly, x86::kFrameSizeRefsOnlyCalleeSave);
CheckFrameSize(InstructionSet::kX86, Runtime::kRefsAndArgs, x86::kFrameSizeRefsAndArgsCalleeSave);
+ CheckFrameSize(InstructionSet::kX86,
+ Runtime::kSaveEverything,
+ x86::kFrameSizeSaveEverythingCalleeSave);
}
TEST_F(ArchTest, X86_64) {
CheckFrameSize(InstructionSet::kX86_64, Runtime::kSaveAll, x86_64::kFrameSizeSaveAllCalleeSave);
CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsOnly, x86_64::kFrameSizeRefsOnlyCalleeSave);
- CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsAndArgs,
+ CheckFrameSize(InstructionSet::kX86_64,
+ Runtime::kRefsAndArgs,
x86_64::kFrameSizeRefsAndArgsCalleeSave);
+ CheckFrameSize(InstructionSet::kX86_64,
+ Runtime::kSaveEverything,
+ x86_64::kFrameSizeSaveEverythingCalleeSave);
}
} // namespace art
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 1fa566b..67f6f7a 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -22,6 +22,7 @@
#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 112
#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 112
+#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE 192
// Flag for enabling R4 optimization in arm runtime
// #define ARM_R4_SUSPEND_FLAG
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 34d3158..42418ad 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -168,6 +168,65 @@
.cfi_adjust_cfa_offset -40
.endm
+ /*
+ * Macro that sets up the callee save frame to conform with
+ * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+ */
+.macro SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME rTemp
+ push {r0-r12, lr} @ 14 words of callee saves and args.
+ .cfi_adjust_cfa_offset 56
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset r1, 4
+ .cfi_rel_offset r2, 8
+ .cfi_rel_offset r3, 12
+ .cfi_rel_offset r4, 16
+ .cfi_rel_offset r5, 20
+ .cfi_rel_offset r6, 24
+ .cfi_rel_offset r7, 28
+ .cfi_rel_offset r8, 32
+ .cfi_rel_offset r9, 36
+ .cfi_rel_offset r10, 40
+ .cfi_rel_offset r11, 44
+ .cfi_rel_offset ip, 48
+ .cfi_rel_offset lr, 52
+ vpush {s0-s31} @ 32 words of float args.
+ .cfi_adjust_cfa_offset 128
+ sub sp, #8 @ 2 words of space, alignment padding and Method*
+ .cfi_adjust_cfa_offset 8
+ RUNTIME_CURRENT1 \rTemp @ Load Runtime::Current into rTemp.
+ @ Load kSaveEverything Method* to rTemp.
+ ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET]
+ str \rTemp, [sp, #0] @ Store kSaveEverything Method* to the bottom of the stack.
+ str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame.
+
+ // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 56 + 128 + 8)
+#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#endif
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME
+ add sp, #8 @ rewind sp
+ .cfi_adjust_cfa_offset -8
+ vpop {s0-s31}
+ .cfi_adjust_cfa_offset -128
+ pop {r0-r12, lr} @ 14 words of callee saves
+ .cfi_restore r0
+ .cfi_restore r1
+ .cfi_restore r2
+ .cfi_restore r3
+ .cfi_restore r5
+ .cfi_restore r6
+ .cfi_restore r7
+ .cfi_restore r8
+ .cfi_restore r9
+ .cfi_restore r10
+ .cfi_restore r11
+ .cfi_restore r12
+ .cfi_restore lr
+ .cfi_adjust_cfa_offset -56
+.endm
+
.macro RETURN_IF_RESULT_IS_ZERO
cbnz r0, 1f @ result non-zero branch over
bx lr @ return
@@ -520,7 +579,7 @@
ldr r2, [r9, #THREAD_ID_OFFSET]
ldrex r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
mov r3, r1
- and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits
+ and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits
cbnz r3, .Lnot_unlocked @ already thin locked
@ unlocked case - r1: original lock word that's zero except for the read barrier bits.
orr r2, r1, r2 @ r2 holds thread id with count of 0 with preserved read barrier bits
@@ -536,9 +595,9 @@
cbnz r2, .Lslow_lock @ lock word and self thread id's match -> recursive lock
@ else contention, go to slow path
mov r3, r1 @ copy the lock word to check count overflow.
- and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits.
+ and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits.
add r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count in lock word placing in r2 to check overflow
- lsr r3, r2, LOCK_WORD_READ_BARRIER_STATE_SHIFT @ if either of the upper two bits (28-29) are set, we overflowed.
+ lsr r3, r2, #LOCK_WORD_GC_STATE_SHIFT @ if the first gc state bit is set, we overflowed.
cbnz r3, .Lslow_lock @ if we overflow the count go slow path
add r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count for real
strex r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
@@ -581,17 +640,17 @@
cbnz r2, .Lslow_unlock @ if either of the top two bits are set, go slow path
ldr r2, [r9, #THREAD_ID_OFFSET]
mov r3, r1 @ copy lock word to check thread id equality
- and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits
+ and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits
eor r3, r3, r2 @ lock_word.ThreadId() ^ self->ThreadId()
uxth r3, r3 @ zero top 16 bits
cbnz r3, .Lslow_unlock @ do lock word and self thread id's match?
mov r3, r1 @ copy lock word to detect transition to unlocked
- and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits
+ and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits
cmp r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
bpl .Lrecursive_thin_unlock
@ transition to unlocked
mov r3, r1
- and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK @ r3: zero except for the preserved read barrier bits
+ and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED @ r3: zero except for the preserved gc bits
dmb ish @ full (LoadStore|StoreStore) memory barrier
#ifndef USE_READ_BARRIER
str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
@@ -1212,17 +1271,18 @@
.extern artTestSuspendFromCode
ENTRY art_quick_test_suspend
#ifdef ARM_R4_SUSPEND_FLAG
- ldrh r0, [rSELF, #THREAD_FLAGS_OFFSET]
- mov rSUSPEND, #SUSPEND_CHECK_INTERVAL @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
- cbnz r0, 1f @ check Thread::Current()->suspend_count_ == 0
- bx lr @ return if suspend_count_ == 0
+ ldrh rSUSPEND, [rSELF, #THREAD_FLAGS_OFFSET]
+ cbnz rSUSPEND, 1f @ check Thread::Current()->suspend_count_ == 0
+ mov rSUSPEND, #SUSPEND_CHECK_INTERVAL @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
+ bx lr @ return if suspend_count_ == 0
1:
+ mov rSUSPEND, #SUSPEND_CHECK_INTERVAL @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
#endif
+ SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME r0 @ save everything for GC stack crawl
mov r0, rSELF
- SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1 @ save callee saves for GC stack crawl
- @ TODO: save FPRs to enable access in the debugger?
- bl artTestSuspendFromCode @ (Thread*)
- RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+ bl artTestSuspendFromCode @ (Thread*)
+ RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME
+ bx lr
END art_quick_test_suspend
ENTRY art_quick_implicit_suspend
@@ -1772,6 +1832,20 @@
*/
.macro READ_BARRIER_MARK_REG name, reg
ENTRY \name
+ // Null check so that we can load the lock word.
+ cmp \reg, #0
+ beq .Lret_rb_\name
+ // Check lock word for mark bit, if marked return.
+ push {r0}
+ ldr r0, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ and r0, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+ cbz r0, .Lslow_rb_\name
+ // Restore LR and return.
+ pop {r0}
+ bx lr
+
+.Lslow_rb_\name:
+ pop {r0}
push {r0-r4, r9, r12, lr} @ save return address and core caller-save registers
.cfi_adjust_cfa_offset 32
.cfi_rel_offset r0, 0
@@ -1831,6 +1905,8 @@
.endif
.endif
pop {r0-r4, r9, r12, pc} @ restore caller-save registers and return
+.Lret_rb_\name:
+ bx lr
END \name
.endm
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
index 0fb8a63..c474d2e 100644
--- a/runtime/arch/arm/quick_method_frame_info_arm.h
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -34,6 +34,9 @@
(1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3);
static constexpr uint32_t kArmCalleeSaveAllSpills =
(1 << art::arm::R4) | (1 << art::arm::R9);
+static constexpr uint32_t kArmCalleeSaveEverythingSpills =
+ (1 << art::arm::R0) | (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3) |
+ (1 << art::arm::R4) | (1 << art::arm::R9) | (1 << art::arm::R12);
static constexpr uint32_t kArmCalleeSaveFpAlwaysSpills = 0;
static constexpr uint32_t kArmCalleeSaveFpRefSpills = 0;
@@ -47,17 +50,21 @@
(1 << art::arm::S20) | (1 << art::arm::S21) | (1 << art::arm::S22) | (1 << art::arm::S23) |
(1 << art::arm::S24) | (1 << art::arm::S25) | (1 << art::arm::S26) | (1 << art::arm::S27) |
(1 << art::arm::S28) | (1 << art::arm::S29) | (1 << art::arm::S30) | (1 << art::arm::S31);
+static constexpr uint32_t kArmCalleeSaveFpEverythingSpills =
+ kArmCalleeSaveFpArgSpills | kArmCalleeSaveFpAllSpills;
constexpr uint32_t ArmCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
return kArmCalleeSaveAlwaysSpills | kArmCalleeSaveRefSpills |
(type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
- (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0);
+ (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0) |
+ (type == Runtime::kSaveEverything ? kArmCalleeSaveEverythingSpills : 0);
}
constexpr uint32_t ArmCalleeSaveFpSpills(Runtime::CalleeSaveType type) {
return kArmCalleeSaveFpAlwaysSpills | kArmCalleeSaveFpRefSpills |
(type == Runtime::kRefsAndArgs ? kArmCalleeSaveFpArgSpills: 0) |
- (type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0);
+ (type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0) |
+ (type == Runtime::kSaveEverything ? kArmCalleeSaveFpEverythingSpills : 0);
}
constexpr uint32_t ArmCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index 989ecc6..68d12e9 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -22,5 +22,6 @@
#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 96
#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 224
+#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE 512
#endif // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index a5be52d..415bb71 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -316,6 +316,204 @@
.cfi_adjust_cfa_offset -224
.endm
+ /*
+ * Macro that sets up the callee save frame to conform with
+ * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+ */
+.macro SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME
+ sub sp, sp, #512
+ .cfi_adjust_cfa_offset 512
+
+ // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 512)
+#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#endif
+
+ // Save FP registers.
+ stp d0, d1, [sp, #8]
+ stp d2, d3, [sp, #24]
+ stp d4, d5, [sp, #40]
+ stp d6, d7, [sp, #56]
+ stp d8, d9, [sp, #72]
+ stp d10, d11, [sp, #88]
+ stp d12, d13, [sp, #104]
+ stp d14, d15, [sp, #120]
+ stp d16, d17, [sp, #136]
+ stp d18, d19, [sp, #152]
+ stp d20, d21, [sp, #168]
+ stp d22, d23, [sp, #184]
+ stp d24, d25, [sp, #200]
+ stp d26, d27, [sp, #216]
+ stp d28, d29, [sp, #232]
+ stp d30, d31, [sp, #248]
+
+ // Save core registers.
+ str x0, [sp, #264]
+ .cfi_rel_offset x0, 264
+
+ stp x1, x2, [sp, #272]
+ .cfi_rel_offset x1, 272
+ .cfi_rel_offset x2, 280
+
+ stp x3, x4, [sp, #288]
+ .cfi_rel_offset x3, 288
+ .cfi_rel_offset x4, 296
+
+ stp x5, x6, [sp, #304]
+ .cfi_rel_offset x5, 304
+ .cfi_rel_offset x6, 312
+
+ stp x7, x8, [sp, #320]
+ .cfi_rel_offset x7, 320
+ .cfi_rel_offset x8, 328
+
+ stp x9, x10, [sp, #336]
+ .cfi_rel_offset x9, 336
+ .cfi_rel_offset x10, 344
+
+ stp x11, x12, [sp, #352]
+ .cfi_rel_offset x11, 352
+ .cfi_rel_offset x12, 360
+
+ stp x13, x14, [sp, #368]
+ .cfi_rel_offset x13, 368
+ .cfi_rel_offset x14, 376
+
+ stp x15, x16, [sp, #384]
+ .cfi_rel_offset x15, 384
+ .cfi_rel_offset x16, 392
+
+ stp x17, x18, [sp, #400]
+ .cfi_rel_offset x17, 400
+ .cfi_rel_offset x18, 408
+
+ stp x19, x20, [sp, #416]
+ .cfi_rel_offset x19, 416
+ .cfi_rel_offset x20, 424
+
+ stp x21, x22, [sp, #432]
+ .cfi_rel_offset x21, 432
+ .cfi_rel_offset x22, 440
+
+ stp x23, x24, [sp, #448]
+ .cfi_rel_offset x23, 448
+ .cfi_rel_offset x24, 456
+
+ stp x25, x26, [sp, #464]
+ .cfi_rel_offset x25, 464
+ .cfi_rel_offset x26, 472
+
+ stp x27, x28, [sp, #480]
+ .cfi_rel_offset x27, 480
+ .cfi_rel_offset x28, 488
+
+ stp x29, xLR, [sp, #496]
+ .cfi_rel_offset x29, 496
+ .cfi_rel_offset x30, 504
+
+ adrp xIP0, :got:_ZN3art7Runtime9instance_E
+ ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
+
+ ldr xIP0, [xIP0] // xIP0 = & (art::Runtime * art::Runtime.instance_) .
+
+ // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kSaveEverything] .
+ // Loads appropriate callee-save-method.
+ ldr xIP0, [xIP0, RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET ]
+
+ // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
+ str xIP0, [sp]
+ // Place sp in Thread::Current()->top_quick_frame.
+ mov xIP0, sp
+ str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME
+ // Restore FP registers.
+ ldp d0, d1, [sp, #8]
+ ldp d2, d3, [sp, #24]
+ ldp d4, d5, [sp, #40]
+ ldp d6, d7, [sp, #56]
+ ldp d8, d9, [sp, #72]
+ ldp d10, d11, [sp, #88]
+ ldp d12, d13, [sp, #104]
+ ldp d14, d15, [sp, #120]
+ ldp d16, d17, [sp, #136]
+ ldp d18, d19, [sp, #152]
+ ldp d20, d21, [sp, #168]
+ ldp d22, d23, [sp, #184]
+ ldp d24, d25, [sp, #200]
+ ldp d26, d27, [sp, #216]
+ ldp d28, d29, [sp, #232]
+ ldp d30, d31, [sp, #248]
+
+ // Restore core registers.
+ ldr x0, [sp, #264]
+ .cfi_restore x0
+
+ ldp x1, x2, [sp, #272]
+ .cfi_restore x1
+ .cfi_restore x2
+
+ ldp x3, x4, [sp, #288]
+ .cfi_restore x3
+ .cfi_restore x4
+
+ ldp x5, x6, [sp, #304]
+ .cfi_restore x5
+ .cfi_restore x6
+
+ ldp x7, x8, [sp, #320]
+ .cfi_restore x7
+ .cfi_restore x8
+
+ ldp x9, x10, [sp, #336]
+ .cfi_restore x9
+ .cfi_restore x10
+
+ ldp x11, x12, [sp, #352]
+ .cfi_restore x11
+ .cfi_restore x12
+
+ ldp x13, x14, [sp, #368]
+ .cfi_restore x13
+ .cfi_restore x14
+
+ ldp x15, x16, [sp, #384]
+ .cfi_restore x15
+ .cfi_restore x16
+
+ ldp x17, x18, [sp, #400]
+ .cfi_restore x17
+ .cfi_restore x18
+
+ ldp x19, x20, [sp, #416]
+ .cfi_restore x19
+ .cfi_restore x20
+
+ ldp x21, x22, [sp, #432]
+ .cfi_restore x21
+ .cfi_restore x22
+
+ ldp x23, x24, [sp, #448]
+ .cfi_restore x23
+ .cfi_restore x24
+
+ ldp x25, x26, [sp, #464]
+ .cfi_restore x25
+ .cfi_restore x26
+
+ ldp x27, x28, [sp, #480]
+ .cfi_restore x27
+ .cfi_restore x28
+
+ ldp x29, xLR, [sp, #496]
+ .cfi_restore x29
+ .cfi_restore x30
+
+ add sp, sp, #512
+ .cfi_adjust_cfa_offset -512
+.endm
+
.macro RETURN_IF_RESULT_IS_ZERO
cbnz x0, 1f // result non-zero branch over
ret // return
@@ -1090,7 +1288,7 @@
ldr w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
ldxr w1, [x4]
mov x3, x1
- and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits
+ and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits
cbnz w3, .Lnot_unlocked // already thin locked
// unlocked case - x1: original lock word that's zero except for the read barrier bits.
orr x2, x1, x2 // x2 holds thread id with count of 0 with preserved read barrier bits
@@ -1106,9 +1304,9 @@
cbnz w2, .Lslow_lock // lock word and self thread id's match -> recursive lock
// else contention, go to slow path
mov x3, x1 // copy the lock word to check count overflow.
- and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits.
+ and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits.
add w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE // increment count in lock word placing in w2 to check overflow
- lsr w3, w2, LOCK_WORD_READ_BARRIER_STATE_SHIFT // if either of the upper two bits (28-29) are set, we overflowed.
+ lsr w3, w2, #LOCK_WORD_GC_STATE_SHIFT // if the first gc state bit is set, we overflowed.
cbnz w3, .Lslow_lock // if we overflow the count go slow path
add w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE // increment count for real
stxr w3, w2, [x4]
@@ -1152,17 +1350,17 @@
cbnz w2, .Lslow_unlock // if either of the top two bits are set, go slow path
ldr w2, [xSELF, #THREAD_ID_OFFSET]
mov x3, x1 // copy lock word to check thread id equality
- and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits
+ and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits
eor w3, w3, w2 // lock_word.ThreadId() ^ self->ThreadId()
uxth w3, w3 // zero top 16 bits
cbnz w3, .Lslow_unlock // do lock word and self thread id's match?
mov x3, x1 // copy lock word to detect transition to unlocked
- and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits
+ and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits
cmp w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
bpl .Lrecursive_thin_unlock
// transition to unlocked
mov x3, x1
- and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK // w3: zero except for the preserved read barrier bits
+ and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED // w3: zero except for the preserved read barrier bits
dmb ish // full (LoadStore|StoreStore) memory barrier
#ifndef USE_READ_BARRIER
str w3, [x4]
@@ -1791,12 +1989,20 @@
ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array
// Load the class (x2)
ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
- // Read barrier for class load.
+
+ // Most common case: GC is not marking.
ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
- cbnz x3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+ cbnz x3, .Lart_quick_alloc_object_region_tlab_marking
+.Lart_quick_alloc_object_region_tlab_do_allocation:
ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+.Lart_quick_alloc_object_region_tlab_marking:
+ // GC is marking, check the lock word of the class for the mark bit.
+ // If the class is null, go slow path. The check is required to read the lock word.
+ cbz w2, .Lart_quick_alloc_object_region_tlab_slow_path
+ // Class is not null, check mark bit in lock word.
+ ldr w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ // If the bit is not zero, do the allocation.
+ tbnz w3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_alloc_object_region_tlab_do_allocation
// The read barrier slow path. Mark
// the class.
stp x0, x1, [sp, #-32]! // Save registers (x0, x1, lr).
@@ -1807,7 +2013,7 @@
ldp x0, x1, [sp, #0] // Restore registers.
ldr xLR, [sp, #16]
add sp, sp, #32
- b .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+ b .Lart_quick_alloc_object_region_tlab_do_allocation
.Lart_quick_alloc_object_region_tlab_slow_path:
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // Save callee saves in case of GC.
mov x2, xSELF // Pass Thread::Current.
@@ -1821,14 +2027,11 @@
*/
.extern artTestSuspendFromCode
ENTRY art_quick_test_suspend
- ldrh w0, [xSELF, #THREAD_FLAGS_OFFSET] // get xSELF->state_and_flags.as_struct.flags
- cbnz w0, .Lneed_suspend // check flags == 0
- ret // return if flags == 0
-.Lneed_suspend:
+ SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME // save callee saves for stack crawl
mov x0, xSELF
- SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves for stack crawl
bl artTestSuspendFromCode // (Thread*)
- RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+ RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME
+ ret
END art_quick_test_suspend
ENTRY art_quick_implicit_suspend
@@ -2265,6 +2468,8 @@
*/
.macro READ_BARRIER_MARK_REG name, wreg, xreg
ENTRY \name
+ // Reference is null, no work to do at all.
+ cbz \wreg, .Lret_rb_\name
/*
* Allocate 46 stack slots * 8 = 368 bytes:
* - 20 slots for core registers X0-X19
@@ -2272,6 +2477,11 @@
* - 1 slot for return address register XLR
* - 1 padding slot for 16-byte stack alignment
*/
+ // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
+ ldr wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ tbz wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name
+ ret
+.Lslow_path_rb_\name:
// Save all potentially live caller-save core registers.
stp x0, x1, [sp, #-368]!
.cfi_adjust_cfa_offset 368
@@ -2360,6 +2570,7 @@
.cfi_restore x30
add sp, sp, #368
.cfi_adjust_cfa_offset -368
+.Lret_rb_\name:
ret
END \name
.endm
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
index b3d250b..188e46e 100644
--- a/runtime/arch/arm64/quick_method_frame_info_arm64.h
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -29,7 +29,7 @@
static constexpr uint32_t kArm64CalleeSaveAlwaysSpills =
// Note: ArtMethod::GetReturnPcOffsetInBytes() rely on the assumption that
// LR is always saved on the top of the frame for all targets.
- // That is, lr = *(sp + framesize - pointsize).
+ // That is, lr = *(sp + framesize - pointer_size).
(1 << art::arm64::LR);
// Callee saved registers
static constexpr uint32_t kArm64CalleeSaveRefSpills =
@@ -44,6 +44,14 @@
(1 << art::arm64::X7);
static constexpr uint32_t kArm64CalleeSaveAllSpills =
(1 << art::arm64::X19);
+static constexpr uint32_t kArm64CalleeSaveEverythingSpills =
+ (1 << art::arm64::X0) | (1 << art::arm64::X1) | (1 << art::arm64::X2) |
+ (1 << art::arm64::X3) | (1 << art::arm64::X4) | (1 << art::arm64::X5) |
+ (1 << art::arm64::X6) | (1 << art::arm64::X7) | (1 << art::arm64::X8) |
+ (1 << art::arm64::X9) | (1 << art::arm64::X10) | (1 << art::arm64::X11) |
+ (1 << art::arm64::X12) | (1 << art::arm64::X13) | (1 << art::arm64::X14) |
+ (1 << art::arm64::X15) | (1 << art::arm64::X16) | (1 << art::arm64::X17) |
+ (1 << art::arm64::X18) | (1 << art::arm64::X19);
static constexpr uint32_t kArm64CalleeSaveFpAlwaysSpills = 0;
static constexpr uint32_t kArm64CalleeSaveFpRefSpills = 0;
@@ -55,17 +63,31 @@
(1 << art::arm64::D8) | (1 << art::arm64::D9) | (1 << art::arm64::D10) |
(1 << art::arm64::D11) | (1 << art::arm64::D12) | (1 << art::arm64::D13) |
(1 << art::arm64::D14) | (1 << art::arm64::D15);
+static constexpr uint32_t kArm64CalleeSaveFpEverythingSpills =
+ (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) |
+ (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) |
+ (1 << art::arm64::D6) | (1 << art::arm64::D7) | (1 << art::arm64::D8) |
+ (1 << art::arm64::D9) | (1 << art::arm64::D10) | (1 << art::arm64::D11) |
+ (1 << art::arm64::D12) | (1 << art::arm64::D13) | (1 << art::arm64::D14) |
+ (1 << art::arm64::D15) | (1 << art::arm64::D16) | (1 << art::arm64::D17) |
+ (1 << art::arm64::D18) | (1 << art::arm64::D19) | (1 << art::arm64::D20) |
+ (1 << art::arm64::D21) | (1 << art::arm64::D22) | (1 << art::arm64::D23) |
+ (1 << art::arm64::D24) | (1 << art::arm64::D25) | (1 << art::arm64::D26) |
+ (1 << art::arm64::D27) | (1 << art::arm64::D28) | (1 << art::arm64::D29) |
+ (1 << art::arm64::D30) | (1 << art::arm64::D31);
constexpr uint32_t Arm64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
return kArm64CalleeSaveAlwaysSpills | kArm64CalleeSaveRefSpills |
(type == Runtime::kRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
- (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0);
+ (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0) |
+ (type == Runtime::kSaveEverything ? kArm64CalleeSaveEverythingSpills : 0);
}
constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills |
(type == Runtime::kRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) |
- (type == Runtime::kSaveAll ? kArm64CalleeSaveFpAllSpills : 0);
+ (type == Runtime::kSaveAll ? kArm64CalleeSaveFpAllSpills : 0) |
+ (type == Runtime::kSaveEverything ? kArm64CalleeSaveFpEverythingSpills : 0);
}
constexpr uint32_t Arm64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 453056d..2ef45f5 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -22,5 +22,6 @@
#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 96
#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 48
#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 80
+#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE 256
#endif // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index c1b8044..b926bdf 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -277,6 +277,197 @@
.endm
/*
+ * Macro that sets up the callee save frame to conform with
+ * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+ * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
+ * 28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
+ * Clobbers $t0 and $t1.
+ * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
+ * Reserves FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack.
+ * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+ */
+.macro SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME
+ addiu $sp, $sp, -256
+ .cfi_adjust_cfa_offset 256
+
+ // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 256)
+#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(MIPS) size not as expected."
+#endif
+
+ sw $ra, 252($sp)
+ .cfi_rel_offset 31, 252
+ sw $fp, 248($sp)
+ .cfi_rel_offset 30, 248
+ sw $gp, 244($sp)
+ .cfi_rel_offset 28, 244
+ sw $t9, 240($sp)
+ .cfi_rel_offset 25, 240
+ sw $t8, 236($sp)
+ .cfi_rel_offset 24, 236
+ sw $s7, 232($sp)
+ .cfi_rel_offset 23, 232
+ sw $s6, 228($sp)
+ .cfi_rel_offset 22, 228
+ sw $s5, 224($sp)
+ .cfi_rel_offset 21, 224
+ sw $s4, 220($sp)
+ .cfi_rel_offset 20, 220
+ sw $s3, 216($sp)
+ .cfi_rel_offset 19, 216
+ sw $s2, 212($sp)
+ .cfi_rel_offset 18, 212
+ sw $s1, 208($sp)
+ .cfi_rel_offset 17, 208
+ sw $s0, 204($sp)
+ .cfi_rel_offset 16, 204
+ sw $t7, 200($sp)
+ .cfi_rel_offset 15, 200
+ sw $t6, 196($sp)
+ .cfi_rel_offset 14, 196
+ sw $t5, 192($sp)
+ .cfi_rel_offset 13, 192
+ sw $t4, 188($sp)
+ .cfi_rel_offset 12, 188
+ sw $t3, 184($sp)
+ .cfi_rel_offset 11, 184
+ sw $t2, 180($sp)
+ .cfi_rel_offset 10, 180
+ sw $t1, 176($sp)
+ .cfi_rel_offset 9, 176
+ sw $t0, 172($sp)
+ .cfi_rel_offset 8, 172
+ sw $a3, 168($sp)
+ .cfi_rel_offset 7, 168
+ sw $a2, 164($sp)
+ .cfi_rel_offset 6, 164
+ sw $a1, 160($sp)
+ .cfi_rel_offset 5, 160
+ sw $a0, 156($sp)
+ .cfi_rel_offset 4, 156
+ sw $v1, 152($sp)
+ .cfi_rel_offset 3, 152
+ sw $v0, 148($sp)
+ .cfi_rel_offset 2, 148
+
+ // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction.
+ bal 1f
+ sw $at, 144($sp)
+ .cfi_rel_offset 1, 144
+1:
+ .cpload $ra
+
+ SDu $f30, $f31, 136, $sp, $t1
+ SDu $f28, $f29, 128, $sp, $t1
+ SDu $f26, $f27, 120, $sp, $t1
+ SDu $f24, $f25, 112, $sp, $t1
+ SDu $f22, $f23, 104, $sp, $t1
+ SDu $f20, $f21, 96, $sp, $t1
+ SDu $f18, $f19, 88, $sp, $t1
+ SDu $f16, $f17, 80, $sp, $t1
+ SDu $f14, $f15, 72, $sp, $t1
+ SDu $f12, $f13, 64, $sp, $t1
+ SDu $f10, $f11, 56, $sp, $t1
+ SDu $f8, $f9, 48, $sp, $t1
+ SDu $f6, $f7, 40, $sp, $t1
+ SDu $f4, $f5, 32, $sp, $t1
+ SDu $f2, $f3, 24, $sp, $t1
+ SDu $f0, $f1, 16, $sp, $t1
+
+ # 3 words padding and 1 word for holding Method*
+
+ lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
+ lw $t0, 0($t0)
+ lw $t0, RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET($t0)
+ sw $t0, 0($sp) # Place Method* at bottom of stack.
+ sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # Place sp in Thread::Current()->top_quick_frame.
+ addiu $sp, $sp, -ARG_SLOT_SIZE # reserve argument slots on the stack
+ .cfi_adjust_cfa_offset ARG_SLOT_SIZE
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME
+ addiu $sp, $sp, ARG_SLOT_SIZE # remove argument slots on the stack
+ .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
+
+ LDu $f30, $f31, 136, $sp, $t1
+ LDu $f28, $f29, 128, $sp, $t1
+ LDu $f26, $f27, 120, $sp, $t1
+ LDu $f24, $f25, 112, $sp, $t1
+ LDu $f22, $f23, 104, $sp, $t1
+ LDu $f20, $f21, 96, $sp, $t1
+ LDu $f18, $f19, 88, $sp, $t1
+ LDu $f16, $f17, 80, $sp, $t1
+ LDu $f14, $f15, 72, $sp, $t1
+ LDu $f12, $f13, 64, $sp, $t1
+ LDu $f10, $f11, 56, $sp, $t1
+ LDu $f8, $f9, 48, $sp, $t1
+ LDu $f6, $f7, 40, $sp, $t1
+ LDu $f4, $f5, 32, $sp, $t1
+ LDu $f2, $f3, 24, $sp, $t1
+ LDu $f0, $f1, 16, $sp, $t1
+
+ lw $ra, 252($sp)
+ .cfi_restore 31
+ lw $fp, 248($sp)
+ .cfi_restore 30
+ lw $gp, 244($sp)
+ .cfi_restore 28
+ lw $t9, 240($sp)
+ .cfi_restore 25
+ lw $t8, 236($sp)
+ .cfi_restore 24
+ lw $s7, 232($sp)
+ .cfi_restore 23
+ lw $s6, 228($sp)
+ .cfi_restore 22
+ lw $s5, 224($sp)
+ .cfi_restore 21
+ lw $s4, 220($sp)
+ .cfi_restore 20
+ lw $s3, 216($sp)
+ .cfi_restore 19
+ lw $s2, 212($sp)
+ .cfi_restore 18
+ lw $s1, 208($sp)
+ .cfi_restore 17
+ lw $s0, 204($sp)
+ .cfi_restore 16
+ lw $t7, 200($sp)
+ .cfi_restore 15
+ lw $t6, 196($sp)
+ .cfi_restore 14
+ lw $t5, 192($sp)
+ .cfi_restore 13
+ lw $t4, 188($sp)
+ .cfi_restore 12
+ lw $t3, 184($sp)
+ .cfi_restore 11
+ lw $t2, 180($sp)
+ .cfi_restore 10
+ lw $t1, 176($sp)
+ .cfi_restore 9
+ lw $t0, 172($sp)
+ .cfi_restore 8
+ lw $a3, 168($sp)
+ .cfi_restore 7
+ lw $a2, 164($sp)
+ .cfi_restore 6
+ lw $a1, 160($sp)
+ .cfi_restore 5
+ lw $a0, 156($sp)
+ .cfi_restore 4
+ lw $v1, 152($sp)
+ .cfi_restore 3
+ lw $v0, 148($sp)
+ .cfi_restore 2
+ lw $at, 144($sp)
+ .cfi_restore 1
+
+ addiu $sp, $sp, 256 # pop frame
+ .cfi_adjust_cfa_offset -256
+.endm
+
+ /*
* Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
* exception is Thread::Current()->exception_
*/
@@ -1652,18 +1843,20 @@
* Called by managed code when the value in rSUSPEND has been decremented to 0.
*/
.extern artTestSuspendFromCode
-ENTRY art_quick_test_suspend
- lh $a0, THREAD_FLAGS_OFFSET(rSELF)
- bnez $a0, 1f
+ENTRY_NO_GP art_quick_test_suspend
+ lh rSUSPEND, THREAD_FLAGS_OFFSET(rSELF)
+ bnez rSUSPEND, 1f
addiu rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
jalr $zero, $ra
nop
1:
- SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves for stack crawl
+ SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME # save everything for stack crawl
la $t9, artTestSuspendFromCode
- jalr $t9 # (Thread*)
+ jalr $t9 # (Thread*)
move $a0, rSELF
- RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+ RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME
+ jalr $zero, $ra
+ nop
END art_quick_test_suspend
/*
diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h
index 7b0623b..170513d 100644
--- a/runtime/arch/mips/quick_method_frame_info_mips.h
+++ b/runtime/arch/mips/quick_method_frame_info_mips.h
@@ -34,6 +34,12 @@
(1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3);
static constexpr uint32_t kMipsCalleeSaveAllSpills =
(1 << art::mips::S0) | (1 << art::mips::S1);
+static constexpr uint32_t kMipsCalleeSaveEverythingSpills =
+ (1 << art::mips::AT) | (1 << art::mips::V0) | (1 << art::mips::V1) |
+ (1 << art::mips::A0) | (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3) |
+ (1 << art::mips::T0) | (1 << art::mips::T1) | (1 << art::mips::T2) | (1 << art::mips::T3) |
+ (1 << art::mips::T4) | (1 << art::mips::T5) | (1 << art::mips::T6) | (1 << art::mips::T7) |
+ (1 << art::mips::S0) | (1 << art::mips::S1) | (1 << art::mips::T8) | (1 << art::mips::T9);
static constexpr uint32_t kMipsCalleeSaveFpAlwaysSpills = 0;
static constexpr uint32_t kMipsCalleeSaveFpRefSpills = 0;
@@ -43,17 +49,28 @@
(1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) |
(1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) |
(1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31);
+static constexpr uint32_t kMipsCalleeSaveFpEverythingSpills =
+ (1 << art::mips::F0) | (1 << art::mips::F1) | (1 << art::mips::F2) | (1 << art::mips::F3) |
+ (1 << art::mips::F4) | (1 << art::mips::F5) | (1 << art::mips::F6) | (1 << art::mips::F7) |
+ (1 << art::mips::F8) | (1 << art::mips::F9) | (1 << art::mips::F10) | (1 << art::mips::F11) |
+ (1 << art::mips::F12) | (1 << art::mips::F13) | (1 << art::mips::F14) | (1 << art::mips::F15) |
+ (1 << art::mips::F16) | (1 << art::mips::F17) | (1 << art::mips::F18) | (1 << art::mips::F19) |
+ (1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) |
+ (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) |
+ (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31);
constexpr uint32_t MipsCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
return kMipsCalleeSaveAlwaysSpills | kMipsCalleeSaveRefSpills |
(type == Runtime::kRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) |
- (type == Runtime::kSaveAll ? kMipsCalleeSaveAllSpills : 0);
+ (type == Runtime::kSaveAll ? kMipsCalleeSaveAllSpills : 0) |
+ (type == Runtime::kSaveEverything ? kMipsCalleeSaveEverythingSpills : 0);
}
constexpr uint32_t MipsCalleeSaveFPSpills(Runtime::CalleeSaveType type) {
return kMipsCalleeSaveFpAlwaysSpills | kMipsCalleeSaveFpRefSpills |
(type == Runtime::kRefsAndArgs ? kMipsCalleeSaveFpArgSpills : 0) |
- (type == Runtime::kSaveAll ? kMipsCalleeSaveAllFPSpills : 0);
+ (type == Runtime::kSaveAll ? kMipsCalleeSaveAllFPSpills : 0) |
+ (type == Runtime::kSaveEverything ? kMipsCalleeSaveFpEverythingSpills : 0);
}
constexpr uint32_t MipsCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc
index 06d6211..0a9ab7a 100644
--- a/runtime/arch/mips/thread_mips.cc
+++ b/runtime/arch/mips/thread_mips.cc
@@ -25,7 +25,7 @@
void Thread::InitCpu() {
CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<PointerSize::k32>().Int32Value());
CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k32>().Int32Value());
- CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k64>().Int32Value());
+ CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k32>().Int32Value());
}
void Thread::CleanupCpu() {
diff --git a/runtime/arch/mips64/asm_support_mips64.h b/runtime/arch/mips64/asm_support_mips64.h
index 995fcf3..2c16c25 100644
--- a/runtime/arch/mips64/asm_support_mips64.h
+++ b/runtime/arch/mips64/asm_support_mips64.h
@@ -25,5 +25,7 @@
#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 80
// $f12-$f19, $a1-$a7, $s2-$s7 + $gp + $s8 + $ra, 16 total + 1x8 bytes padding + method*
#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 208
+// $f0-$f31, $at, $v0-$v1, $a0-$a7, $t0-$t3, $s0-$s7, $t8-$t9, $gp, $s8, $ra + padding + method*
+#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE 496
#endif // ART_RUNTIME_ARCH_MIPS64_ASM_SUPPORT_MIPS64_H_
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index ae69620..0a37909 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -314,6 +314,227 @@
.endm
/*
+ * Macro that sets up the callee save frame to conform with
+ * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+ * callee-save: $at + $v0-$v1 + $a0-$a7 + $t0-$t3 + $s0-$s7 + $t8-$t9 + $gp + $s8 + $ra + $s8,
+ * $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
+ * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+ */
+.macro SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME
+ daddiu $sp, $sp, -496
+ .cfi_adjust_cfa_offset 496
+
+ // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 496)
+#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(MIPS64) size not as expected."
+#endif
+
+ // Save core registers.
+ sd $ra, 488($sp)
+ .cfi_rel_offset 31, 488
+ sd $s8, 480($sp)
+ .cfi_rel_offset 30, 480
+ sd $gp, 472($sp)
+ .cfi_rel_offset 28, 472
+ sd $t9, 464($sp)
+ .cfi_rel_offset 25, 464
+ sd $t8, 456($sp)
+ .cfi_rel_offset 24, 456
+ sd $s7, 448($sp)
+ .cfi_rel_offset 23, 448
+ sd $s6, 440($sp)
+ .cfi_rel_offset 22, 440
+ sd $s5, 432($sp)
+ .cfi_rel_offset 21, 432
+ sd $s4, 424($sp)
+ .cfi_rel_offset 20, 424
+ sd $s3, 416($sp)
+ .cfi_rel_offset 19, 416
+ sd $s2, 408($sp)
+ .cfi_rel_offset 18, 408
+ sd $s1, 400($sp)
+ .cfi_rel_offset 17, 400
+ sd $s0, 392($sp)
+ .cfi_rel_offset 16, 392
+ sd $t3, 384($sp)
+ .cfi_rel_offset 15, 384
+ sd $t2, 376($sp)
+ .cfi_rel_offset 14, 376
+ sd $t1, 368($sp)
+ .cfi_rel_offset 13, 368
+ sd $t0, 360($sp)
+ .cfi_rel_offset 12, 360
+ sd $a7, 352($sp)
+ .cfi_rel_offset 11, 352
+ sd $a6, 344($sp)
+ .cfi_rel_offset 10, 344
+ sd $a5, 336($sp)
+ .cfi_rel_offset 9, 336
+ sd $a4, 328($sp)
+ .cfi_rel_offset 8, 328
+ sd $a3, 320($sp)
+ .cfi_rel_offset 7, 320
+ sd $a2, 312($sp)
+ .cfi_rel_offset 6, 312
+ sd $a1, 304($sp)
+ .cfi_rel_offset 5, 304
+ sd $a0, 296($sp)
+ .cfi_rel_offset 4, 296
+ sd $v1, 288($sp)
+ .cfi_rel_offset 3, 288
+ sd $v0, 280($sp)
+ .cfi_rel_offset 2, 280
+
+ // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction.
+ bal 1f
+ sd $at, 272($sp)
+ .cfi_rel_offset 1, 272
+1:
+ // TODO: Can we avoid the unnecessary move $t8<-$gp?
+ .cpsetup $ra, $t8, 1b
+
+ // Save FP registers.
+ s.d $f31, 264($sp)
+ s.d $f30, 256($sp)
+ s.d $f29, 248($sp)
+ s.d $f28, 240($sp)
+ s.d $f27, 232($sp)
+ s.d $f26, 224($sp)
+ s.d $f25, 216($sp)
+ s.d $f24, 208($sp)
+ s.d $f23, 200($sp)
+ s.d $f22, 192($sp)
+ s.d $f21, 184($sp)
+ s.d $f20, 176($sp)
+ s.d $f19, 168($sp)
+ s.d $f18, 160($sp)
+ s.d $f17, 152($sp)
+ s.d $f16, 144($sp)
+ s.d $f15, 136($sp)
+ s.d $f14, 128($sp)
+ s.d $f13, 120($sp)
+ s.d $f12, 112($sp)
+ s.d $f11, 104($sp)
+ s.d $f10, 96($sp)
+ s.d $f9, 88($sp)
+ s.d $f8, 80($sp)
+ s.d $f7, 72($sp)
+ s.d $f6, 64($sp)
+ s.d $f5, 56($sp)
+ s.d $f4, 48($sp)
+ s.d $f3, 40($sp)
+ s.d $f2, 32($sp)
+ s.d $f1, 24($sp)
+ s.d $f0, 16($sp)
+
+ # load appropriate callee-save-method
+ ld $t1, %got(_ZN3art7Runtime9instance_E)($gp)
+ ld $t1, 0($t1)
+ ld $t1, RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET($t1)
+ sd $t1, 0($sp) # Place ArtMethod* at bottom of stack.
+ # Place sp in Thread::Current()->top_quick_frame.
+ sd $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME
+ // Restore FP registers.
+ l.d $f31, 264($sp)
+ l.d $f30, 256($sp)
+ l.d $f29, 248($sp)
+ l.d $f28, 240($sp)
+ l.d $f27, 232($sp)
+ l.d $f26, 224($sp)
+ l.d $f25, 216($sp)
+ l.d $f24, 208($sp)
+ l.d $f23, 200($sp)
+ l.d $f22, 192($sp)
+ l.d $f21, 184($sp)
+ l.d $f20, 176($sp)
+ l.d $f19, 168($sp)
+ l.d $f18, 160($sp)
+ l.d $f17, 152($sp)
+ l.d $f16, 144($sp)
+ l.d $f15, 136($sp)
+ l.d $f14, 128($sp)
+ l.d $f13, 120($sp)
+ l.d $f12, 112($sp)
+ l.d $f11, 104($sp)
+ l.d $f10, 96($sp)
+ l.d $f9, 88($sp)
+ l.d $f8, 80($sp)
+ l.d $f7, 72($sp)
+ l.d $f6, 64($sp)
+ l.d $f5, 56($sp)
+ l.d $f4, 48($sp)
+ l.d $f3, 40($sp)
+ l.d $f2, 32($sp)
+ l.d $f1, 24($sp)
+ l.d $f0, 16($sp)
+
+ // Restore core registers.
+ ld $ra, 488($sp)
+ .cfi_restore 31
+ ld $s8, 480($sp)
+ .cfi_restore 30
+ ld $gp, 472($sp)
+ .cfi_restore 28
+ ld $t9, 464($sp)
+ .cfi_restore 25
+ ld $t8, 456($sp)
+ .cfi_restore 24
+ ld $s7, 448($sp)
+ .cfi_restore 23
+ ld $s6, 440($sp)
+ .cfi_restore 22
+ ld $s5, 432($sp)
+ .cfi_restore 21
+ ld $s4, 424($sp)
+ .cfi_restore 20
+ ld $s3, 416($sp)
+ .cfi_restore 19
+ ld $s2, 408($sp)
+ .cfi_restore 18
+ ld $s1, 400($sp)
+ .cfi_restore 17
+ ld $s0, 392($sp)
+ .cfi_restore 16
+ ld $t3, 384($sp)
+ .cfi_restore 15
+ ld $t2, 376($sp)
+ .cfi_restore 14
+ ld $t1, 368($sp)
+ .cfi_restore 13
+ ld $t0, 360($sp)
+ .cfi_restore 12
+ ld $a7, 352($sp)
+ .cfi_restore 11
+ ld $a6, 344($sp)
+ .cfi_restore 10
+ ld $a5, 336($sp)
+ .cfi_restore 9
+ ld $a4, 328($sp)
+ .cfi_restore 8
+ ld $a3, 320($sp)
+ .cfi_restore 7
+ ld $a2, 312($sp)
+ .cfi_restore 6
+ ld $a1, 304($sp)
+ .cfi_restore 5
+ ld $a0, 296($sp)
+ .cfi_restore 4
+ ld $v1, 288($sp)
+ .cfi_restore 3
+ ld $v0, 280($sp)
+ .cfi_restore 2
+ ld $at, 272($sp)
+ .cfi_restore 1
+
+ .cpreturn
+ daddiu $sp, $sp, 496
+ .cfi_adjust_cfa_offset -496
+.endm
+
+ /*
* Macro that set calls through to artDeliverPendingExceptionFromCode,
* where the pending
* exception is Thread::Current()->exception_
@@ -1673,17 +1894,19 @@
* Called by managed code when the value in rSUSPEND has been decremented to 0.
*/
.extern artTestSuspendFromCode
-ENTRY art_quick_test_suspend
- lh $a0, THREAD_FLAGS_OFFSET(rSELF)
- bne $a0, $zero, 1f
+ENTRY_NO_GP art_quick_test_suspend
+ lh rSUSPEND, THREAD_FLAGS_OFFSET(rSELF)
+ bne rSUSPEND, $zero, 1f
daddiu rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
jalr $zero, $ra
- .cpreturn # Restore gp from t8 in branch delay slot.
+ nop
1:
- SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves for stack crawl
+ SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME # save everything for stack crawl
jal artTestSuspendFromCode # (Thread*)
move $a0, rSELF
- RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+ RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME
+ jalr $zero, $ra
+ nop
END art_quick_test_suspend
/*
diff --git a/runtime/arch/mips64/quick_method_frame_info_mips64.h b/runtime/arch/mips64/quick_method_frame_info_mips64.h
index b7dc57f..d52945f 100644
--- a/runtime/arch/mips64/quick_method_frame_info_mips64.h
+++ b/runtime/arch/mips64/quick_method_frame_info_mips64.h
@@ -25,6 +25,8 @@
namespace art {
namespace mips64 {
+static constexpr uint32_t kMips64CalleeSaveAlwaysSpills =
+ (1 << art::mips64::RA);
static constexpr uint32_t kMips64CalleeSaveRefSpills =
(1 << art::mips64::S2) | (1 << art::mips64::S3) | (1 << art::mips64::S4) |
(1 << art::mips64::S5) | (1 << art::mips64::S6) | (1 << art::mips64::S7) |
@@ -35,6 +37,14 @@
(1 << art::mips64::A7);
static constexpr uint32_t kMips64CalleeSaveAllSpills =
(1 << art::mips64::S0) | (1 << art::mips64::S1);
+static constexpr uint32_t kMips64CalleeSaveEverythingSpills =
+ (1 << art::mips64::AT) | (1 << art::mips64::V0) | (1 << art::mips64::V1) |
+ (1 << art::mips64::A0) | (1 << art::mips64::A1) | (1 << art::mips64::A2) |
+ (1 << art::mips64::A3) | (1 << art::mips64::A4) | (1 << art::mips64::A5) |
+ (1 << art::mips64::A6) | (1 << art::mips64::A7) | (1 << art::mips64::T0) |
+ (1 << art::mips64::T1) | (1 << art::mips64::T2) | (1 << art::mips64::T3) |
+ (1 << art::mips64::S0) | (1 << art::mips64::S1) | (1 << art::mips64::T8) |
+ (1 << art::mips64::T9);
static constexpr uint32_t kMips64CalleeSaveFpRefSpills = 0;
static constexpr uint32_t kMips64CalleeSaveFpArgSpills =
@@ -46,17 +56,31 @@
(1 << art::mips64::F24) | (1 << art::mips64::F25) | (1 << art::mips64::F26) |
(1 << art::mips64::F27) | (1 << art::mips64::F28) | (1 << art::mips64::F29) |
(1 << art::mips64::F30) | (1 << art::mips64::F31);
+static constexpr uint32_t kMips64CalleeSaveFpEverythingSpills =
+ (1 << art::mips64::F0) | (1 << art::mips64::F1) | (1 << art::mips64::F2) |
+ (1 << art::mips64::F3) | (1 << art::mips64::F4) | (1 << art::mips64::F5) |
+ (1 << art::mips64::F6) | (1 << art::mips64::F7) | (1 << art::mips64::F8) |
+ (1 << art::mips64::F9) | (1 << art::mips64::F10) | (1 << art::mips64::F11) |
+ (1 << art::mips64::F12) | (1 << art::mips64::F13) | (1 << art::mips64::F14) |
+ (1 << art::mips64::F15) | (1 << art::mips64::F16) | (1 << art::mips64::F17) |
+ (1 << art::mips64::F18) | (1 << art::mips64::F19) | (1 << art::mips64::F20) |
+ (1 << art::mips64::F21) | (1 << art::mips64::F22) | (1 << art::mips64::F23) |
+ (1 << art::mips64::F24) | (1 << art::mips64::F25) | (1 << art::mips64::F26) |
+ (1 << art::mips64::F27) | (1 << art::mips64::F28) | (1 << art::mips64::F29) |
+ (1 << art::mips64::F30) | (1 << art::mips64::F31);
constexpr uint32_t Mips64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
- return kMips64CalleeSaveRefSpills |
+ return kMips64CalleeSaveAlwaysSpills | kMips64CalleeSaveRefSpills |
(type == Runtime::kRefsAndArgs ? kMips64CalleeSaveArgSpills : 0) |
- (type == Runtime::kSaveAll ? kMips64CalleeSaveAllSpills : 0) | (1 << art::mips64::RA);
+ (type == Runtime::kSaveAll ? kMips64CalleeSaveAllSpills : 0) |
+ (type == Runtime::kSaveEverything ? kMips64CalleeSaveEverythingSpills : 0);
}
constexpr uint32_t Mips64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
return kMips64CalleeSaveFpRefSpills |
(type == Runtime::kRefsAndArgs ? kMips64CalleeSaveFpArgSpills: 0) |
- (type == Runtime::kSaveAll ? kMips64CalleeSaveFpAllSpills : 0);
+ (type == Runtime::kSaveAll ? kMips64CalleeSaveFpAllSpills : 0) |
+ (type == Runtime::kSaveEverything ? kMips64CalleeSaveFpEverythingSpills : 0);
}
constexpr uint32_t Mips64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index b0a6017..ba5fd99 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -21,8 +21,7 @@
#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 32
#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
-
-// 32 bytes for GPRs and 32 bytes for FPRs.
#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (32 + 32)
+#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE (48 + 64)
#endif // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 77e04e7..68ba0cf 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -222,6 +222,74 @@
END_MACRO
/*
+ * Macro that sets up the callee save frame to conform with
+ * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+ */
+MACRO2(SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME, got_reg, temp_reg)
+ // Save core registers.
+ PUSH edi
+ PUSH esi
+ PUSH ebp
+ PUSH ebx
+ PUSH edx
+ PUSH ecx
+ PUSH eax
+ // Create space for FPR registers and stack alignment padding.
+ subl MACRO_LITERAL(12 + 8 * 8), %esp
+ CFI_ADJUST_CFA_OFFSET(12 + 8 * 8)
+ // Save FPRs.
+ movsd %xmm0, 12(%esp)
+ movsd %xmm1, 20(%esp)
+ movsd %xmm2, 28(%esp)
+ movsd %xmm3, 36(%esp)
+ movsd %xmm4, 44(%esp)
+ movsd %xmm5, 52(%esp)
+ movsd %xmm6, 60(%esp)
+ movsd %xmm7, 68(%esp)
+
+ SETUP_GOT_NOSAVE RAW_VAR(got_reg)
+ // Load Runtime::instance_ from GOT.
+ movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
+ movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
+ // Push save everything callee-save method.
+ pushl RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+ CFI_ADJUST_CFA_OFFSET(4)
+ // Store esp as the stop quick frame.
+ movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
+
+ // Ugly compile-time check, but we only have the preprocessor.
+ // Last +4: implicit return address pushed on stack when caller made call.
+#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 7*4 + 8*8 + 12 + 4 + 4)
+#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(X86) size not as expected."
+#endif
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME)
+ // Restore FPRs. Method and padding is still on the stack.
+ movsd 16(%esp), %xmm0
+ movsd 24(%esp), %xmm1
+ movsd 32(%esp), %xmm2
+ movsd 40(%esp), %xmm3
+ movsd 48(%esp), %xmm4
+ movsd 56(%esp), %xmm5
+ movsd 64(%esp), %xmm6
+ movsd 72(%esp), %xmm7
+
+ // Remove save everything callee save method, stack alignment padding and FPRs.
+ addl MACRO_LITERAL(16 + 8 * 8), %esp
+ CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8))
+
+ // Restore core registers.
+ POP eax
+ POP ecx
+ POP edx
+ POP ebx
+ POP ebp
+ POP esi
+ POP edi
+END_MACRO
+
+ /*
* Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
* exception is Thread::Current()->exception_.
*/
@@ -661,22 +729,6 @@
ret
END_FUNCTION art_quick_invoke_static_stub
-MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
- DEFINE_FUNCTION VAR(c_name)
- SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC
- // Outgoing argument set up
- subl MACRO_LITERAL(12), %esp // push padding
- CFI_ADJUST_CFA_OFFSET(12)
- pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
- CFI_ADJUST_CFA_OFFSET(4)
- call CALLVAR(cxx_name) // cxx_name(Thread*)
- addl MACRO_LITERAL(16), %esp // pop arguments
- CFI_ADJUST_CFA_OFFSET(-16)
- RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
- CALL_MACRO(return_macro) // return or deliver exception
- END_FUNCTION VAR(c_name)
-END_MACRO
-
MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
DEFINE_FUNCTION VAR(c_name)
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC
@@ -1028,7 +1080,13 @@
movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
// Read barrier for class load.
cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
- jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+ // Null check so that we can load the lock word.
+ testl %edx, %edx
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+ // Check the mark bit, if it is 1 return.
+ testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
@@ -1065,7 +1123,7 @@
test LITERAL(LOCK_WORD_STATE_MASK), %ecx // test the 2 high bits.
jne .Lslow_lock // slow path if either of the two high bits are set.
movl %ecx, %edx // save lock word (edx) to keep read barrier bits.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits.
test %ecx, %ecx
jnz .Lalready_thin // lock word contains a thin lock
// unlocked case - edx: original lock word, eax: obj.
@@ -1081,9 +1139,9 @@
cmpw %cx, %dx // do we hold the lock already?
jne .Lslow_lock
movl %edx, %ecx // copy the lock word to check count overflow.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the read barrier bits.
addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count for overflow check.
- test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if either of the upper two bits (28-29) are set.
+ test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // overflowed if the first gc state bit is set.
jne .Lslow_lock // count overflowed so go slow
movl %eax, %ecx // save obj to use eax for cmpxchg.
movl %edx, %eax // copy the lock word as the old val for cmpxchg.
@@ -1137,13 +1195,13 @@
cmpw %cx, %dx // does the thread id match?
jne .Lslow_unlock
movl %ecx, %edx // copy the lock word to detect new count of 0.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits.
cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
jae .Lrecursive_thin_unlock
// update lockword, cmpxchg necessary for read barrier bits.
movl %eax, %edx // edx: obj
movl %ecx, %eax // eax: old lock word.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // ecx: new lock word zero except original rb bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original rb bits.
#ifndef USE_READ_BARRIER
movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
#else
@@ -1397,7 +1455,19 @@
ret
END_FUNCTION art_quick_memcpy
-NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
+DEFINE_FUNCTION art_quick_test_suspend
+ SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME ebx, ebx // save everything for GC
+ // Outgoing argument set up
+ subl MACRO_LITERAL(12), %esp // push padding
+ CFI_ADJUST_CFA_OFFSET(12)
+ pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
+ CFI_ADJUST_CFA_OFFSET(4)
+ call SYMBOL(artTestSuspendFromCode) // (Thread*)
+ addl MACRO_LITERAL(16), %esp // pop arguments
+ CFI_ADJUST_CFA_OFFSET(-16)
+ RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME // restore frame up to return address
+ ret // return
+END_FUNCTION art_quick_test_suspend
DEFINE_FUNCTION art_quick_d2l
subl LITERAL(12), %esp // alignment padding, room for argument
@@ -1923,6 +1993,14 @@
// convention (e.g. standard callee-save registers are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
+ // Null check so that we can load the lock word.
+ test REG_VAR(reg), REG_VAR(reg)
+ jz .Lret_rb_\name
+ // Check the mark bit, if it is 1 return.
+ testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
+ jz .Lslow_rb_\name
+ ret
+.Lslow_rb_\name:
// Save all potentially live caller-save core registers.
PUSH eax
PUSH ecx
@@ -1970,6 +2048,7 @@
POP_REG_NE edx, RAW_VAR(reg)
POP_REG_NE ecx, RAW_VAR(reg)
POP_REG_NE eax, RAW_VAR(reg)
+.Lret_rb_\name:
ret
END_FUNCTION VAR(name)
END_MACRO
diff --git a/runtime/arch/x86/quick_method_frame_info_x86.h b/runtime/arch/x86/quick_method_frame_info_x86.h
index 24c671c..a1612c3 100644
--- a/runtime/arch/x86/quick_method_frame_info_x86.h
+++ b/runtime/arch/x86/quick_method_frame_info_x86.h
@@ -36,21 +36,33 @@
XMM7 = 7,
};
+static constexpr uint32_t kX86CalleeSaveAlwaysSpills =
+ (1 << art::x86::kNumberOfCpuRegisters); // Fake return address callee save.
static constexpr uint32_t kX86CalleeSaveRefSpills =
(1 << art::x86::EBP) | (1 << art::x86::ESI) | (1 << art::x86::EDI);
static constexpr uint32_t kX86CalleeSaveArgSpills =
(1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
+static constexpr uint32_t kX86CalleeSaveEverythingSpills =
+ (1 << art::x86::EAX) | (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
+
static constexpr uint32_t kX86CalleeSaveFpArgSpills =
(1 << art::x86::XMM0) | (1 << art::x86::XMM1) |
(1 << art::x86::XMM2) | (1 << art::x86::XMM3);
+static constexpr uint32_t kX86CalleeSaveFpEverythingSpills =
+ (1 << art::x86::XMM0) | (1 << art::x86::XMM1) |
+ (1 << art::x86::XMM2) | (1 << art::x86::XMM3) |
+ (1 << art::x86::XMM4) | (1 << art::x86::XMM5) |
+ (1 << art::x86::XMM6) | (1 << art::x86::XMM7);
constexpr uint32_t X86CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
- return kX86CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
- (1 << art::x86::kNumberOfCpuRegisters); // fake return address callee save
+ return kX86CalleeSaveAlwaysSpills | kX86CalleeSaveRefSpills |
+ (type == Runtime::kRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
+ (type == Runtime::kSaveEverything ? kX86CalleeSaveEverythingSpills : 0);
}
constexpr uint32_t X86CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
- return type == Runtime::kRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0;
+ return (type == Runtime::kRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0) |
+ (type == Runtime::kSaveEverything ? kX86CalleeSaveFpEverythingSpills : 0);
}
constexpr uint32_t X86CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index 48bec73..58dc2fe 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -21,6 +21,7 @@
#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE (64 + 4*8)
#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE (64 + 4*8)
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (176 + 4*8)
+#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (112 + 12*8)
+#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE (144 + 16*8)
#endif // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 784ec39..3048404 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -165,8 +165,8 @@
PUSH rdx // Quick arg 2.
PUSH rcx // Quick arg 3.
// Create space for FPR args and create 2 slots for ArtMethod*.
- subq MACRO_LITERAL(80 + 4 * 8), %rsp
- CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
+ subq MACRO_LITERAL(16 + 12 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(16 + 12 * 8)
// R10 := ArtMethod* for ref and args callee save frame method.
movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
// Save FPRs.
@@ -189,7 +189,7 @@
// Ugly compile-time check, but we only have the preprocessor.
// Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11 * 8 + 4 * 8 + 80 + 8)
+#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11 * 8 + 12 * 8 + 16 + 8)
#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
#endif
#endif // __APPLE__
@@ -260,6 +260,108 @@
POP r15
END_MACRO
+ /*
+ * Macro that sets up the callee save frame to conform with
+ * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+ */
+MACRO0(SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME)
+#if defined(__APPLE__)
+ int3
+ int3
+#else
+ // Save core registers from highest to lowest to agree with core spills bitmap.
+ PUSH r15
+ PUSH r14
+ PUSH r13
+ PUSH r12
+ PUSH r11
+ PUSH r10
+ PUSH r9
+ PUSH r8
+ PUSH rdi
+ PUSH rsi
+ PUSH rbp
+ PUSH rbx
+ PUSH rdx
+ PUSH rcx
+ PUSH rax
+ // Create space for FPRs and stack alignment padding.
+ subq MACRO_LITERAL(8 + 16 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(8 + 16 * 8)
+ // R10 := Runtime::Current()
+ movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
+ movq (%r10), %r10
+ // Save FPRs.
+ movq %xmm0, 8(%rsp)
+ movq %xmm1, 16(%rsp)
+ movq %xmm2, 24(%rsp)
+ movq %xmm3, 32(%rsp)
+ movq %xmm4, 40(%rsp)
+ movq %xmm5, 48(%rsp)
+ movq %xmm6, 56(%rsp)
+ movq %xmm7, 64(%rsp)
+ movq %xmm8, 72(%rsp)
+ movq %xmm9, 80(%rsp)
+ movq %xmm10, 88(%rsp)
+ movq %xmm11, 96(%rsp)
+ movq %xmm12, 104(%rsp)
+ movq %xmm13, 112(%rsp)
+ movq %xmm14, 120(%rsp)
+ movq %xmm15, 128(%rsp)
+ // Push ArtMethod* for save everything frame method.
+ pushq RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET(%r10)
+ CFI_ADJUST_CFA_OFFSET(8)
+ // Store rsp as the top quick frame.
+ movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
+
+ // Ugly compile-time check, but we only have the preprocessor.
+ // Last +8: implicit return address pushed on stack when caller made call.
+#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 15 * 8 + 16 * 8 + 16 + 8)
+#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(X86_64) size not as expected."
+#endif
+#endif // __APPLE__
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME)
+ // Restore FPRs. Method and padding is still on the stack.
+ movq 16(%rsp), %xmm0
+ movq 24(%rsp), %xmm1
+ movq 32(%rsp), %xmm2
+ movq 40(%rsp), %xmm3
+ movq 48(%rsp), %xmm4
+ movq 56(%rsp), %xmm5
+ movq 64(%rsp), %xmm6
+ movq 72(%rsp), %xmm7
+ movq 80(%rsp), %xmm8
+ movq 88(%rsp), %xmm9
+ movq 96(%rsp), %xmm10
+ movq 104(%rsp), %xmm11
+ movq 112(%rsp), %xmm12
+ movq 120(%rsp), %xmm13
+ movq 128(%rsp), %xmm14
+ movq 136(%rsp), %xmm15
+
+ // Remove save everything callee save method, stack alignment padding and FPRs.
+ addq MACRO_LITERAL(16 + 16 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
+ // Restore callee and GPR args, mixed together to agree with core spills bitmap.
+ POP rax
+ POP rcx
+ POP rdx
+ POP rbx
+ POP rbp
+ POP rsi
+ POP rdi
+ POP r8
+ POP r9
+ POP r10
+ POP r11
+ POP r12
+ POP r13
+ POP r14
+ POP r15
+END_MACRO
+
/*
* Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
@@ -702,17 +804,6 @@
#endif // __APPLE__
END_FUNCTION art_quick_do_long_jump
-MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
- DEFINE_FUNCTION VAR(c_name)
- SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC
- // Outgoing argument set up
- movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current()
- call VAR(cxx_name) // cxx_name(Thread*)
- RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
- CALL_MACRO(return_macro) // return or deliver exception
- END_FUNCTION VAR(c_name)
-END_MACRO
-
MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
DEFINE_FUNCTION VAR(c_name)
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC
@@ -989,7 +1080,13 @@
// Load the class
movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
- jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+ // Null check so that we can load the lock word.
+ testl %edx, %edx
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+ // Check the mark bit, if it is 1 return.
+ testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
@@ -1022,7 +1119,7 @@
test LITERAL(LOCK_WORD_STATE_MASK), %ecx // Test the 2 high bits.
jne .Lslow_lock // Slow path if either of the two high bits are set.
movl %ecx, %edx // save lock word (edx) to keep read barrier bits.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits.
test %ecx, %ecx
jnz .Lalready_thin // Lock word contains a thin lock.
// unlocked case - edx: original lock word, edi: obj.
@@ -1037,9 +1134,9 @@
cmpw %cx, %dx // do we hold the lock already?
jne .Lslow_lock
movl %edx, %ecx // copy the lock word to check count overflow.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits.
addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count
- test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if either of the upper two bits (28-29) are set
+ test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if the upper bit (28) is set
jne .Lslow_lock // count overflowed so go slow
movl %edx, %eax // copy the lock word as the old val for cmpxchg.
addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real.
@@ -1074,12 +1171,12 @@
cmpw %cx, %dx // does the thread id match?
jne .Lslow_unlock
movl %ecx, %edx // copy the lock word to detect new count of 0.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits.
cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
jae .Lrecursive_thin_unlock
// update lockword, cmpxchg necessary for read barrier bits.
movl %ecx, %eax // eax: old lock word.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // ecx: new lock word zero except original rb bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original gc bits.
#ifndef USE_READ_BARRIER
movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
#else
@@ -1329,7 +1426,14 @@
ret
END_FUNCTION art_quick_memcpy
-NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
+DEFINE_FUNCTION art_quick_test_suspend
+ SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME // save everything for GC
+ // Outgoing argument set up
+ movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current()
+ call SYMBOL(artTestSuspendFromCode) // (Thread*)
+ RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME // restore frame up to return address
+ ret
+END_FUNCTION art_quick_test_suspend
UNIMPLEMENTED art_quick_ldiv
UNIMPLEMENTED art_quick_lmod
@@ -1833,6 +1937,14 @@
// convention (e.g. standard callee-save registers are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
+ // Null check so that we can load the lock word.
+ testq REG_VAR(reg), REG_VAR(reg)
+ jz .Lret_rb_\name
+ // Check the mark bit, if it is 1 return.
+ testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
+ jz .Lslow_rb_\name
+ ret
+.Lslow_rb_\name:
// Save all potentially live caller-save core registers.
PUSH rax
PUSH rcx
@@ -1897,6 +2009,7 @@
POP_REG_NE rdx, RAW_VAR(reg)
POP_REG_NE rcx, RAW_VAR(reg)
POP_REG_NE rax, RAW_VAR(reg)
+.Lret_rb_\name:
ret
END_FUNCTION VAR(name)
END_MACRO
diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
index 37eff83..aa75b56 100644
--- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
+++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
@@ -25,12 +25,19 @@
namespace art {
namespace x86_64 {
+static constexpr uint32_t kX86_64CalleeSaveAlwaysSpills =
+ (1 << art::x86_64::kNumberOfCpuRegisters); // Fake return address callee save.
static constexpr uint32_t kX86_64CalleeSaveRefSpills =
(1 << art::x86_64::RBX) | (1 << art::x86_64::RBP) | (1 << art::x86_64::R12) |
(1 << art::x86_64::R13) | (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
static constexpr uint32_t kX86_64CalleeSaveArgSpills =
(1 << art::x86_64::RSI) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RCX) |
(1 << art::x86_64::R8) | (1 << art::x86_64::R9);
+static constexpr uint32_t kX86_64CalleeSaveEverythingSpills =
+ (1 << art::x86_64::RAX) | (1 << art::x86_64::RCX) | (1 << art::x86_64::RDX) |
+ (1 << art::x86_64::RSI) | (1 << art::x86_64::RDI) | (1 << art::x86_64::R8) |
+ (1 << art::x86_64::R9) | (1 << art::x86_64::R10) | (1 << art::x86_64::R11);
+
static constexpr uint32_t kX86_64CalleeSaveFpArgSpills =
(1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) |
(1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
@@ -38,16 +45,24 @@
static constexpr uint32_t kX86_64CalleeSaveFpSpills =
(1 << art::x86_64::XMM12) | (1 << art::x86_64::XMM13) |
(1 << art::x86_64::XMM14) | (1 << art::x86_64::XMM15);
+static constexpr uint32_t kX86_64CalleeSaveFpEverythingSpills =
+ (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) |
+ (1 << art::x86_64::XMM2) | (1 << art::x86_64::XMM3) |
+ (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
+ (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7) |
+ (1 << art::x86_64::XMM8) | (1 << art::x86_64::XMM9) |
+ (1 << art::x86_64::XMM10) | (1 << art::x86_64::XMM11);
constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
- return kX86_64CalleeSaveRefSpills |
+ return kX86_64CalleeSaveAlwaysSpills | kX86_64CalleeSaveRefSpills |
(type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) |
- (1 << art::x86_64::kNumberOfCpuRegisters); // fake return address callee save;
+ (type == Runtime::kSaveEverything ? kX86_64CalleeSaveEverythingSpills : 0);
}
constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
return kX86_64CalleeSaveFpSpills |
- (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0);
+ (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0) |
+ (type == Runtime::kSaveEverything ? kX86_64CalleeSaveFpEverythingSpills : 0);
}
constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 16087a5..a6eb5f6 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -63,7 +63,9 @@
{'0', '3', '5', '\0'},
// Dex version 036 skipped because of an old dalvik bug on some versions of android where dex
// files with that version number would erroneously be accepted and run.
- {'0', '3', '7', '\0'}
+ {'0', '3', '7', '\0'},
+ // Dex version 038: Android "O" and beyond.
+ {'0', '3', '8', '\0'}
};
bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) {
@@ -336,6 +338,11 @@
*error_code = ZipOpenErrorCode::kEntryNotFound;
return nullptr;
}
+ if (zip_entry->GetUncompressedLength() == 0) {
+ *error_msg = StringPrintf("Dex file '%s' has zero length", location.c_str());
+ *error_code = ZipOpenErrorCode::kDexFileError;
+ return nullptr;
+ }
std::unique_ptr<MemMap> map(zip_entry->ExtractToMemMap(location.c_str(), entry_name, error_msg));
if (map.get() == nullptr) {
*error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", entry_name, location.c_str(),
@@ -433,6 +440,8 @@
MemMap* mem_map,
const OatDexFile* oat_dex_file,
std::string* error_msg) {
+ DCHECK(base != nullptr);
+ DCHECK_NE(size, 0U);
CHECK_ALIGNED(base, 4); // various dex file structures must be word aligned
std::unique_ptr<DexFile> dex_file(
new DexFile(base, size, location, location_checksum, mem_map, oat_dex_file));
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 3dffe4b..2eca495 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -63,7 +63,7 @@
static const uint32_t kClassDefinitionOrderEnforcedVersion = 37;
static const uint8_t kDexMagic[];
- static constexpr size_t kNumDexVersions = 2;
+ static constexpr size_t kNumDexVersions = 3;
static constexpr size_t kDexVersionLen = 4;
static const uint8_t kDexMagicVersions[kNumDexVersions][kDexVersionLen];
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 4f8e6f1..2704d8a 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -133,8 +133,46 @@
"AAACAAAAQAEAAAEgAAACAAAAVAEAAAYgAAACAAAAiAEAAAEQAAABAAAAqAEAAAIgAAAPAAAArgEA"
"AAMgAAACAAAAiAIAAAQgAAADAAAAlAIAAAAgAAACAAAAqwIAAAAQAAABAAAAxAIAAA==";
-static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
- const char* location) {
+// kRawDex38 and 39 are dex'ed versions of the following Java source :
+//
+// public class Main {
+// public static void main(String[] foo) {
+// }
+// }
+//
+// The dex file was manually edited to change its dex version code to 38
+// or 39, respectively.
+static const char kRawDex38[] =
+ "ZGV4CjAzOAC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI"
+ "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB"
+ "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA"
+ "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA"
+ "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB"
+ "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW"
+ "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA"
+ "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA"
+ "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC"
+ "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA==";
+
+static const char kRawDex39[] =
+ "ZGV4CjAzOQC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI"
+ "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB"
+ "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA"
+ "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA"
+ "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB"
+ "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW"
+ "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA"
+ "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA"
+ "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC"
+ "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA==";
+
+static const char kRawDexZeroLength[] =
+ "UEsDBAoAAAAAAOhxAkkAAAAAAAAAAAAAAAALABwAY2xhc3Nlcy5kZXhVVAkAA2QNoVdnDaFXdXgL"
+ "AAEE5AMBAASIEwAAUEsBAh4DCgAAAAAA6HECSQAAAAAAAAAAAAAAAAsAGAAAAAAAAAAAAKCBAAAA"
+ "AGNsYXNzZXMuZGV4VVQFAANkDaFXdXgLAAEE5AMBAASIEwAAUEsFBgAAAAABAAEAUQAAAEUAAAAA"
+ "AA==";
+
+static void DecodeAndWriteDexFile(const char* base64, const char* location) {
// decode base64
CHECK(base64 != nullptr);
size_t length;
@@ -150,7 +188,11 @@
if (file->FlushCloseOrErase() != 0) {
PLOG(FATAL) << "Could not flush and close test file.";
}
- file.reset();
+}
+
+static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
+ const char* location) {
+ DecodeAndWriteDexFile(base64, location);
// read dex file
ScopedObjectAccess soa(Thread::Current());
@@ -197,6 +239,39 @@
EXPECT_EQ(header.checksum_, raw->GetLocationChecksum());
}
+TEST_F(DexFileTest, Version38Accepted) {
+ ScratchFile tmp;
+ std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kRawDex38, tmp.GetFilename().c_str()));
+ ASSERT_TRUE(raw.get() != nullptr);
+
+ const DexFile::Header& header = raw->GetHeader();
+ EXPECT_EQ(38u, header.GetVersion());
+}
+
+TEST_F(DexFileTest, Version39Rejected) {
+ ScratchFile tmp;
+ const char* location = tmp.GetFilename().c_str();
+ DecodeAndWriteDexFile(kRawDex39, location);
+
+ ScopedObjectAccess soa(Thread::Current());
+ static constexpr bool kVerifyChecksum = true;
+ std::string error_msg;
+ std::vector<std::unique_ptr<const DexFile>> dex_files;
+ ASSERT_FALSE(DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files));
+}
+
+TEST_F(DexFileTest, ZeroLengthDexRejected) {
+ ScratchFile tmp;
+ const char* location = tmp.GetFilename().c_str();
+ DecodeAndWriteDexFile(kRawDexZeroLength, location);
+
+ ScopedObjectAccess soa(Thread::Current());
+ static constexpr bool kVerifyChecksum = true;
+ std::string error_msg;
+ std::vector<std::unique_ptr<const DexFile>> dex_files;
+ ASSERT_FALSE(DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files));
+}
+
TEST_F(DexFileTest, GetLocationChecksum) {
ScopedObjectAccess soa(Thread::Current());
std::unique_ptr<const DexFile> raw(OpenTestDexFile("Main"));
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 4019a5b..fb774a4 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -154,11 +154,30 @@
}
inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
+ mirror::Object* ret;
+ // TODO: Delete GetMarkBit check when all of the callers properly check the bit. Remaining caller
+ // is array allocations.
+ if (from_ref == nullptr || from_ref->GetMarkBit()) {
+ return from_ref;
+ }
// TODO: Consider removing this check when we are done investigating slow paths. b/30162165
if (UNLIKELY(mark_from_read_barrier_measurements_)) {
- return MarkFromReadBarrierWithMeasurements(from_ref);
+ ret = MarkFromReadBarrierWithMeasurements(from_ref);
+ } else {
+ ret = Mark(from_ref);
}
- return Mark(from_ref);
+ // Only set the mark bit for baker barrier.
+ if (kUseBakerReadBarrier && LIKELY(!rb_mark_bit_stack_full_ && ret->AtomicSetMarkBit(0, 1))) {
+ // If the mark stack is full, we may temporarily go to mark and back to unmarked. Seeing both
+ // values are OK since the only race is doing an unnecessary Mark.
+ if (!rb_mark_bit_stack_->AtomicPushBack(ret)) {
+ // Mark stack is full, set the bit back to zero.
+ CHECK(ret->AtomicSetMarkBit(1, 0));
+ // Set rb_mark_bit_stack_full_, this is racy but OK since AtomicPushBack is thread safe.
+ rb_mark_bit_stack_full_ = true;
+ }
+ }
+ return ret;
}
inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index d7221e4..071537d 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -42,9 +42,6 @@
namespace collector {
static constexpr size_t kDefaultGcMarkStackSize = 2 * MB;
-// If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty
-// pages.
-static constexpr bool kGrayDirtyImmuneObjects = true;
// If kFilterModUnionCards then we attempt to filter cards that don't need to be dirty in the mod
// union table. Disabled since it does not seem to help the pause much.
static constexpr bool kFilterModUnionCards = kIsDebugBuild;
@@ -52,6 +49,9 @@
// ConcurrentCopying::Scan. May be used to diagnose possibly unnecessary read barriers.
// Only enabled for kIsDebugBuild to avoid performance hit.
static constexpr bool kDisallowReadBarrierDuringScan = kIsDebugBuild;
+// Slow path mark stack size, increase this if the stack is getting full and it is causing
+// performance problems.
+static constexpr size_t kReadBarrierMarkStackSize = 512 * KB;
ConcurrentCopying::ConcurrentCopying(Heap* heap,
const std::string& name_prefix,
@@ -63,6 +63,10 @@
gc_mark_stack_(accounting::ObjectStack::Create("concurrent copying gc mark stack",
kDefaultGcMarkStackSize,
kDefaultGcMarkStackSize)),
+ rb_mark_bit_stack_(accounting::ObjectStack::Create("rb copying gc mark stack",
+ kReadBarrierMarkStackSize,
+ kReadBarrierMarkStackSize)),
+ rb_mark_bit_stack_full_(false),
mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock),
thread_running_gc_(nullptr),
is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false),
@@ -187,6 +191,7 @@
CHECK(false_gray_stack_.empty());
}
+ rb_mark_bit_stack_full_ = false;
mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_;
if (measure_read_barrier_slow_path_) {
rb_slow_path_ns_.StoreRelaxed(0);
@@ -914,9 +919,9 @@
}
collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
if (kUseBakerReadBarrier) {
- CHECK(ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+ CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
<< "Ref " << ref << " " << PrettyTypeOf(ref)
- << " has non-white rb_ptr " << ref->GetReadBarrierPointer();
+ << " has non-white rb_ptr ";
}
}
@@ -982,7 +987,7 @@
VerifyNoFromSpaceRefsFieldVisitor visitor(collector);
obj->VisitReferences(visitor, visitor);
if (kUseBakerReadBarrier) {
- CHECK(obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+ CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
<< "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
}
}
@@ -2243,6 +2248,15 @@
}
}
}
+ if (kUseBakerReadBarrier) {
+ TimingLogger::ScopedTiming split("EmptyRBMarkBitStack", GetTimings());
+ DCHECK(rb_mark_bit_stack_.get() != nullptr);
+ const auto* limit = rb_mark_bit_stack_->End();
+ for (StackReference<mirror::Object>* it = rb_mark_bit_stack_->Begin(); it != limit; ++it) {
+ CHECK(it->AsMirrorPtr()->AtomicSetMarkBit(1, 0));
+ }
+ rb_mark_bit_stack_->Reset();
+ }
}
if (measure_read_barrier_slow_path_) {
MutexLock mu(self, rb_slow_path_histogram_lock_);
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 72112fa..a862802 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -57,6 +57,9 @@
static constexpr bool kEnableFromSpaceAccountingCheck = kIsDebugBuild;
// Enable verbose mode.
static constexpr bool kVerboseMode = false;
+ // If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty
+ // pages.
+ static constexpr bool kGrayDirtyImmuneObjects = true;
ConcurrentCopying(Heap* heap,
const std::string& name_prefix = "",
@@ -230,6 +233,8 @@
space::RegionSpace* region_space_; // The underlying region space.
std::unique_ptr<Barrier> gc_barrier_;
std::unique_ptr<accounting::ObjectStack> gc_mark_stack_;
+ std::unique_ptr<accounting::ObjectStack> rb_mark_bit_stack_;
+ bool rb_mark_bit_stack_full_;
std::vector<mirror::Object*> false_gray_stack_ GUARDED_BY(mark_stack_lock_);
Mutex mark_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
std::vector<accounting::ObjectStack*> revoked_mark_stacks_
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index a92cb24..5485cd2 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2538,6 +2538,17 @@
AddSpace(zygote_space_);
non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
AddSpace(non_moving_space_);
+ if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) {
+ // Treat all of the objects in the zygote as marked to avoid unnecessary dirty pages. This is
+ // safe since we mark all of the objects that may reference non immune objects as gray.
+ zygote_space_->GetLiveBitmap()->VisitMarkedRange(
+ reinterpret_cast<uintptr_t>(zygote_space_->Begin()),
+ reinterpret_cast<uintptr_t>(zygote_space_->Limit()),
+ [](mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+ CHECK(obj->AtomicSetMarkBit(0, 1));
+ });
+ }
+
// Create the zygote space mod union table.
accounting::ModUnionTable* mod_union_table =
new accounting::ModUnionTableCardCache("zygote space mod-union table", this,
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index d140b75..8ade185 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1436,6 +1436,8 @@
image_header->GetImageMethod(ImageHeader::kRefsOnlySaveMethod));
CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs),
image_header->GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod));
+ CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveEverything),
+ image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod));
} else if (!runtime->HasResolutionMethod()) {
runtime->SetInstructionSet(space->oat_file_non_owned_->GetOatHeader().GetInstructionSet());
runtime->SetResolutionMethod(image_header->GetImageMethod(ImageHeader::kResolutionMethod));
@@ -1448,6 +1450,8 @@
image_header->GetImageMethod(ImageHeader::kRefsOnlySaveMethod), Runtime::kRefsOnly);
runtime->SetCalleeSaveMethod(
image_header->GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod), Runtime::kRefsAndArgs);
+ runtime->SetCalleeSaveMethod(
+ image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod), Runtime::kSaveEverything);
}
VLOG(image) << "ImageSpace::Init exiting " << *space.get();
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 5d62b59..c66029d 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -32,6 +32,8 @@
DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kRefsOnly))))
#define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 0x10
DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kRefsAndArgs))))
+#define RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET 0x18
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveEverything))))
#define THREAD_FLAGS_OFFSET 0
DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_FLAGS_OFFSET), (static_cast<int32_t>(art::Thread:: ThreadFlagsOffset<art::kRuntimePointerSize>().Int32Value())))
#define THREAD_ID_OFFSET 12
@@ -74,12 +76,22 @@
DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kStateMaskShifted)))
#define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28
DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_READ_BARRIER_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kReadBarrierStateShift)))
-#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x30000000
+#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x10000000
DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShifted)))
-#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xcfffffff
+#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xefffffff
DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled)))
#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_THIN_LOCK_COUNT_ONE), (static_cast<int32_t>(art::LockWord::kThinLockCountOne)))
+#define LOCK_WORD_GC_STATE_MASK_SHIFTED 0x30000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShifted)))
+#define LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED 0xcfffffff
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShiftedToggled)))
+#define LOCK_WORD_GC_STATE_SHIFT 28
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_GC_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kGCStateShift)))
+#define LOCK_WORD_MARK_BIT_SHIFT 29
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_MARK_BIT_SHIFT), (static_cast<int32_t>(art::LockWord::kMarkBitStateShift)))
+#define LOCK_WORD_MARK_BIT_MASK_SHIFTED 0x20000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_MARK_BIT_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kMarkBitStateMaskShifted)))
#define OBJECT_ALIGNMENT_MASK 0x7
DEFINE_CHECK_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), (static_cast<size_t>(art::kObjectAlignment - 1)))
#define OBJECT_ALIGNMENT_MASK_TOGGLED 0xfffffff8
diff --git a/runtime/globals.h b/runtime/globals.h
index 0b44c47..9045d40 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -47,7 +47,8 @@
}
// Required object alignment
-static constexpr size_t kObjectAlignment = 8;
+static constexpr size_t kObjectAlignmentShift = 3;
+static constexpr size_t kObjectAlignment = 1u << kObjectAlignmentShift;
static constexpr size_t kLargeObjectAlignment = kPageSize;
// Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't.
diff --git a/runtime/image.h b/runtime/image.h
index a98cea1..207a818 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -186,6 +186,7 @@
kCalleeSaveMethod,
kRefsOnlySaveMethod,
kRefsAndArgsSaveMethod,
+ kSaveEverythingMethod,
kImageMethodsCount, // Number of elements in enum.
};
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index 341501b..4a2a293 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -43,17 +43,15 @@
inline size_t LockWord::ForwardingAddress() const {
DCHECK_EQ(GetState(), kForwardingAddress);
- return value_ << kStateSize;
+ return value_ << kForwardingAddressShift;
}
inline LockWord::LockWord() : value_(0) {
DCHECK_EQ(GetState(), kUnlocked);
}
-inline LockWord::LockWord(Monitor* mon, uint32_t rb_state)
- : value_(mon->GetMonitorId() | (rb_state << kReadBarrierStateShift) |
- (kStateFat << kStateShift)) {
- DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+inline LockWord::LockWord(Monitor* mon, uint32_t gc_state)
+ : value_(mon->GetMonitorId() | (gc_state << kGCStateShift) | (kStateFat << kStateShift)) {
#ifndef __LP64__
DCHECK_ALIGNED(mon, kMonitorIdAlignment);
#endif
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 5d0d204..538b6eb 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -35,27 +35,27 @@
* the state. The four possible states are fat locked, thin/unlocked, hash code, and forwarding
* address. When the lock word is in the "thin" state and its bits are formatted as follows:
*
- * |33|22|222222221111|1111110000000000|
- * |10|98|765432109876|5432109876543210|
- * |00|rb| lock count |thread id owner |
+ * |33|2|2|222222221111|1111110000000000|
+ * |10|9|8|765432109876|5432109876543210|
+ * |00|m|r| lock count |thread id owner |
*
* When the lock word is in the "fat" state and its bits are formatted as follows:
*
- * |33|22|2222222211111111110000000000|
- * |10|98|7654321098765432109876543210|
- * |01|rb| MonitorId |
+ * |33|2|2|2222222211111111110000000000|
+ * |10|9|8|7654321098765432109876543210|
+ * |01|m|r| MonitorId |
*
* When the lock word is in hash state and its bits are formatted as follows:
*
- * |33|22|2222222211111111110000000000|
- * |10|98|7654321098765432109876543210|
- * |10|rb| HashCode |
+ * |33|2|2|2222222211111111110000000000|
+ * |10|9|8|7654321098765432109876543210|
+ * |10|m|r| HashCode |
*
- * When the lock word is in fowarding address state and its bits are formatted as follows:
+ * When the lock word is in forwarding address state and its bits are formatted as follows:
*
- * |33|22|2222222211111111110000000000|
- * |10|98|7654321098765432109876543210|
- * |11| ForwardingAddress |
+ * |33|2|22222222211111111110000000000|
+ * |10|9|87654321098765432109876543210|
+ * |11|0| ForwardingAddress |
*
* The rb bits store the read barrier state.
*/
@@ -64,11 +64,13 @@
enum SizeShiftsAndMasks { // private marker to avoid generate-operator-out.py from processing.
// Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
kStateSize = 2,
- kReadBarrierStateSize = 2,
+ kReadBarrierStateSize = 1,
+ kMarkBitStateSize = 1,
// Number of bits to encode the thin lock owner.
kThinLockOwnerSize = 16,
// Remaining bits are the recursive lock count.
- kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize,
+ kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize -
+ kMarkBitStateSize,
// Thin lock bits. Owner in lowest bits.
kThinLockOwnerShift = 0,
@@ -81,25 +83,43 @@
kThinLockCountOne = 1 << kThinLockCountShift, // == 65536 (0x10000)
// State in the highest bits.
- kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift,
+ kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift +
+ kMarkBitStateSize,
kStateMask = (1 << kStateSize) - 1,
kStateMaskShifted = kStateMask << kStateShift,
kStateThinOrUnlocked = 0,
kStateFat = 1,
kStateHash = 2,
kStateForwardingAddress = 3,
+
+ // Read barrier bit.
kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
kReadBarrierStateMask = (1 << kReadBarrierStateSize) - 1,
kReadBarrierStateMaskShifted = kReadBarrierStateMask << kReadBarrierStateShift,
kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted,
+ // Mark bit.
+ kMarkBitStateShift = kReadBarrierStateSize + kReadBarrierStateShift,
+ kMarkBitStateMask = (1 << kMarkBitStateSize) - 1,
+ kMarkBitStateMaskShifted = kMarkBitStateMask << kMarkBitStateShift,
+ kMarkBitStateMaskShiftedToggled = ~kMarkBitStateMaskShifted,
+
+ // GC state is mark bit and read barrier state.
+ kGCStateSize = kReadBarrierStateSize + kMarkBitStateSize,
+ kGCStateShift = kReadBarrierStateShift,
+ kGCStateMaskShifted = kReadBarrierStateMaskShifted | kMarkBitStateMaskShifted,
+ kGCStateMaskShiftedToggled = ~kGCStateMaskShifted,
+
// When the state is kHashCode, the non-state bits hold the hashcode.
// Note Object.hashCode() has the hash code layout hardcoded.
kHashShift = 0,
- kHashSize = 32 - kStateSize - kReadBarrierStateSize,
+ kHashSize = 32 - kStateSize - kReadBarrierStateSize - kMarkBitStateSize,
kHashMask = (1 << kHashSize) - 1,
kMaxHash = kHashMask,
+ // Forwarding address shift.
+ kForwardingAddressShift = kObjectAlignmentShift,
+
kMonitorIdShift = kHashShift,
kMonitorIdSize = kHashSize,
kMonitorIdMask = kHashMask,
@@ -108,31 +128,31 @@
kMaxMonitorId = kMaxHash
};
- static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t rb_state) {
+ static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t gc_state) {
CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockMaxOwner));
CHECK_LE(count, static_cast<uint32_t>(kThinLockMaxCount));
- DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
- return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift) |
- (rb_state << kReadBarrierStateShift) |
+ // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U);
+ return LockWord((thread_id << kThinLockOwnerShift) |
+ (count << kThinLockCountShift) |
+ (gc_state << kGCStateShift) |
(kStateThinOrUnlocked << kStateShift));
}
static LockWord FromForwardingAddress(size_t target) {
DCHECK_ALIGNED(target, (1 << kStateSize));
- return LockWord((target >> kStateSize) | (kStateForwardingAddress << kStateShift));
+ return LockWord((target >> kForwardingAddressShift) | (kStateForwardingAddress << kStateShift));
}
- static LockWord FromHashCode(uint32_t hash_code, uint32_t rb_state) {
+ static LockWord FromHashCode(uint32_t hash_code, uint32_t gc_state) {
CHECK_LE(hash_code, static_cast<uint32_t>(kMaxHash));
- DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+ // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U);
return LockWord((hash_code << kHashShift) |
- (rb_state << kReadBarrierStateShift) |
+ (gc_state << kGCStateShift) |
(kStateHash << kStateShift));
}
- static LockWord FromDefault(uint32_t rb_state) {
- DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
- return LockWord(rb_state << kReadBarrierStateShift);
+ static LockWord FromDefault(uint32_t gc_state) {
+ return LockWord(gc_state << kGCStateShift);
}
static bool IsDefault(LockWord lw) {
@@ -154,7 +174,7 @@
LockState GetState() const {
CheckReadBarrierState();
if ((!kUseReadBarrier && UNLIKELY(value_ == 0)) ||
- (kUseReadBarrier && UNLIKELY((value_ & kReadBarrierStateMaskShiftedToggled) == 0))) {
+ (kUseReadBarrier && UNLIKELY((value_ & kGCStateMaskShiftedToggled) == 0))) {
return kUnlocked;
} else {
uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
@@ -176,6 +196,10 @@
return (value_ >> kReadBarrierStateShift) & kReadBarrierStateMask;
}
+ uint32_t GCState() const {
+ return (value_ & kGCStateMaskShifted) >> kGCStateShift;
+ }
+
void SetReadBarrierState(uint32_t rb_state) {
DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
@@ -184,6 +208,19 @@
value_ |= (rb_state & kReadBarrierStateMask) << kReadBarrierStateShift;
}
+
+ uint32_t MarkBitState() const {
+ return (value_ >> kMarkBitStateShift) & kMarkBitStateMask;
+ }
+
+ void SetMarkBitState(uint32_t mark_bit) {
+ DCHECK_EQ(mark_bit & ~kMarkBitStateMask, 0U);
+ DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
+ // Clear and or the bits.
+ value_ &= kMarkBitStateMaskShiftedToggled;
+ value_ |= mark_bit << kMarkBitStateShift;
+ }
+
// Return the owner thin lock thread id.
uint32_t ThinLockOwner() const;
@@ -197,7 +234,7 @@
size_t ForwardingAddress() const;
// Constructor a lock word for inflation to use a Monitor.
- LockWord(Monitor* mon, uint32_t rb_state);
+ LockWord(Monitor* mon, uint32_t gc_state);
// Return the hash code stored in the lock word, must be kHashCode state.
int32_t GetHashCode() const;
@@ -207,7 +244,7 @@
if (kIncludeReadBarrierState) {
return lw1.GetValue() == lw2.GetValue();
}
- return lw1.GetValueWithoutReadBarrierState() == lw2.GetValueWithoutReadBarrierState();
+ return lw1.GetValueWithoutGCState() == lw2.GetValueWithoutGCState();
}
void Dump(std::ostream& os) {
@@ -248,9 +285,9 @@
return value_;
}
- uint32_t GetValueWithoutReadBarrierState() const {
+ uint32_t GetValueWithoutGCState() const {
CheckReadBarrierState();
- return value_ & ~(kReadBarrierStateMask << kReadBarrierStateShift);
+ return value_ & kGCStateMaskShiftedToggled;
}
// Only Object should be converting LockWords to/from uints.
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 0592c6c..0495c95 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -147,10 +147,20 @@
#endif
}
+inline uint32_t Object::GetMarkBit() {
+#ifdef USE_READ_BARRIER
+ return GetLockWord(false).MarkBitState();
+#else
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+#endif
+}
+
inline void Object::SetReadBarrierPointer(Object* rb_ptr) {
#ifdef USE_BAKER_READ_BARRIER
DCHECK(kUseBakerReadBarrier);
DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+ DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
LockWord lw = GetLockWord(false);
lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
SetLockWord(lw, false);
@@ -173,6 +183,8 @@
DCHECK(kUseBakerReadBarrier);
DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U);
DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+ DCHECK_NE(expected_rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
+ DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
LockWord expected_lw;
LockWord new_lw;
do {
@@ -216,6 +228,24 @@
#endif
}
+inline bool Object::AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit) {
+ LockWord expected_lw;
+ LockWord new_lw;
+ do {
+ LockWord lw = GetLockWord(false);
+ if (UNLIKELY(lw.MarkBitState() != expected_mark_bit)) {
+ // Lost the race.
+ return false;
+ }
+ expected_lw = lw;
+ new_lw = lw;
+ new_lw.SetMarkBitState(mark_bit);
+ // Since this is only set from the mutator, we can use the non release Cas.
+ } while (!CasLockWordWeakRelaxed(expected_lw, new_lw));
+ return true;
+}
+
+
inline void Object::AssertReadBarrierPointer() const {
if (kUseBakerReadBarrier) {
Object* obj = const_cast<Object*>(this);
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 701c600..13c536e 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -163,8 +163,7 @@
case LockWord::kUnlocked: {
// Try to compare and swap in a new hash, if we succeed we will return the hash on the next
// loop iteration.
- LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(),
- lw.ReadBarrierState());
+ LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(), lw.GCState());
DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode);
if (const_cast<Object*>(this)->CasLockWordWeakRelaxed(lw, hash_word)) {
return hash_word.GetHashCode();
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index a4bdbad..5b129bf 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -93,6 +93,7 @@
template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_);
+ // TODO: Clean this up and change to return int32_t
Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_);
#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
@@ -103,6 +104,12 @@
template<bool kCasRelease = false>
ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
SHARED_REQUIRES(Locks::mutator_lock_);
+
+ ALWAYS_INLINE uint32_t GetMarkBit() SHARED_REQUIRES(Locks::mutator_lock_);
+
+ ALWAYS_INLINE bool AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit)
+ SHARED_REQUIRES(Locks::mutator_lock_);
+
void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_);
// The verifier treats all interfaces as java.lang.Object and relies on runtime checks in
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index bf9f931..e863ea9 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -155,7 +155,7 @@
return false;
}
}
- LockWord fat(this, lw.ReadBarrierState());
+ LockWord fat(this, lw.GCState());
// Publish the updated lock word, which may race with other threads.
bool success = GetObject()->CasLockWordWeakSequentiallyConsistent(lw, fat);
// Lock profiling.
@@ -774,20 +774,21 @@
return false;
}
// Deflate to a thin lock.
- LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_,
- lw.ReadBarrierState());
+ LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(),
+ monitor->lock_count_,
+ lw.GCState());
// Assume no concurrent read barrier state changes as mutators are suspended.
obj->SetLockWord(new_lw, false);
VLOG(monitor) << "Deflated " << obj << " to thin lock " << owner->GetTid() << " / "
<< monitor->lock_count_;
} else if (monitor->HasHashCode()) {
- LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.ReadBarrierState());
+ LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.GCState());
// Assume no concurrent read barrier state changes as mutators are suspended.
obj->SetLockWord(new_lw, false);
VLOG(monitor) << "Deflated " << obj << " to hash monitor " << monitor->GetHashCode();
} else {
// No lock and no hash, just put an empty lock word inside the object.
- LockWord new_lw = LockWord::FromDefault(lw.ReadBarrierState());
+ LockWord new_lw = LockWord::FromDefault(lw.GCState());
// Assume no concurrent read barrier state changes as mutators are suspended.
obj->SetLockWord(new_lw, false);
VLOG(monitor) << "Deflated" << obj << " to empty lock word";
@@ -876,7 +877,7 @@
LockWord lock_word = h_obj->GetLockWord(true);
switch (lock_word.GetState()) {
case LockWord::kUnlocked: {
- LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.ReadBarrierState()));
+ LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.GCState()));
if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) {
AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
// CasLockWord enforces more than the acquire ordering we need here.
@@ -890,8 +891,9 @@
// We own the lock, increase the recursion count.
uint32_t new_count = lock_word.ThinLockCount() + 1;
if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
- LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count,
- lock_word.ReadBarrierState()));
+ LockWord thin_locked(LockWord::FromThinLockId(thread_id,
+ new_count,
+ lock_word.GCState()));
if (!kUseReadBarrier) {
h_obj->SetLockWord(thin_locked, true);
AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
@@ -975,9 +977,9 @@
LockWord new_lw = LockWord::Default();
if (lock_word.ThinLockCount() != 0) {
uint32_t new_count = lock_word.ThinLockCount() - 1;
- new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.ReadBarrierState());
+ new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.GCState());
} else {
- new_lw = LockWord::FromDefault(lock_word.ReadBarrierState());
+ new_lw = LockWord::FromDefault(lock_word.GCState());
}
if (!kUseReadBarrier) {
DCHECK_EQ(new_lw.ReadBarrierState(), 0U);
diff --git a/runtime/oat.h b/runtime/oat.h
index 2c5c3e6..7c84fe9 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '0', '8', '5', '\0' };
+ static constexpr uint8_t kOatVersion[] = { '0', '8', '6', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 42e959c..5d32c09 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -99,8 +99,9 @@
// Note: These couldn't be constexpr pointers as reinterpret_cast isn't compatible with them.
static constexpr uintptr_t white_ptr_ = 0x0; // Not marked.
static constexpr uintptr_t gray_ptr_ = 0x1; // Marked, but not marked through. On mark stack.
+ // TODO: black_ptr_ is unused, we should remove it.
static constexpr uintptr_t black_ptr_ = 0x2; // Marked through. Used for non-moving objects.
- static constexpr uintptr_t rb_ptr_mask_ = 0x3; // The low 2 bits for white|gray|black.
+ static constexpr uintptr_t rb_ptr_mask_ = 0x1; // The low bits for white|gray.
};
} // namespace art
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index bfa8c54..265587d 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -45,9 +45,11 @@
return GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
} else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveAll)) {
return GetCalleeSaveMethodFrameInfo(Runtime::kSaveAll);
- } else {
- DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(Runtime::kRefsOnly));
+ } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kRefsOnly)) {
return GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly);
+ } else {
+ DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(Runtime::kSaveEverything));
+ return GetCalleeSaveMethodFrameInfo(Runtime::kSaveEverything);
}
}
diff --git a/runtime/runtime.h b/runtime/runtime.h
index afa8e48..7e269af 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -387,9 +387,10 @@
// Returns a special method that describes all callee saves being spilled to the stack.
enum CalleeSaveType {
- kSaveAll,
+ kSaveAll, // All callee-save registers.
kRefsOnly,
kRefsAndArgs,
+ kSaveEverything, // Even caller-save registers.
kLastCalleeSaveType // Value used for iteration
};
diff --git a/test/614-checker-dump-constant-location/expected.txt b/test/614-checker-dump-constant-location/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/614-checker-dump-constant-location/expected.txt
diff --git a/test/614-checker-dump-constant-location/info.txt b/test/614-checker-dump-constant-location/info.txt
new file mode 100644
index 0000000..4a94ffa
--- /dev/null
+++ b/test/614-checker-dump-constant-location/info.txt
@@ -0,0 +1,2 @@
+Test that the graph visualizer outputs useful information for constant
+locations in parallel moves.
diff --git a/test/614-checker-dump-constant-location/src/Main.java b/test/614-checker-dump-constant-location/src/Main.java
new file mode 100644
index 0000000..f6bc063
--- /dev/null
+++ b/test/614-checker-dump-constant-location/src/Main.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+ public static int array_int[] = { 0 };
+ public static long array_long[] = { 0 };
+ public static float array_float[] = { 0.0f };
+ public static double array_double[] = { 0.0 };
+
+ // The code used to print constant locations in parallel moves is architecture
+ // independent. We only test for ARM and ARM64 as it is easy: 'store'
+ // instructions only take registers as a source.
+
+ /// CHECK-START-ARM: void Main.store_to_arrays() register (after)
+ /// CHECK: ParallelMove {{.*#1->.*#2->.*#3\.3->.*#4\.4->.*}}
+
+ /// CHECK-START-ARM64: void Main.store_to_arrays() register (after)
+ /// CHECK: ParallelMove {{.*#1->.*#2->.*#3\.3->.*#4\.4->.*}}
+
+ public void store_to_arrays() {
+ array_int[0] = 1;
+ array_long[0] = 2;
+ array_float[0] = 3.3f;
+ array_double[0] = 4.4;
+ }
+
+ public static void main(String args[]) {}
+}
diff --git a/test/615-checker-arm64-zr-parallel-move/expected.txt b/test/615-checker-arm64-zr-parallel-move/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/615-checker-arm64-zr-parallel-move/expected.txt
diff --git a/test/615-checker-arm64-zr-parallel-move/info.txt b/test/615-checker-arm64-zr-parallel-move/info.txt
new file mode 100644
index 0000000..199755d
--- /dev/null
+++ b/test/615-checker-arm64-zr-parallel-move/info.txt
@@ -0,0 +1 @@
+Checker test to verify we correctly use wzr and xzr to synthesize zero constants.
diff --git a/test/615-checker-arm64-zr-parallel-move/src/Main.java b/test/615-checker-arm64-zr-parallel-move/src/Main.java
new file mode 100644
index 0000000..5024f28
--- /dev/null
+++ b/test/615-checker-arm64-zr-parallel-move/src/Main.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+ public static boolean doThrow = false;
+
+ public void $noinline$foo(int in_w1,
+ int in_w2,
+ int in_w3,
+ int in_w4,
+ int in_w5,
+ int in_w6,
+ int in_w7,
+ int on_stack_int,
+ long on_stack_long,
+ float in_s0,
+ float in_s1,
+ float in_s2,
+ float in_s3,
+ float in_s4,
+ float in_s5,
+ float in_s6,
+ float in_s7,
+ float on_stack_float,
+ double on_stack_double) {
+ if (doThrow) throw new Error();
+ }
+
+ // We expect a parallel move that moves four times the zero constant to stack locations.
+ /// CHECK-START-ARM64: void Main.bar() register (after)
+ /// CHECK: ParallelMove {{.*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*}}
+
+ // Those four moves should generate four 'store' instructions using directly the zero register.
+ /// CHECK-START-ARM64: void Main.bar() disassembly (after)
+ /// CHECK-DAG: {{(str|stur)}} wzr, [sp, #{{[0-9]+}}]
+ /// CHECK-DAG: {{(str|stur)}} xzr, [sp, #{{[0-9]+}}]
+ /// CHECK-DAG: {{(str|stur)}} wzr, [sp, #{{[0-9]+}}]
+ /// CHECK-DAG: {{(str|stur)}} xzr, [sp, #{{[0-9]+}}]
+
+ public void bar() {
+ $noinline$foo(1, 2, 3, 4, 5, 6, 7, // Integral values in registers.
+ 0, 0L, // Integral values on the stack.
+ 1, 2, 3, 4, 5, 6, 7, 8, // Floating-point values in registers.
+ 0.0f, 0.0); // Floating-point values on the stack.
+ }
+
+ public static void main(String args[]) {}
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 8f8b667..8d7d70d 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -26,7 +26,8 @@
# The path where build only targets will be output, e.g.
# out/target/product/generic_x86_64/obj/PACKAGING/art-run-tests_intermediates/DATA
-art_run_tests_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA
+art_run_tests_build_dir := $(call intermediates-dir-for,JAVA_LIBRARIES,art-run-tests)/DATA
+art_run_tests_install_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA
# A generated list of prerequisites that call 'run-test --build-only', the actual prerequisite is
# an empty file touched in the intermediate directory.
@@ -49,7 +50,8 @@
# Helper to create individual build targets for tests. Must be called with $(eval).
# $(1): the test number
define define-build-art-run-test
- dmart_target := $(art_run_tests_dir)/art-run-tests/$(1)/touch
+ dmart_target := $(art_run_tests_build_dir)/art-run-tests/$(1)/touch
+ dmart_install_target := $(art_run_tests_install_dir)/art-run-tests/$(1)/touch
run_test_options = --build-only
ifeq ($(ART_TEST_QUIET),true)
run_test_options += --quiet
@@ -67,8 +69,13 @@
$(LOCAL_PATH)/run-test $$(PRIVATE_RUN_TEST_OPTIONS) --output-path $$(abspath $$(dir $$@)) $(1)
$(hide) touch $$@
- TEST_ART_RUN_TEST_BUILD_RULES += $$(dmart_target)
+$$(dmart_install_target): $$(dmart_target)
+ $(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
+ $(hide) cp $$(dir $$<)/* $$(dir $$@)/
+
+ TEST_ART_RUN_TEST_BUILD_RULES += $$(dmart_install_target)
dmart_target :=
+ dmart_install_target :=
run_test_options :=
endef
$(foreach test, $(TEST_ART_RUN_TESTS), $(eval $(call define-build-art-run-test,$(test))))
@@ -78,12 +85,13 @@
LOCAL_MODULE := art-run-tests
LOCAL_ADDITIONAL_DEPENDENCIES := $(TEST_ART_RUN_TEST_BUILD_RULES)
# The build system use this flag to pick up files generated by declare-make-art-run-test.
-LOCAL_PICKUP_FILES := $(art_run_tests_dir)
+LOCAL_PICKUP_FILES := $(art_run_tests_install_dir)
include $(BUILD_PHONY_PACKAGE)
# Clear temp vars.
-art_run_tests_dir :=
+art_run_tests_build_dir :=
+art_run_tests_install_dir :=
define-build-art-run-test :=
TEST_ART_RUN_TEST_BUILD_RULES :=
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index 806e130..ee2ee1a 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -130,18 +130,18 @@
return;
}
- ScopedObjectAccess soa(Thread::Current());
+ ArtMethod* method = nullptr;
+ {
+ ScopedObjectAccess soa(Thread::Current());
- ScopedUtfChars chars(env, method_name);
- CHECK(chars.c_str() != nullptr);
-
- mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
- ArtMethod* method = klass->FindDeclaredDirectMethodByName(chars.c_str(), kRuntimePointerSize);
+ ScopedUtfChars chars(env, method_name);
+ CHECK(chars.c_str() != nullptr);
+ method = soa.Decode<mirror::Class*>(cls)->FindDeclaredDirectMethodByName(
+ chars.c_str(), kRuntimePointerSize);
+ }
jit::JitCodeCache* code_cache = jit->GetCodeCache();
OatQuickMethodHeader* header = nullptr;
- // Make sure there is a profiling info, required by the compiler.
- ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
while (true) {
header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode());
if (code_cache->ContainsPc(header->GetCode())) {
@@ -149,6 +149,9 @@
} else {
// Sleep to yield to the compiler thread.
usleep(1000);
+ ScopedObjectAccess soa(Thread::Current());
+ // Make sure there is a profiling info, required by the compiler.
+ ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
// Will either ensure it's compiled or do the compilation itself.
jit->CompileMethod(method, soa.Self(), /* osr */ false);
}
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 64bf4f3..c6c9380 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -553,12 +553,10 @@
if [ "$TIME_OUT" = "timeout" ]; then
# Add timeout command if time out is desired.
#
- # Note: We use nested timeouts. The inner timeout sends SIGRTMIN+2 (usually 36) to ART, which
- # will induce a full thread dump before abort. However, dumping threads might deadlock,
- # so the outer timeout sends the regular SIGTERM after an additional minute to ensure
- # termination (without dumping all threads).
- TIME_PLUS_ONE=$(($TIME_OUT_VALUE + 60))
- cmdline="timeout ${TIME_PLUS_ONE}s timeout -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline"
+ # Note: We first send SIGRTMIN+2 (usually 36) to ART, which will induce a full thread dump
+ # before abort. However, dumping threads might deadlock, so we also use the "-k"
+ # option to definitely kill the child.
+ cmdline="timeout -k 120s -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline"
fi
if [ "$DEV_MODE" = "y" ]; then
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index 3cdb40c..8769d11 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -95,9 +95,7 @@
return null;
}
- // TODO: When perflib provides a better way to get the length of the
- // array, we should use that here.
- int numChars = chars.getValues().length;
+ int numChars = chars.getLength();
int count = getIntField(inst, "count", numChars);
if (count == 0) {
return "";
diff --git a/tools/cpp-define-generator/constant_lockword.def b/tools/cpp-define-generator/constant_lockword.def
index c1e6099..67ed5b5 100644
--- a/tools/cpp-define-generator/constant_lockword.def
+++ b/tools/cpp-define-generator/constant_lockword.def
@@ -30,5 +30,12 @@
DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK_TOGGLED, uint32_t, kReadBarrierStateMaskShiftedToggled)
DEFINE_LOCK_WORD_EXPR(THIN_LOCK_COUNT_ONE, int32_t, kThinLockCountOne)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED, uint32_t, kGCStateMaskShifted)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED_TOGGLED, uint32_t, kGCStateMaskShiftedToggled)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_SHIFT, int32_t, kGCStateShift)
+
+DEFINE_LOCK_WORD_EXPR(MARK_BIT_SHIFT, int32_t, kMarkBitStateShift)
+DEFINE_LOCK_WORD_EXPR(MARK_BIT_MASK_SHIFTED, uint32_t, kMarkBitStateMaskShifted)
+
#undef DEFINE_LOCK_WORD_EXPR
diff --git a/tools/cpp-define-generator/offset_runtime.def b/tools/cpp-define-generator/offset_runtime.def
index b327ca3..123992f 100644
--- a/tools/cpp-define-generator/offset_runtime.def
+++ b/tools/cpp-define-generator/offset_runtime.def
@@ -34,6 +34,8 @@
DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(REFS_ONLY, kRefsOnly)
// Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(REFS_AND_ARGS, kRefsAndArgs)
+// Offset of field Runtime::callee_save_methods_[kSaveEverything]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_EVERYTHING, kSaveEverything)
#undef DEFINE_RUNTIME_CALLEE_SAVE_OFFSET
#include "common_undef.def" // undef DEFINE_OFFSET_EXPR
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index bf8d12b..8d87e4f 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -36,6 +36,15 @@
names: ["libcore.io.OsTest#testUnixDomainSockets_in_file_system"]
},
{
+ description: "TCP_USER_TIMEOUT is not defined on host's tcp.h (glibc-2.15-4.8).",
+ result: EXEC_FAILED,
+ modes: [host],
+ names: ["libcore.android.system.OsConstantsTest#testTcpUserTimeoutIsDefined",
+ "libcore.io.OsTest#test_socket_tcpUserTimeout_setAndGet",
+ "libcore.io.OsTest#test_socket_tcpUserTimeout_doesNotWorkOnDatagramSocket"],
+ bug: 30402085
+},
+{
description: "Issue with incorrect device time (1970)",
result: EXEC_FAILED,
modes: [device],
@@ -174,38 +183,7 @@
description: "Failing tests after OpenJDK move.",
result: EXEC_FAILED,
bug: 26326992,
- names: ["libcore.icu.RelativeDateTimeFormatterTest#test_getRelativeDateTimeStringDST",
- "libcore.java.lang.OldSystemTest#test_load",
- "libcore.java.text.NumberFormatTest#test_currencyWithPatternDigits",
- "libcore.java.text.NumberFormatTest#test_setCurrency",
- "libcore.java.text.OldNumberFormatTest#test_getIntegerInstanceLjava_util_Locale",
- "libcore.java.util.CalendarTest#testAddOneDayAndOneDayOver30MinuteDstForwardAdds48Hours",
- "libcore.java.util.CalendarTest#testNewCalendarKoreaIsSelfConsistent",
- "libcore.java.util.CalendarTest#testSetTimeInZoneWhereDstIsNoLongerUsed",
- "libcore.java.util.CalendarTest#test_nullLocale",
- "libcore.java.util.FormatterTest#test_numberLocalization",
- "libcore.java.util.FormatterTest#test_uppercaseConversions",
- "libcore.javax.crypto.CipherTest#testCipher_getInstance_WrongType_Failure",
- "libcore.javax.crypto.CipherTest#testDecryptBufferZeroSize_mustDecodeToEmptyString",
- "libcore.javax.security.auth.x500.X500PrincipalTest#testExceptionsForWrongDNs",
- "org.apache.harmony.luni.tests.java.net.URLConnectionTest#test_getDate",
- "org.apache.harmony.luni.tests.java.net.URLConnectionTest#test_getExpiration",
- "org.apache.harmony.regex.tests.java.util.regex.PatternSyntaxExceptionTest#testPatternSyntaxException",
- "org.apache.harmony.tests.java.lang.FloatTest#test_parseFloat_LString_Harmony6261",
- "org.apache.harmony.tests.java.lang.ThreadTest#test_isDaemon",
- "org.apache.harmony.tests.java.text.DecimalFormatSymbolsTest#test_setInternationalCurrencySymbolLjava_lang_String",
- "org.apache.harmony.tests.java.text.DecimalFormatTest#testSerializationHarmonyRICompatible",
- "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parseLjava_lang_StringLjava_text_ParsePosition",
- "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_W_w_dd_MMMM_yyyy_EEEE",
- "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_dayOfYearPatterns",
- "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_m_z",
- "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_2DigitOffsetFromGMT",
- "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_4DigitOffsetFromGMT",
- "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_4DigitOffsetNoGMT",
- "org.apache.harmony.tests.java.util.jar.JarFileTest#test_getInputStreamLjava_util_jar_JarEntry_subtest0",
- "libcore.java.util.CalendarTest#test_clear_45877",
- "org.apache.harmony.crypto.tests.javax.crypto.spec.SecretKeySpecTest#testGetFormat",
- "org.apache.harmony.tests.java.util.TimerTaskTest#test_scheduledExecutionTime"]
+ names: ["libcore.java.lang.OldSystemTest#test_load"]
},
{
description: "Missing resource in classpath",
@@ -262,10 +240,12 @@
names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_destroyForcibly"]
},
{
- description: "Flaky failure, possibly caused by a kernel bug accessing /proc/",
+ description: "Flaky failure, native crash in the runtime.
+ Unclear if this relates to the tests running sh as a child process.",
result: EXEC_FAILED,
- bug: 27464570,
+ bug: 30657148,
modes: [device],
- names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit"]
+ names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit",
+ "libcore.java.lang.ProcessBuilderTest#testRedirect_nullStreams"]
}
]