Optimize stack maps: add fast path for no inline info.
Consumers of CodeInfo can skip significant chunks of work
if they can quickly determine that method has no inlining.
Store this fact as a flag bit at the start of code info.
This changes binary format and adds <0.1% to oat size.
I added the extra flag field as the simplest solution for now,
although I would like to use it for more things in the future.
(e.g. store the special cases of empty/deduped tables in it)
This improves app startup by 0.4% (maps,speed).
PMD on golem seems to gets around 15% faster.
Bug: 133257467
Test: ./art/test.py -b --host --64
Change-Id: Ia498a31bafc74b51cc95b8c70cf1da4b0e3d894e
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 8c36643..e21e21c 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -184,6 +184,7 @@
in_inline_info_ = true;
DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size());
+ flags_ |= CodeInfo::kHasInlineInfo;
expected_num_dex_registers_ += num_dex_registers;
BitTableBuilder<InlineInfo>::Entry entry;
@@ -305,6 +306,7 @@
ScopedArenaVector<uint8_t> buffer(allocator_->Adapter(kArenaAllocStackMapStream));
BitMemoryWriter<ScopedArenaVector<uint8_t>> out(&buffer);
+ out.WriteVarint(flags_);
out.WriteVarint(packed_frame_size_);
out.WriteVarint(core_spill_mask_);
out.WriteVarint(fp_spill_mask_);
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 01c6bf9..20dd32e 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -99,6 +99,7 @@
ScopedArenaAllocator* allocator_;
const InstructionSet instruction_set_;
+ uint32_t flags_ = 0;
uint32_t packed_frame_size_ = 0;
uint32_t core_spill_mask_ = 0;
uint32_t fp_spill_mask_ = 0;
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index ee2ab56..71196d4 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -203,13 +203,15 @@
const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
DCHECK(current_code != nullptr);
DCHECK(current_code->IsOptimized());
- uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
- CodeInfo code_info(current_code, CodeInfo::DecodeFlags::InlineInfoOnly);
- StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
- DCHECK(stack_map.IsValid());
- BitTableRange<InlineInfo> inline_infos = code_info.GetInlineInfosOf(stack_map);
- if (!inline_infos.empty()) {
- caller = GetResolvedMethod(outer_method, code_info, inline_infos);
+ if (CodeInfo::HasInlineInfo(current_code->GetOptimizedCodeInfoPtr())) {
+ uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
+ CodeInfo code_info(current_code, CodeInfo::DecodeFlags::InlineInfoOnly);
+ StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
+ DCHECK(stack_map.IsValid());
+ BitTableRange<InlineInfo> inline_infos = code_info.GetInlineInfosOf(stack_map);
+ if (!inline_infos.empty()) {
+ caller = GetResolvedMethod(outer_method, code_info, inline_infos);
+ }
}
}
if (kIsDebugBuild && do_caller_check) {
diff --git a/runtime/oat.h b/runtime/oat.h
index 15059a8..f4b5a6e 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
class PACKED(4) OatHeader {
public:
static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
- // Last oat version changed reason: Remove unused trampoline entrypoints.
- static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '0', '\0' } };
+ // Last oat version changed reason: Optimize stack maps: add fast path for no inline info.
+ static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '1', '\0' } };
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 6466efd..ec89d3f 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -816,8 +816,8 @@
if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames)
&& (cur_oat_quick_method_header_ != nullptr)
&& cur_oat_quick_method_header_->IsOptimized()
- // JNI methods cannot have any inlined frames.
- && !method->IsNative()) {
+ && !method->IsNative() // JNI methods cannot have any inlined frames.
+ && CodeInfo::HasInlineInfo(cur_oat_quick_method_header_->GetOptimizedCodeInfoPtr())) {
DCHECK_NE(cur_quick_frame_pc_, 0u);
current_code_info_ = CodeInfo(cur_oat_quick_method_header_,
CodeInfo::DecodeFlags::InlineInfoOnly);
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index 6585a3b..eef7378 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -47,16 +47,20 @@
void CodeInfo::Decode(const uint8_t* data, DecodeFlags flags) {
BitMemoryReader reader(data);
- uint32_t header[4];
+ uint32_t header[5];
reader.ReadVarints(header);
- packed_frame_size_ = header[0];
- core_spill_mask_ = header[1];
- fp_spill_mask_ = header[2];
- number_of_dex_registers_ = header[3];
+ flags_ = header[0];
+ packed_frame_size_ = header[1];
+ core_spill_mask_ = header[2];
+ fp_spill_mask_ = header[3];
+ number_of_dex_registers_ = header[4];
ForEachBitTableField([this, &reader](auto member_pointer) {
DecodeTable(this->*member_pointer, reader);
}, flags);
size_in_bits_ = reader.NumberOfReadBits();
+ if (flags == AllTables) {
+ DCHECK_EQ(HasInlineInfo(data), HasInlineInfo());
+ }
}
size_t CodeInfo::Deduper::Dedupe(const uint8_t* code_info_data) {
@@ -230,6 +234,7 @@
bool verbose,
InstructionSet instruction_set) const {
vios->Stream() << "CodeInfo BitSize=" << size_in_bits_
+ << " Flags:" << flags_
<< " FrameSize:" << packed_frame_size_ * kStackAlignment
<< " CoreSpillMask:" << std::hex << core_spill_mask_
<< " FpSpillMask:" << std::hex << fp_spill_mask_
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index a2f0019..a971467 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -438,11 +438,15 @@
// Accumulate code info size statistics into the given Stats tree.
static void CollectSizeStats(const uint8_t* code_info, /*out*/ Stats* parent);
- ALWAYS_INLINE static QuickMethodFrameInfo DecodeFrameInfo(const uint8_t* data) {
- BitMemoryReader reader(data);
- uint32_t args[3]; // packed_frame_size, core_spill_mask, fp_spill_mask.
- reader.ReadVarints(args);
- return QuickMethodFrameInfo(args[0] * kStackAlignment, args[1], args[2]);
+ ALWAYS_INLINE static bool HasInlineInfo(const uint8_t* code_info_data) {
+ return (*code_info_data & kHasInlineInfo) != 0;
+ }
+
+ ALWAYS_INLINE static QuickMethodFrameInfo DecodeFrameInfo(const uint8_t* code_info_data) {
+ BitMemoryReader reader(code_info_data);
+ uint32_t header[4]; // flags, packed_frame_size, core_spill_mask, fp_spill_mask.
+ reader.ReadVarints(header);
+ return QuickMethodFrameInfo(header[1] * kStackAlignment, header[2], header[3]);
}
private:
@@ -460,6 +464,7 @@
// Invokes the callback with member pointer of each header field.
template<typename Callback>
ALWAYS_INLINE static void ForEachHeaderField(Callback callback) {
+ callback(&CodeInfo::flags_);
callback(&CodeInfo::packed_frame_size_);
callback(&CodeInfo::core_spill_mask_);
callback(&CodeInfo::fp_spill_mask_);
@@ -485,6 +490,11 @@
callback(&CodeInfo::dex_register_catalog_);
}
+ enum Flags {
+ kHasInlineInfo = 1 << 0,
+ };
+
+ uint32_t flags_ = 0;
uint32_t packed_frame_size_ = 0; // Frame size in kStackAlignment units.
uint32_t core_spill_mask_ = 0;
uint32_t fp_spill_mask_ = 0;