Deduplicate register masks
Data is commonly shared between different stack maps. The register
masks are stored after the stack masks.
Oat size for a large app:
96722288 -> 94485872 (-2.31%)
Average oat size reduction according to golem -3.193%.
Bug: 34621054
Test: test-art-host
Change-Id: I5eacf668992e866d11ddba0c01675038a16cdfb4
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 46497e3..668108d 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -157,37 +157,43 @@
}
size_t StackMapStream::PrepareForFillIn() {
- size_t stack_mask_size_in_bits = stack_mask_max_ + 1; // Need room for max element too.
- size_t number_of_stack_masks = PrepareStackMasks(stack_mask_size_in_bits);
+ const size_t stack_mask_size_in_bits = stack_mask_max_ + 1; // Need room for max element too.
+ const size_t number_of_stack_masks = PrepareStackMasks(stack_mask_size_in_bits);
+ const size_t register_mask_size_in_bits = MinimumBitsToStore(register_mask_max_);
+ const size_t number_of_register_masks = PrepareRegisterMasks();
dex_register_maps_size_ = ComputeDexRegisterMapsSize();
ComputeInlineInfoEncoding(); // needs dex_register_maps_size_.
inline_info_size_ = inline_infos_.size() * inline_info_encoding_.GetEntrySize();
CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset();
// The stack map contains compressed native PC offsets.
- size_t stack_map_size = stack_map_encoding_.SetFromSizes(max_native_pc_offset.CompressedValue(),
- dex_pc_max_,
- dex_register_maps_size_,
- inline_info_size_,
- register_mask_max_,
- number_of_stack_masks);
+ const size_t stack_map_size = stack_map_encoding_.SetFromSizes(
+ max_native_pc_offset.CompressedValue(),
+ dex_pc_max_,
+ dex_register_maps_size_,
+ inline_info_size_,
+ number_of_register_masks,
+ number_of_stack_masks);
stack_maps_size_ = RoundUp(stack_maps_.size() * stack_map_size, kBitsPerByte) / kBitsPerByte;
dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
- size_t stack_masks_bytes =
- RoundUp(number_of_stack_masks * stack_mask_size_in_bits, kBitsPerByte) / kBitsPerByte;
-
- size_t non_header_size =
+ const size_t stack_masks_bits = number_of_stack_masks * stack_mask_size_in_bits;
+ const size_t register_masks_bits = number_of_register_masks * register_mask_size_in_bits;
+ // Register masks are last, stack masks are right before that last.
+ // They are both bit packed / aligned.
+ const size_t non_header_size =
stack_maps_size_ +
dex_register_location_catalog_size_ +
dex_register_maps_size_ +
inline_info_size_ +
- stack_masks_bytes;
+ RoundUp(stack_masks_bits + register_masks_bits, kBitsPerByte) / kBitsPerByte;
// Prepare the CodeInfo variable-sized encoding.
CodeInfoEncoding code_info_encoding;
code_info_encoding.non_header_size = non_header_size;
code_info_encoding.number_of_stack_maps = stack_maps_.size();
code_info_encoding.number_of_stack_masks = number_of_stack_masks;
+ code_info_encoding.number_of_register_masks = number_of_register_masks;
code_info_encoding.stack_mask_size_in_bits = stack_mask_size_in_bits;
+ code_info_encoding.register_mask_size_in_bits = register_mask_size_in_bits;
code_info_encoding.stack_map_encoding = stack_map_encoding_;
code_info_encoding.inline_info_encoding = inline_info_encoding_;
code_info_encoding.number_of_location_catalog_entries = location_catalog_entries_.size();
@@ -330,7 +336,7 @@
stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc);
stack_map.SetNativePcCodeOffset(stack_map_encoding_, entry.native_pc_code_offset);
- stack_map.SetRegisterMask(stack_map_encoding_, entry.register_mask);
+ stack_map.SetRegisterMaskIndex(stack_map_encoding_, entry.register_mask_index);
stack_map.SetStackMaskIndex(stack_map_encoding_, entry.stack_mask_index);
if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
@@ -422,7 +428,7 @@
}
}
- // Write stack masks at the end.
+ // Write stack masks table.
size_t stack_mask_bits = encoding.stack_mask_size_in_bits;
if (stack_mask_bits > 0) {
size_t stack_mask_bytes = RoundUp(stack_mask_bits, kBitsPerByte) / kBitsPerByte;
@@ -435,6 +441,12 @@
}
}
+ // Write register masks table.
+ for (size_t i = 0; i < encoding.number_of_register_masks; ++i) {
+ BitMemoryRegion register_mask = code_info.GetRegisterMask(encoding, i);
+ register_mask.StoreBits(0, register_masks_[i], encoding.register_mask_size_in_bits);
+ }
+
// Verify all written data in debug build.
if (kIsDebugBuild) {
CheckCodeInfo(region);
@@ -548,6 +560,17 @@
}
}
+size_t StackMapStream::PrepareRegisterMasks() {
+ register_masks_.resize(stack_maps_.size(), 0u);
+ std::unordered_map<uint32_t, size_t> dedupe;
+ for (StackMapEntry& stack_map : stack_maps_) {
+ const size_t index = dedupe.size();
+ stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second;
+ register_masks_[index] = stack_map.register_mask;
+ }
+ return dedupe.size();
+}
+
size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
// Preallocate memory since we do not want it to move (the dedup map will point into it).
const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
@@ -583,7 +606,8 @@
DCHECK_EQ(stack_map.GetNativePcOffset(stack_map_encoding, instruction_set_),
entry.native_pc_code_offset.Uint32Value(instruction_set_));
DCHECK_EQ(stack_map.GetDexPc(stack_map_encoding), entry.dex_pc);
- DCHECK_EQ(stack_map.GetRegisterMask(stack_map_encoding), entry.register_mask);
+ DCHECK_EQ(stack_map.GetRegisterMaskIndex(stack_map_encoding), entry.register_mask_index);
+ DCHECK_EQ(code_info.GetRegisterMaskOf(encoding, stack_map), entry.register_mask);
const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
DCHECK_EQ(stack_map.GetStackMaskIndex(stack_map_encoding), entry.stack_mask_index);
BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index e2e16e8..b1069a1 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -69,6 +69,7 @@
dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
+ register_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
stack_mask_max_(-1),
dex_pc_max_(0),
register_mask_max_(0),
@@ -109,6 +110,7 @@
uint32_t dex_register_map_hash;
size_t same_dex_register_map_as_;
uint32_t stack_mask_index;
+ uint32_t register_mask_index;
};
struct InlineInfoEntry {
@@ -165,6 +167,9 @@
// Returns the number of unique stack masks.
size_t PrepareStackMasks(size_t entry_size_in_bits);
+ // Returns the number of unique register masks.
+ size_t PrepareRegisterMasks();
+
// Returns the index of an entry with the same dex register map as the current_entry,
// or kNoSameDexMapFound if no such entry exists.
size_t FindEntryWithTheSameDexMap();
@@ -199,6 +204,7 @@
ArenaVector<size_t> dex_register_locations_;
ArenaVector<InlineInfoEntry> inline_infos_;
ArenaVector<uint8_t> stack_masks_;
+ ArenaVector<uint32_t> register_masks_;
int stack_mask_max_;
uint32_t dex_pc_max_;
uint32_t register_mask_max_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index da68b60..ce6d5c2 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -80,7 +80,7 @@
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask));
@@ -195,7 +195,7 @@
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1));
@@ -254,7 +254,7 @@
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u, encoding)));
ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0xFFu, code_info.GetRegisterMaskOf(encoding, stack_map));
ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask2));
@@ -308,7 +308,7 @@
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(192u, encoding)));
ASSERT_EQ(2u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0xABu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0xABu, code_info.GetRegisterMaskOf(encoding, stack_map));
ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask3));
@@ -362,7 +362,7 @@
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(256u, encoding)));
ASSERT_EQ(3u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0xCDu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0xCDu, code_info.GetRegisterMaskOf(encoding, stack_map));
ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask4));
@@ -444,7 +444,7 @@
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -643,7 +643,7 @@
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
@@ -653,7 +653,7 @@
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(68, encoding)));
ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(68u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0x4u, code_info.GetRegisterMaskOf(encoding, stack_map));
ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));