Revert^2 "Optimize register mask and stack mask in stack maps."
This reverts commit 8b20b5c1f5b454b2f8b8bff492c88724b5002600.
Reason for revert: Retry submit unmodified after fixing the test.
Use BitTable to store the masks as well and move the
deduplication responsibility to the BitTable builders.
Don't generate entries for masks which are all zeros.
This saves 0.2% of .oat file size on both Arm64 and Arm.
Encode registers as (value+shift) due to tailing zeros.
This saves 1.0% of .oat file size on Arm64 and 0.2% on Arm.
Test: test-art-target-gtest-exception_test
Test: test-art-host-gtest-bit_table_test
Test: test-art-host-gtest-stack_map_test
Change-Id: Ib643776dbec3f051cc29cd13ff39e453fab5fae9
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index c6e375a..b40ea37 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -48,10 +48,6 @@
ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
current_entry_.dex_register_entry.live_dex_registers_mask->ClearAllBits();
}
- if (sp_mask != nullptr) {
- stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet());
- }
-
current_dex_register_ = 0;
}
@@ -217,11 +213,32 @@
PrepareMethodIndices();
// Dedup stack masks. Needs to be done first as it modifies the stack map entry.
- size_t stack_mask_bits = stack_mask_max_ + 1; // Need room for max element too.
- size_t num_stack_masks = PrepareStackMasks(stack_mask_bits);
+ BitmapTableBuilder stack_mask_builder(allocator_);
+ for (StackMapEntry& stack_map : stack_maps_) {
+ BitVector* mask = stack_map.sp_mask;
+ size_t num_bits = (mask != nullptr) ? mask->GetNumberOfBits() : 0;
+ if (num_bits != 0) {
+ stack_map.stack_mask_index = stack_mask_builder.Dedup(mask->GetRawStorage(), num_bits);
+ } else {
+ stack_map.stack_mask_index = StackMap::kNoValue;
+ }
+ }
// Dedup register masks. Needs to be done first as it modifies the stack map entry.
- size_t num_register_masks = PrepareRegisterMasks();
+ BitTableBuilder<std::array<uint32_t, RegisterMask::kCount>> register_mask_builder(allocator_);
+ for (StackMapEntry& stack_map : stack_maps_) {
+ uint32_t register_mask = stack_map.register_mask;
+ if (register_mask != 0) {
+ uint32_t shift = LeastSignificantBit(register_mask);
+ std::array<uint32_t, RegisterMask::kCount> entry = {
+ register_mask >> shift,
+ shift,
+ };
+ stack_map.register_mask_index = register_mask_builder.Dedup(&entry);
+ } else {
+ stack_map.register_mask_index = StackMap::kNoValue;
+ }
+ }
// Write dex register maps.
MemoryRegion dex_register_map_region =
@@ -301,31 +318,8 @@
stack_map_builder.Encode(&out_, &bit_offset);
invoke_info_builder.Encode(&out_, &bit_offset);
inline_info_builder.Encode(&out_, &bit_offset);
-
- // Write register masks table.
- BitTableBuilder<uint32_t> register_mask_builder(allocator_);
- for (size_t i = 0; i < num_register_masks; ++i) {
- register_mask_builder.Add(register_masks_[i]);
- }
register_mask_builder.Encode(&out_, &bit_offset);
-
- // Write stack masks table.
- EncodeVarintBits(&out_, &bit_offset, stack_mask_bits);
- out_.resize(BitsToBytesRoundUp(bit_offset + stack_mask_bits * num_stack_masks));
- BitMemoryRegion stack_mask_region(MemoryRegion(out_.data(), out_.size()),
- bit_offset,
- stack_mask_bits * num_stack_masks);
- if (stack_mask_bits > 0) {
- for (size_t i = 0; i < num_stack_masks; ++i) {
- size_t stack_mask_bytes = BitsToBytesRoundUp(stack_mask_bits);
- BitMemoryRegion src(MemoryRegion(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes));
- BitMemoryRegion dst = stack_mask_region.Subregion(i * stack_mask_bits, stack_mask_bits);
- for (size_t bit_index = 0; bit_index < stack_mask_bits; bit_index += BitSizeOf<uint32_t>()) {
- size_t num_bits = std::min<size_t>(stack_mask_bits - bit_index, BitSizeOf<uint32_t>());
- dst.StoreBits(bit_index, src.LoadBits(bit_index, num_bits), num_bits);
- }
- }
- }
+ stack_mask_builder.Encode(&out_, &bit_offset);
return UnsignedLeb128Size(out_.size()) + out_.size();
}
@@ -448,17 +442,6 @@
}
}
-size_t StackMapStream::PrepareRegisterMasks() {
- register_masks_.resize(stack_maps_.size(), 0u);
- ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream));
- for (StackMapEntry& stack_map : stack_maps_) {
- const size_t index = dedupe.size();
- stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second;
- register_masks_[index] = stack_map.register_mask;
- }
- return dedupe.size();
-}
-
void StackMapStream::PrepareMethodIndices() {
CHECK(method_indices_.empty());
method_indices_.resize(stack_maps_.size() + inline_infos_.size());
@@ -481,35 +464,10 @@
method_indices_.resize(dedupe.size());
}
-
-size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
- // Preallocate memory since we do not want it to move (the dedup map will point into it).
- const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
- stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u);
- // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later
- // when copying out from stack_masks_.
- ScopedArenaUnorderedMap<MemoryRegion,
- size_t,
- FNVHash<MemoryRegion>,
- MemoryRegion::ContentEquals> dedup(
- stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream));
- for (StackMapEntry& stack_map : stack_maps_) {
- size_t index = dedup.size();
- MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size);
- BitMemoryRegion stack_mask_bits(stack_mask);
- for (size_t i = 0; i < entry_size_in_bits; i++) {
- stack_mask_bits.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i));
- }
- stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second;
- }
- return dedup.size();
-}
-
// Check that all StackMapStream inputs are correctly encoded by trying to read them back.
void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
CodeInfo code_info(region);
DCHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size());
- DCHECK_EQ(code_info.GetNumberOfStackMaskBits(), static_cast<uint32_t>(stack_mask_max_ + 1));
DCHECK_EQ(code_info.GetNumberOfLocationCatalogEntries(), location_catalog_entries_.size());
size_t invoke_info_index = 0;
for (size_t s = 0; s < stack_maps_.size(); ++s) {
@@ -522,18 +480,15 @@
DCHECK_EQ(stack_map.GetDexPc(), entry.dex_pc);
DCHECK_EQ(stack_map.GetRegisterMaskIndex(), entry.register_mask_index);
DCHECK_EQ(code_info.GetRegisterMaskOf(stack_map), entry.register_mask);
- const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits();
DCHECK_EQ(stack_map.GetStackMaskIndex(), entry.stack_mask_index);
BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map);
if (entry.sp_mask != nullptr) {
DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits());
- for (size_t b = 0; b < num_stack_mask_bits; b++) {
- DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b));
+ for (size_t b = 0; b < stack_mask.size_in_bits(); b++) {
+ DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)) << b;
}
} else {
- for (size_t b = 0; b < num_stack_mask_bits; b++) {
- DCHECK_EQ(stack_mask.LoadBit(b), 0u);
- }
+ DCHECK_EQ(stack_mask.size_in_bits(), 0u);
}
if (entry.dex_method_index != dex::kDexNoIndex) {
InvokeInfo invoke_info = code_info.GetInvokeInfo(invoke_info_index);