Delta-compress register maps in stack maps.
The register maps tend to be similar from stack map to stack map,
so instead of encoding them again, store only the modified ones.
The dex register bitmap stores the delta now - if register has
been modified since the previous stack map, the bit will be set.
The decoding logic scans backwards through stack maps until it
eventfully finds the most recent value of each register.
This CL saves ~2.5% of .oat file size (~10% of stackmap size).
Due to the scan, this makes dex register decoding slower by factor
of 2.5, but that still beats the old algorithm before refactoring.
Test: test-art-host-gtest-stack_map_test
Change-Id: Id5217a329eb757954e0c9447f38b05ec34118f84
diff --git a/runtime/dex_register_location.h b/runtime/dex_register_location.h
index c6d4ad2..a20dccb 100644
--- a/runtime/dex_register_location.h
+++ b/runtime/dex_register_location.h
@@ -29,6 +29,7 @@
class DexRegisterLocation {
public:
enum class Kind : int32_t {
+ kInvalid = -2, // only used internally during register map decoding.
kNone = -1, // vreg has not been set.
kInStack, // vreg is on the stack, value holds the stack offset.
kConstant, // vreg is a constant value.
@@ -40,9 +41,8 @@
DexRegisterLocation(Kind kind, int32_t value) : kind_(kind), value_(value) {}
- static DexRegisterLocation None() {
- return DexRegisterLocation(Kind::kNone, 0);
- }
+ static DexRegisterLocation None() { return DexRegisterLocation(Kind::kNone, 0); }
+ static DexRegisterLocation Invalid() { return DexRegisterLocation(Kind::kInvalid, 0); }
bool IsLive() const { return kind_ != Kind::kNone; }
diff --git a/runtime/stack.cc b/runtime/stack.cc
index bd0d5d6..0b3441a 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -236,7 +236,9 @@
size_t depth_in_stack_map = current_inlining_depth_ - 1;
DexRegisterMap dex_register_map = IsInInlinedFrame()
- ? code_info.GetDexRegisterMapAtDepth(depth_in_stack_map, stack_map, number_of_dex_registers)
+ ? code_info.GetDexRegisterMapAtDepth(depth_in_stack_map,
+ stack_map,
+ number_of_dex_registers)
: code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
if (!dex_register_map.IsValid()) {
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index a5749b8..59a89e1 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -25,6 +25,69 @@
namespace art {
+// Scan backward to determine dex register locations at given stack map.
+// All registers for a stack map are combined - inlined registers are just appended,
+// therefore 'first_dex_register' allows us to select a sub-range to decode.
+void CodeInfo::DecodeDexRegisterMap(uint32_t stack_map_index,
+ uint32_t first_dex_register,
+ /*out*/ DexRegisterMap* map) const {
+ // Count remaining work so we know when we have finished.
+ uint32_t remaining_registers = map->size();
+
+ // Keep scanning backwards and collect the most recent location of each register.
+ for (int32_t s = stack_map_index; s >= 0 && remaining_registers != 0; s--) {
+ StackMap stack_map = GetStackMapAt(s);
+ DCHECK_LE(stack_map_index - s, kMaxDexRegisterMapSearchDistance) << "Unbounded search";
+
+ // The mask specifies which registers where modified in this stack map.
+ // NB: the mask can be shorter than expected if trailing zero bits were removed.
+ uint32_t mask_index = stack_map.GetDexRegisterMaskIndex();
+ if (mask_index == StackMap::kNoValue) {
+ continue; // Nothing changed at this stack map.
+ }
+ BitMemoryRegion mask = dex_register_masks_.GetBitMemoryRegion(mask_index);
+ if (mask.size_in_bits() <= first_dex_register) {
+ continue; // Nothing changed after the first register we are interested in.
+ }
+
+ // The map stores one catalogue index per each modified register location.
+ uint32_t map_index = stack_map.GetDexRegisterMapIndex();
+ DCHECK_NE(map_index, StackMap::kNoValue);
+
+ // Skip initial registers which we are not interested in (to get to inlined registers).
+ map_index += mask.PopCount(0, first_dex_register);
+ mask = mask.Subregion(first_dex_register, mask.size_in_bits() - first_dex_register);
+
+ // Update registers that we see for first time (i.e. most recent value).
+ DexRegisterLocation* regs = map->data();
+ const uint32_t end = std::min<uint32_t>(map->size(), mask.size_in_bits());
+ const size_t kNumBits = BitSizeOf<uint32_t>();
+ for (uint32_t reg = 0; reg < end; reg += kNumBits) {
+ // Process the mask in chunks of kNumBits for performance.
+ uint32_t bits = mask.LoadBits(reg, std::min<uint32_t>(end - reg, kNumBits));
+ while (bits != 0) {
+ uint32_t bit = CTZ(bits);
+ if (regs[reg + bit].GetKind() == DexRegisterLocation::Kind::kInvalid) {
+ regs[reg + bit] = GetDexRegisterCatalogEntry(dex_register_maps_.Get(map_index));
+ remaining_registers--;
+ }
+ map_index++;
+ bits ^= 1u << bit; // Clear the bit.
+ }
+ }
+ }
+
+ // Set any remaining registers to None (which is the default state at first stack map).
+ if (remaining_registers != 0) {
+ DexRegisterLocation* regs = map->data();
+ for (uint32_t r = 0; r < map->size(); r++) {
+ if (regs[r].GetKind() == DexRegisterLocation::Kind::kInvalid) {
+ regs[r] = DexRegisterLocation::None();
+ }
+ }
+ }
+}
+
std::ostream& operator<<(std::ostream& stream, const DexRegisterLocation& reg) {
using Kind = DexRegisterLocation::Kind;
switch (reg.GetKind()) {
@@ -42,6 +105,8 @@
return stream << "f" << reg.GetValue() << "/hi";
case Kind::kConstant:
return stream << "#" << reg.GetValue();
+ case Kind::kInvalid:
+ return stream << "Invalid";
default:
return stream << "DexRegisterLocation(" << static_cast<uint32_t>(reg.GetKind())
<< "," << reg.GetValue() << ")";
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 6da0021..ff70b6c 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -39,6 +39,12 @@
// (signed) values.
static constexpr ssize_t kFrameSlotSize = 4;
+// The delta compression of dex register maps means we need to scan the stackmaps backwards.
+// We compress the data in such a way so that there is an upper bound on the search distance.
+// Max distance 0 means each stack map must be fully defined and no scanning back is allowed.
+// If this value is changed, the oat file version should be incremented (for DCHECK to pass).
+static constexpr size_t kMaxDexRegisterMapSearchDistance = 32;
+
class ArtMethod;
class CodeInfo;
@@ -49,12 +55,14 @@
// If the size is small enough, it keeps the data on the stack.
class DexRegisterMap {
public:
- // Create map for given number of registers and initialize all locations to None.
- explicit DexRegisterMap(size_t count) : count_(count), regs_small_{} {
+ using iterator = DexRegisterLocation*;
+
+ // Create map for given number of registers and initialize them to the given value.
+ DexRegisterMap(size_t count, DexRegisterLocation value) : count_(count), regs_small_{} {
if (count_ <= kSmallCount) {
- std::fill_n(regs_small_.begin(), count, DexRegisterLocation::None());
+ std::fill_n(regs_small_.begin(), count, value);
} else {
- regs_large_.resize(count, DexRegisterLocation::None());
+ regs_large_.resize(count, value);
}
}
@@ -62,6 +70,9 @@
return count_ <= kSmallCount ? regs_small_.data() : regs_large_.data();
}
+ iterator begin() { return data(); }
+ iterator end() { return data() + count_; }
+
size_t size() const { return count_; }
bool IsValid() const { return count_ != 0; }
@@ -192,7 +203,7 @@
* The row referenced from the StackMap holds information at depth 0.
* Following rows hold information for further depths.
*/
-class InlineInfo : public BitTable<7>::Accessor {
+class InlineInfo : public BitTable<6>::Accessor {
public:
BIT_TABLE_HEADER()
BIT_TABLE_COLUMN(0, IsLast) // Determines if there are further rows for further depths.
@@ -200,7 +211,7 @@
BIT_TABLE_COLUMN(2, MethodInfoIndex)
BIT_TABLE_COLUMN(3, ArtMethodHi) // High bits of ArtMethod*.
BIT_TABLE_COLUMN(4, ArtMethodLo) // Low bits of ArtMethod*.
- BIT_TABLE_COLUMN(5, DexRegisterMaskIndex)
+ BIT_TABLE_COLUMN(5, NumberOfDexRegisters) // Includes outer levels and the main method.
BIT_TABLE_COLUMN(6, DexRegisterMapIndex)
static constexpr uint32_t kLast = -1;
@@ -220,10 +231,6 @@
return reinterpret_cast<ArtMethod*>((hi << 32) | lo);
}
- ALWAYS_INLINE bool HasDexRegisterMap() const {
- return HasDexRegisterMapIndex();
- }
-
void Dump(VariableIndentationOutputStream* vios,
const CodeInfo& info,
const StackMap& stack_map,
@@ -338,7 +345,9 @@
}
ALWAYS_INLINE DexRegisterLocation GetDexRegisterCatalogEntry(size_t index) const {
- return DexRegisterInfo(&dex_register_catalog_, index).GetLocation();
+ return (index == StackMap::kNoValue)
+ ? DexRegisterLocation::None()
+ : DexRegisterInfo(&dex_register_catalog_, index).GetLocation();
}
uint32_t GetNumberOfStackMaps() const {
@@ -350,19 +359,30 @@
}
ALWAYS_INLINE DexRegisterMap GetDexRegisterMapOf(StackMap stack_map,
- size_t num_dex_registers) const {
- return DecodeDexRegisterMap(stack_map.GetDexRegisterMaskIndex(),
- stack_map.GetDexRegisterMapIndex(),
- num_dex_registers);
+ size_t vregs ATTRIBUTE_UNUSED = 0) const {
+ if (stack_map.HasDexRegisterMap()) {
+ DexRegisterMap map(number_of_dex_registers_, DexRegisterLocation::Invalid());
+ DecodeDexRegisterMap(stack_map.Row(), /* first_dex_register */ 0, &map);
+ return map;
+ }
+ return DexRegisterMap(0, DexRegisterLocation::None());
}
ALWAYS_INLINE DexRegisterMap GetDexRegisterMapAtDepth(uint8_t depth,
StackMap stack_map,
- size_t num_dex_registers) const {
- InlineInfo inline_info = GetInlineInfoAtDepth(stack_map, depth);
- return DecodeDexRegisterMap(inline_info.GetDexRegisterMaskIndex(),
- inline_info.GetDexRegisterMapIndex(),
- num_dex_registers);
+ size_t vregs ATTRIBUTE_UNUSED = 0) const {
+ if (stack_map.HasDexRegisterMap()) {
+ // The register counts are commutative and include all outer levels.
+ // This allows us to determine the range [first, last) in just two lookups.
+ // If we are at depth 0 (the first inlinee), the count from the main method is used.
+ uint32_t first = (depth == 0) ? number_of_dex_registers_
+ : GetInlineInfoAtDepth(stack_map, depth - 1).GetNumberOfDexRegisters();
+ uint32_t last = GetInlineInfoAtDepth(stack_map, depth).GetNumberOfDexRegisters();
+ DexRegisterMap map(last - first, DexRegisterLocation::Invalid());
+ DecodeDexRegisterMap(stack_map.Row(), first, &map);
+ return map;
+ }
+ return DexRegisterMap(0, DexRegisterLocation::None());
}
InlineInfo GetInlineInfo(size_t index) const {
@@ -421,8 +441,6 @@
if (other.GetDexPc() == dex_pc &&
other.GetNativePcOffset(kRuntimeISA) ==
stack_map.GetNativePcOffset(kRuntimeISA)) {
- DCHECK_EQ(other.GetDexRegisterMapIndex(),
- stack_map.GetDexRegisterMapIndex());
if (i < e - 2) {
// Make sure there are not three identical stack maps following each other.
DCHECK_NE(
@@ -469,23 +487,10 @@
const MethodInfo& method_info) const;
private:
- ALWAYS_INLINE DexRegisterMap DecodeDexRegisterMap(uint32_t mask_index,
- uint32_t map_index,
- uint32_t num_dex_registers) const {
- DexRegisterMap map(map_index == StackMap::kNoValue ? 0 : num_dex_registers);
- if (mask_index != StackMap::kNoValue) {
- BitMemoryRegion mask = dex_register_masks_.GetBitMemoryRegion(mask_index);
- num_dex_registers = std::min<uint32_t>(num_dex_registers, mask.size_in_bits());
- DexRegisterLocation* regs = map.data();
- for (uint32_t r = 0; r < mask.size_in_bits(); r++) {
- if (mask.LoadBit(r) /* is_live */) {
- DCHECK_LT(r, map.size());
- regs[r] = GetDexRegisterCatalogEntry(dex_register_maps_.Get(map_index++));
- }
- }
- }
- return map;
- }
+ // Scan backward to determine dex register locations at given stack map.
+ void DecodeDexRegisterMap(uint32_t stack_map_index,
+ uint32_t first_dex_register,
+ /*out*/ DexRegisterMap* map) const;
void Decode(const uint8_t* data) {
size_t non_header_size = DecodeUnsignedLeb128(&data);
@@ -500,6 +505,7 @@
dex_register_masks_.Decode(region, &bit_offset);
dex_register_maps_.Decode(region, &bit_offset);
dex_register_catalog_.Decode(region, &bit_offset);
+ number_of_dex_registers_ = DecodeVarintBits(region, &bit_offset);
CHECK_EQ(non_header_size, BitsToBytesRoundUp(bit_offset)) << "Invalid CodeInfo";
}
@@ -512,6 +518,7 @@
BitTable<1> dex_register_masks_;
BitTable<1> dex_register_maps_;
BitTable<DexRegisterInfo::kCount> dex_register_catalog_;
+ uint32_t number_of_dex_registers_; // Excludes any inlined methods.
friend class OatDumper;
};