Add invoke infos to stack maps

Invoke info records the invoke type and dex method index for invokes
that may reach artQuickResolutionTrampoline. Having this information
recorded allows the runtime to avoid reading the dex code and pulling
in extra pages.

Code size increase for a large app:
93886360 -> 95811480 (2.05% increase)

1/2 of the code size increase is from making less stack maps deduped.
I suspect there is less deduping because of the invoke info method
index.

Merged disabled until we measure the RAM savings.

Test: test-art-host, N6P boots

Bug: 34109702

Change-Id: I6c5e4a60675a1d7c76dee0561a12909e4ab6d5d9
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index bac16cd..8dd423f 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -58,6 +58,9 @@
 
 namespace art {
 
+// If true, we record the static and direct invokes in the invoke infos.
+static constexpr bool kEnableDexLayoutOptimizations = false;
+
 // Return whether a location is consistent with a type.
 static bool CheckType(Primitive::Type type, Location location) {
   if (location.IsFpuRegister()
@@ -801,7 +804,18 @@
                                        outer_environment_size,
                                        inlining_depth);
 
-  EmitEnvironment(instruction->GetEnvironment(), slow_path);
+  HEnvironment* const environment = instruction->GetEnvironment();
+  EmitEnvironment(environment, slow_path);
+  // Record invoke info, the common case for the trampoline is super and static invokes. Only
+  // record these to reduce oat file size.
+  if (kEnableDexLayoutOptimizations) {
+    if (environment != nullptr &&
+        instruction->IsInvoke() &&
+        instruction->IsInvokeStaticOrDirect()) {
+      HInvoke* const invoke = instruction->AsInvoke();
+      stack_map_stream_.AddInvoke(invoke->GetInvokeType(), invoke->GetDexMethodIndex());
+    }
+  }
   stack_map_stream_.EndStackMapEntry();
 
   HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
@@ -818,7 +832,6 @@
     EmitEnvironment(instruction->GetEnvironment(), slow_path);
     stack_map_stream_.EndStackMapEntry();
     if (kIsDebugBuild) {
-      HEnvironment* environment = instruction->GetEnvironment();
       for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
         HInstruction* in_environment = environment->GetInstructionAt(i);
         if (in_environment != nullptr) {
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 1bcc8e1..eeae96e 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -41,12 +41,12 @@
   current_entry_.inlining_depth = inlining_depth;
   current_entry_.inline_infos_start_index = inline_infos_.size();
   current_entry_.stack_mask_index = 0;
+  current_entry_.dex_method_index = DexFile::kDexNoIndex;
   current_entry_.dex_register_entry.num_dex_registers = num_dex_registers;
   current_entry_.dex_register_entry.locations_start_index = dex_register_locations_.size();
   current_entry_.dex_register_entry.live_dex_registers_mask = (num_dex_registers != 0)
       ? ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream)
       : nullptr;
-
   if (sp_mask != nullptr) {
     stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet());
   }
@@ -99,6 +99,11 @@
   current_dex_register_++;
 }
 
+void StackMapStream::AddInvoke(InvokeType invoke_type, uint32_t dex_method_index) {
+  current_entry_.invoke_type = invoke_type;
+  current_entry_.dex_method_index = dex_method_index;
+}
+
 void StackMapStream::BeginInlineInfoEntry(ArtMethod* method,
                                           uint32_t dex_pc,
                                           uint32_t num_dex_registers,
@@ -166,6 +171,7 @@
       encoding.inline_info.num_entries,
       encoding.register_mask.num_entries,
       encoding.stack_mask.num_entries);
+  ComputeInvokeInfoEncoding(&encoding);
   DCHECK_EQ(code_info_encoding_.size(), 0u);
   encoding.Compress(&code_info_encoding_);
   encoding.ComputeTableOffsets();
@@ -212,6 +218,24 @@
   return size;
 }
 
+void StackMapStream::ComputeInvokeInfoEncoding(CodeInfoEncoding* encoding) {
+  DCHECK(encoding != nullptr);
+  uint32_t native_pc_max = 0;
+  uint16_t method_index_max = 0;
+  size_t invoke_infos_count = 0;
+  size_t invoke_type_max = 0;
+  for (const StackMapEntry& entry : stack_maps_) {
+    if (entry.dex_method_index != DexFile::kDexNoIndex) {
+      native_pc_max = std::max(native_pc_max, entry.native_pc_code_offset.CompressedValue());
+      method_index_max = std::max(method_index_max, static_cast<uint16_t>(entry.dex_method_index));
+      invoke_type_max = std::max(invoke_type_max, static_cast<size_t>(entry.invoke_type));
+      ++invoke_infos_count;
+    }
+  }
+  encoding->invoke_info.num_entries = invoke_infos_count;
+  encoding->invoke_info.encoding.SetFromSizes(native_pc_max, invoke_type_max, method_index_max);
+}
+
 void StackMapStream::ComputeInlineInfoEncoding(InlineInfoEncoding* encoding,
                                                size_t dex_register_maps_bytes) {
   uint32_t method_index_max = 0;
@@ -304,6 +328,7 @@
   ArenaBitVector empty_bitmask(allocator_, 0, /* expandable */ false, kArenaAllocStackMapStream);
   uintptr_t next_dex_register_map_offset = 0;
   uintptr_t next_inline_info_index = 0;
+  size_t invoke_info_idx = 0;
   for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) {
     StackMap stack_map = code_info.GetStackMapAt(i, encoding);
     StackMapEntry entry = stack_maps_[i];
@@ -318,6 +343,14 @@
                                             dex_register_locations_region);
     stack_map.SetDexRegisterMapOffset(encoding.stack_map.encoding, offset);
 
+    if (entry.dex_method_index != DexFile::kDexNoIndex) {
+      InvokeInfo invoke_info(code_info.GetInvokeInfo(encoding, invoke_info_idx));
+      invoke_info.SetNativePcCodeOffset(encoding.invoke_info.encoding, entry.native_pc_code_offset);
+      invoke_info.SetInvokeType(encoding.invoke_info.encoding, entry.invoke_type);
+      invoke_info.SetMethodIndex(encoding.invoke_info.encoding, entry.dex_method_index);
+      ++invoke_info_idx;
+    }
+
     // Set the inlining info.
     if (entry.inlining_depth != 0) {
       InlineInfo inline_info = code_info.GetInlineInfo(next_inline_info_index, encoding);
@@ -528,6 +561,7 @@
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
   DCHECK_EQ(code_info.GetNumberOfStackMaps(encoding), stack_maps_.size());
+  size_t invoke_info_index = 0;
   for (size_t s = 0; s < stack_maps_.size(); ++s) {
     const StackMap stack_map = code_info.GetStackMapAt(s, encoding);
     const StackMapEncoding& stack_map_encoding = encoding.stack_map.encoding;
@@ -552,7 +586,14 @@
         DCHECK_EQ(stack_mask.LoadBit(b), 0u);
       }
     }
-
+    if (entry.dex_method_index != DexFile::kDexNoIndex) {
+      InvokeInfo invoke_info = code_info.GetInvokeInfo(encoding, invoke_info_index);
+      DCHECK_EQ(invoke_info.GetNativePcOffset(encoding.invoke_info.encoding, instruction_set_),
+                entry.native_pc_code_offset.Uint32Value(instruction_set_));
+      DCHECK_EQ(invoke_info.GetInvokeType(encoding.invoke_info.encoding), entry.invoke_type);
+      DCHECK_EQ(invoke_info.GetMethodIndex(encoding.invoke_info.encoding), entry.dex_method_index);
+      invoke_info_index++;
+    }
     CheckDexRegisterMap(code_info,
                         code_info.GetDexRegisterMapOf(
                             stack_map, encoding, entry.dex_register_entry.num_dex_registers),
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index bba3d51..4225a87 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -118,6 +118,8 @@
     uint32_t register_mask_index;
     DexRegisterMapEntry dex_register_entry;
     size_t dex_register_map_index;
+    InvokeType invoke_type;
+    uint32_t dex_method_index;
   };
 
   struct InlineInfoEntry {
@@ -138,6 +140,8 @@
 
   void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value);
 
+  void AddInvoke(InvokeType type, uint32_t dex_method_index);
+
   void BeginInlineInfoEntry(ArtMethod* method,
                             uint32_t dex_pc,
                             uint32_t num_dex_registers,
@@ -184,6 +188,14 @@
   bool DexRegisterMapEntryEquals(const DexRegisterMapEntry& a, const DexRegisterMapEntry& b) const;
 
   // Fill in the corresponding entries of a register map.
+  void ComputeInvokeInfoEncoding(CodeInfoEncoding* encoding);
+
+  // Returns the index of an entry with the same dex register map as the current_entry,
+  // or kNoSameDexMapFound if no such entry exists.
+  size_t FindEntryWithTheSameDexMap();
+  bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const;
+
+  // Fill in the corresponding entries of a register map.
   void FillInDexRegisterMap(DexRegisterMap dex_register_map,
                             uint32_t num_dex_registers,
                             const BitVector& live_dex_registers_mask,
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 0416951..330f7f2 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -934,7 +934,6 @@
   EXPECT_EQ(offset_mips64.Uint32Value(kMips64), kMips64InstructionAlignment);
 }
 
-
 TEST(StackMapTest, TestDeduplicateStackMask) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
@@ -963,4 +962,48 @@
             stack_map2.GetStackMaskIndex(encoding.stack_map.encoding));
 }
 
+TEST(StackMapTest, TestInvokeInfo) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  StackMapStream stream(&arena, kRuntimeISA);
+
+  ArenaBitVector sp_mask(&arena, 0, true);
+  sp_mask.SetBit(1);
+  stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0);
+  stream.AddInvoke(kSuper, 1);
+  stream.EndStackMapEntry();
+  stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0);
+  stream.AddInvoke(kStatic, 3);
+  stream.EndStackMapEntry();
+  stream.BeginStackMapEntry(0, 16, 0x3, &sp_mask, 0, 0);
+  stream.AddInvoke(kDirect, 65535);
+  stream.EndStackMapEntry();
+
+  const size_t size = stream.PrepareForFillIn();
+  MemoryRegion region(arena.Alloc(size, kArenaAllocMisc), size);
+  stream.FillIn(region);
+
+  CodeInfo code_info(region);
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  ASSERT_EQ(3u, code_info.GetNumberOfStackMaps(encoding));
+
+  InvokeInfo invoke1(code_info.GetInvokeInfoForNativePcOffset(4, encoding));
+  InvokeInfo invoke2(code_info.GetInvokeInfoForNativePcOffset(8, encoding));
+  InvokeInfo invoke3(code_info.GetInvokeInfoForNativePcOffset(16, encoding));
+  InvokeInfo invoke_invalid(code_info.GetInvokeInfoForNativePcOffset(12, encoding));
+  EXPECT_FALSE(invoke_invalid.IsValid());  // No entry for that index.
+  EXPECT_TRUE(invoke1.IsValid());
+  EXPECT_TRUE(invoke2.IsValid());
+  EXPECT_TRUE(invoke3.IsValid());
+  EXPECT_EQ(invoke1.GetInvokeType(encoding.invoke_info.encoding), kSuper);
+  EXPECT_EQ(invoke1.GetMethodIndex(encoding.invoke_info.encoding), 1u);
+  EXPECT_EQ(invoke1.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 4u);
+  EXPECT_EQ(invoke2.GetInvokeType(encoding.invoke_info.encoding), kStatic);
+  EXPECT_EQ(invoke2.GetMethodIndex(encoding.invoke_info.encoding), 3u);
+  EXPECT_EQ(invoke2.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 8u);
+  EXPECT_EQ(invoke3.GetInvokeType(encoding.invoke_info.encoding), kDirect);
+  EXPECT_EQ(invoke3.GetMethodIndex(encoding.invoke_info.encoding), 65535u);
+  EXPECT_EQ(invoke3.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 16u);
+}
+
 }  // namespace art