Determine HLoadClass/String load kind early.

This helps save memory by avoiding the allocation of
HEnvironment and related objects for AOT references to
boot image strings and classes (kBootImage* load kinds)
and also for JIT references (kJitTableAddress).

Compiling aosp_taimen-userdebug boot image, the most memory
hungry method BatteryStats.dumpLocked() needs
  - before:
    Used 55105384 bytes of arena memory...
    ...
    UseListNode    10009704
    Environment      423248
    EnvVRegs       20676560
    ...
  - after:
    Used 50559176 bytes of arena memory...
    ...
    UseListNode     8568936
    Environment      365680
    EnvVRegs       17628704
    ...

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing --jit
Bug: 34053922
Change-Id: I68e73a438e6ac8e8908e6fccf53bbeea8a64a077
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 560372e..a175c21 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -876,9 +876,9 @@
       load_class, codegen_, compiler_driver_, caller_compilation_unit_);
   DCHECK(kind != HLoadClass::LoadKind::kInvalid)
       << "We should always be able to reference a class for inline caches";
-  // Insert before setting the kind, as setting the kind affects the inputs.
-  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
+  // Load kind must be set before inserting the instruction into the graph.
   load_class->SetLoadKind(kind);
+  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
   // In AOT mode, we will most likely load the class from BSS, which will involve a call
   // to the runtime. In this case, the load instruction will need an environment so copy
   // it from the invoke instruction.
@@ -1932,7 +1932,7 @@
   // optimization that could lead to a HDeoptimize. The following optimizations do not.
   HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
   HConstantFolding fold(callee_graph, "constant_folding$inliner");
-  HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
+  HSharpening sharpening(callee_graph, codegen_, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
 
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 782546c..4485f06 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1128,7 +1128,7 @@
       MethodCompilationStat::kConstructorFenceGeneratedNew);
 }
 
-static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
+static bool IsSubClass(ObjPtr<mirror::Class> to_test, ObjPtr<mirror::Class> super_class)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
 }
@@ -1424,7 +1424,7 @@
   return true;
 }
 
-static mirror::Class* GetClassFrom(CompilerDriver* driver,
+static ObjPtr<mirror::Class> GetClassFrom(CompilerDriver* driver,
                                    const DexCompilationUnit& compilation_unit) {
   ScopedObjectAccess soa(Thread::Current());
   Handle<mirror::ClassLoader> class_loader = compilation_unit.GetClassLoader();
@@ -1433,11 +1433,11 @@
   return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
 }
 
-mirror::Class* HInstructionBuilder::GetOutermostCompilingClass() const {
+ObjPtr<mirror::Class> HInstructionBuilder::GetOutermostCompilingClass() const {
   return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
 }
 
-mirror::Class* HInstructionBuilder::GetCompilingClass() const {
+ObjPtr<mirror::Class> HInstructionBuilder::GetCompilingClass() const {
   return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
 }
 
@@ -1799,6 +1799,17 @@
   }
 }
 
+void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) {
+  HLoadString* load_string =
+      new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc);
+  HSharpening::ProcessLoadString(load_string,
+                                 code_generator_,
+                                 compiler_driver_,
+                                 *dex_compilation_unit_,
+                                 handles_);
+  AppendInstruction(load_string);
+}
+
 HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
@@ -1811,7 +1822,7 @@
     if (klass->IsPublic()) {
       needs_access_check = false;
     } else {
-      mirror::Class* compiling_class = GetCompilingClass();
+      ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
       if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) {
         needs_access_check = false;
       }
@@ -1856,9 +1867,9 @@
     // We actually cannot reference this class, we're forced to bail.
     return nullptr;
   }
-  // Append the instruction first, as setting the load kind affects the inputs.
-  AppendInstruction(load_class);
+  // Load kind must be set before inserting the instruction into the graph.
   load_class->SetLoadKind(load_kind);
+  AppendInstruction(load_class);
   return load_class;
 }
 
@@ -2837,20 +2848,14 @@
 
     case Instruction::CONST_STRING: {
       dex::StringIndex string_index(instruction.VRegB_21c());
-      AppendInstruction(new (allocator_) HLoadString(graph_->GetCurrentMethod(),
-                                                     string_index,
-                                                     *dex_file_,
-                                                     dex_pc));
+      BuildLoadString(string_index, dex_pc);
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
       break;
     }
 
     case Instruction::CONST_STRING_JUMBO: {
       dex::StringIndex string_index(instruction.VRegB_31c());
-      AppendInstruction(new (allocator_) HLoadString(graph_->GetCurrentMethod(),
-                                                     string_index,
-                                                     *dex_file_,
-                                                     dex_pc));
+      BuildLoadString(string_index, dex_pc);
       UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction());
       break;
     }
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 2446ddb..0500d40 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -240,9 +240,10 @@
   // Builds an instruction sequence for a switch statement.
   void BuildSwitch(const Instruction& instruction, uint32_t dex_pc);
 
-  // Builds a `HLoadClass` loading the given `type_index`. If `outer` is true,
-  // this method will use the outer class's dex file to lookup the type at
-  // `type_index`.
+  // Builds a `HLoadString` loading the given `string_index`.
+  void BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc);
+
+  // Builds a `HLoadClass` loading the given `type_index`.
   HLoadClass* BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc);
 
   HLoadClass* BuildLoadClass(dex::TypeIndex type_index,
@@ -253,10 +254,10 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns the outer-most compiling method's class.
-  mirror::Class* GetOutermostCompilingClass() const;
+  ObjPtr<mirror::Class> GetOutermostCompilingClass() const;
 
   // Returns the class whose method is being compiled.
-  mirror::Class* GetCompilingClass() const;
+  ObjPtr<mirror::Class> GetCompilingClass() const;
 
   // Returns whether `type_index` points to the outer-most compiling method's class.
   bool IsOutermostCompilingClass(dex::TypeIndex type_index) const;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d117bfb..5f33ed6 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2831,21 +2831,6 @@
   }
 }
 
-void HLoadClass::SetLoadKind(LoadKind load_kind) {
-  SetPackedField<LoadKindField>(load_kind);
-
-  if (load_kind != LoadKind::kRuntimeCall &&
-      load_kind != LoadKind::kReferrersClass) {
-    RemoveAsUserOfInput(0u);
-    SetRawInputAt(0u, nullptr);
-  }
-
-  if (!NeedsEnvironment()) {
-    RemoveEnvironment();
-    SetSideEffects(SideEffects::None());
-  }
-}
-
 std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) {
   switch (rhs) {
     case HLoadClass::LoadKind::kReferrersClass:
@@ -2888,21 +2873,6 @@
   }
 }
 
-void HLoadString::SetLoadKind(LoadKind load_kind) {
-  // Once sharpened, the load kind should not be changed again.
-  DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall);
-  SetPackedField<LoadKindField>(load_kind);
-
-  if (load_kind != LoadKind::kRuntimeCall) {
-    RemoveAsUserOfInput(0u);
-    SetRawInputAt(0u, nullptr);
-  }
-  if (!NeedsEnvironment()) {
-    RemoveEnvironment();
-    SetSideEffects(SideEffects::None());
-  }
-}
-
 std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
   switch (rhs) {
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7fbd7f4..42a9d95 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -6061,6 +6061,20 @@
 std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
 
 // Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
+inline void HLoadClass::SetLoadKind(LoadKind load_kind) {
+  // The load kind should be determined before inserting the instruction to the graph.
+  DCHECK(GetBlock() == nullptr);
+  DCHECK(GetEnvironment() == nullptr);
+  SetPackedField<LoadKindField>(load_kind);
+  if (load_kind != LoadKind::kRuntimeCall && load_kind != LoadKind::kReferrersClass) {
+    special_input_ = HUserRecord<HInstruction*>(nullptr);
+  }
+  if (!NeedsEnvironment()) {
+    SetSideEffects(SideEffects::None());
+  }
+}
+
+// Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
 inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
   // The special input is used for PC-relative loads on some architectures,
   // including literal pool loads, which are PC-relative too.
@@ -6208,6 +6222,21 @@
 std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs);
 
 // Note: defined outside class to see operator<<(., HLoadString::LoadKind).
+inline void HLoadString::SetLoadKind(LoadKind load_kind) {
+  // The load kind should be determined before inserting the instruction to the graph.
+  DCHECK(GetBlock() == nullptr);
+  DCHECK(GetEnvironment() == nullptr);
+  DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall);
+  SetPackedField<LoadKindField>(load_kind);
+  if (load_kind != LoadKind::kRuntimeCall) {
+    special_input_ = HUserRecord<HInstruction*>(nullptr);
+  }
+  if (!NeedsEnvironment()) {
+    SetSideEffects(SideEffects::None());
+  }
+}
+
+// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
 inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
   // The special input is used for PC-relative loads on some architectures,
   // including literal pool loads, which are PC-relative too.
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 7edb642..7149d93 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -258,8 +258,7 @@
         break;
       }
       case OptimizationPass::kSharpening:
-        opt = new (allocator) HSharpening(
-            graph, codegen, dex_compilation_unit, driver, handles, name);
+        opt = new (allocator) HSharpening(graph, codegen, driver, name);
         break;
       case OptimizationPass::kSelectGenerator:
         opt = new (allocator) HSelectGenerator(graph, handles, stats, name);
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index e46c9a7..64092d3 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -45,8 +45,6 @@
         SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(),
                                     codegen_,
                                     compiler_driver_);
-      } else if (instruction->IsLoadString()) {
-        ProcessLoadString(instruction->AsLoadString());
       }
       // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder
       //       here. Rewrite it to avoid the CompilerDriver's reliance on verifier data
@@ -147,10 +145,11 @@
   invoke->SetDispatchInfo(dispatch_info);
 }
 
-HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
-                                                       CodeGenerator* codegen,
-                                                       CompilerDriver* compiler_driver,
-                                                       const DexCompilationUnit& dex_compilation_unit) {
+HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
+    HLoadClass* load_class,
+    CodeGenerator* codegen,
+    CompilerDriver* compiler_driver,
+    const DexCompilationUnit& dex_compilation_unit) {
   Handle<mirror::Class> klass = load_class->GetClass();
   DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall ||
          load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
@@ -237,7 +236,12 @@
   return load_kind;
 }
 
-void HSharpening::ProcessLoadString(HLoadString* load_string) {
+void HSharpening::ProcessLoadString(
+    HLoadString* load_string,
+    CodeGenerator* codegen,
+    CompilerDriver* compiler_driver,
+    const DexCompilationUnit& dex_compilation_unit,
+    VariableSizedHandleScope* handles) {
   DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
 
   const DexFile& dex_file = load_string->GetDexFile();
@@ -249,26 +253,26 @@
     ClassLinker* class_linker = runtime->GetClassLinker();
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile())
-        ? compilation_unit_.GetDexCache()
+    Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *dex_compilation_unit.GetDexFile())
+        ? dex_compilation_unit.GetDexCache()
         : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
-    mirror::String* string = nullptr;
+    ObjPtr<mirror::String> string = nullptr;
 
-    if (codegen_->GetCompilerOptions().IsBootImage()) {
+    if (codegen->GetCompilerOptions().IsBootImage()) {
       // Compiling boot image. Resolve the string and allocate it if needed, to ensure
       // the string will be added to the boot image.
       DCHECK(!runtime->UseJitCompilation());
       string = class_linker->ResolveString(dex_file, string_index, dex_cache);
       CHECK(string != nullptr);
-      if (compiler_driver_->GetSupportBootImageFixup()) {
-        DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
+      if (compiler_driver->GetSupportBootImageFixup()) {
+        DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file));
         desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative;
       } else {
         // compiler_driver_test. Do not sharpen.
         desired_load_kind = HLoadString::LoadKind::kRuntimeCall;
       }
     } else if (runtime->UseJitCompilation()) {
-      DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+      DCHECK(!codegen->GetCompilerOptions().GetCompilePic());
       string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
       if (string != nullptr) {
         if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
@@ -283,7 +287,7 @@
       // AOT app compilation. Try to lookup the string without allocating if not found.
       string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
       if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
-        if (codegen_->GetCompilerOptions().GetCompilePic()) {
+        if (codegen->GetCompilerOptions().GetCompilePic()) {
           desired_load_kind = HLoadString::LoadKind::kBootImageInternTable;
         } else {
           desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
@@ -293,12 +297,12 @@
       }
     }
     if (string != nullptr) {
-      load_string->SetString(handles_->NewHandle(string));
+      load_string->SetString(handles->NewHandle(string));
     }
   }
   DCHECK_NE(desired_load_kind, static_cast<HLoadString::LoadKind>(-1));
 
-  HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind);
+  HLoadString::LoadKind load_kind = codegen->GetSupportedLoadStringKind(desired_load_kind);
   load_string->SetLoadKind(load_kind);
 }
 
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index bb1954e..6df7d6d 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -34,26 +34,29 @@
  public:
   HSharpening(HGraph* graph,
               CodeGenerator* codegen,
-              const DexCompilationUnit& compilation_unit,
               CompilerDriver* compiler_driver,
-              VariableSizedHandleScope* handles,
               const char* name = kSharpeningPassName)
       : HOptimization(graph, name),
         codegen_(codegen),
-        compilation_unit_(compilation_unit),
-        compiler_driver_(compiler_driver),
-        handles_(handles) { }
+        compiler_driver_(compiler_driver) { }
 
   void Run() OVERRIDE;
 
   static constexpr const char* kSharpeningPassName = "sharpening";
 
+  // Used by the builder.
+  static void ProcessLoadString(HLoadString* load_string,
+                                CodeGenerator* codegen,
+                                CompilerDriver* compiler_driver,
+                                const DexCompilationUnit& dex_compilation_unit,
+                                VariableSizedHandleScope* handles);
+
   // Used by the builder and the inliner.
   static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
                                                    CodeGenerator* codegen,
                                                    CompilerDriver* compiler_driver,
                                                    const DexCompilationUnit& dex_compilation_unit)
-    REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Used by Sharpening and InstructionSimplifier.
   static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
@@ -61,12 +64,8 @@
                                           CompilerDriver* compiler_driver);
 
  private:
-  void ProcessLoadString(HLoadString* load_string);
-
   CodeGenerator* codegen_;
-  const DexCompilationUnit& compilation_unit_;
   CompilerDriver* compiler_driver_;
-  VariableSizedHandleScope* handles_;
 };
 
 }  // namespace art