Merge "Fix braino in RTP."
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index bbf9eee..e2a0942 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -176,6 +176,7 @@
     kCallRelative,     // NOTE: Actual patching is instruction_set-dependent.
     kType,
     kTypeRelative,     // NOTE: Actual patching is instruction_set-dependent.
+    kTypeBssEntry,     // NOTE: Actual patching is instruction_set-dependent.
     kString,
     kStringRelative,   // NOTE: Actual patching is instruction_set-dependent.
     kStringBssEntry,   // NOTE: Actual patching is instruction_set-dependent.
@@ -228,6 +229,16 @@
     return patch;
   }
 
+  static LinkerPatch TypeBssEntryPatch(size_t literal_offset,
+                                       const DexFile* target_dex_file,
+                                       uint32_t pc_insn_offset,
+                                       uint32_t target_type_idx) {
+    LinkerPatch patch(literal_offset, Type::kTypeBssEntry, target_dex_file);
+    patch.type_idx_ = target_type_idx;
+    patch.pc_insn_offset_ = pc_insn_offset;
+    return patch;
+  }
+
   static LinkerPatch StringPatch(size_t literal_offset,
                                  const DexFile* target_dex_file,
                                  uint32_t target_string_idx) {
@@ -282,6 +293,7 @@
     switch (GetType()) {
       case Type::kCallRelative:
       case Type::kTypeRelative:
+      case Type::kTypeBssEntry:
       case Type::kStringRelative:
       case Type::kStringBssEntry:
       case Type::kDexCacheArray:
@@ -299,12 +311,16 @@
   }
 
   const DexFile* TargetTypeDexFile() const {
-    DCHECK(patch_type_ == Type::kType || patch_type_ == Type::kTypeRelative);
+    DCHECK(patch_type_ == Type::kType ||
+           patch_type_ == Type::kTypeRelative ||
+           patch_type_ == Type::kTypeBssEntry);
     return target_dex_file_;
   }
 
   dex::TypeIndex TargetTypeIndex() const {
-    DCHECK(patch_type_ == Type::kType || patch_type_ == Type::kTypeRelative);
+    DCHECK(patch_type_ == Type::kType ||
+           patch_type_ == Type::kTypeRelative ||
+           patch_type_ == Type::kTypeBssEntry);
     return dex::TypeIndex(type_idx_);
   }
 
@@ -334,6 +350,7 @@
 
   uint32_t PcInsnOffset() const {
     DCHECK(patch_type_ == Type::kTypeRelative ||
+           patch_type_ == Type::kTypeBssEntry ||
            patch_type_ == Type::kStringRelative ||
            patch_type_ == Type::kStringBssEntry ||
            patch_type_ == Type::kDexCacheArray);
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 4a9de7f..79e1785 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -224,6 +224,7 @@
     } else {
       // LDR/STR 32-bit or 64-bit with imm12 == 0 (unset).
       DCHECK(patch.GetType() == LinkerPatch::Type::kDexCacheArray ||
+             patch.GetType() == LinkerPatch::Type::kTypeBssEntry ||
              patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType();
       DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn;
     }
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index a9da09c..de5af97 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -296,6 +296,7 @@
     bss_start_(0u),
     bss_size_(0u),
     bss_roots_offset_(0u),
+    bss_type_entries_(),
     bss_string_entries_(),
     oat_data_offset_(0u),
     oat_header_(nullptr),
@@ -585,7 +586,7 @@
   }
   oat_size_ = offset;
 
-  if (!HasBootImage()) {
+  {
     TimingLogger::ScopedTiming split("InitBssLayout", timings_);
     InitBssLayout(instruction_set);
   }
@@ -847,6 +848,10 @@
             if (!patch.IsPcRelative()) {
               writer_->absolute_patch_locations_.push_back(base_loc + patch.LiteralOffset());
             }
+            if (patch.GetType() == LinkerPatch::Type::kTypeBssEntry) {
+              TypeReference ref(patch.TargetTypeDexFile(), patch.TargetTypeIndex());
+              writer_->bss_type_entries_.Overwrite(ref, /* placeholder */ 0u);
+            }
             if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) {
               StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex());
               writer_->bss_string_entries_.Overwrite(ref, /* placeholder */ 0u);
@@ -1185,6 +1190,15 @@
                                                                      target_offset);
                 break;
               }
+              case LinkerPatch::Type::kTypeBssEntry: {
+                TypeReference ref(patch.TargetTypeDexFile(), patch.TargetTypeIndex());
+                uint32_t target_offset = writer_->bss_type_entries_.Get(ref);
+                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+                                                                     patch,
+                                                                     offset_ + literal_offset,
+                                                                     target_offset);
+                break;
+              }
               case LinkerPatch::Type::kCall: {
                 uint32_t target_offset = GetTargetOffset(patch);
                 PatchCodeAddress(&patched_code_, literal_offset, target_offset);
@@ -1619,20 +1633,34 @@
 }
 
 void OatWriter::InitBssLayout(InstructionSet instruction_set) {
-  DCHECK(!HasBootImage());
+  if (HasBootImage()) {
+    DCHECK(bss_string_entries_.empty());
+    if (bss_type_entries_.empty()) {
+      // Nothing to put to the .bss section.
+      return;
+    }
+  }
 
   // Allocate space for app dex cache arrays in the .bss section.
   bss_start_ = RoundUp(oat_size_, kPageSize);
-  PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set);
   bss_size_ = 0u;
-  for (const DexFile* dex_file : *dex_files_) {
-    dex_cache_arrays_offsets_.Put(dex_file, bss_start_ + bss_size_);
-    DexCacheArraysLayout layout(pointer_size, dex_file);
-    bss_size_ += layout.Size();
+  if (!HasBootImage()) {
+    PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set);
+    for (const DexFile* dex_file : *dex_files_) {
+      dex_cache_arrays_offsets_.Put(dex_file, bss_start_ + bss_size_);
+      DexCacheArraysLayout layout(pointer_size, dex_file);
+      bss_size_ += layout.Size();
+    }
   }
 
   bss_roots_offset_ = bss_size_;
 
+  // Prepare offsets for .bss Class entries.
+  for (auto& entry : bss_type_entries_) {
+    DCHECK_EQ(entry.second, 0u);
+    entry.second = bss_start_ + bss_size_;
+    bss_size_ += sizeof(GcRoot<mirror::Class>);
+  }
   // Prepare offsets for .bss String entries.
   for (auto& entry : bss_string_entries_) {
     DCHECK_EQ(entry.second, 0u);
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 8d087f4..db84166 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -31,6 +31,7 @@
 #include "os.h"
 #include "safe_map.h"
 #include "string_reference.h"
+#include "utils/type_reference.h"
 
 namespace art {
 
@@ -372,6 +373,11 @@
   // The offset of the GC roots in .bss section.
   size_t bss_roots_offset_;
 
+  // Map for allocating Class entries in .bss. Indexed by TypeReference for the source
+  // type in the dex file with the "type value comparator" for deduplication. The value
+  // is the target offset for patching, starting at `bss_start_ + bss_roots_offset_`.
+  SafeMap<TypeReference, size_t, TypeReferenceValueComparator> bss_type_entries_;
+
   // Map for allocating String entries in .bss. Indexed by StringReference for the source
   // string in the dex file with the "string value comparator" for deduplication. The value
   // is the target offset for patching, starting at `bss_start_ + bss_roots_offset_`.
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 9ca7b19..6c680c8 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -497,30 +497,33 @@
   }
 }
 
-// TODO: Remove argument `code_generator_supports_read_barrier` when
-// all code generators have read barrier support.
-void CodeGenerator::CreateLoadClassLocationSummary(HLoadClass* cls,
-                                                   Location runtime_type_index_location,
-                                                   Location runtime_return_location,
-                                                   bool code_generator_supports_read_barrier) {
-  ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena();
-  LocationSummary::CallKind call_kind = cls->NeedsAccessCheck()
-      ? LocationSummary::kCallOnMainOnly
-      : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) ||
-          cls->CanCallRuntime())
-            ? LocationSummary::kCallOnSlowPath
-            : LocationSummary::kNoCall);
-  LocationSummary* locations = new (allocator) LocationSummary(cls, call_kind);
-  if (cls->NeedsAccessCheck()) {
-    locations->SetInAt(0, Location::NoLocation());
-    locations->AddTemp(runtime_type_index_location);
-    locations->SetOut(runtime_return_location);
-  } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetOut(Location::RequiresRegister());
-  }
+void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
+                                                              Location runtime_type_index_location,
+                                                              Location runtime_return_location) {
+  DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+  DCHECK_EQ(cls->InputCount(), 1u);
+  LocationSummary* locations = new (cls->GetBlock()->GetGraph()->GetArena()) LocationSummary(
+      cls, LocationSummary::kCallOnMainOnly);
+  locations->SetInAt(0, Location::NoLocation());
+  locations->AddTemp(runtime_type_index_location);
+  locations->SetOut(runtime_return_location);
 }
 
+void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) {
+  DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+  LocationSummary* locations = cls->GetLocations();
+  MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
+  if (cls->NeedsAccessCheck()) {
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
+    InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
+  } else if (cls->MustGenerateClinitCheck()) {
+    CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    InvokeRuntime(kQuickInitializeStaticStorage, cls, cls->GetDexPc());
+  } else {
+    CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    InvokeRuntime(kQuickInitializeType, cls, cls->GetDexPc());
+  }
+}
 
 void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const {
   // The DCHECKS below check that a register is not specified twice in
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 7e2dd48..38d532e 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -509,11 +509,10 @@
       uint32_t dex_pc,
       const FieldAccessCallingConvention& calling_convention);
 
-  // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design.
-  static void CreateLoadClassLocationSummary(HLoadClass* cls,
-                                             Location runtime_type_index_location,
-                                             Location runtime_return_location,
-                                             bool code_generator_supports_read_barrier = false);
+  static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
+                                                        Location runtime_type_index_location,
+                                                        Location runtime_return_location);
+  void GenerateLoadClassRuntimeCall(HLoadClass* cls);
 
   static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
 
@@ -523,7 +522,7 @@
   virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
                              HInstruction* instruction,
                              uint32_t dex_pc,
-                             SlowPathCode* slow_path) = 0;
+                             SlowPathCode* slow_path = nullptr) = 0;
 
   // Check if the desired_string_load_kind is supported. If it is, return it,
   // otherwise return a fall-back kind that should be used instead.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3bb97b6..ef4bd1e 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -371,22 +371,23 @@
                        HInstruction* at,
                        uint32_t dex_pc,
                        bool do_clinit)
-      : SlowPathCodeARM(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCodeARM(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = at_->GetLocations();
+    LocationSummary* locations = instruction_->GetLocations();
 
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadImmediate(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex().index_);
+    dex::TypeIndex type_index = cls_->GetTypeIndex();
+    __ LoadImmediate(calling_convention.GetRegisterAt(0), type_index.index_);
     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
                                                 : kQuickInitializeType;
-    arm_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
+    arm_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
     if (do_clinit_) {
       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
     } else {
@@ -400,6 +401,23 @@
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     }
     RestoreLiveRegisters(codegen, locations);
+    // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
+    DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+    if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
+      DCHECK(out.IsValid());
+      // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
+      // kSaveEverything and use a temporary for the .bss entry address in the fast path,
+      // so that we can avoid another calculation here.
+      CodeGeneratorARM::PcRelativePatchInfo* labels =
+          arm_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(IP, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(IP, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(IP, IP, ShifterOperand(PC));
+      __ str(locations->Out().AsRegister<Register>(), Address(IP));
+    }
     __ b(GetExitLabel());
   }
 
@@ -409,10 +427,6 @@
   // The class this slow path will load.
   HLoadClass* const cls_;
 
-  // The instruction where this slow path is happening.
-  // (Might be the load class or an initialization check).
-  HInstruction* const at_;
-
   // The dex PC of `at_`.
   const uint32_t dex_pc_;
 
@@ -430,7 +444,7 @@
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     HLoadString* load = instruction_->AsLoadString();
-    const uint32_t string_index = load->GetStringIndex().index_;
+    const dex::StringIndex string_index = load->GetStringIndex();
     Register out = locations->Out().AsRegister<Register>();
     Register temp = locations->GetTemp(0).AsRegister<Register>();
     constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
@@ -449,7 +463,7 @@
       __ mov(entry_address, ShifterOperand(temp));
     }
 
-    __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index);
+    __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index.index_);
     arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 
@@ -1208,6 +1222,7 @@
       boot_image_type_patches_(TypeReferenceValueComparator(),
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_address_patches_(std::less<uint32_t>(),
                                   graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
@@ -5716,17 +5731,11 @@
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
       break;
-    case HLoadClass::LoadKind::kJitTableAddress:
-      break;
-    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
-      // We disable pc-relative load when there is an irreducible loop, as the optimization
-      // is incompatible with it.
-      // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
-      // with irreducible loops.
-      if (GetGraph()->HasIrreducibleLoops()) {
-        return HLoadClass::LoadKind::kDexCacheViaMethod;
-      }
+      break;
+    case HLoadClass::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       break;
@@ -5735,15 +5744,16 @@
 }
 
 void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
-  if (cls->NeedsAccessCheck()) {
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConvention calling_convention;
-    CodeGenerator::CreateLoadClassLocationSummary(
+    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Location::RegisterLocation(R0),
-        /* code_generator_supports_read_barrier */ true);
+        Location::RegisterLocation(R0));
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
@@ -5754,24 +5764,21 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
 
-  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
-      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
-      load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
-  LocationSummary* locations = cls->GetLocations();
-  if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
-    codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
-    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
+  LocationSummary* locations = cls->GetLocations();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
 
@@ -5779,7 +5786,7 @@
       ? kWithoutReadBarrier
       : kCompilerReadBarrierOption;
   bool generate_null_check = false;
-  switch (cls->GetLoadKind()) {
+  switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass: {
       DCHECK(!cls->CanCallRuntime());
       DCHECK(!cls->MustGenerateClinitCheck());
@@ -5793,12 +5800,14 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
                                                                     cls->GetTypeIndex()));
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       CodeGeneratorARM::PcRelativePatchInfo* labels =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
@@ -5817,6 +5826,19 @@
       __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
       break;
     }
+    case HLoadClass::LoadKind::kBssEntry: {
+      CodeGeneratorARM::PcRelativePatchInfo* labels =
+          codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(out, out, ShifterOperand(PC));
+      GenerateGcRootFieldLoad(cls, out_loc, out, 0, kCompilerReadBarrierOption);
+      generate_null_check = true;
+      break;
+    }
     case HLoadClass::LoadKind::kJitTableAddress: {
       __ LoadLiteral(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
                                                                cls->GetTypeIndex(),
@@ -5825,28 +5847,9 @@
       GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
       break;
     }
-    case HLoadClass::LoadKind::kDexCachePcRelative: {
-      Register base_reg = locations->InAt(0).AsRegister<Register>();
-      HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase();
-      int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset();
-      // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
-      GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset, read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
-      break;
-    }
-    case HLoadClass::LoadKind::kDexCacheViaMethod: {
-      // /* GcRoot<mirror::Class>[] */ out =
-      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-      Register current_method = locations->InAt(0).AsRegister<Register>();
-      __ LoadFromOffset(kLoadWord,
-                        out,
-                        current_method,
-                        ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_);
-      GenerateGcRootFieldLoad(cls, out_loc, out, offset, read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
-    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
 
   if (generate_null_check || cls->MustGenerateClinitCheck()) {
@@ -5954,6 +5957,7 @@
 
   switch (load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       __ LoadLiteral(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
                                                                       load->GetStringIndex()));
       return;  // No dex cache slow path.
@@ -5961,7 +5965,7 @@
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorARM::PcRelativePatchInfo* labels =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       __ BindTrackedLabel(&labels->movw_label);
       __ movw(out, /* placeholder */ 0u);
       __ BindTrackedLabel(&labels->movt_label);
@@ -5981,7 +5985,7 @@
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       Register temp = locations->GetTemp(0).AsRegister<Register>();
       CodeGeneratorARM::PcRelativePatchInfo* labels =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       __ BindTrackedLabel(&labels->movw_label);
       __ movw(temp, /* placeholder */ 0u);
       __ BindTrackedLabel(&labels->movt_label);
@@ -7287,8 +7291,8 @@
 }
 
 CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch(
-    const DexFile& dex_file, uint32_t string_index) {
-  return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
 }
 
 CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch(
@@ -7296,6 +7300,11 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_);
 }
 
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewTypeBssEntryPatch(
+    const DexFile& dex_file, dex::TypeIndex type_index) {
+  return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
+}
+
 CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file, uint32_t element_offset) {
   return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -7374,6 +7383,7 @@
       /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       boot_image_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
       boot_image_address_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
@@ -7388,12 +7398,17 @@
                                                        target_string.string_index.index_));
   }
   if (!GetCompilerOptions().IsBootImage()) {
+    DCHECK(pc_relative_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
   } else {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+                                                                linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
   }
+  EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
+                                                              linker_patches);
   for (const auto& entry : boot_image_type_patches_) {
     const TypeReference& target_type = entry.first;
     Literal* literal = entry.second;
@@ -7403,8 +7418,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
-                                                              linker_patches);
   for (const auto& entry : boot_image_address_patches_) {
     DCHECK(GetCompilerOptions().GetIncludePatchInformation());
     Literal* literal = entry.second;
@@ -7412,6 +7425,7 @@
     uint32_t literal_offset = literal->GetLabel()->Position();
     linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
   }
+  DCHECK_EQ(size, linker_patches->size());
 }
 
 Literal* CodeGeneratorARM::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index d5968e0..bd237e9 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -481,8 +481,10 @@
     Label add_pc_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
+  PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
   Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
@@ -635,8 +637,10 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
   TypeToLiteralMap boot_image_type_patches_;
-  // PC-relative type patch info.
+  // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // PC-relative type patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
   // Deduplication map for patchable boot image addresses.
   Uint32ToLiteralMap boot_image_address_patches_;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 227ad0f..a9617e1 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -276,22 +276,23 @@
                          HInstruction* at,
                          uint32_t dex_pc,
                          bool do_clinit)
-      : SlowPathCodeARM64(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCodeARM64(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = at_->GetLocations();
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex().index_);
+    dex::TypeIndex type_index = cls_->GetTypeIndex();
+    __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
                                                 : kQuickInitializeType;
-    arm64_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
+    arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
     if (do_clinit_) {
       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
     } else {
@@ -302,11 +303,32 @@
     Location out = locations->Out();
     if (out.IsValid()) {
       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
-      Primitive::Type type = at_->GetType();
+      Primitive::Type type = instruction_->GetType();
       arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
     }
-
     RestoreLiveRegisters(codegen, locations);
+    // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
+    DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+    if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
+      DCHECK(out.IsValid());
+      UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
+      Register temp = temps.AcquireX();
+      const DexFile& dex_file = cls_->GetDexFile();
+      // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
+      // kSaveEverything and use a temporary for the ADRP in the fast path, so that we
+      // can avoid the ADRP here.
+      vixl::aarch64::Label* adrp_label =
+          arm64_codegen->NewBssEntryTypePatch(dex_file, type_index);
+      arm64_codegen->EmitAdrpPlaceholder(adrp_label, temp);
+      vixl::aarch64::Label* strp_label =
+          arm64_codegen->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
+      {
+        SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
+        __ Bind(strp_label);
+        __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
+               MemOperand(temp, /* offset placeholder */ 0));
+      }
+    }
     __ B(GetExitLabel());
   }
 
@@ -316,10 +338,6 @@
   // The class this slow path will load.
   HLoadClass* const cls_;
 
-  // The instruction where this slow path is happening.
-  // (Might be the load class or an initialization check).
-  HInstruction* const at_;
-
   // The dex PC of `at_`.
   const uint32_t dex_pc_;
 
@@ -349,8 +367,8 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
-    __ Mov(calling_convention.GetRegisterAt(0).W(), string_index);
+    const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     Primitive::Type type = instruction_->GetType();
@@ -1154,6 +1172,7 @@
       boot_image_type_patches_(TypeReferenceValueComparator(),
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_address_patches_(std::less<uint32_t>(),
                                   graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
@@ -4090,9 +4109,10 @@
 
 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
     const DexFile& dex_file,
-    uint32_t string_index,
+    dex::StringIndex string_index,
     vixl::aarch64::Label* adrp_label) {
-  return NewPcRelativePatch(dex_file, string_index, adrp_label, &pc_relative_string_patches_);
+  return
+      NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
 }
 
 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
@@ -4102,6 +4122,13 @@
   return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &pc_relative_type_patches_);
 }
 
+vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
+    const DexFile& dex_file,
+    dex::TypeIndex type_index,
+    vixl::aarch64::Label* adrp_label) {
+  return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
+}
+
 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file,
     uint32_t element_offset,
@@ -4208,6 +4235,7 @@
       pc_relative_string_patches_.size() +
       boot_image_type_patches_.size() +
       pc_relative_type_patches_.size() +
+      type_bss_entry_patches_.size() +
       boot_image_address_patches_.size();
   linker_patches->reserve(size);
   for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
@@ -4224,12 +4252,17 @@
                                                        target_string.string_index.index_));
   }
   if (!GetCompilerOptions().IsBootImage()) {
+    DCHECK(pc_relative_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
   } else {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+                                                                linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
   }
+  EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
+                                                              linker_patches);
   for (const auto& entry : boot_image_type_patches_) {
     const TypeReference& target_type = entry.first;
     vixl::aarch64::Literal<uint32_t>* literal = entry.second;
@@ -4237,13 +4270,12 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
-                                                                linker_patches);
   for (const auto& entry : boot_image_address_patches_) {
     DCHECK(GetCompilerOptions().GetIncludePatchInformation());
     vixl::aarch64::Literal<uint32_t>* literal = entry.second;
     linker_patches->push_back(LinkerPatch::RecordPosition(literal->GetOffset()));
   }
+  DCHECK_EQ(size, linker_patches->size());
 }
 
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
@@ -4306,12 +4338,12 @@
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
       break;
+    case HLoadClass::LoadKind::kBssEntry:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
-    case HLoadClass::LoadKind::kDexCachePcRelative:
-      DCHECK(!Runtime::Current()->UseJitCompilation());
-      break;
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -4319,15 +4351,16 @@
 }
 
 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
-  if (cls->NeedsAccessCheck()) {
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConvention calling_convention;
-    CodeGenerator::CreateLoadClassLocationSummary(
+    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
         LocationFrom(calling_convention.GetRegisterAt(0)),
-        LocationFrom(vixl::aarch64::x0),
-        /* code_generator_supports_read_barrier */ true);
+        LocationFrom(vixl::aarch64::x0));
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
@@ -4338,21 +4371,19 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
 
-  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
-      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
-  if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(cls->GetLocations()->GetTemp(0), cls->GetTypeIndex().index_);
-    codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
-    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
   Location out_loc = cls->GetLocations()->Out();
   Register out = OutputRegister(cls);
@@ -4361,7 +4392,7 @@
       ? kWithoutReadBarrier
       : kCompilerReadBarrierOption;
   bool generate_null_check = false;
-  switch (cls->GetLoadKind()) {
+  switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass: {
       DCHECK(!cls->CanCallRuntime());
       DCHECK(!cls->MustGenerateClinitCheck());
@@ -4399,6 +4430,25 @@
       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(cls->GetAddress()));
       break;
     }
+    case HLoadClass::LoadKind::kBssEntry: {
+      // Add ADRP with its PC-relative Class .bss entry patch.
+      const DexFile& dex_file = cls->GetDexFile();
+      dex::TypeIndex type_index = cls->GetTypeIndex();
+      vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
+      codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
+      // Add LDR with its PC-relative Class patch.
+      vixl::aarch64::Label* ldr_label =
+          codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
+      GenerateGcRootFieldLoad(cls,
+                              cls->GetLocations()->Out(),
+                              out.X(),
+                              /* placeholder */ 0u,
+                              ldr_label,
+                              kCompilerReadBarrierOption);
+      generate_null_check = true;
+      break;
+    }
     case HLoadClass::LoadKind::kJitTableAddress: {
       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
                                                        cls->GetTypeIndex(),
@@ -4411,43 +4461,9 @@
                               kCompilerReadBarrierOption);
       break;
     }
-    case HLoadClass::LoadKind::kDexCachePcRelative: {
-      // Add ADRP with its PC-relative DexCache access patch.
-      const DexFile& dex_file = cls->GetDexFile();
-      uint32_t element_offset = cls->GetDexCacheElementOffset();
-      vixl::aarch64::Label* adrp_label =
-          codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
-      codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
-      // Add LDR with its PC-relative DexCache access patch.
-      vixl::aarch64::Label* ldr_label =
-          codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
-      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              out.X(),
-                              /* offset placeholder */ 0,
-                              ldr_label,
-                              read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
-      break;
-    }
-    case HLoadClass::LoadKind::kDexCacheViaMethod: {
-      MemberOffset resolved_types_offset =
-          ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
-      // /* GcRoot<mirror::Class>[] */ out =
-      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-      Register current_method = InputRegisterAt(cls, 0);
-      __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              out.X(),
-                              CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_),
-                              /* fixup_label */ nullptr,
-                              read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
-      break;
-    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
 
   if (generate_null_check || cls->MustGenerateClinitCheck()) {
@@ -4502,11 +4518,11 @@
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
-    case HLoadString::LoadKind::kDexCacheViaMethod:
-      break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
   }
   return desired_string_load_kind;
 }
@@ -4550,7 +4566,7 @@
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       // Add ADRP with its PC-relative String patch.
       const DexFile& dex_file = load->GetDexFile();
-      uint32_t string_index = load->GetStringIndex().index_;
+      const dex::StringIndex string_index = load->GetStringIndex();
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
@@ -4570,7 +4586,7 @@
     case HLoadString::LoadKind::kBssEntry: {
       // Add ADRP with its PC-relative String .bss entry patch.
       const DexFile& dex_file = load->GetDexFile();
-      uint32_t string_index = load->GetStringIndex().index_;
+      const dex::StringIndex string_index = load->GetStringIndex();
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
       Register temp = temps.AcquireX();
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index d6a5f9d..c7a0614 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -540,7 +540,7 @@
   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
   // to the associated ADRP patch label).
   vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                 uint32_t string_index,
+                                                 dex::StringIndex string_index,
                                                  vixl::aarch64::Label* adrp_label = nullptr);
 
   // Add a new PC-relative type patch for an instruction and return the label
@@ -551,6 +551,14 @@
                                                dex::TypeIndex type_index,
                                                vixl::aarch64::Label* adrp_label = nullptr);
 
+  // Add a new .bss entry type patch for an instruction and return the label
+  // to be bound before the instruction. The instruction will be either the
+  // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
+  // to the associated ADRP patch label).
+  vixl::aarch64::Label* NewBssEntryTypePatch(const DexFile& dex_file,
+                                             dex::TypeIndex type_index,
+                                             vixl::aarch64::Label* adrp_label = nullptr);
+
   // Add a new PC-relative dex cache array patch for an instruction and return
   // the label to be bound before the instruction. The instruction will be
   // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
@@ -744,8 +752,10 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
   TypeToLiteralMap boot_image_type_patches_;
-  // PC-relative type patch info.
+  // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // PC-relative type patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
   // Deduplication map for patchable boot image addresses.
   Uint32ToLiteralMap boot_image_address_patches_;
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index fd51aab..83c289e 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -394,22 +394,23 @@
 class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
  public:
   LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit)
-      : SlowPathCodeARMVIXL(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCodeARMVIXL(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = at_->GetLocations();
+    LocationSummary* locations = instruction_->GetLocations();
 
     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConventionARMVIXL calling_convention;
-    __ Mov(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex().index_);
+    dex::TypeIndex type_index = cls_->GetTypeIndex();
+    __ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
                                                 : kQuickInitializeType;
-    arm_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
+    arm_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
     if (do_clinit_) {
       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
     } else {
@@ -423,6 +424,18 @@
       arm_codegen->Move32(locations->Out(), LocationFrom(r0));
     }
     RestoreLiveRegisters(codegen, locations);
+    // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
+    DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+    if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
+      DCHECK(out.IsValid());
+      // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
+      // kSaveEverything and use a temporary for the .bss entry address in the fast path,
+      // so that we can avoid another calculation here.
+      CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+          arm_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
+      arm_codegen->EmitMovwMovtPlaceholder(labels, ip);
+      __ Str(OutputRegister(cls_), MemOperand(ip));
+    }
     __ B(GetExitLabel());
   }
 
@@ -432,10 +445,6 @@
   // The class this slow path will load.
   HLoadClass* const cls_;
 
-  // The instruction where this slow path is happening.
-  // (Might be the load class or an initialization check).
-  HInstruction* const at_;
-
   // The dex PC of `at_`.
   const uint32_t dex_pc_;
 
@@ -454,7 +463,7 @@
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     HLoadString* load = instruction_->AsLoadString();
-    const uint32_t string_index = load->GetStringIndex().index_;
+    const dex::StringIndex string_index = load->GetStringIndex();
     vixl32::Register out = OutputRegister(load);
     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
     constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
@@ -473,7 +482,7 @@
       __ Mov(entry_address, temp);
     }
 
-    __ Mov(calling_convention.GetRegisterAt(0), string_index);
+    __ Mov(calling_convention.GetRegisterAt(0), string_index.index_);
     arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 
@@ -1252,6 +1261,7 @@
       boot_image_type_patches_(TypeReferenceValueComparator(),
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_address_patches_(std::less<uint32_t>(),
                                   graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
@@ -5797,17 +5807,11 @@
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
       break;
-    case HLoadClass::LoadKind::kJitTableAddress:
-      break;
-    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
-      // We disable pc-relative load when there is an irreducible loop, as the optimization
-      // is incompatible with it.
-      // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
-      // with irreducible loops.
-      if (GetGraph()->HasIrreducibleLoops()) {
-        return HLoadClass::LoadKind::kDexCacheViaMethod;
-      }
+      break;
+    case HLoadClass::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       break;
@@ -5816,15 +5820,16 @@
 }
 
 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
-  if (cls->NeedsAccessCheck()) {
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConventionARMVIXL calling_convention;
-    CodeGenerator::CreateLoadClassLocationSummary(
+    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
         LocationFrom(calling_convention.GetRegisterAt(0)),
-        LocationFrom(r0),
-        /* code_generator_supports_read_barrier */ true);
+        LocationFrom(r0));
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
@@ -5835,24 +5840,21 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
 
-  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
-      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
-      load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) {
-  LocationSummary* locations = cls->GetLocations();
-  if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
-    codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
-    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
+  LocationSummary* locations = cls->GetLocations();
   Location out_loc = locations->Out();
   vixl32::Register out = OutputRegister(cls);
 
@@ -5860,7 +5862,7 @@
       ? kWithoutReadBarrier
       : kCompilerReadBarrierOption;
   bool generate_null_check = false;
-  switch (cls->GetLoadKind()) {
+  switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass: {
       DCHECK(!cls->CanCallRuntime());
       DCHECK(!cls->MustGenerateClinitCheck());
@@ -5874,12 +5876,14 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
                                                             cls->GetTypeIndex()));
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
@@ -5893,6 +5897,14 @@
       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
       break;
     }
+    case HLoadClass::LoadKind::kBssEntry: {
+      CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+          codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
+      codegen_->EmitMovwMovtPlaceholder(labels, out);
+      GenerateGcRootFieldLoad(cls, out_loc, out, 0, kCompilerReadBarrierOption);
+      generate_null_check = true;
+      break;
+    }
     case HLoadClass::LoadKind::kJitTableAddress: {
       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
                                                        cls->GetTypeIndex(),
@@ -5901,30 +5913,9 @@
       GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
       break;
     }
-    case HLoadClass::LoadKind::kDexCachePcRelative: {
-      vixl32::Register base_reg = InputRegisterAt(cls, 0);
-      HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase();
-      int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset();
-      // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
-      GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset, read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
-      break;
-    }
-    case HLoadClass::LoadKind::kDexCacheViaMethod: {
-      // /* GcRoot<mirror::Class>[] */ out =
-      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-      vixl32::Register current_method = InputRegisterAt(cls, 0);
-      const int32_t resolved_types_offset =
-          ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value();
-      GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset);
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_);
-      GenerateGcRootFieldLoad(cls, out_loc, out, offset, read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
-      break;
-    }
-    default:
-      TODO_VIXL32(FATAL);
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
 
   if (generate_null_check || cls->MustGenerateClinitCheck()) {
@@ -6046,7 +6037,7 @@
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       codegen_->EmitMovwMovtPlaceholder(labels, out);
       return;  // No dex cache slow path.
     }
@@ -6061,7 +6052,7 @@
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       codegen_->EmitMovwMovtPlaceholder(labels, temp);
       GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption);
       LoadStringSlowPathARMVIXL* slow_path =
@@ -7405,8 +7396,8 @@
 }
 
 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch(
-    const DexFile& dex_file, uint32_t string_index) {
-  return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
 }
 
 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch(
@@ -7414,6 +7405,11 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_);
 }
 
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch(
+    const DexFile& dex_file, dex::TypeIndex type_index) {
+  return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
+}
+
 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file, uint32_t element_offset) {
   return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -7507,6 +7503,7 @@
       /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       boot_image_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
       boot_image_address_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
@@ -7521,12 +7518,17 @@
                                                        target_string.string_index.index_));
   }
   if (!GetCompilerOptions().IsBootImage()) {
+    DCHECK(pc_relative_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
   } else {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+                                                                linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
   }
+  EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
+                                                              linker_patches);
   for (const auto& entry : boot_image_type_patches_) {
     const TypeReference& target_type = entry.first;
     VIXLUInt32Literal* literal = entry.second;
@@ -7536,8 +7538,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
-                                                              linker_patches);
   for (const auto& entry : boot_image_address_patches_) {
     DCHECK(GetCompilerOptions().GetIncludePatchInformation());
     VIXLUInt32Literal* literal = entry.second;
@@ -7545,6 +7545,7 @@
     uint32_t literal_offset = literal->GetLocation();
     linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
   }
+  DCHECK_EQ(size, linker_patches->size());
 }
 
 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 200a463..0f0a954 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -562,8 +562,10 @@
     vixl::aarch32::Label add_pc_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
+  PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
   VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
@@ -731,8 +733,10 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
   TypeToLiteralMap boot_image_type_patches_;
-  // PC-relative type patch info.
+  // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // PC-relative type patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
   // Deduplication map for patchable boot image addresses.
   Uint32ToLiteralMap boot_image_address_patches_;
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index ed0d997..1f4ff27 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -213,23 +213,24 @@
                         HInstruction* at,
                         uint32_t dex_pc,
                         bool do_clinit)
-      : SlowPathCodeMIPS(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCodeMIPS(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = at_->GetLocations();
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex().index_);
+    dex::TypeIndex type_index = cls_->GetTypeIndex();
+    __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
 
     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
                                                 : kQuickInitializeType;
-    mips_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
+    mips_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
     if (do_clinit_) {
       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
     } else {
@@ -240,11 +241,26 @@
     Location out = locations->Out();
     if (out.IsValid()) {
       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
-      Primitive::Type type = at_->GetType();
+      Primitive::Type type = instruction_->GetType();
       mips_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
     }
 
     RestoreLiveRegisters(codegen, locations);
+    // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
+    DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+    if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
+      DCHECK(out.IsValid());
+      // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
+      // kSaveEverything and use a temporary for the .bss entry address in the fast path,
+      // so that we can avoid another calculation here.
+      bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+      Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+      DCHECK_NE(out.AsRegister<Register>(), AT);
+      CodeGeneratorMIPS::PcRelativePatchInfo* info =
+          mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
+      mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base);
+      __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, 0);
+    }
     __ B(GetExitLabel());
   }
 
@@ -254,10 +270,6 @@
   // The class this slow path will load.
   HLoadClass* const cls_;
 
-  // The instruction where this slow path is happening.
-  // (Might be the load class or an initialization check).
-  HInstruction* const at_;
-
   // The dex PC of `at_`.
   const uint32_t dex_pc_;
 
@@ -281,8 +293,8 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     HLoadString* load = instruction_->AsLoadString();
-    const uint32_t string_index = load->GetStringIndex().index_;
-    __ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
+    const dex::StringIndex string_index = load->GetStringIndex();
+    __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_);
     mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     Primitive::Type type = instruction_->GetType();
@@ -465,6 +477,7 @@
       boot_image_type_patches_(TypeReferenceValueComparator(),
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_address_patches_(std::less<uint32_t>(),
                                   graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       clobbered_ra_(false) {
@@ -1007,6 +1020,7 @@
       pc_relative_dex_cache_patches_.size() +
       pc_relative_string_patches_.size() +
       pc_relative_type_patches_.size() +
+      type_bss_entry_patches_.size() +
       boot_image_string_patches_.size() +
       boot_image_type_patches_.size() +
       boot_image_address_patches_.size();
@@ -1014,13 +1028,16 @@
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
   if (!GetCompilerOptions().IsBootImage()) {
+    DCHECK(pc_relative_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
   } else {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+                                                                linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
   }
-  EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+  EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
   for (const auto& entry : boot_image_string_patches_) {
     const StringReference& target_string = entry.first;
@@ -1047,11 +1064,12 @@
     uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
     linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
   }
+  DCHECK_EQ(size, linker_patches->size());
 }
 
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
-    const DexFile& dex_file, uint32_t string_index) {
-  return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
 }
 
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch(
@@ -1059,6 +1077,11 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_);
 }
 
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewTypeBssEntryPatch(
+    const DexFile& dex_file, dex::TypeIndex type_index) {
+  return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
+}
+
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file, uint32_t element_offset) {
   return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -5194,14 +5217,14 @@
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
-    case HLoadString::LoadKind::kDexCacheViaMethod:
-      fallback_load = false;
-      break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       // TODO: implement.
       fallback_load = true;
       break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      fallback_load = false;
+      break;
   }
   if (fallback_load) {
     desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
@@ -5230,15 +5253,13 @@
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
       break;
+    case HLoadClass::LoadKind::kBssEntry:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       fallback_load = true;
       break;
-    case HLoadClass::LoadKind::kDexCachePcRelative:
-      DCHECK(!Runtime::Current()->UseJitCompilation());
-      // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
-      // with irreducible loops.
-      break;
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       fallback_load = false;
       break;
@@ -5435,34 +5456,32 @@
 }
 
 void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
-  if (cls->NeedsAccessCheck()) {
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConvention calling_convention;
-    CodeGenerator::CreateLoadClassLocationSummary(
+    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Location::RegisterLocation(V0),
-        /* code_generator_supports_read_barrier */ false);  // TODO: revisit this bool.
+        Location::RegisterLocation(V0));
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
-  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   switch (load_kind) {
     // We need an extra register for PC-relative literals on R2.
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-    case HLoadClass::LoadKind::kBootImageAddress:
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kBssEntry:
       if (codegen_->GetInstructionSetFeatures().IsR6()) {
         break;
       }
       FALLTHROUGH_INTENDED;
-    // We need an extra register for PC-relative dex cache accesses.
-    case HLoadClass::LoadKind::kDexCachePcRelative:
     case HLoadClass::LoadKind::kReferrersClass:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
       locations->SetInAt(0, Location::RequiresRegister());
       break;
     default:
@@ -5472,15 +5491,14 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
-  LocationSummary* locations = cls->GetLocations();
-  if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
-    codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
-    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
-  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  LocationSummary* locations = cls->GetLocations();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
   Register base_or_current_method_reg;
@@ -5488,12 +5506,11 @@
   switch (load_kind) {
     // We need an extra register for PC-relative literals on R2.
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-    case HLoadClass::LoadKind::kBootImageAddress:
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kBssEntry:
       base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
       break;
-    // We need an extra register for PC-relative dex cache accesses.
-    case HLoadClass::LoadKind::kDexCachePcRelative:
     case HLoadClass::LoadKind::kReferrersClass:
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
@@ -5516,14 +5533,14 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       __ LoadLiteral(out,
                      base_or_current_method_reg,
                      codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
                                                                cls->GetTypeIndex()));
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
       codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
@@ -5538,31 +5555,21 @@
                      codegen_->DeduplicateBootImageAddressLiteral(address));
       break;
     }
+    case HLoadClass::LoadKind::kBssEntry: {
+      CodeGeneratorMIPS::PcRelativePatchInfo* info =
+          codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
+      codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
+      __ LoadFromOffset(kLoadWord, out, out, 0);
+      generate_null_check = true;
+      break;
+    }
     case HLoadClass::LoadKind::kJitTableAddress: {
       LOG(FATAL) << "Unimplemented";
       break;
     }
-    case HLoadClass::LoadKind::kDexCachePcRelative: {
-      HMipsDexCacheArraysBase* base = cls->InputAt(0)->AsMipsDexCacheArraysBase();
-      int32_t offset =
-          cls->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
-      // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
-      GenerateGcRootFieldLoad(cls, out_loc, base_or_current_method_reg, offset);
-      generate_null_check = !cls->IsInDexCache();
-      break;
-    }
-    case HLoadClass::LoadKind::kDexCacheViaMethod: {
-      // /* GcRoot<mirror::Class>[] */ out =
-      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-      __ LoadFromOffset(kLoadWord,
-                        out,
-                        base_or_current_method_reg,
-                        ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_);
-      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
-      generate_null_check = !cls->IsInDexCache();
-    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
 
   if (generate_null_check || cls->MustGenerateClinitCheck()) {
@@ -5657,6 +5664,7 @@
 
   switch (load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       __ LoadLiteral(out,
                      base_or_current_method_reg,
                      codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
@@ -5665,7 +5673,7 @@
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
       return;  // No dex cache slow path.
     }
@@ -5681,7 +5689,7 @@
     case HLoadString::LoadKind::kBssEntry: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
       __ LoadFromOffset(kLoadWord, out, out, 0);
       SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 7b0812c..c8fd325 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -452,8 +452,10 @@
     MipsLabel pc_rel_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
+  PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
   Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
@@ -504,8 +506,10 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
   BootTypeToLiteralMap boot_image_type_patches_;
-  // PC-relative type patch info.
+  // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // PC-relative type patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
   // Deduplication map for patchable boot image addresses.
   Uint32ToLiteralMap boot_image_address_patches_;
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 0ea5bb0..a350de7 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -167,22 +167,23 @@
                           HInstruction* at,
                           uint32_t dex_pc,
                           bool do_clinit)
-      : SlowPathCodeMIPS64(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCodeMIPS64(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = at_->GetLocations();
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex().index_);
+    dex::TypeIndex type_index = cls_->GetTypeIndex();
+    __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
                                                 : kQuickInitializeType;
-    mips64_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
+    mips64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
     if (do_clinit_) {
       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
     } else {
@@ -193,11 +194,24 @@
     Location out = locations->Out();
     if (out.IsValid()) {
       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
-      Primitive::Type type = at_->GetType();
+      Primitive::Type type = instruction_->GetType();
       mips64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
     }
 
     RestoreLiveRegisters(codegen, locations);
+    // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
+    DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+    if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
+      DCHECK(out.IsValid());
+      // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
+      // kSaveEverything and use a temporary for the .bss entry address in the fast path,
+      // so that we can avoid another calculation here.
+      DCHECK_NE(out.AsRegister<GpuRegister>(), AT);
+      CodeGeneratorMIPS64::PcRelativePatchInfo* info =
+          mips64_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
+      mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT);
+      __ Sw(out.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678);
+    }
     __ Bc(GetExitLabel());
   }
 
@@ -207,10 +221,6 @@
   // The class this slow path will load.
   HLoadClass* const cls_;
 
-  // The instruction where this slow path is happening.
-  // (Might be the load class or an initialization check).
-  HInstruction* const at_;
-
   // The dex PC of `at_`.
   const uint32_t dex_pc_;
 
@@ -234,8 +244,8 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     HLoadString* load = instruction_->AsLoadString();
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
-    __ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
+    const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_);
     mips64_codegen->InvokeRuntime(kQuickResolveString,
                                   instruction_,
                                   instruction_->GetDexPc(),
@@ -422,6 +432,7 @@
       boot_image_type_patches_(TypeReferenceValueComparator(),
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_address_patches_(std::less<uint32_t>(),
                                   graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Save RA (containing the return address) to mimic Quick.
@@ -922,6 +933,7 @@
       pc_relative_dex_cache_patches_.size() +
       pc_relative_string_patches_.size() +
       pc_relative_type_patches_.size() +
+      type_bss_entry_patches_.size() +
       boot_image_string_patches_.size() +
       boot_image_type_patches_.size() +
       boot_image_address_patches_.size();
@@ -929,13 +941,16 @@
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
   if (!GetCompilerOptions().IsBootImage()) {
+    DCHECK(pc_relative_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
   } else {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+                                                                linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
   }
-  EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+  EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
   for (const auto& entry : boot_image_string_patches_) {
     const StringReference& target_string = entry.first;
@@ -962,11 +977,12 @@
     uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
     linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
   }
+  DCHECK_EQ(size, linker_patches->size());
 }
 
 CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch(
-    const DexFile& dex_file, uint32_t string_index) {
-  return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
 }
 
 CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch(
@@ -974,6 +990,11 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_);
 }
 
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewTypeBssEntryPatch(
+    const DexFile& dex_file, dex::TypeIndex type_index) {
+  return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
+}
+
 CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file, uint32_t element_offset) {
   return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -3095,7 +3116,7 @@
     Location root,
     GpuRegister obj,
     uint32_t offset) {
-  // When handling HLoadClass::LoadKind::kDexCachePcRelative, the caller calls
+  // When handling PC-relative loads, the caller calls
   // EmitPcRelativeAddressPlaceholderHigh() and then GenerateGcRootFieldLoad().
   // The relative patcher expects the two methods to emit the following patchable
   // sequence of instructions in this case:
@@ -3322,14 +3343,14 @@
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
       break;
+    case HLoadClass::LoadKind::kBssEntry:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       // TODO: implement.
       fallback_load = true;
       break;
-    case HLoadClass::LoadKind::kDexCachePcRelative:
-      DCHECK(!Runtime::Current()->UseJitCompilation());
-      break;
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -3482,38 +3503,36 @@
 }
 
 void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
-  if (cls->NeedsAccessCheck()) {
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConvention calling_convention;
-    CodeGenerator::CreateLoadClassLocationSummary(
+    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        calling_convention.GetReturnLocation(Primitive::kPrimNot),
-        /* code_generator_supports_read_barrier */ false);
+        calling_convention.GetReturnLocation(Primitive::kPrimNot));
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
-  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
-      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
-  LocationSummary* locations = cls->GetLocations();
-  if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
-    codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
-    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
-  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  LocationSummary* locations = cls->GetLocations();
   Location out_loc = locations->Out();
   GpuRegister out = out_loc.AsRegister<GpuRegister>();
   GpuRegister current_method_reg = ZERO;
@@ -3534,14 +3553,14 @@
                               ArtMethod::DeclaringClassOffset().Int32Value());
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       __ LoadLiteral(out,
                      kLoadUnsignedWord,
                      codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
                                                                cls->GetTypeIndex()));
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
@@ -3557,32 +3576,21 @@
                      codegen_->DeduplicateBootImageAddressLiteral(address));
       break;
     }
+    case HLoadClass::LoadKind::kBssEntry: {
+      CodeGeneratorMIPS64::PcRelativePatchInfo* info =
+          codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
+      __ Lwu(out, AT, /* placeholder */ 0x5678);
+      generate_null_check = true;
+      break;
+    }
     case HLoadClass::LoadKind::kJitTableAddress: {
       LOG(FATAL) << "Unimplemented";
       break;
     }
-    case HLoadClass::LoadKind::kDexCachePcRelative: {
-      uint32_t element_offset = cls->GetDexCacheElementOffset();
-      CodeGeneratorMIPS64::PcRelativePatchInfo* info =
-          codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), element_offset);
-      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
-      // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
-      GenerateGcRootFieldLoad(cls, out_loc, AT, /* placeholder */ 0x5678);
-      generate_null_check = !cls->IsInDexCache();
-      break;
-    }
-    case HLoadClass::LoadKind::kDexCacheViaMethod: {
-      // /* GcRoot<mirror::Class>[] */ out =
-      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-      __ LoadFromOffset(kLoadDoubleword,
-                        out,
-                        current_method_reg,
-                        ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value());
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_);
-      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
-      generate_null_check = !cls->IsInDexCache();
-    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
 
   if (generate_null_check || cls->MustGenerateClinitCheck()) {
@@ -3646,6 +3654,7 @@
 
   switch (load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       __ LoadLiteral(out,
                      kLoadUnsignedWord,
                      codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
@@ -3654,7 +3663,7 @@
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
       __ Daddiu(out, AT, /* placeholder */ 0x5678);
       return;  // No dex cache slow path.
@@ -3671,7 +3680,7 @@
     case HLoadString::LoadKind::kBssEntry: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
       __ Lwu(out, AT, /* placeholder */ 0x5678);
       SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 8ac919f..52b780c 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -411,8 +411,10 @@
     Mips64Label pc_rel_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
+  PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
   PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file,
@@ -469,8 +471,10 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
   BootTypeToLiteralMap boot_image_type_patches_;
-  // PC-relative type patch info.
+  // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // PC-relative type patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
   // Deduplication map for patchable boot image addresses.
   Uint32ToLiteralMap boot_image_address_patches_;
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 624cf81..a850d38 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -225,8 +225,8 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
-    __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index));
+    const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
     x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
@@ -254,21 +254,24 @@
                        HInstruction* at,
                        uint32_t dex_pc,
                        bool do_clinit)
-      : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = at_->GetLocations();
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex().index_));
+    dex::TypeIndex type_index = cls_->GetTypeIndex();
+    __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
     x86_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage
                                           : kQuickInitializeType,
-                               at_, dex_pc_, this);
+                               instruction_,
+                               dex_pc_,
+                               this);
     if (do_clinit_) {
       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
     } else {
@@ -281,8 +284,17 @@
       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
       x86_codegen->Move32(out, Location::RegisterLocation(EAX));
     }
-
     RestoreLiveRegisters(codegen, locations);
+    // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
+    DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+    if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
+      DCHECK(out.IsValid());
+      Register method_address = locations->InAt(0).AsRegister<Register>();
+      __ movl(Address(method_address, CodeGeneratorX86::kDummy32BitOffset),
+              locations->Out().AsRegister<Register>());
+      Label* fixup_label = x86_codegen->NewTypeBssEntryPatch(cls_);
+      __ Bind(fixup_label);
+    }
     __ jmp(GetExitLabel());
   }
 
@@ -292,10 +304,6 @@
   // The class this slow path will load.
   HLoadClass* const cls_;
 
-  // The instruction where this slow path is happening.
-  // (Might be the load class or an initialization check).
-  HInstruction* const at_;
-
   // The dex PC of `at_`.
   const uint32_t dex_pc_;
 
@@ -1009,7 +1017,8 @@
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       constant_area_start_(-1),
@@ -4601,9 +4610,15 @@
   __ Bind(&string_patches_.back().label);
 }
 
-void CodeGeneratorX86::RecordTypePatch(HLoadClass* load_class) {
-  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_);
-  __ Bind(&type_patches_.back().label);
+void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) {
+  boot_image_type_patches_.emplace_back(load_class->GetDexFile(),
+                                        load_class->GetTypeIndex().index_);
+  __ Bind(&boot_image_type_patches_.back().label);
+}
+
+Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
+  type_bss_entry_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_);
+  return &type_bss_entry_patches_.back().label;
 }
 
 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
@@ -4640,7 +4655,8 @@
       pc_relative_dex_cache_patches_.size() +
       simple_patches_.size() +
       string_patches_.size() +
-      type_patches_.size();
+      boot_image_type_patches_.size() +
+      type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -4649,24 +4665,26 @@
     linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
   }
   if (!GetCompilerOptions().IsBootImage()) {
+    DCHECK(boot_image_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
   } else if (GetCompilerOptions().GetCompilePic()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
+                                                                linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
   } else {
+    for (const PatchInfo<Label>& info : boot_image_type_patches_) {
+      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+      linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, &info.dex_file, info.index));
+    }
     for (const PatchInfo<Label>& info : string_patches_) {
       uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
       linker_patches->push_back(
           LinkerPatch::StringPatch(literal_offset, &info.dex_file, info.index));
     }
   }
-  if (GetCompilerOptions().GetCompilePic()) {
-    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(type_patches_, linker_patches);
-  } else {
-    for (const PatchInfo<Label>& info : type_patches_) {
-      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
-      linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, &info.dex_file, info.index));
-    }
-  }
+  EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
+                                                              linker_patches);
+  DCHECK_EQ(size, linker_patches->size());
 }
 
 void CodeGeneratorX86::MarkGCCard(Register temp,
@@ -5985,7 +6003,7 @@
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
       DCHECK(GetCompilerOptions().GetCompilePic());
       FALLTHROUGH_INTENDED;
-    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());  // Note: boot image is also non-JIT.
       // We disable pc-relative load when there is an irreducible loop, as the optimization
       // is incompatible with it.
@@ -6007,15 +6025,16 @@
 }
 
 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
-  if (cls->NeedsAccessCheck()) {
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConvention calling_convention;
-    CodeGenerator::CreateLoadClassLocationSummary(
+    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Location::RegisterLocation(EAX),
-        /* code_generator_supports_read_barrier */ true);
+        Location::RegisterLocation(EAX));
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
@@ -6026,11 +6045,9 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
 
-  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
-      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
       load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
-      load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+      load_kind == HLoadClass::LoadKind::kBssEntry) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister());
@@ -6047,14 +6064,14 @@
 }
 
 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
-  LocationSummary* locations = cls->GetLocations();
-  if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
-    codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
-    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
+  LocationSummary* locations = cls->GetLocations();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
 
@@ -6062,7 +6079,7 @@
   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
       ? kWithoutReadBarrier
       : kCompilerReadBarrierOption;
-  switch (cls->GetLoadKind()) {
+  switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass: {
       DCHECK(!cls->CanCallRuntime());
       DCHECK(!cls->MustGenerateClinitCheck());
@@ -6077,16 +6094,18 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ movl(out, Immediate(/* placeholder */ 0));
-      codegen_->RecordTypePatch(cls);
+      codegen_->RecordBootTypePatch(cls);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       Register method_address = locations->InAt(0).AsRegister<Register>();
       __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
-      codegen_->RecordTypePatch(cls);
+      codegen_->RecordBootTypePatch(cls);
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
@@ -6097,6 +6116,14 @@
       codegen_->RecordSimplePatch();
       break;
     }
+    case HLoadClass::LoadKind::kBssEntry: {
+      Register method_address = locations->InAt(0).AsRegister<Register>();
+      Address address(method_address, CodeGeneratorX86::kDummy32BitOffset);
+      Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
+      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
+      generate_null_check = true;
+      break;
+    }
     case HLoadClass::LoadKind::kJitTableAddress: {
       Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
       Label* fixup_label = codegen_->NewJitRootClassPatch(
@@ -6105,35 +6132,9 @@
       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, kCompilerReadBarrierOption);
       break;
     }
-    case HLoadClass::LoadKind::kDexCachePcRelative: {
-      Register base_reg = locations->InAt(0).AsRegister<Register>();
-      uint32_t offset = cls->GetDexCacheElementOffset();
-      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset);
-      // /* GcRoot<mirror::Class> */ out = *(base + offset)  /* PC-relative */
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              Address(base_reg, CodeGeneratorX86::kDummy32BitOffset),
-                              fixup_label,
-                              read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
-      break;
-    }
-    case HLoadClass::LoadKind::kDexCacheViaMethod: {
-      // /* GcRoot<mirror::Class>[] */ out =
-      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-      Register current_method = locations->InAt(0).AsRegister<Register>();
-      __ movl(out, Address(current_method,
-                           ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              Address(out,
-                                      CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_)),
-                              /* fixup_label */ nullptr,
-                              read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
-      break;
-    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
 
   if (generate_null_check || cls->MustGenerateClinitCheck()) {
@@ -6203,11 +6204,11 @@
       break;
     case HLoadString::LoadKind::kBootImageAddress:
       break;
-    case HLoadString::LoadKind::kDexCacheViaMethod:
-      break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
   }
   return desired_string_load_kind;
 }
@@ -6258,11 +6259,13 @@
 
   switch (load->GetLoadKind()) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       __ movl(out, Immediate(/* placeholder */ 0));
       codegen_->RecordBootStringPatch(load);
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       Register method_address = locations->InAt(0).AsRegister<Register>();
       __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
       codegen_->RecordBootStringPatch(load);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index dd1628c..b86d080 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -412,7 +412,8 @@
 
   void RecordSimplePatch();
   void RecordBootStringPatch(HLoadString* load_string);
-  void RecordTypePatch(HLoadClass* load_class);
+  void RecordBootTypePatch(HLoadClass* load_class);
+  Label* NewTypeBssEntryPatch(HLoadClass* load_class);
   Label* NewStringBssEntryPatch(HLoadString* load_string);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
   Label* NewJitRootStringPatch(const DexFile& dex_file,
@@ -621,8 +622,10 @@
   ArenaDeque<Label> simple_patches_;
   // String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC).
   ArenaDeque<PatchInfo<Label>> string_patches_;
-  // Type patch locations.
-  ArenaDeque<PatchInfo<Label>> type_patches_;
+  // Type patch locations for boot image; type depends on configuration (boot image PIC/non-PIC).
+  ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
+  // Type patch locations for kBssEntry.
+  ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
 
   // Patches for string root accesses in JIT compiled code.
   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 152a9ed..2691af8 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -234,12 +234,12 @@
                           HInstruction* at,
                           uint32_t dex_pc,
                           bool do_clinit)
-      : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = at_->GetLocations();
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
 
@@ -249,7 +249,7 @@
     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
             Immediate(cls_->GetTypeIndex().index_));
     x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType,
-                                  at_,
+                                  instruction_,
                                   dex_pc_,
                                   this);
     if (do_clinit_) {
@@ -266,6 +266,15 @@
     }
 
     RestoreLiveRegisters(codegen, locations);
+    // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
+    DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+    if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
+      DCHECK(out.IsValid());
+      __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false),
+              locations->Out().AsRegister<CpuRegister>());
+      Label* fixup_label = x86_64_codegen->NewTypeBssEntryPatch(cls_);
+      __ Bind(fixup_label);
+    }
     __ jmp(GetExitLabel());
   }
 
@@ -275,10 +284,6 @@
   // The class this slow path will load.
   HLoadClass* const cls_;
 
-  // The instruction where this slow path is happening.
-  // (Might be the load class or an initialization check).
-  HInstruction* const at_;
-
   // The dex PC of `at_`.
   const uint32_t dex_pc_;
 
@@ -300,9 +305,9 @@
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
+    const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
     // Custom calling convention: RAX serves as both input and output.
-    __ movl(CpuRegister(RAX), Immediate(string_index));
+    __ movl(CpuRegister(RAX), Immediate(string_index.index_));
     x86_64_codegen->InvokeRuntime(kQuickResolveString,
                                   instruction_,
                                   instruction_->GetDexPc(),
@@ -1079,9 +1084,15 @@
   __ Bind(&string_patches_.back().label);
 }
 
-void CodeGeneratorX86_64::RecordTypePatch(HLoadClass* load_class) {
-  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_);
-  __ Bind(&type_patches_.back().label);
+void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) {
+  boot_image_type_patches_.emplace_back(load_class->GetDexFile(),
+                                        load_class->GetTypeIndex().index_);
+  __ Bind(&boot_image_type_patches_.back().label);
+}
+
+Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
+  type_bss_entry_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_);
+  return &type_bss_entry_patches_.back().label;
 }
 
 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
@@ -1118,7 +1129,8 @@
       pc_relative_dex_cache_patches_.size() +
       simple_patches_.size() +
       string_patches_.size() +
-      type_patches_.size();
+      boot_image_type_patches_.size() +
+      type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -1127,13 +1139,17 @@
     linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
   }
   if (!GetCompilerOptions().IsBootImage()) {
+    DCHECK(boot_image_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
   } else {
-    // These are always PC-relative, see GetSupportedLoadStringKind().
+    // These are always PC-relative, see GetSupportedLoadClassKind()/GetSupportedLoadStringKind().
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
+                                                                linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
   }
-  // These are always PC-relative, see GetSupportedLoadClassKind().
-  EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(type_patches_, linker_patches);
+  EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
+                                                              linker_patches);
+  DCHECK_EQ(size, linker_patches->size());
 }
 
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -1214,7 +1230,8 @@
         pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
@@ -5424,11 +5441,12 @@
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
       break;
-    case HLoadClass::LoadKind::kJitTableAddress:
-      break;
-    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadClass::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -5436,15 +5454,16 @@
 }
 
 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
-  if (cls->NeedsAccessCheck()) {
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConvention calling_convention;
-    CodeGenerator::CreateLoadClassLocationSummary(
+    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Location::RegisterLocation(RAX),
-        /* code_generator_supports_read_barrier */ true);
+        Location::RegisterLocation(RAX));
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
@@ -5455,9 +5474,7 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
 
-  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
-      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister());
@@ -5474,14 +5491,14 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
-  LocationSummary* locations = cls->GetLocations();
-  if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
-    codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
-    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
+  DCHECK(!cls->NeedsAccessCheck());
 
+  LocationSummary* locations = cls->GetLocations();
   Location out_loc = locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
 
@@ -5489,7 +5506,7 @@
       ? kWithoutReadBarrier
       : kCompilerReadBarrierOption;
   bool generate_null_check = false;
-  switch (cls->GetLoadKind()) {
+  switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass: {
       DCHECK(!cls->CanCallRuntime());
       DCHECK(!cls->MustGenerateClinitCheck());
@@ -5504,9 +5521,10 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
-      codegen_->RecordTypePatch(cls);
+      codegen_->RecordBootTypePatch(cls);
       break;
     case HLoadClass::LoadKind::kBootImageAddress: {
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
@@ -5516,6 +5534,15 @@
       codegen_->RecordSimplePatch();
       break;
     }
+    case HLoadClass::LoadKind::kBssEntry: {
+      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
+                                          /* no_rip */ false);
+      Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
+      // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
+      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
+      generate_null_check = true;
+      break;
+    }
     case HLoadClass::LoadKind::kJitTableAddress: {
       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
                                           /* no_rip */ true);
@@ -5525,33 +5552,6 @@
       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, kCompilerReadBarrierOption);
       break;
     }
-    case HLoadClass::LoadKind::kDexCachePcRelative: {
-      uint32_t offset = cls->GetDexCacheElementOffset();
-      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset);
-      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
-                                          /* no_rip */ false);
-      // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
-      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
-      break;
-    }
-    case HLoadClass::LoadKind::kDexCacheViaMethod: {
-      // /* GcRoot<mirror::Class>[] */ out =
-      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-      CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
-      __ movq(out,
-              Address(current_method,
-                      ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      GenerateGcRootFieldLoad(
-          cls,
-          out_loc,
-          Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_)),
-          /* fixup_label */ nullptr,
-          read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
-      break;
-    }
     default:
       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
       UNREACHABLE();
@@ -5607,11 +5607,11 @@
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
-    case HLoadString::LoadKind::kDexCacheViaMethod:
-      break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
   }
   return desired_string_load_kind;
 }
@@ -5657,6 +5657,7 @@
 
   switch (load->GetLoadKind()) {
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
       codegen_->RecordBootStringPatch(load);
       return;  // No dex cache slow path.
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 32d006c..8b3ab4c 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -409,7 +409,8 @@
 
   void RecordSimplePatch();
   void RecordBootStringPatch(HLoadString* load_string);
-  void RecordTypePatch(HLoadClass* load_class);
+  void RecordBootTypePatch(HLoadClass* load_class);
+  Label* NewTypeBssEntryPatch(HLoadClass* load_class);
   Label* NewStringBssEntryPatch(HLoadString* load_string);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
   Label* NewJitRootStringPatch(const DexFile& dex_file,
@@ -604,8 +605,10 @@
   ArenaDeque<Label> simple_patches_;
   // String patch locations; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PatchInfo<Label>> string_patches_;
-  // Type patch locations.
-  ArenaDeque<PatchInfo<Label>> type_patches_;
+  // Type patch locations for boot image (always PIC).
+  ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
+  // Type patch locations for kBssEntry.
+  ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
 
   // Fixups for jump tables need to be handled specially.
   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
index 10a36c6..a3140d0 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -59,21 +59,6 @@
   }
 
  private:
-  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
-    // If this is a load with PC-relative access to the dex cache types array,
-    // we need to add the dex cache arrays base as the special input.
-    if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) {
-      // Initialize base for target dex file if needed.
-      const DexFile& dex_file = load_class->GetDexFile();
-      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
-      // Update the element offset in base.
-      DexCacheArraysLayout layout(kArmPointerSize, &dex_file);
-      base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex()));
-      // Add the special argument base to the load.
-      load_class->AddSpecialInput(base);
-    }
-  }
-
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
     // If this is an invoke with PC-relative access to the dex cache methods array,
     // we need to add the dex cache arrays base as the special input.
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc
index 31fff26..9a34f54 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc
@@ -53,21 +53,6 @@
   }
 
  private:
-  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
-    // If this is a load with PC-relative access to the dex cache types array,
-    // we need to add the dex cache arrays base as the special input.
-    if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) {
-      // Initialize base for target dex file if needed.
-      const DexFile& dex_file = load_class->GetDexFile();
-      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
-      // Update the element offset in base.
-      DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
-      base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex()));
-      // Add the special argument base to the load.
-      load_class->AddSpecialInput(base);
-    }
-  }
-
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
     // If this is an invoke with PC-relative access to the dex cache methods array,
     // we need to add the dex cache arrays base as the special input.
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index a6084eb..0af0d19 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2459,7 +2459,7 @@
     case LoadKind::kJitTableAddress:
       return AsMirrorInternal(GetAddress()) == AsMirrorInternal(other_load_class->GetAddress());
     default:
-      DCHECK(HasTypeReference(GetLoadKind()) || HasDexCacheReference(GetLoadKind()));
+      DCHECK(HasTypeReference(GetLoadKind()));
       return IsSameDexFile(GetDexFile(), other_load_class->GetDexFile());
   }
 }
@@ -2490,10 +2490,10 @@
       return os << "BootImageLinkTimePcRelative";
     case HLoadClass::LoadKind::kBootImageAddress:
       return os << "BootImageAddress";
+    case HLoadClass::LoadKind::kBssEntry:
+      return os << "BssEntry";
     case HLoadClass::LoadKind::kJitTableAddress:
       return os << "JitTableAddress";
-    case HLoadClass::LoadKind::kDexCachePcRelative:
-      return os << "DexCachePcRelative";
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       return os << "DexCacheViaMethod";
     default:
@@ -2555,10 +2555,10 @@
       return os << "BootImageAddress";
     case HLoadString::LoadKind::kBssEntry:
       return os << "BssEntry";
-    case HLoadString::LoadKind::kDexCacheViaMethod:
-      return os << "DexCacheViaMethod";
     case HLoadString::LoadKind::kJitTableAddress:
       return os << "JitTableAddress";
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      return os << "DexCacheViaMethod";
     default:
       LOG(FATAL) << "Unknown HLoadString::LoadKind: " << static_cast<int>(rhs);
       UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8efd7e0..3e7914c 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -5549,14 +5549,13 @@
     // GetIncludePatchInformation().
     kBootImageAddress,
 
+    // Load from an entry in the .bss section using a PC-relative load.
+    // Used for classes outside boot image when .bss is accessible with a PC-relative load.
+    kBssEntry,
+
     // Load from the root table associated with the JIT compiled method.
     kJitTableAddress,
 
-    // Load from resolved types array in the dex cache using a PC-relative load.
-    // Used for classes outside boot image when we know that we can access
-    // the dex cache arrays using a PC-relative load.
-    kDexCachePcRelative,
-
     // Load from resolved types array accessed through the class loaded from
     // the compiled method's own ArtMethod*. This is the default access type when
     // all other types are unavailable.
@@ -5575,6 +5574,7 @@
         special_input_(HUserRecord<HInstruction*>(current_method)),
         type_index_(type_index),
         dex_file_(dex_file),
+        address_(0u),
         loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
     // Referrers class should not need access check. We never inline unverified
     // methods so we can't possibly end up in this situation.
@@ -5583,14 +5583,14 @@
     SetPackedField<LoadKindField>(
         is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod);
     SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
-    SetPackedFlag<kFlagIsInDexCache>(false);
     SetPackedFlag<kFlagIsInBootImage>(false);
     SetPackedFlag<kFlagGenerateClInitCheck>(false);
   }
 
   void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) {
     DCHECK(HasAddress(load_kind));
-    load_data_.address = address;
+    DCHECK_NE(address, 0u);
+    address_ = address;
     SetLoadKindInternal(load_kind);
   }
 
@@ -5603,15 +5603,6 @@
     SetLoadKindInternal(load_kind);
   }
 
-  void SetLoadKindWithDexCacheReference(LoadKind load_kind,
-                                        const DexFile& dex_file,
-                                        uint32_t element_index) {
-    DCHECK(HasDexCacheReference(load_kind));
-    DCHECK(IsSameDexFile(dex_file_, dex_file));
-    load_data_.dex_cache_element_index = element_index;
-    SetLoadKindInternal(load_kind);
-  }
-
   LoadKind GetLoadKind() const {
     return GetPackedField<LoadKindField>();
   }
@@ -5636,13 +5627,21 @@
   }
 
   bool CanCallRuntime() const {
-    return MustGenerateClinitCheck() ||
-           (!IsReferrersClass() && !IsInDexCache()) ||
-           NeedsAccessCheck();
+    return NeedsAccessCheck() ||
+           MustGenerateClinitCheck() ||
+           GetLoadKind() == LoadKind::kDexCacheViaMethod ||
+           GetLoadKind() == LoadKind::kBssEntry;
   }
 
   bool CanThrow() const OVERRIDE {
-    return CanCallRuntime();
+    return NeedsAccessCheck() ||
+           MustGenerateClinitCheck() ||
+           // If the class is in the boot image, the lookup in the runtime call cannot throw.
+           // This keeps CanThrow() consistent between non-PIC (using kBootImageAddress) and
+           // PIC and subsequently avoids a DCE behavior dependency on the PIC option.
+           ((GetLoadKind() == LoadKind::kDexCacheViaMethod ||
+             GetLoadKind() == LoadKind::kBssEntry) &&
+            !IsInBootImage());
   }
 
   ReferenceTypeInfo GetLoadedClassRTI() {
@@ -5658,15 +5657,13 @@
   dex::TypeIndex GetTypeIndex() const { return type_index_; }
   const DexFile& GetDexFile() const { return dex_file_; }
 
-  uint32_t GetDexCacheElementOffset() const;
-
   uint64_t GetAddress() const {
     DCHECK(HasAddress(GetLoadKind()));
-    return load_data_.address;
+    return address_;
   }
 
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
-    return !IsReferrersClass();
+    return GetLoadKind() == LoadKind::kDexCacheViaMethod;
   }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
@@ -5675,17 +5672,9 @@
 
   bool IsReferrersClass() const { return GetLoadKind() == LoadKind::kReferrersClass; }
   bool NeedsAccessCheck() const { return GetPackedFlag<kFlagNeedsAccessCheck>(); }
-  bool IsInDexCache() const { return GetPackedFlag<kFlagIsInDexCache>(); }
   bool IsInBootImage() const { return GetPackedFlag<kFlagIsInBootImage>(); }
   bool MustGenerateClinitCheck() const { return GetPackedFlag<kFlagGenerateClInitCheck>(); }
 
-  void MarkInDexCache() {
-    SetPackedFlag<kFlagIsInDexCache>(true);
-    DCHECK(!NeedsEnvironment());
-    RemoveEnvironment();
-    SetSideEffects(SideEffects::None());
-  }
-
   void MarkInBootImage() {
     SetPackedFlag<kFlagIsInBootImage>(true);
   }
@@ -5706,8 +5695,7 @@
 
  private:
   static constexpr size_t kFlagNeedsAccessCheck    = kNumberOfGenericPackedBits;
-  static constexpr size_t kFlagIsInDexCache        = kFlagNeedsAccessCheck + 1;
-  static constexpr size_t kFlagIsInBootImage       = kFlagIsInDexCache + 1;
+  static constexpr size_t kFlagIsInBootImage       = kFlagNeedsAccessCheck + 1;
   // Whether this instruction must generate the initialization check.
   // Used for code generation.
   static constexpr size_t kFlagGenerateClInitCheck = kFlagIsInBootImage + 1;
@@ -5719,10 +5707,11 @@
   using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
 
   static bool HasTypeReference(LoadKind load_kind) {
-    return load_kind == LoadKind::kBootImageLinkTimeAddress ||
+    return load_kind == LoadKind::kReferrersClass ||
+        load_kind == LoadKind::kBootImageLinkTimeAddress ||
         load_kind == LoadKind::kBootImageLinkTimePcRelative ||
-        load_kind == LoadKind::kDexCacheViaMethod ||
-        load_kind == LoadKind::kReferrersClass;
+        load_kind == LoadKind::kBssEntry ||
+        load_kind == LoadKind::kDexCacheViaMethod;
   }
 
   static bool HasAddress(LoadKind load_kind) {
@@ -5730,24 +5719,17 @@
         load_kind == LoadKind::kJitTableAddress;
   }
 
-  static bool HasDexCacheReference(LoadKind load_kind) {
-    return load_kind == LoadKind::kDexCachePcRelative;
-  }
-
   void SetLoadKindInternal(LoadKind load_kind);
 
   // The special input is the HCurrentMethod for kDexCacheViaMethod or kReferrersClass.
   // For other load kinds it's empty or possibly some architecture-specific instruction
-  // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative.
+  // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative.
   HUserRecord<HInstruction*> special_input_;
 
   const dex::TypeIndex type_index_;
   const DexFile& dex_file_;
 
-  union {
-    uint32_t dex_cache_element_index;   // Only for dex cache reference.
-    uint64_t address;  // Up to 64-bit, needed for kJitTableAddress on 64-bit targets.
-  } load_data_;
+  uint64_t address_;  // Up to 64-bit, needed for kJitTableAddress on 64-bit targets.
 
   ReferenceTypeInfo loaded_class_rti_;
 
@@ -5756,19 +5738,13 @@
 std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
 
 // Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
-inline uint32_t HLoadClass::GetDexCacheElementOffset() const {
-  DCHECK(HasDexCacheReference(GetLoadKind())) << GetLoadKind();
-  return load_data_.dex_cache_element_index;
-}
-
-// Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
 inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
   // The special input is used for PC-relative loads on some architectures,
   // including literal pool loads, which are PC-relative too.
   DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
-         GetLoadKind() == LoadKind::kDexCachePcRelative ||
          GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
-         GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
+         GetLoadKind() == LoadKind::kBootImageAddress ||
+         GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
   DCHECK(special_input_.GetInstruction() == nullptr);
   special_input_ = HUserRecord<HInstruction*>(special_input);
   special_input->AddUseAt(this, 0);
@@ -5796,15 +5772,15 @@
     // Used for strings outside boot image when .bss is accessible with a PC-relative load.
     kBssEntry,
 
+    // Load from the root table associated with the JIT compiled method.
+    kJitTableAddress,
+
     // Load from resolved strings array accessed through the class loaded from
     // the compiled method's own ArtMethod*. This is the default access type when
     // all other types are unavailable.
     kDexCacheViaMethod,
 
-    // Load from the root table associated with the JIT compiled method.
-    kJitTableAddress,
-
-    kLast = kJitTableAddress,
+    kLast = kDexCacheViaMethod,
   };
 
   HLoadString(HCurrentMethod* current_method,
@@ -5896,7 +5872,7 @@
 
   // The special input is the HCurrentMethod for kDexCacheViaMethod.
   // For other load kinds it's empty or possibly some architecture-specific instruction
-  // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative.
+  // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative.
   HUserRecord<HInstruction*> special_input_;
 
   dex::StringIndex string_index_;
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index e321b9e..a0fdde1 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -62,8 +62,9 @@
     HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
     switch (load_kind) {
       case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      case HLoadClass::LoadKind::kBootImageAddress:
       case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+      case HLoadClass::LoadKind::kBssEntry:
         // Add a base register for PC-relative literals on R2.
         InitializePCRelativeBasePointer();
         load_class->AddSpecialInput(base_);
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index b1fdb17..2befc8c 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -83,7 +83,7 @@
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
     HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
     if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
-        load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+        load_kind == HLoadClass::LoadKind::kBssEntry) {
       InitializePCRelativeBasePointer();
       load_class->AddSpecialInput(base_);
     }
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index dc8ee23..eabda26 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -154,13 +154,24 @@
   DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod ||
          load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
       << load_class->GetLoadKind();
-  DCHECK(!load_class->IsInDexCache()) << "HLoadClass should not be optimized before sharpening.";
   DCHECK(!load_class->IsInBootImage()) << "HLoadClass should not be optimized before sharpening.";
 
+  if (load_class->NeedsAccessCheck()) {
+    // We need to call the runtime anyway, so we simply get the class as that call's return value.
+    return;
+  }
+
+  if (load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) {
+    // Loading from the ArtMethod* is the most efficient retrieval in code size.
+    // TODO: This may not actually be true for all architectures and
+    // locations of target classes. The additional register pressure
+    // for using the ArtMethod* should be considered.
+    return;
+  }
+
   const DexFile& dex_file = load_class->GetDexFile();
   dex::TypeIndex type_index = load_class->GetTypeIndex();
 
-  bool is_in_dex_cache = false;
   bool is_in_boot_image = false;
   HLoadClass::LoadKind desired_load_kind = static_cast<HLoadClass::LoadKind>(-1);
   uint64_t address = 0u;  // Class or dex cache element address.
@@ -169,31 +180,29 @@
     // Compiling boot image. Check if the class is a boot image class.
     DCHECK(!runtime->UseJitCompilation());
     if (!compiler_driver->GetSupportBootImageFixup()) {
-      // MIPS64 or compiler_driver_test. Do not sharpen.
+      // compiler_driver_test. Do not sharpen.
       desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
     } else if ((klass != nullptr) && compiler_driver->IsImageClass(
         dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
       is_in_boot_image = true;
-      is_in_dex_cache = true;
       desired_load_kind = codegen->GetCompilerOptions().GetCompilePic()
           ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
           : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
     } else {
-      // Not a boot image class. We must go through the dex cache.
+      // Not a boot image class.
       DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file));
-      desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative;
+      desired_load_kind = HLoadClass::LoadKind::kBssEntry;
     }
   } else {
     is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass);
     if (runtime->UseJitCompilation()) {
       // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
       // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
-      is_in_dex_cache = (klass != nullptr);
       if (is_in_boot_image) {
         // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
         desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
         address = reinterpret_cast64<uint64_t>(klass);
-      } else if (is_in_dex_cache) {
+      } else if (klass != nullptr) {
         desired_load_kind = HLoadClass::LoadKind::kJitTableAddress;
         // We store in the address field the location of the stack reference maintained
         // by the handle. We do this now so that the code generation does not need to figure
@@ -212,12 +221,7 @@
       address = reinterpret_cast64<uint64_t>(klass);
     } else {
       // Not JIT and either the klass is not in boot image or we are compiling in PIC mode.
-      // Use PC-relative load from the dex cache if the dex file belongs
-      // to the oat file that we're currently compiling.
-      desired_load_kind =
-          ContainsElement(compiler_driver->GetDexFilesForOatFile(), &load_class->GetDexFile())
-              ? HLoadClass::LoadKind::kDexCachePcRelative
-              : HLoadClass::LoadKind::kDexCacheViaMethod;
+      desired_load_kind = HLoadClass::LoadKind::kBssEntry;
     }
   }
   DCHECK_NE(desired_load_kind, static_cast<HLoadClass::LoadKind>(-1));
@@ -226,27 +230,11 @@
     load_class->MarkInBootImage();
   }
 
-  if (load_class->NeedsAccessCheck()) {
-    // We need to call the runtime anyway, so we simply get the class as that call's return value.
-    return;
-  }
-
-  if (load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) {
-    // Loading from the ArtMethod* is the most efficient retrieval in code size.
-    // TODO: This may not actually be true for all architectures and
-    // locations of target classes. The additional register pressure
-    // for using the ArtMethod* should be considered.
-    return;
-  }
-
-  if (is_in_dex_cache) {
-    load_class->MarkInDexCache();
-  }
-
   HLoadClass::LoadKind load_kind = codegen->GetSupportedLoadClassKind(desired_load_kind);
   switch (load_kind) {
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+    case HLoadClass::LoadKind::kBssEntry:
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       load_class->SetLoadKindWithTypeReference(load_kind, dex_file, type_index);
       break;
@@ -255,13 +243,6 @@
       DCHECK_NE(address, 0u);
       load_class->SetLoadKindWithAddress(load_kind, address);
       break;
-    case HLoadClass::LoadKind::kDexCachePcRelative: {
-      PointerSize pointer_size = InstructionSetPointerSize(codegen->GetInstructionSet());
-      DexCacheArraysLayout layout(pointer_size, &dex_file);
-      size_t element_index = layout.TypeOffset(type_index);
-      load_class->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index);
-      break;
-    }
     default:
       LOG(FATAL) << "Unexpected load kind: " << load_kind;
       UNREACHABLE();
@@ -274,7 +255,7 @@
   const DexFile& dex_file = load_string->GetDexFile();
   dex::StringIndex string_index = load_string->GetStringIndex();
 
-  HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+  HLoadString::LoadKind desired_load_kind = static_cast<HLoadString::LoadKind>(-1);
   {
     Runtime* runtime = Runtime::Current();
     ClassLinker* class_linker = runtime->GetClassLinker();
@@ -297,8 +278,8 @@
             ? HLoadString::LoadKind::kBootImageLinkTimePcRelative
             : HLoadString::LoadKind::kBootImageLinkTimeAddress;
       } else {
-        // MIPS64 or compiler_driver_test. Do not sharpen.
-        DCHECK_EQ(desired_load_kind, HLoadString::LoadKind::kDexCacheViaMethod);
+        // compiler_driver_test. Do not sharpen.
+        desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
       }
     } else if (runtime->UseJitCompilation()) {
       // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
@@ -310,6 +291,8 @@
         } else {
           desired_load_kind = HLoadString::LoadKind::kJitTableAddress;
         }
+      } else {
+        desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
       }
     } else {
       // AOT app compilation. Try to lookup the string without allocating if not found.
@@ -326,6 +309,7 @@
       load_string->SetString(handles_->NewHandle(string));
     }
   }
+  DCHECK_NE(desired_load_kind, static_cast<HLoadString::LoadKind>(-1));
 
   HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind);
   load_string->SetLoadKind(load_kind);
diff --git a/runtime/arch/mips64/instruction_set_features_mips64.cc b/runtime/arch/mips64/instruction_set_features_mips64.cc
index 5606c1d..5757906 100644
--- a/runtime/arch/mips64/instruction_set_features_mips64.cc
+++ b/runtime/arch/mips64/instruction_set_features_mips64.cc
@@ -70,7 +70,7 @@
 }
 
 std::string Mips64InstructionSetFeatures::GetFeatureString() const {
-  return "";
+  return "default";
 }
 
 std::unique_ptr<const InstructionSetFeatures>
diff --git a/runtime/arch/mips64/instruction_set_features_mips64_test.cc b/runtime/arch/mips64/instruction_set_features_mips64_test.cc
index 1d03794..380c4e5 100644
--- a/runtime/arch/mips64/instruction_set_features_mips64_test.cc
+++ b/runtime/arch/mips64/instruction_set_features_mips64_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(mips64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(mips64_features->GetInstructionSet(), kMips64);
   EXPECT_TRUE(mips64_features->Equals(mips64_features.get()));
-  EXPECT_STREQ("", mips64_features->GetFeatureString().c_str());
+  EXPECT_STREQ("default", mips64_features->GetFeatureString().c_str());
   EXPECT_EQ(mips64_features->AsBitmap(), 0U);
 }
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 035cead..129c93f 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -351,7 +351,7 @@
       array_iftable_(nullptr),
       find_array_class_cache_next_victim_(0),
       init_done_(false),
-      log_new_class_table_roots_(false),
+      log_new_roots_(false),
       intern_table_(intern_table),
       quick_resolution_trampoline_(nullptr),
       quick_imt_conflict_trampoline_(nullptr),
@@ -1865,12 +1865,10 @@
                     << reinterpret_cast<const void*>(section_end);
       }
     }
-    if (!oat_file->GetBssGcRoots().empty()) {
-      // Insert oat file to class table for visiting .bss GC roots.
-      class_table->InsertOatFile(oat_file);
-    }
-  } else {
-    DCHECK(oat_file->GetBssGcRoots().empty());
+  }
+  if (!oat_file->GetBssGcRoots().empty()) {
+    // Insert oat file to class table for visiting .bss GC roots.
+    class_table->InsertOatFile(oat_file);
   }
   if (added_class_table) {
     WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
@@ -1934,14 +1932,27 @@
       // Concurrent moving GC marked new roots through the to-space invariant.
       CHECK_EQ(new_ref, old_ref);
     }
+    for (const OatFile* oat_file : new_bss_roots_boot_oat_files_) {
+      for (GcRoot<mirror::Object>& root : oat_file->GetBssGcRoots()) {
+        ObjPtr<mirror::Object> old_ref = root.Read<kWithoutReadBarrier>();
+        if (old_ref != nullptr) {
+          DCHECK(old_ref->IsClass());
+          root.VisitRoot(visitor, RootInfo(kRootStickyClass));
+          ObjPtr<mirror::Object> new_ref = root.Read<kWithoutReadBarrier>();
+          // Concurrent moving GC marked new roots through the to-space invariant.
+          CHECK_EQ(new_ref, old_ref);
+        }
+      }
+    }
   }
   if ((flags & kVisitRootFlagClearRootLog) != 0) {
     new_class_roots_.clear();
+    new_bss_roots_boot_oat_files_.clear();
   }
   if ((flags & kVisitRootFlagStartLoggingNewRoots) != 0) {
-    log_new_class_table_roots_ = true;
+    log_new_roots_ = true;
   } else if ((flags & kVisitRootFlagStopLoggingNewRoots) != 0) {
-    log_new_class_table_roots_ = false;
+    log_new_roots_ = false;
   }
   // We deliberately ignore the class roots in the image since we
   // handle image roots by using the MS/CMS rescanning of dirty cards.
@@ -3307,6 +3318,7 @@
     ReaderMutexLock mu(self, *Locks::dex_lock_);
     ObjPtr<mirror::DexCache> dex_cache = FindDexCacheLocked(self, dex_file, true);
     if (dex_cache != nullptr) {
+      // TODO: Check if the dex file was registered with the same class loader. Bug: 34193123
       return dex_cache.Ptr();
     }
   }
@@ -3651,7 +3663,7 @@
       // This is necessary because we need to have the card dirtied for remembered sets.
       Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
     }
-    if (log_new_class_table_roots_) {
+    if (log_new_roots_) {
       new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
     }
   }
@@ -3664,6 +3676,14 @@
   return nullptr;
 }
 
+void ClassLinker::WriteBarrierForBootOatFileBssRoots(const OatFile* oat_file) {
+  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  DCHECK(!oat_file->GetBssGcRoots().empty()) << oat_file->GetLocation();
+  if (log_new_roots_ && !ContainsElement(new_bss_roots_boot_oat_files_, oat_file)) {
+    new_bss_roots_boot_oat_files_.push_back(oat_file);
+  }
+}
+
 // TODO This should really be in mirror::Class.
 void ClassLinker::UpdateClassMethods(ObjPtr<mirror::Class> klass,
                                      LengthPrefixedArray<ArtMethod>* new_methods) {
@@ -5161,7 +5181,7 @@
         Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
       }
       CHECK_EQ(existing, klass.Get());
-      if (log_new_class_table_roots_) {
+      if (log_new_roots_) {
         new_class_roots_.push_back(GcRoot<mirror::Class>(h_new_class.Get()));
       }
     }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 77322ed..580acb7 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -64,6 +64,7 @@
 template<typename T> class LengthPrefixedArray;
 template<class T> class MutableHandle;
 class InternTable;
+class OatFile;
 template<class T> class ObjectLock;
 class Runtime;
 class ScopedObjectAccessAlreadyRunnable;
@@ -535,6 +536,12 @@
       REQUIRES(!Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Add an oat file with .bss GC roots to be visited again at the end of GC
+  // for collector types that need it.
+  void WriteBarrierForBootOatFileBssRoots(const OatFile* oat_file)
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   mirror::ObjectArray<mirror::Class>* GetClassRoots() REQUIRES_SHARED(Locks::mutator_lock_) {
     mirror::ObjectArray<mirror::Class>* class_roots = class_roots_.Read();
     DCHECK(class_roots != nullptr);
@@ -1138,6 +1145,10 @@
   // New class roots, only used by CMS since the GC needs to mark these in the pause.
   std::vector<GcRoot<mirror::Class>> new_class_roots_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
+  // Boot image oat files with new .bss GC roots to be visited in the pause by CMS.
+  std::vector<const OatFile*> new_bss_roots_boot_oat_files_
+      GUARDED_BY(Locks::classlinker_classes_lock_);
+
   // Number of times we've searched dex caches for a class. After a certain number of misses we move
   // the classes into the class_table_ to avoid dex cache based searches.
   Atomic<uint32_t> failed_dex_cache_class_lookups_;
@@ -1155,7 +1166,7 @@
   size_t find_array_class_cache_next_victim_;
 
   bool init_done_;
-  bool log_new_class_table_roots_ GUARDED_BY(Locks::classlinker_classes_lock_);
+  bool log_new_roots_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
   InternTable* intern_table_;
 
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 469c45c..5ff95b4 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -122,11 +122,6 @@
   return inlined_method;
 }
 
-inline ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type) {
-  return GetCalleeSaveMethodCaller(
-      self->GetManagedStack()->GetTopQuickFrame(), type, true /* do_caller_check */);
-}
-
 ALWAYS_INLINE inline mirror::Class* CheckObjectAlloc(mirror::Class* klass,
                                                      Thread* self,
                                                      bool* slow_path)
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 5390165..b17e1a8 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -261,11 +261,8 @@
   return true;
 }
 
-ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp,
-                                     Runtime::CalleeSaveType type,
-                                     bool do_caller_check)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+static inline std::pair<ArtMethod*, uintptr_t> DoGetCalleeSaveMethodOuterCallerAndPc(
+    ArtMethod** sp, Runtime::CalleeSaveType type) REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(type));
 
   const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type);
@@ -275,6 +272,13 @@
   uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>(
       (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset));
   ArtMethod* outer_method = *caller_sp;
+  return std::make_pair(outer_method, caller_pc);
+}
+
+static inline ArtMethod* DoGetCalleeSaveMethodCaller(ArtMethod* outer_method,
+                                                     uintptr_t caller_pc,
+                                                     bool do_caller_check)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   ArtMethod* caller = outer_method;
   if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
     if (outer_method != nullptr) {
@@ -308,8 +312,33 @@
     visitor.WalkStack();
     caller = visitor.caller;
   }
-
   return caller;
 }
 
+ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp,
+                                     Runtime::CalleeSaveType type,
+                                     bool do_caller_check)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+  auto outer_caller_and_pc = DoGetCalleeSaveMethodOuterCallerAndPc(sp, type);
+  ArtMethod* outer_method = outer_caller_and_pc.first;
+  uintptr_t caller_pc = outer_caller_and_pc.second;
+  ArtMethod* caller = DoGetCalleeSaveMethodCaller(outer_method, caller_pc, do_caller_check);
+  return caller;
+}
+
+CallerAndOuterMethod GetCalleeSaveMethodCallerAndOuterMethod(Thread* self,
+                                                             Runtime::CalleeSaveType type) {
+  CallerAndOuterMethod result;
+  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+  ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrame();
+  auto outer_caller_and_pc = DoGetCalleeSaveMethodOuterCallerAndPc(sp, type);
+  result.outer_method = outer_caller_and_pc.first;
+  uintptr_t caller_pc = outer_caller_and_pc.second;
+  result.caller =
+      DoGetCalleeSaveMethodCaller(result.outer_method, caller_pc, /* do_caller_check */ true);
+  return result;
+}
+
+
 }  // namespace art
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 4794610..d4cf83c 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -201,7 +201,13 @@
                                      bool do_caller_check = false)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
-ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type)
+struct CallerAndOuterMethod {
+  ArtMethod* caller;
+  ArtMethod* outer_method;
+};
+
+CallerAndOuterMethod GetCalleeSaveMethodCallerAndOuterMethod(Thread* self,
+                                                             Runtime::CalleeSaveType type)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 5dad43e..5b1b287 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -31,22 +31,56 @@
 
 namespace art {
 
+static inline void BssWriteBarrier(ArtMethod* outer_method) REQUIRES_SHARED(Locks::mutator_lock_) {
+  // For AOT code, we need a write barrier for the class loader that holds
+  // the GC roots in the .bss.
+  const DexFile* dex_file = outer_method->GetDexFile();
+  if (dex_file != nullptr &&
+      dex_file->GetOatDexFile() != nullptr &&
+      !dex_file->GetOatDexFile()->GetOatFile()->GetBssGcRoots().empty()) {
+    mirror::ClassLoader* class_loader = outer_method->GetClassLoader();
+    if (class_loader != nullptr) {
+      DCHECK(!class_loader->GetClassTable()->InsertOatFile(dex_file->GetOatDexFile()->GetOatFile()))
+          << "Oat file with .bss GC roots was not registered in class table: "
+          << dex_file->GetOatDexFile()->GetOatFile()->GetLocation();
+      // Note that we emit the barrier before the compiled code stores the String or Class
+      // as a GC root. This is OK as there is no suspend point point in between.
+      Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
+    } else {
+      Runtime::Current()->GetClassLinker()->WriteBarrierForBootOatFileBssRoots(
+          dex_file->GetOatDexFile()->GetOatFile());
+    }
+  }
+}
+
 extern "C" mirror::Class* artInitializeStaticStorageFromCode(uint32_t type_idx, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Called to ensure static storage base is initialized for direct static field reads and writes.
   // A class may be accessing another class' fields when it doesn't have access, as access has been
   // given by inheritance.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
-  return ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, true, false);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveRefsOnly);
+  ArtMethod* caller = caller_and_outer.caller;
+  mirror::Class* result =
+      ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, true, false);
+  if (LIKELY(result != nullptr)) {
+    BssWriteBarrier(caller_and_outer.outer_method);
+  }
+  return result;
 }
 
 extern "C" mirror::Class* artInitializeTypeFromCode(uint32_t type_idx, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
-  return ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, false);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveRefsOnly);
+  ArtMethod* caller = caller_and_outer.caller;
+  mirror::Class* result =
+      ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, false);
+  if (LIKELY(result != nullptr)) {
+    BssWriteBarrier(caller_and_outer.outer_method);
+  }
+  return result;
 }
 
 extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx, Thread* self)
@@ -54,36 +88,28 @@
   // Called when caller isn't guaranteed to have access to a type and the dex cache may be
   // unpopulated.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
-  return ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, true);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveRefsOnly);
+  ArtMethod* caller = caller_and_outer.caller;
+  mirror::Class* result =
+      ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, true);
+  if (LIKELY(result != nullptr)) {
+    BssWriteBarrier(caller_and_outer.outer_method);
+  }
+  return result;
 }
 
 extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(
       self,
       // TODO: Change art_quick_resolve_string on MIPS and MIPS64 to kSaveEverything.
       (kRuntimeISA == kMips || kRuntimeISA == kMips64) ? Runtime::kSaveRefsOnly
                                                        : Runtime::kSaveEverything);
+  ArtMethod* caller = caller_and_outer.caller;
   mirror::String* result = ResolveStringFromCode(caller, dex::StringIndex(string_idx));
   if (LIKELY(result != nullptr)) {
-    // For AOT code, we need a write barrier for the class loader that holds
-    // the GC roots in the .bss.
-    const DexFile* dex_file = caller->GetDexFile();
-    if (dex_file != nullptr &&
-        dex_file->GetOatDexFile() != nullptr &&
-        !dex_file->GetOatDexFile()->GetOatFile()->GetBssGcRoots().empty()) {
-      mirror::ClassLoader* class_loader = caller->GetDeclaringClass()->GetClassLoader();
-      DCHECK(class_loader != nullptr);  // We do not use .bss GC roots for boot image.
-      DCHECK(
-          !class_loader->GetClassTable()->InsertOatFile(dex_file->GetOatDexFile()->GetOatFile()))
-          << "Oat file with .bss GC roots was not registered in class table: "
-          << dex_file->GetOatDexFile()->GetOatFile()->GetLocation();
-      // Note that we emit the barrier before the compiled code stores the string as GC root.
-      // This is OK as there is no suspend point point in between.
-      Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
-    }
+    BssWriteBarrier(caller_and_outer.outer_method);
   }
   return result;
 }
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 8f7f746..eb76fb6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2427,7 +2427,7 @@
   const size_t shorty_length = strlen(shorty);
   static const bool kMethodIsStatic = false;  // invoke() and invokeExact() are not static.
   RememberForGcArgumentVisitor gc_visitor(sp, kMethodIsStatic, shorty, shorty_length, &soa);
-  gc_visitor.Visit();
+  gc_visitor.VisitArguments();
 
   // Wrap raw_method_handle in a Handle for safety.
   StackHandleScope<5> hs(self);
diff --git a/runtime/oat.h b/runtime/oat.h
index 685aa04..f2d457c 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '9', '6', '\0' };  // invoke-polymorphic entry
+  static constexpr uint8_t kOatVersion[] = { '0', '9', '7', '\0' };  // HLoadClass/kBssEntry change
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 38df427..d47f1b5 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -323,8 +323,10 @@
   }
 
   PointerSize pointer_size = GetInstructionSetPointerSize(GetOatHeader().GetInstructionSet());
-  uint8_t* dex_cache_arrays = bss_begin_;
-  uint8_t* dex_cache_arrays_end = (bss_roots_ != nullptr) ? bss_roots_ : bss_end_;
+  uint8_t* dex_cache_arrays = (bss_begin_ == bss_roots_) ? nullptr : bss_begin_;
+  uint8_t* dex_cache_arrays_end =
+      (bss_begin_ == bss_roots_) ? nullptr : (bss_roots_ != nullptr) ? bss_roots_ : bss_end_;
+  DCHECK_EQ(dex_cache_arrays != nullptr, dex_cache_arrays_end != nullptr);
   uint32_t dex_file_count = GetOatHeader().GetDexFileCount();
   oat_dex_files_storage_.reserve(dex_file_count);
   for (size_t i = 0; i < dex_file_count; i++) {
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 09fcc1c..8674e6c 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -126,7 +126,7 @@
   }
 
   static jvmtiError GetAllThreads(jvmtiEnv* env, jint* threads_count_ptr, jthread** threads_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    return ThreadUtil::GetAllThreads(env, threads_count_ptr, threads_ptr);
   }
 
   static jvmtiError SuspendThread(jvmtiEnv* env, jthread thread) {
diff --git a/runtime/openjdkjvmti/ti_thread.cc b/runtime/openjdkjvmti/ti_thread.cc
index e20f560..2bcdd8c 100644
--- a/runtime/openjdkjvmti/ti_thread.cc
+++ b/runtime/openjdkjvmti/ti_thread.cc
@@ -35,13 +35,17 @@
 #include "art_jvmti.h"
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "gc/system_weak.h"
+#include "gc_root-inl.h"
 #include "jni_internal.h"
 #include "mirror/class.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
 #include "obj_ptr.h"
+#include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 #include "well_known_classes.h"
 
 namespace openjdkjvmti {
@@ -354,4 +358,89 @@
   return ERR(NONE);
 }
 
+jvmtiError ThreadUtil::GetAllThreads(jvmtiEnv* env,
+                                     jint* threads_count_ptr,
+                                     jthread** threads_ptr) {
+  if (threads_count_ptr == nullptr || threads_ptr == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  art::Thread* current = art::Thread::Current();
+
+  art::ScopedObjectAccess soa(current);
+
+  art::MutexLock mu(current, *art::Locks::thread_list_lock_);
+  std::list<art::Thread*> thread_list = art::Runtime::Current()->GetThreadList()->GetList();
+
+  std::vector<art::ObjPtr<art::mirror::Object>> peers;
+
+  for (art::Thread* thread : thread_list) {
+    // Skip threads that are still starting.
+    if (thread->IsStillStarting()) {
+      continue;
+    }
+
+    art::ObjPtr<art::mirror::Object> peer = thread->GetPeer();
+    if (peer != nullptr) {
+      peers.push_back(peer);
+    }
+  }
+
+  if (peers.empty()) {
+    *threads_count_ptr = 0;
+    *threads_ptr = nullptr;
+  } else {
+    unsigned char* data;
+    jvmtiError data_result = env->Allocate(peers.size() * sizeof(jthread), &data);
+    if (data_result != ERR(NONE)) {
+      return data_result;
+    }
+    jthread* threads = reinterpret_cast<jthread*>(data);
+    for (size_t i = 0; i != peers.size(); ++i) {
+      threads[i] = soa.AddLocalReference<jthread>(peers[i]);
+    }
+
+    *threads_count_ptr = static_cast<jint>(peers.size());
+    *threads_ptr = threads;
+  }
+  return ERR(NONE);
+}
+
+jvmtiError ThreadUtil::SetThreadLocalStorage(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                             jthread thread,
+                                             const void* data) {
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::Thread* self = GetNativeThread(thread, soa);
+  if (self == nullptr && thread == nullptr) {
+    return ERR(INVALID_THREAD);
+  }
+  if (self == nullptr) {
+    return ERR(THREAD_NOT_ALIVE);
+  }
+
+  self->SetCustomTLS(data);
+
+  return ERR(NONE);
+}
+
+jvmtiError ThreadUtil::GetThreadLocalStorage(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                             jthread thread,
+                                             void** data_ptr) {
+  if (data_ptr == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::Thread* self = GetNativeThread(thread, soa);
+  if (self == nullptr && thread == nullptr) {
+    return ERR(INVALID_THREAD);
+  }
+  if (self == nullptr) {
+    return ERR(THREAD_NOT_ALIVE);
+  }
+
+  *data_ptr = const_cast<void*>(self->GetCustomTLS());
+  return ERR(NONE);
+}
+
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_thread.h b/runtime/openjdkjvmti/ti_thread.h
index b6ffbb5..290e9d4 100644
--- a/runtime/openjdkjvmti/ti_thread.h
+++ b/runtime/openjdkjvmti/ti_thread.h
@@ -39,11 +39,16 @@
 
 class ThreadUtil {
  public:
+  static jvmtiError GetAllThreads(jvmtiEnv* env, jint* threads_count_ptr, jthread** threads_ptr);
+
   static jvmtiError GetCurrentThread(jvmtiEnv* env, jthread* thread_ptr);
 
   static jvmtiError GetThreadInfo(jvmtiEnv* env, jthread thread, jvmtiThreadInfo* info_ptr);
 
   static jvmtiError GetThreadState(jvmtiEnv* env, jthread thread, jint* thread_state_ptr);
+
+  static jvmtiError SetThreadLocalStorage(jvmtiEnv* env, jthread thread, const void* data);
+  static jvmtiError GetThreadLocalStorage(jvmtiEnv* env, jthread thread, void** data_ptr);
 };
 
 }  // namespace openjdkjvmti
diff --git a/runtime/thread.h b/runtime/thread.h
index d54a80d..3958c10 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1140,6 +1140,14 @@
     return debug_disallow_read_barrier_;
   }
 
+  const void* GetCustomTLS() const {
+    return custom_tls_;
+  }
+
+  void SetCustomTLS(const void* data) {
+    custom_tls_ = data;
+  }
+
   // Returns true if the current thread is the jit sensitive thread.
   bool IsJitSensitiveThread() const {
     return this == jit_sensitive_thread_;
@@ -1600,6 +1608,10 @@
   // Pending extra checkpoints if checkpoint_function_ is already used.
   std::list<Closure*> checkpoint_overflow_ GUARDED_BY(Locks::thread_suspend_count_lock_);
 
+  // Custom TLS field that can be used by plugins.
+  // TODO: Generalize once we have more plugins.
+  const void* custom_tls_;
+
   // True if the thread is allowed to call back into java (for e.g. during class resolution).
   // By default this is true.
   bool can_call_into_java_;
diff --git a/test/494-checker-instanceof-tests/src/Main.java b/test/494-checker-instanceof-tests/src/Main.java
index 2eac6c9..acd2305 100644
--- a/test/494-checker-instanceof-tests/src/Main.java
+++ b/test/494-checker-instanceof-tests/src/Main.java
@@ -142,11 +142,11 @@
   /// CHECK:                LoadClass
   /// CHECK:                Return [<<Const>>]
   public static boolean knownTestWithUnloadedClass() {
-    return $inline$returnMain() instanceof String;
+    return $inline$returnUnrelated() instanceof String;
   }
 
-  public static Object $inline$returnMain() {
-    return new Main();
+  public static Object $inline$returnUnrelated() {
+    return new Unrelated();
   }
 
   public static void expect(boolean expected, boolean actual) {
diff --git a/test/496-checker-inlining-class-loader/src/Main.java b/test/496-checker-inlining-class-loader/src/Main.java
index 15d4dc0..5deb77f 100644
--- a/test/496-checker-inlining-class-loader/src/Main.java
+++ b/test/496-checker-inlining-class-loader/src/Main.java
@@ -82,10 +82,10 @@
 
 class LoadedByMyClassLoader {
   /// CHECK-START: void LoadedByMyClassLoader.bar() inliner (before)
-  /// CHECK:      LoadClass
+  /// CHECK:      LoadClass class_name:FirstSeenByMyClassLoader
   /// CHECK-NEXT: ClinitCheck
   /// CHECK-NEXT: InvokeStaticOrDirect
-  /// CHECK-NEXT: LoadClass
+  /// CHECK-NEXT: LoadClass class_name:java.lang.System
   /// CHECK-NEXT: ClinitCheck
   /// CHECK-NEXT: StaticFieldGet
   /// CHECK-NEXT: LoadString
@@ -93,10 +93,10 @@
   /// CHECK-NEXT: InvokeVirtual
 
   /// CHECK-START: void LoadedByMyClassLoader.bar() inliner (after)
-  /// CHECK:      LoadClass
+  /// CHECK:      LoadClass class_name:FirstSeenByMyClassLoader
   /// CHECK-NEXT: ClinitCheck
                 /* We inlined FirstSeenByMyClassLoader.$inline$bar */
-  /// CHECK-NEXT: LoadClass
+  /// CHECK-NEXT: LoadClass class_name:java.lang.System
   /// CHECK-NEXT: ClinitCheck
   /// CHECK-NEXT: StaticFieldGet
   /// CHECK-NEXT: LoadString
@@ -105,12 +105,15 @@
 
   /// CHECK-START: void LoadedByMyClassLoader.bar() register (before)
                 /* Load and initialize FirstSeenByMyClassLoader */
-  /// CHECK:      LoadClass gen_clinit_check:true
+  /// CHECK:      LoadClass class_name:FirstSeenByMyClassLoader gen_clinit_check:true
                 /* Load and initialize System */
   // There may be MipsComputeBaseMethodAddress here.
-  /// CHECK:      LoadClass gen_clinit_check:true
-  /// CHECK-NEXT: StaticFieldGet
-  // There may be HArmDexCacheArraysBase or HX86ComputeBaseMethodAddress here.
+  /// CHECK:      LoadClass class_name:java.lang.System
+  // The ClinitCheck may (PIC) or may not (non-PIC) be merged into the LoadClass.
+  // (The merging checks for environment match but HLoadClass/kBootImageAddress
+  // used for non-PIC mode does not have an environment at all.)
+  /// CHECK:      StaticFieldGet
+  // There may be HX86ComputeBaseMethodAddress or MipsComputeBaseMethodAddress here.
   /// CHECK:      LoadString
   /// CHECK-NEXT: NullCheck
   /// CHECK-NEXT: InvokeVirtual
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index fe6ff13..db43768 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -331,32 +331,32 @@
   /// CHECK-START-X86: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
 
   /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
 
   /// CHECK-START-ARM: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
 
   /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
 
   /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
 
   /// CHECK-START-MIPS64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
 
   public static Class<?> $noinline$getStringClass() {
     // Prevent inlining to avoid the string comparison being optimized away.
@@ -369,34 +369,34 @@
   /// CHECK:                LoadClass load_kind:DexCacheViaMethod class_name:Other
 
   /// CHECK-START-X86: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
-  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+  /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
 
   /// CHECK-START-X86: java.lang.Class Main.$noinline$getOtherClass() pc_relative_fixups_x86 (after)
   /// CHECK-DAG:            X86ComputeBaseMethodAddress
-  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+  /// CHECK-DAG:            LoadClass load_kind:BssEntry class_name:Other
 
   /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
-  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+  /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
 
   /// CHECK-START-ARM: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
-  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+  /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
 
   /// CHECK-START-ARM: java.lang.Class Main.$noinline$getOtherClass() dex_cache_array_fixups_arm (after)
   /// CHECK-DAG:            ArmDexCacheArraysBase
-  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+  /// CHECK-DAG:            LoadClass load_kind:BssEntry class_name:Other
 
   /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
-  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+  /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
 
   /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
-  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+  /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
 
   /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() dex_cache_array_fixups_mips (after)
   /// CHECK-DAG:            MipsDexCacheArraysBase
-  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+  /// CHECK-DAG:            LoadClass load_kind:BssEntry class_name:Other
 
   /// CHECK-START-MIPS64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
-  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+  /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
 
   public static Class<?> $noinline$getOtherClass() {
     // Prevent inlining to avoid the string comparison being optimized away.
diff --git a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
index af43973..a30a11a 100644
--- a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -196,7 +196,7 @@
   const-class v0, LMain;
   if-ne v0, v2, :exit
   :other_loop_entry
-  const-class v1, Ljava/lang/Class;  # LoadClass that can throw
+  const-class v1, LOther;  # LoadClass that can throw
   goto :loop_entry
   :exit
   return-object v0
@@ -250,7 +250,7 @@
   const/4 v0, 0
   if-ne p0, v0, :other_loop_entry
   :loop_entry
-  const-class v1, Ljava/lang/Class;  # LoadClass that can throw
+  const-class v1, LOther;  # LoadClass that can throw
   if-ne v0, p0, :exit
   :other_loop_entry
   sub-int v1, p0, p0
@@ -286,7 +286,7 @@
 .method public static licm3(III)I
   .registers 4
   :loop_entry
-  const-class v0, Ljava/lang/Class;  # LoadClass that can throw
+  const-class v0, LOther;  # LoadClass that can throw
   if-ne p1, p2, :exit
   goto :loop_body
 
diff --git a/test/559-checker-irreducible-loop/src/Main.java b/test/559-checker-irreducible-loop/src/Main.java
index ab84f81..023e769 100644
--- a/test/559-checker-irreducible-loop/src/Main.java
+++ b/test/559-checker-irreducible-loop/src/Main.java
@@ -67,3 +67,6 @@
 
   int myField;
 }
+
+class Other {
+}
diff --git a/test/924-threads/expected.txt b/test/924-threads/expected.txt
index 5406522..32e3368 100644
--- a/test/924-threads/expected.txt
+++ b/test/924-threads/expected.txt
@@ -28,3 +28,4 @@
 e1 = ALIVE|WAITING_WITH_TIMEOUT|SLEEPING|WAITING
 5 = ALIVE|RUNNABLE
 2 = TERMINATED
+[Thread[FinalizerDaemon,5,system], Thread[FinalizerWatchdogDaemon,5,system], Thread[HeapTaskDaemon,5,system], Thread[ReferenceQueueDaemon,5,system], Thread[Signal Catcher,5,system], Thread[main,5,main]]
diff --git a/test/924-threads/src/Main.java b/test/924-threads/src/Main.java
index 0487666..492a7ac 100644
--- a/test/924-threads/src/Main.java
+++ b/test/924-threads/src/Main.java
@@ -17,6 +17,7 @@
 import java.util.Arrays;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.concurrent.CountDownLatch;
 import java.util.HashMap;
 import java.util.List;
@@ -53,6 +54,8 @@
     printThreadInfo(t3);
 
     doStateTests();
+
+    doAllThreadsTests();
   }
 
   private static class Holder {
@@ -155,6 +158,18 @@
     printThreadState(t);
   }
 
+  private static void doAllThreadsTests() {
+    Thread[] threads = getAllThreads();
+    Arrays.sort(threads, THREAD_COMP);
+    System.out.println(Arrays.toString(threads));
+  }
+
+  private final static Comparator<Thread> THREAD_COMP = new Comparator<Thread>() {
+    public int compare(Thread o1, Thread o2) {
+      return o1.getName().compareTo(o2.getName());
+    }
+  };
+
   private final static Map<Integer, String> STATE_NAMES = new HashMap<Integer, String>();
   private final static List<Integer> STATE_KEYS = new ArrayList<Integer>();
   static {
@@ -213,4 +228,5 @@
   private static native Thread getCurrentThread();
   private static native Object[] getThreadInfo(Thread t);
   private static native int getThreadState(Thread t);
+  private static native Thread[] getAllThreads();
 }
diff --git a/test/924-threads/threads.cc b/test/924-threads/threads.cc
index 4abf8fc..1487b7c 100644
--- a/test/924-threads/threads.cc
+++ b/test/924-threads/threads.cc
@@ -100,5 +100,25 @@
   return state;
 }
 
+extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_getAllThreads(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED) {
+  jint thread_count;
+  jthread* threads;
+
+  jvmtiError result = jvmti_env->GetAllThreads(&thread_count, &threads);
+  if (JvmtiErrorToException(env, result)) {
+    return nullptr;
+  }
+
+  auto callback = [&](jint index) {
+    return threads[index];
+  };
+  jobjectArray ret = CreateObjectArray(env, thread_count, "java/lang/Thread", callback);
+
+  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(threads));
+
+  return ret;
+}
+
 }  // namespace Test924Threads
 }  // namespace art
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 047adae..6068876 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -228,9 +228,15 @@
 
 # Disable 153-reference-stress temporarily until a fix arrives. b/33389022.
 # Disable 080-oom-fragmentation due to flakes. b/33795328
+# Disable 497-inlining-and-class-loader and 542-unresolved-access-check until
+#     they are rewritten. These tests use a broken class loader that tries to
+#     register a dex file that's already registered with a different loader.
+#     b/34193123
 ART_TEST_RUN_TEST_SKIP += \
   153-reference-stress \
-  080-oom-fragmentation
+  080-oom-fragmentation \
+  497-inlining-and-class-loader \
+  542-unresolved-access-check
 
 ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
         $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \