Merge "Allocate dex cache arrays in their class loader's linear alloc"
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 8e3b555..20c8023 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -66,6 +66,7 @@
 	optimizing/builder.cc \
 	optimizing/code_generator.cc \
 	optimizing/code_generator_utils.cc \
+	optimizing/constant_area_fixups_x86.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
 	optimizing/graph_checker.cc \
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index b60eebf..0ec3780 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -5922,115 +5922,6 @@
   return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup);
 }
 
-/**
- * Finds instructions that need the constant area base as an input.
- */
-class ConstantHandlerVisitor : public HGraphVisitor {
- public:
-  explicit ConstantHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
-
- private:
-  void VisitAdd(HAdd* add) OVERRIDE {
-    BinaryFP(add);
-  }
-
-  void VisitSub(HSub* sub) OVERRIDE {
-    BinaryFP(sub);
-  }
-
-  void VisitMul(HMul* mul) OVERRIDE {
-    BinaryFP(mul);
-  }
-
-  void VisitDiv(HDiv* div) OVERRIDE {
-    BinaryFP(div);
-  }
-
-  void VisitReturn(HReturn* ret) OVERRIDE {
-    HConstant* value = ret->InputAt(0)->AsConstant();
-    if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) {
-      ReplaceInput(ret, value, 0, true);
-    }
-  }
-
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
-    HandleInvoke(invoke);
-  }
-
-  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
-    HandleInvoke(invoke);
-  }
-
-  void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
-    HandleInvoke(invoke);
-  }
-
-  void BinaryFP(HBinaryOperation* bin) {
-    HConstant* rhs = bin->InputAt(1)->AsConstant();
-    if (rhs != nullptr && Primitive::IsFloatingPointType(bin->GetResultType())) {
-      ReplaceInput(bin, rhs, 1, false);
-    }
-  }
-
-  void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
-    // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
-    // address the constant area.
-    InitializeConstantAreaPointer(switch_insn);
-    HGraph* graph = GetGraph();
-    HBasicBlock* block = switch_insn->GetBlock();
-    HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
-        switch_insn->GetStartValue(),
-        switch_insn->GetNumEntries(),
-        switch_insn->InputAt(0),
-        base_,
-        switch_insn->GetDexPc());
-    block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
-  }
-
-  void InitializeConstantAreaPointer(HInstruction* user) {
-    // Ensure we only initialize the pointer once.
-    if (base_ != nullptr) {
-      return;
-    }
-
-    HGraph* graph = GetGraph();
-    HBasicBlock* entry = graph->GetEntryBlock();
-    base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
-    HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
-    entry->InsertInstructionBefore(base_, insert_pos);
-    DCHECK(base_ != nullptr);
-  }
-
-  void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
-    InitializeConstantAreaPointer(insn);
-    HGraph* graph = GetGraph();
-    HBasicBlock* block = insn->GetBlock();
-    HX86LoadFromConstantTable* load_constant =
-        new (graph->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
-    block->InsertInstructionBefore(load_constant, insn);
-    insn->ReplaceInput(load_constant, input_index);
-  }
-
-  void HandleInvoke(HInvoke* invoke) {
-    // Ensure that we can load FP arguments from the constant area.
-    for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
-      HConstant* input = invoke->InputAt(i)->AsConstant();
-      if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) {
-        ReplaceInput(invoke, input, i, true);
-      }
-    }
-  }
-
-  // The generated HX86ComputeBaseMethodAddress in the entry block needed as an
-  // input to the HX86LoadFromConstantTable instructions.
-  HX86ComputeBaseMethodAddress* base_;
-};
-
-void ConstantAreaFixups::Run() {
-  ConstantHandlerVisitor visitor(graph_);
-  visitor.VisitInsertionOrder();
-}
-
 // TODO: target as memory.
 void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) {
   if (!target.IsValid()) {
diff --git a/compiler/optimizing/constant_area_fixups_x86.cc b/compiler/optimizing/constant_area_fixups_x86.cc
new file mode 100644
index 0000000..c347000
--- /dev/null
+++ b/compiler/optimizing/constant_area_fixups_x86.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "constant_area_fixups_x86.h"
+
+namespace art {
+namespace x86 {
+
+/**
+ * Finds instructions that need the constant area base as an input.
+ */
+class ConstantHandlerVisitor : public HGraphVisitor {
+ public:
+  explicit ConstantHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
+
+ private:
+  void VisitAdd(HAdd* add) OVERRIDE {
+    BinaryFP(add);
+  }
+
+  void VisitSub(HSub* sub) OVERRIDE {
+    BinaryFP(sub);
+  }
+
+  void VisitMul(HMul* mul) OVERRIDE {
+    BinaryFP(mul);
+  }
+
+  void VisitDiv(HDiv* div) OVERRIDE {
+    BinaryFP(div);
+  }
+
+  void VisitReturn(HReturn* ret) OVERRIDE {
+    HConstant* value = ret->InputAt(0)->AsConstant();
+    if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) {
+      ReplaceInput(ret, value, 0, true);
+    }
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void BinaryFP(HBinaryOperation* bin) {
+    HConstant* rhs = bin->InputAt(1)->AsConstant();
+    if (rhs != nullptr && Primitive::IsFloatingPointType(bin->GetResultType())) {
+      ReplaceInput(bin, rhs, 1, false);
+    }
+  }
+
+  void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+    // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
+    // address the constant area.
+    InitializeConstantAreaPointer(switch_insn);
+    HGraph* graph = GetGraph();
+    HBasicBlock* block = switch_insn->GetBlock();
+    HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
+        switch_insn->GetStartValue(),
+        switch_insn->GetNumEntries(),
+        switch_insn->InputAt(0),
+        base_,
+        switch_insn->GetDexPc());
+    block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
+  }
+
+  void InitializeConstantAreaPointer(HInstruction* user) {
+    // Ensure we only initialize the pointer once.
+    if (base_ != nullptr) {
+      return;
+    }
+
+    HGraph* graph = GetGraph();
+    HBasicBlock* entry = graph->GetEntryBlock();
+    base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
+    HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
+    entry->InsertInstructionBefore(base_, insert_pos);
+    DCHECK(base_ != nullptr);
+  }
+
+  void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
+    InitializeConstantAreaPointer(insn);
+    HGraph* graph = GetGraph();
+    HBasicBlock* block = insn->GetBlock();
+    HX86LoadFromConstantTable* load_constant =
+        new (graph->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
+    block->InsertInstructionBefore(load_constant, insn);
+    insn->ReplaceInput(load_constant, input_index);
+  }
+
+  void HandleInvoke(HInvoke* invoke) {
+    // Ensure that we can load FP arguments from the constant area.
+    for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
+      HConstant* input = invoke->InputAt(i)->AsConstant();
+      if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) {
+        ReplaceInput(invoke, input, i, true);
+      }
+    }
+  }
+
+  // The generated HX86ComputeBaseMethodAddress in the entry block needed as an
+  // input to the HX86LoadFromConstantTable instructions.
+  HX86ComputeBaseMethodAddress* base_;
+};
+
+void ConstantAreaFixups::Run() {
+  ConstantHandlerVisitor visitor(graph_);
+  visitor.VisitInsertionOrder();
+}
+
+}  // namespace x86
+}  // namespace art
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 930bb2c..d09631b 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -891,109 +891,7 @@
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
-ENTRY art_quick_alloc_object_rosalloc
-    // Fast path rosalloc allocation.
-    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current
-    // r2, r3, r12: free.
-    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
-                                                              // Load the class (r2)
-    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
-    cbz    r2, .Lart_quick_alloc_object_rosalloc_slow_path    // Check null class
-                                                              // Check class status.
-    ldr    r3, [r2, #MIRROR_CLASS_STATUS_OFFSET]
-    cmp    r3, #MIRROR_CLASS_STATUS_INITIALIZED
-    bne    .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // Add a fake dependence from the
-                                                              // following access flag and size
-                                                              // loads to the status load.
-                                                              // This is to prevent those loads
-                                                              // from being reordered above the
-                                                              // status load and reading wrong
-                                                              // values (an alternative is to use
-                                                              // a load-acquire for the status).
-    eor    r3, r3, r3
-    add    r2, r2, r3
-                                                              // Check access flags has
-                                                              // kAccClassIsFinalizable
-    ldr    r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
-    tst    r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
-    bne    .Lart_quick_alloc_object_rosalloc_slow_path
-
-    ldr    r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]     // Check if the thread local
-                                                              // allocation stack has room.
-                                                              // TODO: consider using ldrd.
-    ldr    r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
-    cmp    r3, r12
-    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
-
-    ldr    r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (r3)
-    cmp    r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
-                                                              // local allocation
-    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // Compute the rosalloc bracket index
-                                                              // from the size.
-                                                              // Align up the size by the rosalloc
-                                                              // bracket quantum size and divide
-                                                              // by the quantum size and subtract
-                                                              // by 1. This code is a shorter but
-                                                              // equivalent version.
-    sub    r3, r3, #1
-    lsr    r3, r3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT
-                                                              // Load the rosalloc run (r12)
-    add    r12, r9, r3, lsl #POINTER_SIZE_SHIFT
-    ldr    r12, [r12, #THREAD_ROSALLOC_RUNS_OFFSET]
-                                                              // Load the free list head (r3). This
-                                                              // will be the return val.
-    ldr    r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
-    cbz    r3, .Lart_quick_alloc_object_rosalloc_slow_path
-    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
-    ldr    r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
-                                                              // and update the list head with the
-                                                              // next pointer.
-    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
-                                                              // Store the class pointer in the
-                                                              // header. This also overwrites the
-                                                              // next pointer. The offsets are
-                                                              // asserted to match.
-#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
-#error "Class pointer needs to overwrite next pointer."
-#endif
-    str    r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
-                                                              // Push the new object onto the thread
-                                                              // local allocation stack and
-                                                              // increment the thread local
-                                                              // allocation stack top.
-    ldr    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
-    str    r3, [r1], #COMPRESSED_REFERENCE_SIZE               // (Increment r1 as a side effect.)
-    str    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
-                                                              // Decrement the size of the free list
-    ldr    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
-    sub    r1, #1
-                                                              // TODO: consider combining this store
-                                                              // and the list head store above using
-                                                              // strd.
-    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
-                                                              // Fence. This is "ish" not "ishst" so
-                                                              // that the code after this allocation
-                                                              // site will see the right values in
-                                                              // the fields of the class.
-                                                              // Alternatively we could use "ishst"
-                                                              // if we use load-acquire for the
-                                                              // class status load.)
-    dmb    ish
-    mov    r0, r3                                             // Set the return value and return.
-    bx     lr
-
-.Lart_quick_alloc_object_rosalloc_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
-    mov    r2, r9                     @ pass Thread::Current
-    bl     artAllocObjectFromCodeRosAlloc     @ (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_rosalloc
+GENERATE_ALL_ALLOC_ENTRYPOINTS
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index fbacdbc..ef5edbb 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -113,8 +113,7 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
 
-// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 2f485ae..4a106e4 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -788,7 +788,6 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
 
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 95f0ccb..5c413d2 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -809,7 +809,6 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
 // A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 69f6fe9..d98fc51 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -19,7 +19,6 @@
 
 #if defined(__cplusplus)
 #include "art_method.h"
-#include "gc/allocator/rosalloc.h"
 #include "lock_word.h"
 #include "mirror/class.h"
 #include "mirror/string.h"
@@ -54,14 +53,6 @@
 #define ADD_TEST_EQ(x, y)
 #endif
 
-#if defined(__LP64__)
-#define POINTER_SIZE_SHIFT 3
-#else
-#define POINTER_SIZE_SHIFT 2
-#endif
-ADD_TEST_EQ(static_cast<size_t>(1U << POINTER_SIZE_SHIFT),
-            static_cast<size_t>(__SIZEOF_POINTER__))
-
 // Size of references to the heap on the stack.
 #define STACK_REFERENCE_SIZE 4
 ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReference<art::mirror::Object>))
@@ -71,10 +62,6 @@
 ADD_TEST_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE),
             sizeof(art::mirror::CompressedReference<art::mirror::Object>))
 
-#define COMPRESSED_REFERENCE_SIZE_SHIFT 2
-ADD_TEST_EQ(static_cast<size_t>(1U << COMPRESSED_REFERENCE_SIZE_SHIFT),
-            static_cast<size_t>(COMPRESSED_REFERENCE_SIZE))
-
 // Note: these callee save methods loads require read barriers.
 // Offset of field Runtime::callee_save_methods_[kSaveAll]
 #define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
@@ -133,18 +120,6 @@
 #define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
-// Offset of field Thread::tlsPtr_.rosalloc_runs.
-#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__)
-ADD_TEST_EQ(THREAD_ROSALLOC_RUNS_OFFSET,
-            art::Thread::RosAllocRunsOffset<__SIZEOF_POINTER__>().Int32Value())
-// Offset of field Thread::tlsPtr_.thread_local_alloc_stack_top.
-#define THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 34 * __SIZEOF_POINTER__)
-ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET,
-            art::Thread::ThreadLocalAllocStackTopOffset<__SIZEOF_POINTER__>().Int32Value())
-// Offset of field Thread::tlsPtr_.thread_local_alloc_stack_end.
-#define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 35 * __SIZEOF_POINTER__)
-ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET,
-            art::Thread::ThreadLocalAllocStackEndOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offsets within java.lang.Object.
 #define MIRROR_OBJECT_CLASS_OFFSET 0
@@ -261,44 +236,6 @@
 ADD_TEST_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED),
             ~static_cast<uint32_t>(art::kObjectAlignment - 1))
 
-#define ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE 128
-ADD_TEST_EQ(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 4
-ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSizeShift))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 15
-ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff0
-ADD_TEST_EQ(static_cast<uint32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32),
-            ~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff0
-ADD_TEST_EQ(static_cast<uint64_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64),
-            ~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
-
-#define ROSALLOC_RUN_FREE_LIST_OFFSET 8
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListOffset()))
-
-#define ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET 0
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListHeadOffset()))
-
-#define ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET 16
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListSizeOffset()))
-
-#define ROSALLOC_SLOT_NEXT_OFFSET 0
-ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunSlotNextOffset()))
-// Assert this so that we can avoid zeroing the next field by installing the class pointer.
-ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, MIRROR_OBJECT_CLASS_OFFSET)
-
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
 #endif
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 27fcfb0..4450ed9 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3501,28 +3501,31 @@
   if (!klass->IsInterface()) {
     // Initialize interfaces with default methods for the JLS.
     size_t num_direct_interfaces = klass->NumDirectInterfaces();
-    for (size_t i = 0; i < num_direct_interfaces; i++) {
+    // Only setup the (expensive) handle scope if we actually need to.
+    if (UNLIKELY(num_direct_interfaces > 0)) {
       StackHandleScope<1> hs_iface(self);
-      Handle<mirror::Class> handle_scope_iface(
-          hs_iface.NewHandle(mirror::Class::GetDirectInterface(self, klass, i)));
-      CHECK(handle_scope_iface.Get() != nullptr);
-      CHECK(handle_scope_iface->IsInterface());
-      if (handle_scope_iface->HasBeenRecursivelyInitialized()) {
-        // We have already done this once for this interface. Skip it.
-        continue;
-      }
-      // We cannot just call initialize class directly because we need to ensure that ALL interfaces
-      // with default methods are initialized. Non-default interface initialization will not affect
-      // other non-default super-interfaces.
-      bool iface_initialized = InitializeDefaultInterfaceRecursive(self,
-                                                                   handle_scope_iface,
-                                                                   can_init_statics,
-                                                                   can_init_parents);
-      if (!iface_initialized) {
-        ObjectLock<mirror::Class> lock(self, klass);
-        // Initialization failed because one of our interfaces with default methods is erroneous.
-        mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
-        return false;
+      MutableHandle<mirror::Class> handle_scope_iface(hs_iface.NewHandle<mirror::Class>(nullptr));
+      for (size_t i = 0; i < num_direct_interfaces; i++) {
+        handle_scope_iface.Assign(mirror::Class::GetDirectInterface(self, klass, i));
+        CHECK(handle_scope_iface.Get() != nullptr);
+        CHECK(handle_scope_iface->IsInterface());
+        if (handle_scope_iface->HasBeenRecursivelyInitialized()) {
+          // We have already done this for this interface. Skip it.
+          continue;
+        }
+        // We cannot just call initialize class directly because we need to ensure that ALL
+        // interfaces with default methods are initialized. Non-default interface initialization
+        // will not affect other non-default super-interfaces.
+        bool iface_initialized = InitializeDefaultInterfaceRecursive(self,
+                                                                     handle_scope_iface,
+                                                                     can_init_statics,
+                                                                     can_init_parents);
+        if (!iface_initialized) {
+          ObjectLock<mirror::Class> lock(self, klass);
+          // Initialization failed because one of our interfaces with default methods is erroneous.
+          mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
+          return false;
+        }
       }
     }
   }
@@ -3624,18 +3627,22 @@
                                                       bool can_init_parents) {
   CHECK(iface->IsInterface());
   size_t num_direct_ifaces = iface->NumDirectInterfaces();
-  // First we initialize all of iface's super-interfaces recursively.
-  for (size_t i = 0; i < num_direct_ifaces; i++) {
-    mirror::Class* super_iface = mirror::Class::GetDirectInterface(self, iface, i);
-    if (!super_iface->HasBeenRecursivelyInitialized()) {
-      // Recursive step
-      StackHandleScope<1> hs(self);
-      Handle<mirror::Class> handle_super_iface(hs.NewHandle(super_iface));
-      if (!InitializeDefaultInterfaceRecursive(self,
-                                               handle_super_iface,
-                                               can_init_statics,
-                                               can_init_parents)) {
-        return false;
+  // Only create the (expensive) handle scope if we need it.
+  if (UNLIKELY(num_direct_ifaces > 0)) {
+    StackHandleScope<1> hs(self);
+    MutableHandle<mirror::Class> handle_super_iface(hs.NewHandle<mirror::Class>(nullptr));
+    // First we initialize all of iface's super-interfaces recursively.
+    for (size_t i = 0; i < num_direct_ifaces; i++) {
+      mirror::Class* super_iface = mirror::Class::GetDirectInterface(self, iface, i);
+      if (!super_iface->HasBeenRecursivelyInitialized()) {
+        // Recursive step
+        handle_super_iface.Assign(super_iface);
+        if (!InitializeDefaultInterfaceRecursive(self,
+                                                 handle_super_iface,
+                                                 can_init_statics,
+                                                 can_init_parents)) {
+          return false;
+        }
       }
     }
   }
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 3ce3d63..87f1392 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -131,7 +131,6 @@
 
    private:
     Slot* next_;  // Next slot in the list.
-    friend class RosAlloc;
   };
 
   // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to
@@ -303,7 +302,6 @@
     // free without traversing the whole free list.
     uint32_t size_;
     uint32_t padding_ ATTRIBUTE_UNUSED;
-    friend class RosAlloc;
   };
 
   // Represents a run of memory slots of the same size.
@@ -484,7 +482,7 @@
   static constexpr uint8_t kMagicNumFree = 43;
   // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
   static constexpr size_t kNumOfSizeBrackets = kNumRosAllocThreadLocalSizeBrackets;
-  // The number of smaller size brackets that are the quantum size apart.
+  // The number of smaller size brackets that are 16 bytes apart.
   static constexpr size_t kNumOfQuantumSizeBrackets = 32;
   // The sizes (the slot sizes, in bytes) of the size brackets.
   static size_t bracketSizes[kNumOfSizeBrackets];
@@ -522,7 +520,9 @@
   }
   // Returns true if the given allocation size is for a thread local allocation.
   static bool IsSizeForThreadLocal(size_t size) {
-    bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize;
+    DCHECK_GT(kNumThreadLocalSizeBrackets, 0U);
+    size_t max_thread_local_bracket_idx = kNumThreadLocalSizeBrackets - 1;
+    bool is_size_for_thread_local = size <= bracketSizes[max_thread_local_bracket_idx];
     DCHECK(size > kLargeSizeThreshold ||
            (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets)));
     return is_size_for_thread_local;
@@ -634,16 +634,6 @@
   // are less than this index. We use shared (current) runs for the rest.
   static const size_t kNumThreadLocalSizeBrackets = 8;
 
-  // The size of the largest bracket we use thread-local runs for.
-  // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1].
-  static const size_t kMaxThreadLocalBracketSize = 128;
-
-  // The bracket size increment for the brackets of size <= 512 bytes.
-  static constexpr size_t kBracketQuantumSize = 16;
-
-  // Equal to Log2(kQuantumBracketSizeIncrement).
-  static constexpr size_t kBracketQuantumSizeShift = 4;
-
  private:
   // The base address of the memory region that's managed by this allocator.
   uint8_t* base_;
@@ -780,19 +770,6 @@
            size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
   ~RosAlloc();
 
-  static size_t RunFreeListOffset() {
-    return OFFSETOF_MEMBER(Run, free_list_);
-  }
-  static size_t RunFreeListHeadOffset() {
-    return OFFSETOF_MEMBER(SlotFreeList<false>, head_);
-  }
-  static size_t RunFreeListSizeOffset() {
-    return OFFSETOF_MEMBER(SlotFreeList<false>, size_);
-  }
-  static size_t RunSlotNextOffset() {
-    return OFFSETOF_MEMBER(Slot, next_);
-  }
-
   // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
   // If used, this may cause race conditions if multiple threads are allocating at the same time.
   template<bool kThreadSafe = true>
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 9fb5d0d..4eea3f3 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -74,7 +74,7 @@
                                              const OatFile* oat_file,
                                              std::vector<std::unique_ptr<const DexFile>>& vec) {
   // Add one for the oat file.
-  jlongArray long_array = env->NewLongArray(static_cast<jsize>(1u + vec.size()));
+  jlongArray long_array = env->NewLongArray(static_cast<jsize>(kDexFileIndexStart + vec.size()));
   if (env->ExceptionCheck() == JNI_TRUE) {
     return nullptr;
   }
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 3371a39..9eee156 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -22,7 +22,7 @@
 
 #include "base/logging.h"
 #include "base/stl_util.h"
-#include "dex_file.h"
+#include "dex_file-inl.h"
 #include "gc/space/image_space.h"
 #include "oat_file_assistant.h"
 #include "thread-inl.h"
@@ -30,7 +30,9 @@
 namespace art {
 
 // For b/21333911.
-static constexpr bool kDuplicateClassesCheck = false;
+// Only enabled for debug builds to prevent bit rot. There are too many performance regressions for
+// normal builds.
+static constexpr bool kDuplicateClassesCheck = kIsDebugBuild;
 
 const OatFile* OatFileManager::RegisterOatFile(std::unique_ptr<const OatFile> oat_file) {
   WriterMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
@@ -115,9 +117,9 @@
        current_class_index_(current_class_index),
        from_loaded_oat_(from_loaded_oat) {}
 
-  DexFileAndClassPair(DexFileAndClassPair&& rhs) = default;
+  DexFileAndClassPair(const DexFileAndClassPair& rhs) = default;
 
-  DexFileAndClassPair& operator=(DexFileAndClassPair&& rhs) = default;
+  DexFileAndClassPair& operator=(const DexFileAndClassPair& rhs) = default;
 
   const char* GetCachedDescriptor() const {
     return cached_descriptor_;
@@ -139,7 +141,7 @@
 
   void Next() {
     ++current_class_index_;
-    cached_descriptor_ = nullptr;
+    cached_descriptor_ = GetClassDescriptor(dex_file_.get(), current_class_index_);
   }
 
   size_t GetCurrentClassIndex() const {
@@ -162,7 +164,7 @@
   }
 
   const char* cached_descriptor_;
-  std::unique_ptr<const DexFile> dex_file_;
+  std::shared_ptr<const DexFile> dex_file_;
   size_t current_class_index_;
   bool from_loaded_oat_;  // We only need to compare mismatches between what we load now
                           // and what was loaded before. Any old duplicates must have been
@@ -215,8 +217,17 @@
 
   // Add dex files from already loaded oat files, but skip boot.
   const OatFile* boot_oat = GetBootOatFile();
+  // The same OatFile can be loaded multiple times at different addresses. In this case, we don't
+  // need to check both against each other since they would have resolved the same way at compile
+  // time.
+  std::unordered_set<std::string> unique_locations;
   for (const std::unique_ptr<const OatFile>& loaded_oat_file : oat_files_) {
-    if (loaded_oat_file.get() != boot_oat) {
+    DCHECK_NE(loaded_oat_file.get(), oat_file);
+    const std::string& location = loaded_oat_file->GetLocation();
+    if (loaded_oat_file.get() != boot_oat &&
+        location != oat_file->GetLocation() &&
+        unique_locations.find(location) == unique_locations.end()) {
+      unique_locations.insert(location);
       AddDexFilesFromOat(loaded_oat_file.get(), /*already_loaded*/true, &queue);
     }
   }
@@ -232,12 +243,12 @@
   // Now drain the queue.
   while (!queue.empty()) {
     // Modifying the top element is only safe if we pop right after.
-    DexFileAndClassPair compare_pop(std::move(const_cast<DexFileAndClassPair&>(queue.top())));
+    DexFileAndClassPair compare_pop(queue.top());
     queue.pop();
 
     // Compare against the following elements.
     while (!queue.empty()) {
-      DexFileAndClassPair top(std::move(const_cast<DexFileAndClassPair&>(queue.top())));
+      DexFileAndClassPair top(queue.top());
 
       if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
         // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
@@ -249,7 +260,6 @@
                            top.GetDexFile()->GetLocation().c_str());
           return true;
         }
-        // Pop it.
         queue.pop();
         AddNext(&top, &queue);
       } else {
diff --git a/runtime/thread.h b/runtime/thread.h
index 8f3461a..8cea10c 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -626,24 +626,6 @@
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_objects));
   }
 
-  template<size_t pointer_size>
-  static ThreadOffset<pointer_size> RosAllocRunsOffset() {
-    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
-                                                                rosalloc_runs));
-  }
-
-  template<size_t pointer_size>
-  static ThreadOffset<pointer_size> ThreadLocalAllocStackTopOffset() {
-    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
-                                                                thread_local_alloc_stack_top));
-  }
-
-  template<size_t pointer_size>
-  static ThreadOffset<pointer_size> ThreadLocalAllocStackEndOffset() {
-    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
-                                                                thread_local_alloc_stack_end));
-  }
-
   // Size of stack less any space reserved for stack overflow
   size_t GetStackSize() const {
     return tlsPtr_.stack_size - (tlsPtr_.stack_end - tlsPtr_.stack_begin);
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 33c90e3..02c93cf 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -391,6 +391,34 @@
   }
 }
 
+// Check whether there is another register in the search map that is locked the same way as the
+// register in the src map. This establishes an alias.
+static bool FindLockAliasedRegister(
+    uint32_t src,
+    const AllocationTrackingSafeMap<uint32_t, uint32_t, kAllocatorTagVerifier>& src_map,
+    const AllocationTrackingSafeMap<uint32_t, uint32_t, kAllocatorTagVerifier>& search_map) {
+  auto it = src_map.find(src);
+  if (it == src_map.end()) {
+    // "Not locked" is trivially aliased.
+    return true;
+  }
+  uint32_t src_lock_levels = it->second;
+  if (src_lock_levels == 0) {
+    // "Not locked" is trivially aliased.
+    return true;
+  }
+
+  // Scan the map for the same value.
+  for (const std::pair<uint32_t, uint32_t>& pair : search_map) {
+    if (pair.first != src && pair.second == src_lock_levels) {
+      return true;
+    }
+  }
+
+  // Nothing found, no alias.
+  return false;
+}
+
 bool RegisterLine::MergeRegisters(MethodVerifier* verifier, const RegisterLine* incoming_line) {
   bool changed = false;
   DCHECK(incoming_line != nullptr);
@@ -417,9 +445,29 @@
         size_t depths = reg_to_lock_depths_.count(idx);
         size_t incoming_depths = incoming_line->reg_to_lock_depths_.count(idx);
         if (depths != incoming_depths) {
-          if (depths == 0 || incoming_depths == 0) {
-            reg_to_lock_depths_.erase(idx);
-          } else {
+          // Stack levels aren't matching. This is potentially bad, as we don't do a
+          // flow-sensitive analysis.
+          // However, this could be an alias of something locked in one path, and the alias was
+          // destroyed in another path. It is fine to drop this as long as there's another alias
+          // for the lock around. The last vanishing alias will then report that things would be
+          // left unlocked. We need to check for aliases for both lock levels.
+          //
+          // Example (lock status in curly braces as pair of register and lock leels):
+          //
+          //                            lock v1 {v1=1}
+          //                        |                    |
+          //              v0 = v1 {v0=1, v1=1}       v0 = v2 {v1=1}
+          //                        |                    |
+          //                                 {v1=1}
+          //                                         // Dropping v0, as the status can't be merged
+          //                                         // but the lock info ("locked at depth 1" and)
+          //                                         // "not locked at all") is available.
+          if (!FindLockAliasedRegister(idx,
+                                       reg_to_lock_depths_,
+                                       reg_to_lock_depths_) ||
+              !FindLockAliasedRegister(idx,
+                                       incoming_line->reg_to_lock_depths_,
+                                       reg_to_lock_depths_)) {
             verifier->Fail(VERIFY_ERROR_LOCKING);
             if (kDumpLockFailures) {
               LOG(WARNING) << "mismatched stack depths for register v" << idx
@@ -429,20 +477,51 @@
             }
             break;
           }
+          // We found aliases, set this to zero.
+          reg_to_lock_depths_.erase(idx);
         } else if (depths > 0) {
           // Check whether they're actually the same levels.
           uint32_t locked_levels = reg_to_lock_depths_.find(idx)->second;
           uint32_t incoming_locked_levels = incoming_line->reg_to_lock_depths_.find(idx)->second;
           if (locked_levels != incoming_locked_levels) {
-            verifier->Fail(VERIFY_ERROR_LOCKING);
-            if (kDumpLockFailures) {
-              LOG(WARNING) << "mismatched lock levels for register v" << idx << ": "
-                  << std::hex << locked_levels << std::dec  << " != "
-                  << std::hex << incoming_locked_levels << std::dec << " in "
-                  << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                  *verifier->GetMethodReference().dex_file);
+            // Lock levels aren't matching. This is potentially bad, as we don't do a
+            // flow-sensitive analysis.
+            // However, this could be an alias of something locked in one path, and the alias was
+            // destroyed in another path. It is fine to drop this as long as there's another alias
+            // for the lock around. The last vanishing alias will then report that things would be
+            // left unlocked. We need to check for aliases for both lock levels.
+            //
+            // Example (lock status in curly braces as pair of register and lock leels):
+            //
+            //                          lock v1 {v1=1}
+            //                          lock v2 {v1=1, v2=2}
+            //                        |                      |
+            //         v0 = v1 {v0=1, v1=1, v2=2}  v0 = v2 {v0=2, v1=1, v2=2}
+            //                        |                      |
+            //                             {v1=1, v2=2}
+            //                                           // Dropping v0, as the status can't be
+            //                                           // merged but the lock info ("locked at
+            //                                           // depth 1" and "locked at depth 2") is
+            //                                           // available.
+            if (!FindLockAliasedRegister(idx,
+                                         reg_to_lock_depths_,
+                                         reg_to_lock_depths_) ||
+                !FindLockAliasedRegister(idx,
+                                         incoming_line->reg_to_lock_depths_,
+                                         reg_to_lock_depths_)) {
+              // No aliases for both current and incoming, we'll lose information.
+              verifier->Fail(VERIFY_ERROR_LOCKING);
+              if (kDumpLockFailures) {
+                LOG(WARNING) << "mismatched lock levels for register v" << idx << ": "
+                    << std::hex << locked_levels << std::dec  << " != "
+                    << std::hex << incoming_locked_levels << std::dec << " in "
+                    << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                    *verifier->GetMethodReference().dex_file);
+              }
+              break;
             }
-            break;
+            // We found aliases, set this to zero.
+            reg_to_lock_depths_.erase(idx);
           }
         }
       }
diff --git a/test/088-monitor-verification/src/TwoPath.java b/test/088-monitor-verification/src/TwoPath.java
index 2542de7..bdc15ad 100644
--- a/test/088-monitor-verification/src/TwoPath.java
+++ b/test/088-monitor-verification/src/TwoPath.java
@@ -31,6 +31,8 @@
      * Conditionally uses one of the synchronized objects.
      */
     public static void twoPath(Object obj1, Object obj2, int x) {
+        Main.assertIsManaged();
+
         Object localObj;
 
         synchronized (obj1) {
diff --git a/test/131-structural-change/expected.txt b/test/131-structural-change/expected.txt
index cc7713d..1d19278 100644
--- a/test/131-structural-change/expected.txt
+++ b/test/131-structural-change/expected.txt
@@ -1,2 +1,3 @@
+JNI_OnLoad called
 Should really reach here.
 Done.
diff --git a/test/131-structural-change/src/Main.java b/test/131-structural-change/src/Main.java
index 6cbbd12..c748899 100644
--- a/test/131-structural-change/src/Main.java
+++ b/test/131-structural-change/src/Main.java
@@ -35,7 +35,7 @@
             e.printStackTrace(System.out);
         }
 
-        boolean haveOatFile = hasOat();
+        boolean haveOatFile = hasOatFile();
         boolean gotError = false;
         try {
             Class<?> bClass = getClass().getClassLoader().loadClass("B");
@@ -45,10 +45,10 @@
             e.printStackTrace(System.out);
         }
         if (haveOatFile ^ gotError) {
-            System.out.println("Did not get expected error.");
+            System.out.println("Did not get expected error. " + haveOatFile + " " + gotError);
         }
         System.out.println("Done.");
     }
 
-    private native static boolean hasOat();
+    private native static boolean hasOatFile();
 }
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index ad64b68..e114a2e 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -214,19 +214,24 @@
   055-enum-performance \
   133-static-invoke-super
 
+# disable timing sensitive tests on "dist" builds.
+ifdef dist_goal
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+        $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(ALL_ADDRESS_SIZES))
+endif
+
 # Tests that require python3.
 TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS := \
   960-default-smali \
   961-default-iface-resolution-generated \
   964-default-iface-init-generated \
 
-# disable timing sensitive tests on "dist" builds.
-ifdef dist_goal
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-        $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(ALL_ADDRESS_SIZES))
+# Check if we have python3 to run our tests.
+ifeq ($(wildcard /usr/bin/python3),)
+  $(warning "No python3 found. Disabling tests: $(TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS)")
 
-  # Currently disable tsts requiring python3.
+  # Currently disable tests requiring python3 when it is not installed.
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
         $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS), $(ALL_ADDRESS_SIZES))
@@ -324,13 +329,15 @@
     $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),130-hprof,$(ALL_ADDRESS_SIZES))
 
 # 131 is an old test. The functionality has been implemented at an earlier stage and is checked
-# in tests 138.
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+# in tests 138. Blacklisted for debug builds since these builds have duplicate classes checks which
+# punt to interpreter.
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),debug,$(PREBUILD_TYPES), \
     $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
     $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),131-structural-change,$(ALL_ADDRESS_SIZES))
 
-# 138-duplicate-classes-check. Turned off temporarily, b/21333911.
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+# 138-duplicate-classes-check. Turned on for debug builds since debug builds have duplicate classes
+# checks enabled, b/2133391.
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),ndebug,$(PREBUILD_TYPES), \
     $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
     $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),138-duplicate-classes-check,$(ALL_ADDRESS_SIZES))