Merge "AArch64: Enable LONG_* and INT_* opcodes."
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 3c33975..a186e85 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -99,8 +99,8 @@
     LOCAL_MULTILIB := $$(art_multilib)
   endif
 
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    include external/libcxx/libcxx.mk
     include $(BUILD_EXECUTABLE)
     ART_TARGET_EXECUTABLES := $(ART_TARGET_EXECUTABLES) $(TARGET_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
   else # host
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 952f79a..765216b 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -182,6 +182,7 @@
   endif
 
   LOCAL_CFLAGS := $(ART_TEST_CFLAGS)
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
     LOCAL_CLANG := $(ART_TARGET_CLANG)
     LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
@@ -191,7 +192,6 @@
     LOCAL_MODULE_PATH_32 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
-    include external/libcxx/libcxx.mk
     include $(BUILD_EXECUTABLE)
     
     ART_TARGET_GTEST_EXECUTABLES$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_NATIVETEST_OUT)/$(TARGET_ARCH)/$$(LOCAL_MODULE)
@@ -216,7 +216,7 @@
     LOCAL_STATIC_LIBRARIES += libcutils libvixl
     ifneq ($(WITHOUT_HOST_CLANG),true)
         # GCC host compiled tests fail with this linked, presumably due to destructors that run.
-        LOCAL_STATIC_LIBRARIES += libgtest_host
+        LOCAL_STATIC_LIBRARIES += libgtest_libc++_host
     endif
     LOCAL_LDLIBS += -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk
index 6965326..9e5f3d6 100644
--- a/build/Android.libarttest.mk
+++ b/build/Android.libarttest.mk
@@ -46,6 +46,7 @@
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
   LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/build/Android.common.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/build/Android.libarttest.mk
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
     LOCAL_CLANG := $(ART_TARGET_CLANG)
     LOCAL_CFLAGS := $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
@@ -56,13 +57,12 @@
     LOCAL_MODULE_PATH_32 := $(ART_TEST_OUT)/$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $(ART_TEST_OUT)/$(ART_TARGET_ARCH_64)
     LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
-    include external/libcxx/libcxx.mk
     include $(BUILD_SHARED_LIBRARY)
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
     LOCAL_STATIC_LIBRARIES := libcutils
-    LOCAL_LDLIBS := -ldl -lpthread
+    LOCAL_LDLIBS += -ldl -lpthread
     ifeq ($(HOST_OS),linux)
       LOCAL_LDLIBS += -lrt
     endif
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 4108ba4..cb9e41a 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -194,8 +194,8 @@
   LOCAL_GENERATED_SOURCES += $$(ENUM_OPERATOR_OUT_GEN)
 
   LOCAL_CFLAGS := $$(LIBART_COMPILER_CFLAGS)
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    include external/libcxx/libcxx.mk
     LOCAL_CLANG := $(ART_TARGET_CLANG)
     LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
   else # host
@@ -247,7 +247,7 @@
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
 
   ifeq ($$(art_target_or_host),host)
-    LOCAL_LDLIBS := -ldl -lpthread
+    LOCAL_LDLIBS += -ldl -lpthread
   endif
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index b45d6a4..62973af 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -326,6 +326,81 @@
       GrowableArray<BasicBlock*>::Iterator all_nodes_iterator_;    /**< @brief The list of all the nodes */
   };
 
+  /**
+   * @class TopologicalSortIterator
+   * @brief Used to perform a Topological Sort Iteration of a MIRGraph.
+   */
+  class TopologicalSortIterator : public DataflowIterator {
+    public:
+      /**
+       * @brief The constructor, using all of the reachable blocks of the MIRGraph.
+       * @param mir_graph The MIRGraph considered.
+       */
+      explicit TopologicalSortIterator(MIRGraph* mir_graph)
+          : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ?
+            mir_graph->GetTopologicalSortOrder()->Size() : 0) {
+        // Extra setup for TopologicalSortIterator.
+        idx_ = start_idx_;
+        block_id_list_ = mir_graph->GetTopologicalSortOrder();
+
+        if (mir_graph->GetTopologicalSortOrder() == nullptr) {
+          /* Compute the topological order */
+          mir_graph->ComputeTopologicalSortOrder();
+        }
+      }
+
+      /**
+       * @brief Get the next BasicBlock depending on iteration order.
+       * @param had_change did the user of the iteration change the previous BasicBlock.
+       * @return the next BasicBlock following the iteration order, 0 if finished.
+       */
+      virtual BasicBlock* Next(bool had_change = false) {
+        // Update changed: if had_changed is true, we remember it for the whole iteration.
+        changed_ |= had_change;
+
+        return ForwardSingleNext();
+      }
+  };
+
+  /**
+   * @class RepeatingTopologicalSortIterator
+   * @brief Used to perform a Topological Sort Iteration of a MIRGraph.
+   * @details If there is a change during an iteration, the iteration starts over at the end of the
+   *          iteration.
+   */
+  class RepeatingTopologicalSortIterator : public DataflowIterator {
+    public:
+     /**
+      * @brief The constructor, using all of the reachable blocks of the MIRGraph.
+      * @param mir_graph The MIRGraph considered.
+      */
+     explicit RepeatingTopologicalSortIterator(MIRGraph* mir_graph)
+         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ?
+           mir_graph->GetTopologicalSortOrder()->Size() : 0) {
+       // Extra setup for RepeatingTopologicalSortIterator.
+       idx_ = start_idx_;
+       block_id_list_ = mir_graph->GetTopologicalSortOrder();
+
+       if (mir_graph->GetTopologicalSortOrder() == nullptr) {
+         /* Compute the topological order */
+         mir_graph->ComputeTopologicalSortOrder();
+       }
+     }
+
+     /**
+      * @brief Get the next BasicBlock depending on iteration order.
+      * @param had_change did the user of the iteration change the previous BasicBlock.
+      * @return the next BasicBlock following the iteration order, 0 if finished.
+      */
+     virtual BasicBlock* Next(bool had_change = false) {
+       // Update changed: if had_changed is true, we remember it for the whole iteration.
+       changed_ |= had_change;
+
+       return ForwardRepeatNext();
+     }
+  };
+
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_DATAFLOW_ITERATOR_H_
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 4ba6677..c34a9f5 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -17,6 +17,7 @@
 #include "mir_graph.h"
 
 #include <inttypes.h>
+#include <queue>
 
 #include "base/stl_util.h"
 #include "compiler_internals.h"
@@ -76,6 +77,7 @@
       dfs_order_(NULL),
       dfs_post_order_(NULL),
       dom_post_order_traversal_(NULL),
+      topological_order_(nullptr),
       i_dom_list_(NULL),
       def_block_matrix_(NULL),
       temp_dalvik_register_v_(NULL),
@@ -1337,6 +1339,104 @@
   DoDFSPreOrderSSARename(GetEntryBlock());
 }
 
+void MIRGraph::ComputeTopologicalSortOrder() {
+  std::queue<BasicBlock *> q;
+  std::map<int, int> visited_cnt_values;
+
+  // Clear the nodes.
+  ClearAllVisitedFlags();
+
+  // Create the topological order if need be.
+  if (topological_order_ != nullptr) {
+    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, 0);
+  }
+  topological_order_->Reset();
+
+  // Set up visitedCntValues map for all BB. The default value for this counters in the map is zero.
+  // also fill initial queue.
+  GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
+
+  while (true) {
+    BasicBlock* bb = iterator.Next();
+
+    if (bb == nullptr) {
+      break;
+    }
+
+    if (bb->hidden == true) {
+      continue;
+    }
+
+    visited_cnt_values[bb->id] = bb->predecessors->Size();
+
+    GrowableArray<BasicBlockId>::Iterator pred_iterator(bb->predecessors);
+    // To process loops we should not wait for dominators.
+    while (true) {
+      BasicBlock* pred_bb = GetBasicBlock(pred_iterator.Next());
+
+      if (pred_bb == nullptr) {
+        break;
+      }
+
+      if (pred_bb->dominators == nullptr || pred_bb->hidden == true) {
+        continue;
+      }
+
+      // Skip the backward branch.
+      if (pred_bb->dominators->IsBitSet(bb->id) != 0) {
+        visited_cnt_values[bb->id]--;
+      }
+    }
+
+    // Add entry block to queue.
+    if (visited_cnt_values[bb->id] == 0) {
+      q.push(bb);
+    }
+  }
+
+  while (q.size() > 0) {
+    // Get top.
+    BasicBlock *bb = q.front();
+    q.pop();
+
+    DCHECK_EQ(bb->hidden, false);
+
+    if (bb->IsExceptionBlock() == true) {
+      continue;
+    }
+
+    // We've visited all the predecessors. So, we can visit bb.
+    if (bb->visited == false) {
+      bb->visited = true;
+
+      // Now add the basic block.
+      topological_order_->Insert(bb->id);
+
+      // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero.
+      ChildBlockIterator succIter(bb, this);
+      BasicBlock *successor = succIter.Next();
+      while (successor != nullptr) {
+        // one more predecessor was visited.
+        visited_cnt_values[successor->id]--;
+
+        if (visited_cnt_values[successor->id] <= 0 && successor->visited == false && successor->hidden == false) {
+          q.push(successor);
+        }
+
+        // Take next successor.
+        successor = succIter.Next();
+      }
+    }
+  }
+}
+
+bool BasicBlock::IsExceptionBlock() const {
+  if (block_type == kExceptionHandling) {
+    return true;
+  }
+  return false;
+}
+
 ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph)
     : basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false),
       visited_taken_(false), have_successors_(false) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 0bb8265..3a00a43 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -354,11 +354,12 @@
    */
   MIR* GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current);
   bool RemoveMIR(MIR* mir);
+  bool IsExceptionBlock() const;
 };
 
 /*
  * The "blocks" field in "successor_block_list" points to an array of elements with the type
- * "SuccessorBlockInfo".  For catch blocks, key is type index for the exception.  For swtich
+ * "SuccessorBlockInfo".  For catch blocks, key is type index for the exception.  For switch
  * blocks, key is the case value.
  */
 struct SuccessorBlockInfo {
@@ -598,6 +599,10 @@
 
   void BasicBlockOptimization();
 
+  GrowableArray<BasicBlockId>* GetTopologicalSortOrder() {
+    return topological_order_;
+  }
+
   bool IsConst(int32_t s_reg) const {
     return is_constant_v_->IsBitSet(s_reg);
   }
@@ -865,6 +870,7 @@
   MIR* AdvanceMIR(BasicBlock** p_bb, MIR* mir);
   BasicBlock* NextDominatedBlock(BasicBlock* bb);
   bool LayoutBlocks(BasicBlock* bb);
+  void ComputeTopologicalSortOrder();
 
   bool InlineCallsGate();
   void InlineCallsStart();
@@ -1003,6 +1009,7 @@
   GrowableArray<BasicBlockId>* dfs_order_;
   GrowableArray<BasicBlockId>* dfs_post_order_;
   GrowableArray<BasicBlockId>* dom_post_order_traversal_;
+  GrowableArray<BasicBlockId>* topological_order_;
   int* i_dom_list_;
   ArenaBitVector** def_block_matrix_;    // num_dalvik_register x num_blocks.
   ArenaBitVector* temp_dalvik_register_v_;
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index ac22294..4ce040e 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -22,6 +22,11 @@
 #include "base/macros.h"
 namespace art {
 
+// Forward declarations.
+struct BasicBlock;
+struct CompilationUnit;
+class Pass;
+
 // Empty Pass Data Class, can be extended by any pass extending the base Pass class.
 class PassDataHolder {
 };
diff --git a/compiler/dex/pass_driver.cc b/compiler/dex/pass_driver.cc
index 999ed2a..ca936cd 100644
--- a/compiler/dex/pass_driver.cc
+++ b/compiler/dex/pass_driver.cc
@@ -162,6 +162,12 @@
     case kAllNodes:
       DoWalkBasicBlocks<AllNodesIterator>(c_unit, curPass);
       break;
+    case kTopologicalSortTraversal:
+      DoWalkBasicBlocks<TopologicalSortIterator>(c_unit, curPass);
+      break;
+    case kRepeatingTopologicalSortTraversal:
+      DoWalkBasicBlocks<RepeatingTopologicalSortIterator>(c_unit, curPass);
+      break;
     case kNoNodes:
       break;
     default:
diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h
index 1132166..069fb45 100644
--- a/compiler/dex/pass_me.h
+++ b/compiler/dex/pass_me.h
@@ -49,6 +49,8 @@
   kRepeatingPostOrderDFSTraversal,         /**< @brief Depth-First-Search / Repeating Post-Order. */
   kRepeatingReversePostOrderDFSTraversal,  /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */
   kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
+  kTopologicalSortTraversal,               /**< @brief Topological Order traversal. */
+  kRepeatingTopologicalSortTraversal,      /**< @brief Repeating Topological Order traversal. */
   kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
 };
 
diff --git a/compiler/output_stream.h b/compiler/output_stream.h
index 478a854..97ccc2c 100644
--- a/compiler/output_stream.h
+++ b/compiler/output_stream.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OUTPUT_STREAM_H_
 
 #include <stdint.h>
+#include <sys/types.h>
 
 #include <string>
 
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index 0ded2d8..03d32f0 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -51,6 +51,7 @@
 LOCAL_LDFLAGS := -ldl -lpthread
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_IS_HOST_MODULE := true
+include external/libcxx/libcxx.mk
 include $(BUILD_HOST_EXECUTABLE)
 ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
 endif
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index dd4e9d5..814323c 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -87,8 +87,8 @@
 
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    include external/libcxx/libcxx.mk
     LOCAL_SHARED_LIBRARIES += libcutils libvixl
     include $(BUILD_SHARED_LIBRARY)
   else # host
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 1521caa..c2507b1 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -308,6 +308,12 @@
   LIBART_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
 endif
 
+ifeq ($(MALLOC_IMPL),jemalloc)
+  LIBART_CFLAGS += -DUSE_JEMALLOC
+else
+  LIBART_CFLAGS += -DUSE_DLMALLOC
+endif
+
 # $(1): target or host
 # $(2): ndebug or debug
 # $(3): true or false for LOCAL_CLANG
@@ -397,12 +403,8 @@
   endif
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES)
   LOCAL_SHARED_LIBRARIES += liblog libnativehelper
-  ifeq ($$(art_target_or_host),target)
-    include external/libcxx/libcxx.mk
-    LOCAL_SHARED_LIBRARIES += libbacktrace_libc++
-  else
-    LOCAL_SHARED_LIBRARIES += libbacktrace
-  endif
+  include external/libcxx/libcxx.mk
+  LOCAL_SHARED_LIBRARIES += libbacktrace_libc++
   ifeq ($$(art_target_or_host),target)
     LOCAL_SHARED_LIBRARIES += libcutils libdl libselinux libutils
     LOCAL_STATIC_LIBRARIES := libziparchive libz
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 26cd864..9f36927 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -40,10 +40,9 @@
 
 namespace art {
 
-static Mutex modify_ldt_lock("modify_ldt lock");
-
 void Thread::InitCpu() {
-  MutexLock mu(Thread::Current(), modify_ldt_lock);
+  // Take the ldt lock, Thread::Current isn't yet established.
+  MutexLock mu(nullptr, *Locks::modify_ldt_lock_);
 
   const uintptr_t base = reinterpret_cast<uintptr_t>(this);
   const size_t limit = kPageSize;
@@ -138,7 +137,7 @@
 }
 
 void Thread::CleanupCpu() {
-  MutexLock mu(Thread::Current(), modify_ldt_lock);
+  MutexLock mu(this, *Locks::modify_ldt_lock_);
 
   // Sanity check that reads from %fs point to this Thread*.
   Thread* self_check;
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index adf4c66..6c415e7 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -132,9 +132,21 @@
   // TODO: tighten this check.
   if (kDebugLocking) {
     Runtime* runtime = Runtime::Current();
-    CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDownLocked() ||
-          level == kDefaultMutexLevel  || level == kRuntimeShutdownLock ||
-          level == kThreadListLock || level == kLoggingLock || level == kAbortLock);
+    CHECK(runtime == nullptr || !runtime->IsStarted() || runtime->IsShuttingDownLocked() ||
+          // Used during thread creation to avoid races with runtime shutdown. Thread::Current not
+          // yet established.
+          level == kRuntimeShutdownLock ||
+          // Thread Ids are allocated/released before threads are established.
+          level == kAllocatedThreadIdsLock ||
+          // Thread LDT's are initialized without Thread::Current established.
+          level == kModifyLdtLock ||
+          // Threads are unregistered while holding the thread list lock, during this process they
+          // no longer exist and so we expect an unlock with no self.
+          level == kThreadListLock ||
+          // Ignore logging which may or may not have set up thread data structures.
+          level == kLoggingLock ||
+          // Avoid recursive death.
+          level == kAbortLock);
   }
 }
 
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 6f7f2c1..705be40 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -30,10 +30,12 @@
 namespace art {
 
 Mutex* Locks::abort_lock_ = nullptr;
+Mutex* Locks::allocated_thread_ids_lock_ = nullptr;
 Mutex* Locks::breakpoint_lock_ = nullptr;
 ReaderWriterMutex* Locks::classlinker_classes_lock_ = nullptr;
 ReaderWriterMutex* Locks::heap_bitmap_lock_ = nullptr;
 Mutex* Locks::logging_lock_ = nullptr;
+Mutex* Locks::modify_ldt_lock_ = nullptr;
 ReaderWriterMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::runtime_shutdown_lock_ = nullptr;
 Mutex* Locks::thread_list_lock_ = nullptr;
@@ -814,7 +816,13 @@
 void Locks::Init() {
   if (logging_lock_ != nullptr) {
     // Already initialized.
+    if (kRuntimeISA == kX86) {
+      DCHECK(modify_ldt_lock_ != nullptr);
+    } else {
+      DCHECK(modify_ldt_lock_ == nullptr);
+    }
     DCHECK(abort_lock_ != nullptr);
+    DCHECK(allocated_thread_ids_lock_ != nullptr);
     DCHECK(breakpoint_lock_ != nullptr);
     DCHECK(classlinker_classes_lock_ != nullptr);
     DCHECK(heap_bitmap_lock_ != nullptr);
@@ -827,32 +835,76 @@
     DCHECK(unexpected_signal_lock_ != nullptr);
     DCHECK(intern_table_lock_ != nullptr);
   } else {
-    logging_lock_ = new Mutex("logging lock", kLoggingLock, true);
-    abort_lock_ = new Mutex("abort lock", kAbortLock, true);
+    // Create global locks in level order from highest lock level to lowest.
+    LockLevel current_lock_level = kMutatorLock;
+    DCHECK(mutator_lock_ == nullptr);
+    mutator_lock_ = new ReaderWriterMutex("mutator lock", current_lock_level);
 
+    #define UPDATE_CURRENT_LOCK_LEVEL(new_level) \
+        DCHECK_LT(new_level, current_lock_level); \
+        current_lock_level = new_level;
+
+    UPDATE_CURRENT_LOCK_LEVEL(kHeapBitmapLock);
+    DCHECK(heap_bitmap_lock_ == nullptr);
+    heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kRuntimeShutdownLock);
+    DCHECK(runtime_shutdown_lock_ == nullptr);
+    runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kProfilerLock);
+    DCHECK(profiler_lock_ == nullptr);
+    profiler_lock_ = new Mutex("profiler lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kTraceLock);
+    DCHECK(trace_lock_ == nullptr);
+    trace_lock_ = new Mutex("trace lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kThreadListLock);
+    DCHECK(thread_list_lock_ == nullptr);
+    thread_list_lock_ = new Mutex("thread list lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kBreakpointLock);
     DCHECK(breakpoint_lock_ == nullptr);
-    breakpoint_lock_ = new Mutex("breakpoint lock", kBreakpointLock);
+    breakpoint_lock_ = new Mutex("breakpoint lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kClassLinkerClassesLock);
     DCHECK(classlinker_classes_lock_ == nullptr);
     classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
-                                                      kClassLinkerClassesLock);
-    DCHECK(heap_bitmap_lock_ == nullptr);
-    heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock);
-    DCHECK(mutator_lock_ == nullptr);
-    mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock);
-    DCHECK(runtime_shutdown_lock_ == nullptr);
-    runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", kRuntimeShutdownLock);
-    DCHECK(thread_list_lock_ == nullptr);
-    thread_list_lock_ = new Mutex("thread list lock", kThreadListLock);
-    DCHECK(thread_suspend_count_lock_ == nullptr);
-    thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock);
-    DCHECK(trace_lock_ == nullptr);
-    trace_lock_ = new Mutex("trace lock", kTraceLock);
-    DCHECK(profiler_lock_ == nullptr);
-    profiler_lock_ = new Mutex("profiler lock", kProfilerLock);
-    DCHECK(unexpected_signal_lock_ == nullptr);
-    unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true);
+                                                      current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kAllocatedThreadIdsLock);
+    DCHECK(allocated_thread_ids_lock_ == nullptr);
+    allocated_thread_ids_lock_ =  new Mutex("allocated thread ids lock", current_lock_level);
+
+    if (kRuntimeISA == kX86) {
+      UPDATE_CURRENT_LOCK_LEVEL(kModifyLdtLock);
+      DCHECK(modify_ldt_lock_ == nullptr);
+      modify_ldt_lock_ = new Mutex("modify_ldt lock", current_lock_level);
+    }
+
+    UPDATE_CURRENT_LOCK_LEVEL(kInternTableLock);
     DCHECK(intern_table_lock_ == nullptr);
-    intern_table_lock_ = new Mutex("InternTable lock", kInternTableLock);
+    intern_table_lock_ = new Mutex("InternTable lock", current_lock_level);
+
+
+    UPDATE_CURRENT_LOCK_LEVEL(kAbortLock);
+    DCHECK(abort_lock_ == nullptr);
+    abort_lock_ = new Mutex("abort lock", current_lock_level, true);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kThreadSuspendCountLock);
+    DCHECK(thread_suspend_count_lock_ == nullptr);
+    thread_suspend_count_lock_ = new Mutex("thread suspend count lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kUnexpectedSignalLock);
+    DCHECK(unexpected_signal_lock_ == nullptr);
+    unexpected_signal_lock_ = new Mutex("unexpected signal lock", current_lock_level, true);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kLoggingLock);
+    DCHECK(logging_lock_ == nullptr);
+    logging_lock_ = new Mutex("logging lock", current_lock_level, true);
+
+    #undef UPDATE_CURRENT_LOCK_LEVEL
   }
 }
 
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index e13c8d5..522692e 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -74,6 +74,8 @@
   kPinTableLock,
   kLoadLibraryLock,
   kJdwpObjectRegistryLock,
+  kModifyLdtLock,
+  kAllocatedThreadIdsLock,
   kClassLinkerClassesLock,
   kBreakpointLock,
   kMonitorLock,
@@ -532,28 +534,34 @@
   // Guards shutdown of the runtime.
   static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
 
+  // Guards background profiler global state.
+  static Mutex* profiler_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
+
+  // Guards trace (ie traceview) requests.
+  static Mutex* trace_lock_ ACQUIRED_AFTER(profiler_lock_);
+
   // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
   // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(trace_lock_);
 
   // Guards breakpoints.
   static Mutex* breakpoint_lock_ ACQUIRED_AFTER(thread_list_lock_);
 
-  // Guards trace requests.
-  static Mutex* trace_lock_ ACQUIRED_AFTER(breakpoint_lock_);
-
-  // Guards profile objects.
-  static Mutex* profiler_lock_ ACQUIRED_AFTER(trace_lock_);
-
   // Guards lists of classes within the class linker.
-  static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(profiler_lock_);
+  static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(breakpoint_lock_);
 
   // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code
   // doesn't try to hold a higher level Mutex.
   #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(Locks::classlinker_classes_lock_)
 
+  // Guard the allocation/deallocation of thread ids.
+  static Mutex* allocated_thread_ids_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+
+  // Guards modification of the LDT on x86.
+  static Mutex* modify_ldt_lock_ ACQUIRED_AFTER(allocated_thread_ids_lock_);
+
   // Guards intern table.
-  static Mutex* intern_table_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+  static Mutex* intern_table_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
 
   // Have an exclusive aborting thread.
   static Mutex* abort_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 7136c67..8c8a355 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -4039,7 +4039,11 @@
   // Send a series of heap segment chunks.
   HeapChunkContext context((what == HPSG_WHAT_MERGED_OBJECTS), native);
   if (native) {
+#ifdef USE_DLMALLOC
     dlmalloc_inspect_all(HeapChunkContext::HeapChunkCallback, &context);
+#else
+    UNIMPLEMENTED(WARNING) << "Native heap inspection is only supported with dlmalloc";
+#endif
   } else {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ea1ccdd..fdc4367 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -893,10 +893,16 @@
   uint64_t gc_heap_end_ns = NanoTime();
   // We never move things in the native heap, so we can finish the GC at this point.
   FinishGC(self, collector::kGcTypeNone);
+  size_t native_reclaimed = 0;
+#if defined(USE_DLMALLOC)
   // Trim the native heap.
   dlmalloc_trim(0);
-  size_t native_reclaimed = 0;
   dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
+#elif defined(USE_JEMALLOC)
+  // Jemalloc does it's own internal trimming.
+#else
+  UNIMPLEMENTED(WARNING) << "Add trimming support";
+#endif
   uint64_t end_ns = NanoTime();
   VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
       << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index f783edb..c53520d 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -111,7 +111,7 @@
   MutexLock mu(self, monitor_lock_);  // Uncontended mutex acquisition as monitor isn't yet public.
   CHECK(owner_ == nullptr || owner_ == self || owner_->IsSuspended());
   // Propagate the lock state.
-  LockWord lw(obj_->GetLockWord(false));
+  LockWord lw(GetObject()->GetLockWord(false));
   switch (lw.GetState()) {
     case LockWord::kThinLocked: {
       CHECK_EQ(owner_->GetThreadId(), lw.ThinLockOwner());
@@ -137,7 +137,7 @@
   }
   LockWord fat(this);
   // Publish the updated lock word, which may race with other threads.
-  bool success = obj_->CasLockWord(lw, fat);
+  bool success = GetObject()->CasLockWord(lw, fat);
   // Lock profiling.
   if (success && owner_ != nullptr && lock_profiling_threshold_ != 0) {
     locking_method_ = owner_->GetCurrentMethod(&locking_dex_pc_);
@@ -226,9 +226,9 @@
     // Do this before releasing the lock so that we don't get deflated.
     ++num_waiters_;
     monitor_lock_.Unlock(self);  // Let go of locks in order.
+    self->SetMonitorEnterObject(GetObject());
     {
       ScopedThreadStateChange tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
-      self->SetMonitorEnterObject(obj_);
       MutexLock mu2(self, monitor_lock_);  // Reacquire monitor_lock_ without mutator_lock_ for Wait.
       if (owner_ != NULL) {  // Did the owner_ give the lock up?
         monitor_contenders_.Wait(self);  // Still contended so wait.
@@ -249,8 +249,8 @@
           }
         }
       }
-      self->SetMonitorEnterObject(nullptr);
     }
+    self->SetMonitorEnterObject(nullptr);
     monitor_lock_.Lock(self);  // Reacquire locks in order.
     --num_waiters_;
   }
@@ -363,7 +363,7 @@
     // We don't own this, so we're not allowed to unlock it.
     // The JNI spec says that we should throw IllegalMonitorStateException
     // in this case.
-    FailedUnlock(obj_, self, owner, this);
+    FailedUnlock(GetObject(), self, owner, this);
     return false;
   }
   return true;
@@ -895,7 +895,7 @@
     MutexLock mu(self, *thread->GetWaitMutex());
     Monitor* monitor = thread->GetWaitMonitor();
     if (monitor != nullptr) {
-      pretty_object = monitor->obj_;
+      pretty_object = monitor->GetObject();
     }
   } else if (state == kBlocked) {
     wait_message = "  - waiting to lock ";
@@ -1101,12 +1101,13 @@
   MutexLock mu(Thread::Current(), monitor_list_lock_);
   for (auto it = list_.begin(); it != list_.end(); ) {
     Monitor* m = *it;
-    mirror::Object* obj = m->GetObject();
+    // Disable the read barrier in GetObject() as this is called by GC.
+    mirror::Object* obj = m->GetObject<kWithoutReadBarrier>();
     // The object of a monitor can be null if we have deflated it.
     mirror::Object* new_obj = obj != nullptr ? callback(obj, arg) : nullptr;
     if (new_obj == nullptr) {
       VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
-                    << m->GetObject();
+                    << obj;
       delete m;
       it = list_.erase(it);
     } else {
diff --git a/runtime/monitor.h b/runtime/monitor.h
index bc1b2ed4..7af2d4c 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -27,6 +27,7 @@
 #include "atomic.h"
 #include "base/mutex.h"
 #include "object_callbacks.h"
+#include "read_barrier.h"
 #include "thread_state.h"
 
 namespace art {
@@ -92,8 +93,9 @@
 
   static bool IsValidLockWord(LockWord lock_word);
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::Object* GetObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return obj_;
+    return ReadBarrier::BarrierForWeakRoot<mirror::Object, kReadBarrierOption>(obj_);
   }
 
   void SetObject(mirror::Object* object);
@@ -190,7 +192,9 @@
   // Owner's recursive lock depth.
   int lock_count_ GUARDED_BY(monitor_lock_);
 
-  // What object are we part of.
+  // What object are we part of. This is a weak root. Do not access
+  // this directly, use GetObject() to read it so it will be guarded
+  // by a read barrier.
   mirror::Object* obj_;
 
   // Threads currently waiting on this monitor.
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 69b05f4..d9c9b59 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -155,6 +155,21 @@
   return env->NewStringUTF(kIsDebugBuild ? "libartd.so" : "libart.so");
 }
 
+static jstring VMRuntime_vmInstructionSet(JNIEnv* env, jobject) {
+  InstructionSet isa = Runtime::Current()->GetInstructionSet();
+  const char* isa_string = GetInstructionSetString(isa);
+  return env->NewStringUTF(isa_string);
+}
+
+static jboolean VMRuntime_is64Bit(JNIEnv* env, jobject) {
+  bool is64BitMode = (sizeof(void*) == sizeof(uint64_t));
+  return is64BitMode ? JNI_TRUE : JNI_FALSE;
+}
+
+static jboolean VMRuntime_isCheckJniEnabled(JNIEnv* env, jobject) {
+  return Runtime::Current()->GetJavaVM()->check_jni ? JNI_TRUE : JNI_FALSE;
+}
+
 static void VMRuntime_setTargetSdkVersionNative(JNIEnv* env, jobject, jint targetSdkVersion) {
   // This is the target SDK version of the app we're about to run. It is intended that this a place
   // where workarounds can be enabled.
@@ -529,6 +544,9 @@
   NATIVE_METHOD(VMRuntime, trimHeap, "()V"),
   NATIVE_METHOD(VMRuntime, vmVersion, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, vmLibrary, "()Ljava/lang/String;"),
+  NATIVE_METHOD(VMRuntime, vmInstructionSet, "()Ljava/lang/String;"),
+  NATIVE_METHOD(VMRuntime, is64Bit, "!()Z"),
+  NATIVE_METHOD(VMRuntime, isCheckJniEnabled, "!()Z"),
   NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"),
   NATIVE_METHOD(VMRuntime, registerAppInfo, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V"),
 };
diff --git a/runtime/oat.cc b/runtime/oat.cc
index cb9334a..10d335e 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '2', '8', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '2', '9', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 88e2f8f..4302c9e 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -43,6 +43,21 @@
   }
 }
 
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
+inline MirrorType* ReadBarrier::BarrierForWeakRoot(MirrorType* ref) {
+  UNUSED(ref);
+  const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
+  if (with_read_barrier && kUseBakerReadBarrier) {
+    // To be implemented.
+    return ref;
+  } else if (with_read_barrier && kUseBrooksReadBarrier) {
+    // To be implemented.
+    return ref;
+  } else {
+    return ref;
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_READ_BARRIER_INL_H_
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 73c3d43..e40e8ea 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -37,6 +37,10 @@
   ALWAYS_INLINE static MirrorType* Barrier(
       mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  ALWAYS_INLINE static MirrorType* BarrierForWeakRoot(MirrorType* ref)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 };
 
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index 62fa323..9a7cb48 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -396,11 +396,11 @@
   // Convert a jobject into a Object*
   mirror::Object* DecodeJObject(jobject obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Object* GetMonitorEnterObject() const {
+  mirror::Object* GetMonitorEnterObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return tlsPtr_.monitor_enter_object;
   }
 
-  void SetMonitorEnterObject(mirror::Object* obj) {
+  void SetMonitorEnterObject(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     tlsPtr_.monitor_enter_object = obj;
   }
 
@@ -1045,9 +1045,6 @@
     // A cached pthread_t for the pthread underlying this Thread*.
     pthread_t pthread_self;
 
-    // Support for Mutex lock hierarchy bug detection.
-    BaseMutex* held_mutexes[kLockLevelCount];
-
     // If no_thread_suspension_ is > 0, what is causing that assertion.
     const char* last_no_thread_suspension_cause;
 
@@ -1074,6 +1071,9 @@
     // Thread-local allocation stack data/routines.
     mirror::Object** thread_local_alloc_stack_top;
     mirror::Object** thread_local_alloc_stack_end;
+
+    // Support for Mutex lock hierarchy bug detection.
+    BaseMutex* held_mutexes[kLockLevelCount];
   } tlsPtr_;
 
   // Guards the 'interrupted_' and 'wait_monitor_' members.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 8046500..388c9b4 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -40,8 +40,7 @@
 namespace art {
 
 ThreadList::ThreadList()
-    : allocated_ids_lock_("allocated thread ids lock"),
-      suspend_all_count_(0), debug_suspend_all_count_(0),
+    : suspend_all_count_(0), debug_suspend_all_count_(0),
       thread_exit_cond_("thread exit condition variable", *Locks::thread_list_lock_) {
   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1)));
 }
@@ -849,7 +848,7 @@
 }
 
 uint32_t ThreadList::AllocThreadId(Thread* self) {
-  MutexLock mu(self, allocated_ids_lock_);
+  MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
   for (size_t i = 0; i < allocated_ids_.size(); ++i) {
     if (!allocated_ids_[i]) {
       allocated_ids_.set(i);
@@ -861,7 +860,7 @@
 }
 
 void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) {
-  MutexLock mu(self, allocated_ids_lock_);
+  MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
   --id;  // Zero is reserved to mean "invalid".
   DCHECK(allocated_ids_[id]) << id;
   allocated_ids_.reset(id);
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index a574340..d46987a 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -132,7 +132,7 @@
 
  private:
   uint32_t AllocThreadId(Thread* self);
-  void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(allocated_ids_lock_);
+  void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(Locks::allocated_thread_ids_lock_);
 
   bool Contains(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_);
   bool Contains(pid_t tid) EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_);
@@ -151,8 +151,7 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
 
-  mutable Mutex allocated_ids_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(allocated_ids_lock_);
+  std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(Locks::allocated_thread_ids_lock_);
 
   // The actual list of all threads.
   std::list<Thread*> list_ GUARDED_BY(Locks::thread_list_lock_);