Merge "Fix JDWP thread crash."
diff --git a/Android.mk b/Android.mk
index 4d9f622..62d40bb 100644
--- a/Android.mk
+++ b/Android.mk
@@ -92,7 +92,6 @@
 include $(art_path)/disassembler/Android.mk
 include $(art_path)/oatdump/Android.mk
 include $(art_path)/dalvikvm/Android.mk
-include $(art_path)/jdwpspy/Android.mk
 include $(art_build_path)/Android.oat.mk
 
 # ART_HOST_DEPENDENCIES depends on Android.executable.mk above for ART_HOST_EXECUTABLES
@@ -334,7 +333,7 @@
 .PHONY: dump-oat-boot
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
 dump-oat-boot: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(OATDUMP)
-	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt
+	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt --host-prefix=$(DEXPREOPT_PRODUCT_DIR_FULL_PATH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/boot.oatdump.txt
 endif
 
@@ -356,19 +355,19 @@
 .PHONY: use-art
 use-art:
 	adb root && sleep 3
-	adb shell setprop persist.sys.dalvik.vm.lib libart.so
+	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
 	adb reboot
 
 .PHONY: use-artd
 use-artd:
 	adb root && sleep 3
-	adb shell setprop persist.sys.dalvik.vm.lib libartd.so
+	adb shell setprop persist.sys.dalvik.vm.lib.1 libartd.so
 	adb reboot
 
 .PHONY: use-dalvik
 use-dalvik:
 	adb root && sleep 3
-	adb shell setprop persist.sys.dalvik.vm.lib libdvm.so
+	adb shell setprop persist.sys.dalvik.vm.lib.1 libdvm.so
 	adb reboot
 
 ########################################################################
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 0f4ade3..415d810 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -182,9 +182,6 @@
 # Make host builds easier to debug and profile by not omitting the frame pointer.
 ART_HOST_CFLAGS += -fno-omit-frame-pointer
 
-# Workaround differences in inttypes.h.
-ART_HOST_CFLAGS += -D__STDC_FORMAT_MACROS=1
-
 # To use oprofile_android --callgraph, uncomment this and recompile with "mmm art -B -j16"
 # ART_TARGET_CFLAGS += -fno-omit-frame-pointer -marm -mapcs
 
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index bed48ba..acaa0f8 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -30,6 +30,7 @@
 	compiler/utils/x86/managed_register_x86_test.cc \
 	runtime/barrier_test.cc \
 	runtime/base/bit_vector_test.cc \
+	runtime/base/hex_dump_test.cc \
 	runtime/base/histogram_test.cc \
 	runtime/base/mutex_test.cc \
 	runtime/base/timing_logger_test.cc \
diff --git a/compiler/buffered_output_stream.cc b/compiler/buffered_output_stream.cc
index 81a58f6..0940a80 100644
--- a/compiler/buffered_output_stream.cc
+++ b/compiler/buffered_output_stream.cc
@@ -23,7 +23,7 @@
 BufferedOutputStream::BufferedOutputStream(OutputStream* out)
     : OutputStream(out->GetLocation()), out_(out), used_(0) {}
 
-bool BufferedOutputStream::WriteFully(const void* buffer, int64_t byte_count) {
+bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) {
   if (byte_count > kBufferSize) {
     Flush();
     return out_->WriteFully(buffer, byte_count);
diff --git a/compiler/buffered_output_stream.h b/compiler/buffered_output_stream.h
index 7d874fb..75a3f24 100644
--- a/compiler/buffered_output_stream.h
+++ b/compiler/buffered_output_stream.h
@@ -31,7 +31,7 @@
     delete out_;
   }
 
-  virtual bool WriteFully(const void* buffer, int64_t byte_count);
+  virtual bool WriteFully(const void* buffer, size_t byte_count);
 
   virtual off_t Seek(off_t offset, Whence whence);
 
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 29ff390..f6d724a 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -20,14 +20,16 @@
 namespace art {
 
 CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
-                           const std::vector<uint8_t>& code)
-    : compiler_driver_(compiler_driver), instruction_set_(instruction_set), code_(nullptr) {
-  SetCode(code);
+                           const std::vector<uint8_t>& quick_code)
+    : compiler_driver_(compiler_driver), instruction_set_(instruction_set),
+      portable_code_(nullptr), quick_code_(nullptr) {
+  SetCode(&quick_code, nullptr);
 }
 
 CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
                            const std::string& elf_object, const std::string& symbol)
-    : compiler_driver_(compiler_driver), instruction_set_(instruction_set), symbol_(symbol) {
+    : compiler_driver_(compiler_driver), instruction_set_(instruction_set),
+      portable_code_(nullptr), quick_code_(nullptr), symbol_(symbol) {
   CHECK_NE(elf_object.size(), 0U);
   CHECK_NE(symbol.size(), 0U);
   std::vector<uint8_t> temp_code(elf_object.size());
@@ -38,12 +40,41 @@
   // change to have different kinds of compiled methods.  This is
   // being deferred until we work on hybrid execution or at least
   // until we work on batch compilation.
-  SetCode(temp_code);
+  SetCode(nullptr, &temp_code);
 }
 
-void CompiledCode::SetCode(const std::vector<uint8_t>& code) {
-  CHECK(!code.empty());
-  code_ = compiler_driver_->DeduplicateCode(code);
+void CompiledCode::SetCode(const std::vector<uint8_t>* quick_code,
+                           const std::vector<uint8_t>* portable_code) {
+  if (portable_code != nullptr) {
+    CHECK(!portable_code->empty());
+    portable_code_ = compiler_driver_->DeduplicateCode(*portable_code);
+  }
+  if (quick_code != nullptr) {
+    CHECK(!quick_code->empty());
+    quick_code_ = compiler_driver_->DeduplicateCode(*quick_code);
+  }
+}
+
+bool CompiledCode::operator==(const CompiledCode& rhs) const {
+  if (quick_code_ != nullptr) {
+    if (rhs.quick_code_ == nullptr) {
+      return false;
+    } else if (quick_code_->size() != rhs.quick_code_->size()) {
+      return false;
+    } else {
+      return std::equal(quick_code_->begin(), quick_code_->end(), rhs.quick_code_->begin());
+    }
+  } else if (portable_code_ != nullptr) {
+    if (rhs.portable_code_ == nullptr) {
+      return false;
+    } else if (portable_code_->size() != rhs.portable_code_->size()) {
+      return false;
+    } else {
+      return std::equal(portable_code_->begin(), portable_code_->end(),
+                        rhs.portable_code_->begin());
+    }
+  }
+  return (rhs.quick_code_ == nullptr) && (rhs.portable_code_ == nullptr);
 }
 
 uint32_t CompiledCode::AlignCode(uint32_t offset) const {
@@ -100,7 +131,6 @@
   }
 }
 
-#if defined(ART_USE_PORTABLE_COMPILER)
 const std::string& CompiledCode::GetSymbol() const {
   CHECK_NE(0U, symbol_.size());
   return symbol_;
@@ -114,18 +144,17 @@
 void CompiledCode::AddOatdataOffsetToCompliledCodeOffset(uint32_t offset) {
   oatdata_offsets_to_compiled_code_offset_.push_back(offset);
 }
-#endif
 
 CompiledMethod::CompiledMethod(CompilerDriver& driver,
                                InstructionSet instruction_set,
-                               const std::vector<uint8_t>& code,
+                               const std::vector<uint8_t>& quick_code,
                                const size_t frame_size_in_bytes,
                                const uint32_t core_spill_mask,
                                const uint32_t fp_spill_mask,
                                const std::vector<uint8_t>& mapping_table,
                                const std::vector<uint8_t>& vmap_table,
                                const std::vector<uint8_t>& native_gc_map)
-    : CompiledCode(&driver, instruction_set, code), frame_size_in_bytes_(frame_size_in_bytes),
+    : CompiledCode(&driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
   mapping_table_(driver.DeduplicateMappingTable(mapping_table)),
   vmap_table_(driver.DeduplicateVMapTable(vmap_table)),
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index e4fedf1..6112305 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -36,7 +36,7 @@
  public:
   // For Quick to supply an code blob
   CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
-               const std::vector<uint8_t>& code);
+               const std::vector<uint8_t>& quick_code);
 
   // For Portable to supply an ELF object
   CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
@@ -46,16 +46,18 @@
     return instruction_set_;
   }
 
-  const std::vector<uint8_t>& GetCode() const {
-    return *code_;
+  const std::vector<uint8_t>* GetPortableCode() const {
+    return portable_code_;
   }
 
-  void SetCode(const std::vector<uint8_t>& code);
-
-  bool operator==(const CompiledCode& rhs) const {
-    return (code_ == rhs.code_);
+  const std::vector<uint8_t>* GetQuickCode() const {
+    return quick_code_;
   }
 
+  void SetCode(const std::vector<uint8_t>* quick_code, const std::vector<uint8_t>* portable_code);
+
+  bool operator==(const CompiledCode& rhs) const;
+
   // To align an offset from a page-aligned value to make it suitable
   // for code storage. For example on ARM, to ensure that PC relative
   // valu computations work out as expected.
@@ -72,19 +74,20 @@
   static const void* CodePointer(const void* code_pointer,
                                  InstructionSet instruction_set);
 
-#if defined(ART_USE_PORTABLE_COMPILER)
   const std::string& GetSymbol() const;
   const std::vector<uint32_t>& GetOatdataOffsetsToCompliledCodeOffset() const;
   void AddOatdataOffsetToCompliledCodeOffset(uint32_t offset);
-#endif
 
  private:
-  CompilerDriver* compiler_driver_;
+  CompilerDriver* const compiler_driver_;
 
   const InstructionSet instruction_set_;
 
-  // Used to store the PIC code for Quick and an ELF image for portable.
-  std::vector<uint8_t>* code_;
+  // The ELF image for portable.
+  std::vector<uint8_t>* portable_code_;
+
+  // Used to store the PIC code for Quick.
+  std::vector<uint8_t>* quick_code_;
 
   // Used for the Portable ELF symbol name.
   const std::string symbol_;
@@ -101,7 +104,7 @@
   // Constructs a CompiledMethod for the non-LLVM compilers.
   CompiledMethod(CompilerDriver& driver,
                  InstructionSet instruction_set,
-                 const std::vector<uint8_t>& code,
+                 const std::vector<uint8_t>& quick_code,
                  const size_t frame_size_in_bytes,
                  const uint32_t core_spill_mask,
                  const uint32_t fp_spill_mask,
@@ -109,10 +112,10 @@
                  const std::vector<uint8_t>& vmap_table,
                  const std::vector<uint8_t>& native_gc_map);
 
-  // Constructs a CompiledMethod for the JniCompiler.
+  // Constructs a CompiledMethod for the QuickJniCompiler.
   CompiledMethod(CompilerDriver& driver,
                  InstructionSet instruction_set,
-                 const std::vector<uint8_t>& code,
+                 const std::vector<uint8_t>& quick_code,
                  const size_t frame_size_in_bytes,
                  const uint32_t core_spill_mask,
                  const uint32_t fp_spill_mask);
diff --git a/compiler/dex/arena_allocator.cc b/compiler/dex/arena_allocator.cc
index 132831c..8d24439 100644
--- a/compiler/dex/arena_allocator.cc
+++ b/compiler/dex/arena_allocator.cc
@@ -52,7 +52,8 @@
       next_(nullptr) {
   if (kUseMemMap) {
     std::string error_msg;
-    map_ = MemMap::MapAnonymous("dalvik-arena", NULL, size, PROT_READ | PROT_WRITE, &error_msg);
+    map_ = MemMap::MapAnonymous("dalvik-arena", NULL, size, PROT_READ | PROT_WRITE, false,
+                                &error_msg);
     CHECK(map_ != nullptr) << error_msg;
     memory_ = map_->Begin();
     size_ = map_->Size();
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
index f013067..2ab6252 100644
--- a/compiler/dex/bb_optimizations.cc
+++ b/compiler/dex/bb_optimizations.cc
@@ -23,7 +23,7 @@
 /*
  * Code Layout pass implementation start.
  */
-bool CodeLayout::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool CodeLayout::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->LayoutBlocks(bb);
   // No need of repeating, so just return false.
   return false;
@@ -32,13 +32,13 @@
 /*
  * SSATransformation pass implementation start.
  */
-bool SSATransformation::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool SSATransformation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->InsertPhiNodeOperands(bb);
   // No need of repeating, so just return false.
   return false;
 }
 
-void SSATransformation::End(CompilationUnit *cUnit) const {
+void SSATransformation::End(CompilationUnit* cUnit) const {
   // Verify the dataflow information after the pass.
   if (cUnit->enable_debug & (1 << kDebugVerifyDataflow)) {
     cUnit->mir_graph->VerifyDataflow();
@@ -48,7 +48,7 @@
 /*
  * ConstantPropagation pass implementation start
  */
-bool ConstantPropagation::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool ConstantPropagation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->DoConstantPropagation(bb);
   // No need of repeating, so just return false.
   return false;
@@ -57,7 +57,7 @@
 /*
  * MethodUseCount pass implementation start.
  */
-bool MethodUseCount::Gate(const CompilationUnit *cUnit) const {
+bool MethodUseCount::Gate(const CompilationUnit* cUnit) const {
   // First initialize the data.
   cUnit->mir_graph->InitializeMethodUses();
 
@@ -67,7 +67,7 @@
   return res;
 }
 
-bool MethodUseCount::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool MethodUseCount::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->CountUses(bb);
   // No need of repeating, so just return false.
   return false;
@@ -77,7 +77,7 @@
  * Null Check Elimination and Type Inference Initialization pass implementation start.
  */
 
-bool NullCheckEliminationAndTypeInferenceInit::Gate(const CompilationUnit *cUnit) const {
+bool NullCheckEliminationAndTypeInferenceInit::Gate(const CompilationUnit* cUnit) const {
   // First check the ssa register vector
   cUnit->mir_graph->CheckSSARegisterVector();
 
@@ -87,7 +87,8 @@
   return performInit;
 }
 
-bool NullCheckEliminationAndTypeInferenceInit::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool NullCheckEliminationAndTypeInferenceInit::WalkBasicBlocks(CompilationUnit* cUnit,
+                                                               BasicBlock* bb) const {
   cUnit->mir_graph->NullCheckEliminationInit(bb);
   // No need of repeating, so just return false.
   return false;
@@ -96,7 +97,7 @@
 /*
  * BasicBlock Combine pass implementation start.
  */
-bool BBCombine::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool BBCombine::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->CombineBlocks(bb);
 
   // No need of repeating, so just return false.
@@ -106,9 +107,7 @@
 /*
  * BasicBlock Optimization pass implementation start.
  */
-void BBOptimizations::Start(CompilationUnit *cUnit) const {
-  DCHECK_EQ(cUnit->num_compiler_temps, 0);
-
+void BBOptimizations::Start(CompilationUnit* cUnit) const {
   /*
    * This pass has a different ordering depEnding on the suppress exception,
    * so do the pass here for now:
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 768b273..1286a8e 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -28,14 +28,14 @@
  */
 class CodeLayout : public Pass {
  public:
-  CodeLayout():Pass("CodeLayout", "2_post_layout_cfg") {
+  CodeLayout() : Pass("CodeLayout", "2_post_layout_cfg") {
   }
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->VerifyDataflow();
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 };
 
 /**
@@ -44,16 +44,16 @@
  */
 class SSATransformation : public Pass {
  public:
-  SSATransformation():Pass("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
+  SSATransformation() : Pass("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->InitializeSSATransformation();
   }
 
-  void End(CompilationUnit *cUnit) const;
+  void End(CompilationUnit* cUnit) const;
 };
 
 /**
@@ -62,12 +62,12 @@
  */
 class ConstantPropagation : public Pass {
  public:
-  ConstantPropagation():Pass("ConstantPropagation") {
+  ConstantPropagation() : Pass("ConstantPropagation") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->InitializeConstantPropagation();
   }
 };
@@ -78,10 +78,10 @@
  */
 class InitRegLocations : public Pass {
  public:
-  InitRegLocations():Pass("InitRegLocation") {
+  InitRegLocations() : Pass("InitRegLocation", kNoNodes) {
   }
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->InitRegLocations();
   }
 };
@@ -92,12 +92,12 @@
  */
 class MethodUseCount : public Pass {
  public:
-  MethodUseCount():Pass("UseCount") {
+  MethodUseCount() : Pass("UseCount") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  bool Gate(const CompilationUnit *cUnit) const;
+  bool Gate(const CompilationUnit* cUnit) const;
 };
 
 /**
@@ -106,12 +106,12 @@
  */
 class NullCheckEliminationAndTypeInferenceInit : public Pass {
  public:
-  NullCheckEliminationAndTypeInferenceInit():Pass("NCE_TypeInferenceInit") {
+  NullCheckEliminationAndTypeInferenceInit() : Pass("NCE_TypeInferenceInit") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  bool Gate(const CompilationUnit *cUnit) const;
+  bool Gate(const CompilationUnit* cUnit) const;
 };
 
 /**
@@ -120,10 +120,11 @@
  */
 class NullCheckEliminationAndTypeInference : public Pass {
  public:
-  NullCheckEliminationAndTypeInference():Pass("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
+  NullCheckEliminationAndTypeInference()
+    : Pass("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
     return cUnit->mir_graph->EliminateNullChecksAndInferTypes(bb);
   }
 };
@@ -134,14 +135,14 @@
  */
 class BBCombine : public Pass {
  public:
-  BBCombine():Pass("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
+  BBCombine() : Pass("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const CompilationUnit* cUnit) const {
     return ((cUnit->disable_opt & (1 << kSuppressExceptionEdges)) != 0);
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 };
 
 /**
@@ -150,14 +151,14 @@
  */
 class BBOptimizations : public Pass {
  public:
-  BBOptimizations():Pass("BBOptimizations", "5_post_bbo_cfg") {
+  BBOptimizations() : Pass("BBOptimizations", kNoNodes, "5_post_bbo_cfg") {
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const CompilationUnit* cUnit) const {
     return ((cUnit->disable_opt & (1 << kBBOpt)) == 0);
   }
 
-  void Start(CompilationUnit *cUnit) const;
+  void Start(CompilationUnit* cUnit) const;
 };
 
 }  // namespace art
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 18122b3..2bc36a5 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -210,6 +210,22 @@
   kOpInvalid,
 };
 
+enum MoveType {
+  kMov8GP,      // Move 8-bit general purpose register.
+  kMov16GP,     // Move 16-bit general purpose register.
+  kMov32GP,     // Move 32-bit general purpose register.
+  kMov64GP,     // Move 64-bit general purpose register.
+  kMov32FP,     // Move 32-bit FP register.
+  kMov64FP,     // Move 64-bit FP register.
+  kMovLo64FP,   // Move low 32-bits of 64-bit FP register.
+  kMovHi64FP,   // Move high 32-bits of 64-bit FP register.
+  kMovU128FP,   // Move 128-bit FP register to/from possibly unaligned region.
+  kMov128FP = kMovU128FP,
+  kMovA128FP,   // Move 128-bit FP register to/from region surely aligned to 16-bytes.
+  kMovLo128FP,  // Move low 64-bits of 128-bit FP register.
+  kMovHi128FP,  // Move high 64-bits of 128-bit FP register.
+};
+
 std::ostream& operator<<(std::ostream& os, const OpKind& kind);
 
 enum ConditionCode {
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 3798b45..32fd79b 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -81,9 +81,6 @@
   uint16_t num_outs;
   uint16_t num_regs;            // Unlike num_dalvik_registers, does not include ins.
 
-  // TODO: may want to move this to MIRGraph.
-  uint16_t num_compiler_temps;
-
   // If non-empty, apply optimizer/debug flags only to matching methods.
   std::string compiler_method_match;
   // Flips sense of compiler_method_match - apply flags if doesn't match.
diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h
index 0ca1a47..f8b9c1a 100644
--- a/compiler/dex/dataflow_iterator-inl.h
+++ b/compiler/dex/dataflow_iterator-inl.h
@@ -107,7 +107,7 @@
   // Find the next BasicBlock.
   while (keep_looking == true) {
     // Get next BasicBlock.
-    res = all_nodes_iterator_->Next();
+    res = all_nodes_iterator_.Next();
 
     // Are we done or is the BasicBlock not hidden?
     if ((res == NULL) || (res->hidden == false)) {
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index 658a9b1..b45d6a4 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -138,21 +138,6 @@
 
         return ForwardSingleNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(PreOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -184,21 +169,6 @@
 
         return ForwardRepeatNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(RepeatingPreOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -230,21 +200,6 @@
 
         return ForwardRepeatNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(RepeatingPostOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -275,21 +230,6 @@
 
         return ReverseSingleNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(ReversePostOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -321,21 +261,6 @@
 
         return ReverseRepeatNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(RepeatingReversePostOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -366,21 +291,6 @@
 
         return ForwardSingleNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(PostOrderDOMIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -394,16 +304,15 @@
        * @param mir_graph The MIRGraph considered.
        */
       explicit AllNodesIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, 0) {
-        all_nodes_iterator_ = new
-            (mir_graph->GetArena()) GrowableArray<BasicBlock*>::Iterator(mir_graph->GetBlockList());
+          : DataflowIterator(mir_graph, 0, 0),
+            all_nodes_iterator_(mir_graph->GetBlockList()) {
       }
 
       /**
        * @brief Resetting the iterator.
        */
       void Reset() {
-        all_nodes_iterator_->Reset();
+        all_nodes_iterator_.Reset();
       }
 
       /**
@@ -413,23 +322,8 @@
        */
       virtual BasicBlock* Next(bool had_change = false) ALWAYS_INLINE;
 
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(AllNodesIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
-
     private:
-      GrowableArray<BasicBlock*>::Iterator* all_nodes_iterator_;    /**< @brief The list of all the nodes */
+      GrowableArray<BasicBlock*>::Iterator all_nodes_iterator_;    /**< @brief The list of all the nodes */
   };
 
 }  // namespace art
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index f5bb85a..c2016d0 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -130,7 +130,6 @@
     num_ins(0),
     num_outs(0),
     num_regs(0),
-    num_compiler_temps(0),
     compiler_flip_match(false),
     arena(pool),
     mir_graph(NULL),
@@ -236,6 +235,43 @@
   cu.StartTimingSplit("BuildMIRGraph");
   cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));
 
+  /*
+   * After creation of the MIR graph, also create the code generator.
+   * The reason we do this is that optimizations on the MIR graph may need to get information
+   * that is only available if a CG exists.
+   */
+#if defined(ART_USE_PORTABLE_COMPILER)
+  if (compiler_backend == kPortable) {
+    cu.cg.reset(PortableCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena, llvm_compilation_unit));
+  } else {
+#endif
+    Mir2Lir* mir_to_lir = nullptr;
+    switch (compiler.GetInstructionSet()) {
+      case kThumb2:
+        mir_to_lir = ArmCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena);
+        break;
+      case kMips:
+        mir_to_lir = MipsCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena);
+        break;
+      case kX86:
+        mir_to_lir = X86CodeGenerator(&cu, cu.mir_graph.get(), &cu.arena);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected instruction set: " << compiler.GetInstructionSet();
+    }
+
+    cu.cg.reset(mir_to_lir);
+
+    /* The number of compiler temporaries depends on backend so set it up now if possible */
+    if (mir_to_lir) {
+      size_t max_temps = mir_to_lir->GetMaxPossibleCompilerTemps();
+      bool set_max = cu.mir_graph->SetMaxAvailableNonSpecialCompilerTemps(max_temps);
+      CHECK(set_max);
+    }
+#if defined(ART_USE_PORTABLE_COMPILER)
+  }
+#endif
+
   /* Gathering opcode stats? */
   if (kCompilerDebugFlags & (1 << kDebugCountOpcodes)) {
     cu.mir_graph->EnableOpcodeCounting();
@@ -269,28 +305,6 @@
 
   CompiledMethod* result = NULL;
 
-#if defined(ART_USE_PORTABLE_COMPILER)
-  if (compiler_backend == kPortable) {
-    cu.cg.reset(PortableCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena, llvm_compilation_unit));
-  } else {
-#endif
-    switch (compiler.GetInstructionSet()) {
-      case kThumb2:
-        cu.cg.reset(ArmCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena));
-        break;
-      case kMips:
-        cu.cg.reset(MipsCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena));
-        break;
-      case kX86:
-        cu.cg.reset(X86CodeGenerator(&cu, cu.mir_graph.get(), &cu.arena));
-        break;
-      default:
-        LOG(FATAL) << "Unexpected instruction set: " << compiler.GetInstructionSet();
-    }
-#if defined(ART_USE_PORTABLE_COMPILER)
-  }
-#endif
-
   cu.cg->Materialize();
 
   cu.NewTimingSplit("Dedupe");  /* deduping takes up the vast majority of time in GetCompiledMethod(). */
diff --git a/compiler/dex/growable_array.h b/compiler/dex/growable_array.h
index 639120a..6ed207c 100644
--- a/compiler/dex/growable_array.h
+++ b/compiler/dex/growable_array.h
@@ -40,6 +40,7 @@
   kGrowableArrayFillArrayData,
   kGrowableArraySuccessorBlocks,
   kGrowableArrayPredecessors,
+  kGrowableArraySlowPaths,
   kGNumListKinds
 };
 
@@ -66,11 +67,6 @@
           idx_ = 0;
         }
 
-        static void* operator new(size_t size, ArenaAllocator* arena) {
-          return arena->Alloc(sizeof(GrowableArray::Iterator), ArenaAllocator::kAllocGrowableArray);
-        };
-        static void operator delete(void* p) {}  // Nop.
-
       private:
         size_t idx_;
         GrowableArray* const g_list_;
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index c235448..9680450 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1158,8 +1158,8 @@
     ssa_last_defs_[i] = 0;
   }
 
-  /* Add ssa reg for Method* */
-  method_sreg_ = AddNewSReg(SSA_METHOD_BASEREG);
+  // Create a compiler temporary for Method*. This is done after SSA initialization.
+  GetNewCompilerTemp(kCompilerTempSpecialMethodPtr, false);
 
   /*
    * Allocate the BasicBlockDataFlow structure for the entry and code blocks
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 9dbb341..10bcdb9 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -46,7 +46,6 @@
 
 MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
     : reg_location_(NULL),
-      compiler_temps_(arena, 6, kGrowableArrayMisc),
       cu_(cu),
       ssa_base_vregs_(NULL),
       ssa_subscripts_(NULL),
@@ -82,8 +81,13 @@
       checkstats_(NULL),
       arena_(arena),
       backward_branches_(0),
-      forward_branches_(0) {
+      forward_branches_(0),
+      compiler_temps_(arena, 6, kGrowableArrayMisc),
+      num_non_special_compiler_temps_(0),
+      max_available_non_special_compiler_temps_(0) {
   try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
+  max_available_special_compiler_temps_ = std::abs(static_cast<int>(kVRegNonSpecialTempBaseReg))
+      - std::abs(static_cast<int>(kVRegTempBaseReg));
 }
 
 MIRGraph::~MIRGraph() {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 4666d1e..f8706c4 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -168,11 +168,6 @@
 #define INVALID_REG (0xFF)
 #define INVALID_OFFSET (0xDEADF00FU)
 
-/* SSA encodings for special registers */
-#define SSA_METHOD_BASEREG (-2)
-/* First compiler temp basereg, grows smaller */
-#define SSA_CTEMP_BASEREG (SSA_METHOD_BASEREG - 1)
-
 #define MIR_IGNORE_NULL_CHECK           (1 << kMIRIgnoreNullCheck)
 #define MIR_NULL_CHECK_ONLY             (1 << kMIRNullCheckOnly)
 #define MIR_IGNORE_RANGE_CHECK          (1 << kMIRIgnoreRangeCheck)
@@ -195,7 +190,13 @@
  * name of compiler-introduced temporaries.
  */
 struct CompilerTemp {
-  int32_t s_reg;
+  int32_t v_reg;      // Virtual register number for temporary.
+  int32_t s_reg_low;  // SSA name for low Dalvik word.
+};
+
+enum CompilerTempType {
+  kCompilerTempVR,                // A virtual register temporary.
+  kCompilerTempSpecialMethodPtr,  // Temporary that keeps track of current method pointer.
 };
 
 // When debug option enabled, records effectiveness of null and range check elimination.
@@ -571,16 +572,82 @@
     return bad_loc;
   }
 
-  int GetMethodSReg() {
+  int GetMethodSReg() const {
     return method_sreg_;
   }
 
+  /**
+   * @brief Used to obtain the number of compiler temporaries being used.
+   * @return Returns the number of compiler temporaries.
+   */
+  size_t GetNumUsedCompilerTemps() const {
+    size_t total_num_temps = compiler_temps_.Size();
+    DCHECK_LE(num_non_special_compiler_temps_, total_num_temps);
+    return total_num_temps;
+  }
+
+  /**
+   * @brief Used to obtain the number of non-special compiler temporaries being used.
+   * @return Returns the number of non-special compiler temporaries.
+   */
+  size_t GetNumNonSpecialCompilerTemps() const {
+    return num_non_special_compiler_temps_;
+  }
+
+  /**
+   * @brief Used to set the total number of available non-special compiler temporaries.
+   * @details Can fail setting the new max if there are more temps being used than the new_max.
+   * @param new_max The new maximum number of non-special compiler temporaries.
+   * @return Returns true if the max was set and false if failed to set.
+   */
+  bool SetMaxAvailableNonSpecialCompilerTemps(size_t new_max) {
+    if (new_max < GetNumNonSpecialCompilerTemps()) {
+      return false;
+    } else {
+      max_available_non_special_compiler_temps_ = new_max;
+      return true;
+    }
+  }
+
+  /**
+   * @brief Provides the number of non-special compiler temps available.
+   * @details Even if this returns zero, special compiler temps are guaranteed to be available.
+   * @return Returns the number of available temps.
+   */
+  size_t GetNumAvailableNonSpecialCompilerTemps();
+
+  /**
+   * @brief Used to obtain an existing compiler temporary.
+   * @param index The index of the temporary which must be strictly less than the
+   * number of temporaries.
+   * @return Returns the temporary that was asked for.
+   */
+  CompilerTemp* GetCompilerTemp(size_t index) const {
+    return compiler_temps_.Get(index);
+  }
+
+  /**
+   * @brief Used to obtain the maximum number of compiler temporaries that can be requested.
+   * @return Returns the maximum number of compiler temporaries, whether used or not.
+   */
+  size_t GetMaxPossibleCompilerTemps() const {
+    return max_available_special_compiler_temps_ + max_available_non_special_compiler_temps_;
+  }
+
+  /**
+   * @brief Used to obtain a new unique compiler temporary.
+   * @param ct_type Type of compiler temporary requested.
+   * @param wide Whether we should allocate a wide temporary.
+   * @return Returns the newly created compiler temporary.
+   */
+  CompilerTemp* GetNewCompilerTemp(CompilerTempType ct_type, bool wide);
+
   bool MethodIsLeaf() {
     return attributes_ & METHOD_IS_LEAF;
   }
 
   RegLocation GetRegLocation(int index) {
-    DCHECK((index >= 0) && (index > num_ssa_regs_));
+    DCHECK((index >= 0) && (index < num_ssa_regs_));
     return reg_location_[index];
   }
 
@@ -727,7 +794,6 @@
 
   // TODO: make these private.
   RegLocation* reg_location_;                         // Map SSA names to location.
-  GrowableArray<CompilerTemp*> compiler_temps_;
   SafeMap<unsigned int, unsigned int> block_id_map_;  // Block collapse lookup cache.
 
   static const uint64_t oat_data_flow_attributes_[kMirOpLast];
@@ -836,6 +902,10 @@
   ArenaAllocator* arena_;
   int backward_branches_;
   int forward_branches_;
+  GrowableArray<CompilerTemp*> compiler_temps_;
+  size_t num_non_special_compiler_temps_;
+  size_t max_available_non_special_compiler_temps_;
+  size_t max_available_special_compiler_temps_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 0d53d4c..209ed3d 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -199,13 +199,94 @@
   return raw_use_counts_.Get(s_reg);
 }
 
+size_t MIRGraph::GetNumAvailableNonSpecialCompilerTemps() {
+  if (num_non_special_compiler_temps_ >= max_available_non_special_compiler_temps_) {
+    return 0;
+  } else {
+    return max_available_non_special_compiler_temps_ - num_non_special_compiler_temps_;
+  }
+}
+
+static const RegLocation temp_loc = {kLocCompilerTemp,
+                                     0, 1 /*defined*/, 0, 0, 0, 0, 0, 1 /*home*/,
+                                     kVectorNotUsed, INVALID_REG, INVALID_REG, INVALID_SREG,
+                                     INVALID_SREG};
+
+CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) {
+  // There is a limit to the number of non-special temps so check to make sure it wasn't exceeded.
+  if (ct_type == kCompilerTempVR) {
+    size_t available_temps = GetNumAvailableNonSpecialCompilerTemps();
+    if (available_temps <= 0 || (available_temps <= 1 && wide)) {
+      return 0;
+    }
+  }
+
+  CompilerTemp *compiler_temp = static_cast<CompilerTemp *>(arena_->Alloc(sizeof(CompilerTemp),
+                                                            ArenaAllocator::kAllocRegAlloc));
+
+  // Create the type of temp requested. Special temps need special handling because
+  // they have a specific virtual register assignment.
+  if (ct_type == kCompilerTempSpecialMethodPtr) {
+    DCHECK_EQ(wide, false);
+    compiler_temp->v_reg = static_cast<int>(kVRegMethodPtrBaseReg);
+    compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
+
+    // The MIR graph keeps track of the sreg for method pointer specially, so record that now.
+    method_sreg_ = compiler_temp->s_reg_low;
+  } else {
+    DCHECK_EQ(ct_type, kCompilerTempVR);
+
+    // The new non-special compiler temp must receive a unique v_reg with a negative value.
+    compiler_temp->v_reg = static_cast<int>(kVRegNonSpecialTempBaseReg) - num_non_special_compiler_temps_;
+    compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
+    num_non_special_compiler_temps_++;
+
+    if (wide) {
+      // Ensure that the two registers are consecutive. Since the virtual registers used for temps grow in a
+      // negative fashion, we need the smaller to refer to the low part. Thus, we redefine the v_reg and s_reg_low.
+      compiler_temp->v_reg--;
+      int ssa_reg_high = compiler_temp->s_reg_low;
+      compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
+      int ssa_reg_low = compiler_temp->s_reg_low;
+
+      // If needed initialize the register location for the high part.
+      // The low part is handled later in this method on a common path.
+      if (reg_location_ != nullptr) {
+        reg_location_[ssa_reg_high] = temp_loc;
+        reg_location_[ssa_reg_high].high_word = 1;
+        reg_location_[ssa_reg_high].s_reg_low = ssa_reg_low;
+        reg_location_[ssa_reg_high].wide = true;
+
+        // A new SSA needs new use counts.
+        use_counts_.Insert(0);
+        raw_use_counts_.Insert(0);
+      }
+
+      num_non_special_compiler_temps_++;
+    }
+  }
+
+  // Have we already allocated the register locations?
+  if (reg_location_ != nullptr) {
+    int ssa_reg_low = compiler_temp->s_reg_low;
+    reg_location_[ssa_reg_low] = temp_loc;
+    reg_location_[ssa_reg_low].s_reg_low = ssa_reg_low;
+    reg_location_[ssa_reg_low].wide = wide;
+
+    // A new SSA needs new use counts.
+    use_counts_.Insert(0);
+    raw_use_counts_.Insert(0);
+  }
+
+  compiler_temps_.Insert(compiler_temp);
+  return compiler_temp;
+}
 
 /* Do some MIR-level extended basic block optimizations */
 bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
   if (bb->block_type == kDead) {
     return true;
   }
-  int num_temps = 0;
   bool use_lvn = bb->use_lvn;
   UniquePtr<LocalValueNumbering> local_valnum;
   if (use_lvn) {
@@ -468,9 +549,6 @@
     bb = ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) ? NextDominatedBlock(bb) : NULL;
   }
 
-  if (num_temps > cu_->num_compiler_temps) {
-    cu_->num_compiler_temps = num_temps;
-  }
   return true;
 }
 
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index c52ddf5..255892e 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -41,6 +41,7 @@
   kRepeatingPostOrderDFSTraversal,         /**< @brief Depth-First-Search / Repeating Post-Order. */
   kRepeatingReversePostOrderDFSTraversal,  /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */
   kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
+  kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
 };
 
 /**
@@ -50,20 +51,22 @@
  */
 class Pass {
  public:
-  Pass(const char *name, DataFlowAnalysisMode type, bool freed, const unsigned int f, const char *dump): pass_name_(name), traversal_type_(type), flags_(f), dump_cfg_folder_(dump) {
+  explicit Pass(const char* name, DataFlowAnalysisMode type = kAllNodes,
+                unsigned int flags = 0u, const char* dump = "")
+    : pass_name_(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) {
   }
 
-  Pass(const char *name, const char *dump): pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
+  Pass(const char* name, DataFlowAnalysisMode type, const char* dump)
+    : pass_name_(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) {
   }
 
-  explicit Pass(const char *name):pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_("") {
+  Pass(const char* name, const char* dump)
+    : pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
   }
 
-  Pass(const char *name, DataFlowAnalysisMode type, const char *dump):pass_name_(name), traversal_type_(type), flags_(false), dump_cfg_folder_(dump) {
+  virtual ~Pass() {
   }
 
-  virtual ~Pass() {}
-
   virtual const char* GetName() const {
     return pass_name_;
   }
@@ -76,14 +79,16 @@
     return (flags_ & flag);
   }
 
-  const char* GetDumpCFGFolder() const {return dump_cfg_folder_;}
+  const char* GetDumpCFGFolder() const {
+    return dump_cfg_folder_;
+  }
 
   /**
    * @brief Gate for the pass: determines whether to execute the pass or not considering a CompilationUnit
    * @param c_unit the CompilationUnit.
    * @return whether or not to execute the pass
    */
-  virtual bool Gate(const CompilationUnit *c_unit) const {
+  virtual bool Gate(const CompilationUnit* c_unit) const {
     // Unused parameter.
     UNUSED(c_unit);
 
@@ -95,7 +100,7 @@
    * @brief Start of the pass: called before the WalkBasicBlocks function
    * @param c_unit the considered CompilationUnit.
    */
-  virtual void Start(CompilationUnit *c_unit) const {
+  virtual void Start(CompilationUnit* c_unit) const {
     // Unused parameter.
     UNUSED(c_unit);
   }
@@ -104,7 +109,7 @@
    * @brief End of the pass: called after the WalkBasicBlocks function
    * @param c_unit the considered CompilationUnit.
    */
-  virtual void End(CompilationUnit *c_unit) const {
+  virtual void End(CompilationUnit* c_unit) const {
     // Unused parameter.
     UNUSED(c_unit);
   }
@@ -115,7 +120,7 @@
    * @param bb the BasicBlock.
    * @return whether or not there is a change when walking the BasicBlock
    */
-  virtual bool WalkBasicBlocks(CompilationUnit *c_unit, BasicBlock *bb) const {
+  virtual bool WalkBasicBlocks(CompilationUnit* c_unit, BasicBlock* bb) const {
     // Unused parameters.
     UNUSED(c_unit);
     UNUSED(bb);
diff --git a/compiler/dex/pass_driver.cc b/compiler/dex/pass_driver.cc
index 820dc5a..4f8739a 100644
--- a/compiler/dex/pass_driver.cc
+++ b/compiler/dex/pass_driver.cc
@@ -16,6 +16,8 @@
 
 #include <dlfcn.h>
 
+#include "base/logging.h"
+#include "base/macros.h"
 #include "bb_optimizations.h"
 #include "compiler_internals.h"
 #include "dataflow_iterator.h"
@@ -28,7 +30,8 @@
 namespace {  // anonymous namespace
 
 /**
- * @brief Helper function to create a single instance of a given Pass and can be shared across the threads
+ * @brief Helper function to create a single instance of a given Pass and can be shared across
+ * the threads.
  */
 template <typename PassType>
 const Pass* GetPassInstance() {
@@ -36,55 +39,58 @@
   return &pass;
 }
 
+void DoWalkBasicBlocks(CompilationUnit* c_unit, const Pass* pass, DataflowIterator* iterator) {
+  // Paranoid: Check the iterator before walking the BasicBlocks.
+  DCHECK(iterator != nullptr);
+
+  bool change = false;
+  for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) {
+    change = pass->WalkBasicBlocks(c_unit, bb);
+  }
+}
+
+template <typename Iterator>
+inline void DoWalkBasicBlocks(CompilationUnit* c_unit, const Pass* pass) {
+  Iterator iterator(c_unit->mir_graph.get());
+  DoWalkBasicBlocks(c_unit, pass, &iterator);
+}
+
 }  // anonymous namespace
 
-PassDriver::PassDriver(CompilationUnit* cu, bool create_default_passes) : cu_(cu) {
-  dump_cfg_folder_ = "/sdcard/";
+PassDriver::PassDriver(CompilationUnit* cu, bool create_default_passes)
+    : cu_(cu), dump_cfg_folder_("/sdcard/") {
+  DCHECK(cu != nullptr);
 
   // If need be, create the default passes.
-  if (create_default_passes == true) {
+  if (create_default_passes) {
     CreatePasses();
   }
 }
 
 PassDriver::~PassDriver() {
-  // Clear the map: done to remove any chance of having a pointer after freeing below
-  pass_map_.clear();
 }
 
-void PassDriver::InsertPass(const Pass* new_pass, bool warn_override) {
-  assert(new_pass != 0);
+void PassDriver::InsertPass(const Pass* new_pass) {
+  DCHECK(new_pass != nullptr);
+  DCHECK(new_pass->GetName() != nullptr && new_pass->GetName()[0] != 0);
 
-  // Get name here to not do it all over the method.
-  const std::string& name = new_pass->GetName();
+  // It is an error to override an existing pass.
+  DCHECK(GetPass(new_pass->GetName()) == nullptr)
+      << "Pass name " << new_pass->GetName() << " already used.";
 
-  // Do we want to warn the user about squashing a pass?
-  if (warn_override == false) {
-    auto it = pass_map_.find(name);
-
-    if (it != pass_map_.end()) {
-      LOG(INFO) << "Pass name " << name << " already used, overwriting pass";
-    }
-  }
-
-  // Now add to map and list.
-  pass_map_.Put(name, new_pass);
+  // Now add to the list.
   pass_list_.push_back(new_pass);
 }
 
 void PassDriver::CreatePasses() {
   /*
-   * Create the pass list:
-   *   - These passes are immutable and are shared across the threads:
-   *    - This is achieved via:
-   *     - The UniquePtr used here.
-   *     - DISALLOW_COPY_AND_ASSIGN in the base Pass class.
+   * Create the pass list. These passes are immutable and are shared across the threads.
    *
    * Advantage is that there will be no race conditions here.
    * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
    *   - This is not yet an issue: no current pass would require it.
    */
-  static const Pass* passes[] = {
+  static const Pass* const passes[] = {
       GetPassInstance<CodeLayout>(),
       GetPassInstance<SSATransformation>(),
       GetPassInstance<ConstantPropagation>(),
@@ -96,14 +102,10 @@
       GetPassInstance<BBOptimizations>(),
   };
 
-  // Get number of elements in the array.
-  unsigned int nbr = (sizeof(passes) / sizeof(passes[0]));
-
-  // Insert each pass into the map and into the list via the InsertPass method:
-  //   - Map is used for the lookup
-  //   - List is used for the pass walk
-  for (unsigned int i = 0; i < nbr; i++) {
-    InsertPass(passes[i]);
+  // Insert each pass into the list via the InsertPass method.
+  pass_list_.reserve(arraysize(passes));
+  for (const Pass* pass : passes) {
+    InsertPass(pass);
   }
 }
 
@@ -114,49 +116,37 @@
 }
 
 void PassDriver::DispatchPass(CompilationUnit* c_unit, const Pass* curPass) {
-  DataflowIterator* iterator = 0;
-
   LOG(DEBUG) << "Dispatching " << curPass->GetName();
 
-  MIRGraph* mir_graph = c_unit->mir_graph.get();
-  ArenaAllocator *arena = &(c_unit->arena);
-
-  // Let us start by getting the right iterator.
   DataFlowAnalysisMode mode = curPass->GetTraversal();
 
   switch (mode) {
     case kPreOrderDFSTraversal:
-      iterator = new (arena) PreOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<PreOrderDfsIterator>(c_unit, curPass);
       break;
     case kRepeatingPreOrderDFSTraversal:
-      iterator = new (arena) RepeatingPreOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(c_unit, curPass);
       break;
     case kRepeatingPostOrderDFSTraversal:
-      iterator = new (arena) RepeatingPostOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(c_unit, curPass);
       break;
     case kReversePostOrderDFSTraversal:
-      iterator = new (arena) ReversePostOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<ReversePostOrderDfsIterator>(c_unit, curPass);
       break;
     case kRepeatingReversePostOrderDFSTraversal:
-      iterator = new (arena) RepeatingReversePostOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(c_unit, curPass);
       break;
     case kPostOrderDOMTraversal:
-      iterator = new (arena) PostOrderDOMIterator(mir_graph);
+      DoWalkBasicBlocks<PostOrderDOMIterator>(c_unit, curPass);
       break;
     case kAllNodes:
-      iterator = new (arena) AllNodesIterator(mir_graph);
+      DoWalkBasicBlocks<AllNodesIterator>(c_unit, curPass);
+      break;
+    case kNoNodes:
       break;
     default:
       LOG(DEBUG) << "Iterator mode not handled in dispatcher: " << mode;
-      return;
-  }
-
-  // Paranoid: Check the iterator before walking the BasicBlocks.
-  assert(iterator != 0);
-
-  bool change = false;
-  for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) {
-    change = curPass->WalkBasicBlocks(c_unit, bb);
+      break;
   }
 }
 
@@ -166,33 +156,34 @@
   curPass->End(c_unit);
 }
 
-bool PassDriver::RunPass(CompilationUnit* c_unit, const Pass* curPass, bool time_split) {
-  // Paranoid: c_unit or curPass cannot be 0, and the pass should have a name.
-  if (c_unit == 0 || curPass == 0 || (strcmp(curPass->GetName(), "") == 0)) {
-    return false;
-  }
+bool PassDriver::RunPass(CompilationUnit* c_unit, const Pass* pass, bool time_split) {
+  // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name.
+  DCHECK(c_unit != nullptr);
+  DCHECK(pass != nullptr);
+  DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0);
 
   // Do we perform a time split
-  if (time_split == true) {
-    c_unit->NewTimingSplit(curPass->GetName());
+  if (time_split) {
+    c_unit->NewTimingSplit(pass->GetName());
   }
 
   // Check the pass gate first.
-  bool shouldApplyPass = curPass->Gate(c_unit);
+  bool should_apply_pass = pass->Gate(c_unit);
 
-  if (shouldApplyPass == true) {
+  if (should_apply_pass) {
     // Applying the pass: first start, doWork, and end calls.
-    ApplyPass(c_unit, curPass);
+    ApplyPass(c_unit, pass);
 
     // Clean up if need be.
-    HandlePassFlag(c_unit, curPass);
+    HandlePassFlag(c_unit, pass);
 
     // Do we want to log it?
     if ((c_unit->enable_debug&  (1 << kDebugDumpCFG)) != 0) {
       // Do we have a pass folder?
-      const std::string& passFolder = curPass->GetDumpCFGFolder();
+      const char* passFolder = pass->GetDumpCFGFolder();
+      DCHECK(passFolder != nullptr);
 
-      if (passFolder != "") {
+      if (passFolder[0] != 0) {
         // Create directory prefix.
         std::string prefix = GetDumpCFGFolder();
         prefix += passFolder;
@@ -204,19 +195,18 @@
   }
 
   // If the pass gate passed, we can declare success.
-  return shouldApplyPass;
+  return should_apply_pass;
 }
 
-bool PassDriver::RunPass(CompilationUnit* c_unit, const std::string& pass_name) {
-  // Paranoid: c_unit cannot be 0 and we need a pass name.
-  if (c_unit == 0 || pass_name == "") {
-    return false;
-  }
+bool PassDriver::RunPass(CompilationUnit* c_unit, const char* pass_name) {
+  // Paranoid: c_unit cannot be nullptr and we need a pass name.
+  DCHECK(c_unit != nullptr);
+  DCHECK(pass_name != nullptr && pass_name[0] != 0);
 
-  const Pass* curPass = GetPass(pass_name);
+  const Pass* cur_pass = GetPass(pass_name);
 
-  if (curPass != 0) {
-    return RunPass(c_unit, curPass);
+  if (cur_pass != nullptr) {
+    return RunPass(c_unit, cur_pass);
   }
 
   // Return false, we did not find the pass.
@@ -224,27 +214,26 @@
 }
 
 void PassDriver::Launch() {
-  for (const Pass *curPass : pass_list_) {
-    RunPass(cu_, curPass, true);
+  for (const Pass* cur_pass : pass_list_) {
+    RunPass(cu_, cur_pass, true);
   }
 }
 
 void PassDriver::PrintPassNames() const {
   LOG(INFO) << "Loop Passes are:";
 
-  for (const Pass *curPass : pass_list_) {
-    LOG(INFO) << "\t-" << curPass->GetName();
+  for (const Pass* cur_pass : pass_list_) {
+    LOG(INFO) << "\t-" << cur_pass->GetName();
   }
 }
 
-const Pass* PassDriver::GetPass(const std::string& name) const {
-  auto it = pass_map_.find(name);
-
-  if (it != pass_map_.end()) {
-    return it->second;
+const Pass* PassDriver::GetPass(const char* name) const {
+  for (const Pass* cur_pass : pass_list_) {
+    if (strcmp(name, cur_pass->GetName()) == 0) {
+      return cur_pass;
+    }
   }
-
-  return 0;
+  return nullptr;
 }
 
 }  // namespace art
diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h
index d580460..c734d3e 100644
--- a/compiler/dex/pass_driver.h
+++ b/compiler/dex/pass_driver.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_DEX_PASS_DRIVER_H_
 #define ART_COMPILER_DEX_PASS_DRIVER_H_
 
-#include <list>
+#include <vector>
 #include "pass.h"
 #include "safe_map.h"
 
@@ -42,7 +42,7 @@
    * @param new_pass the new Pass to insert in the map and list.
    * @param warn_override warn if the name of the Pass is already used.
    */
-  void InsertPass(const Pass* new_pass, bool warn_override = true);
+  void InsertPass(const Pass* new_pass);
 
   /**
    * @brief Run a pass using the name as key.
@@ -50,7 +50,7 @@
    * @param pass_name the Pass name.
    * @return whether the pass was applied.
    */
-  bool RunPass(CompilationUnit* c_unit, const std::string& pass_name);
+  bool RunPass(CompilationUnit* c_unit, const char* pass_name);
 
   /**
    * @brief Run a pass using the Pass itself.
@@ -75,20 +75,17 @@
 
   void PrintPassNames() const;
 
-  const Pass* GetPass(const std::string& name) const;
+  const Pass* GetPass(const char* name) const;
 
-  const char *GetDumpCFGFolder() const {
+  const char* GetDumpCFGFolder() const {
     return dump_cfg_folder_;
   }
 
  protected:
   void CreatePasses();
 
-  /** @brief The Pass Map: contains name -> pass for quick lookup. */
-  SafeMap<std::string, const Pass*> pass_map_;
-
   /** @brief List of passes: provides the order to execute the passes. */
-  std::list<const Pass*> pass_list_;
+  std::vector<const Pass*> pass_list_;
 
   /** @brief The CompilationUnit on which to execute the passes on. */
   CompilationUnit* const cu_;
diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc
index 70b660b..e6cc2de 100644
--- a/compiler/dex/portable/mir_to_gbc.cc
+++ b/compiler/dex/portable/mir_to_gbc.cc
@@ -1522,7 +1522,7 @@
   reg_info.push_back(irb_->getInt32(cu_->num_ins));
   reg_info.push_back(irb_->getInt32(cu_->num_regs));
   reg_info.push_back(irb_->getInt32(cu_->num_outs));
-  reg_info.push_back(irb_->getInt32(cu_->num_compiler_temps));
+  reg_info.push_back(irb_->getInt32(mir_graph_->GetNumUsedCompilerTemps()));
   reg_info.push_back(irb_->getInt32(mir_graph_->GetNumSSARegs()));
   ::llvm::MDNode* reg_info_node = ::llvm::MDNode::get(*context_, reg_info);
   inst->setMetadata("RegInfo", reg_info_node);
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 32673db..0ed4576 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -154,6 +154,8 @@
     LIR* OpRegImm(OpKind op, int r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset);
     LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2);
+    LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type);
     LIR* OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src);
     LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value);
     LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 07fc6c7..9d3968b 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -367,6 +367,16 @@
   return OpRegRegShift(op, r_dest_src1, r_src2, 0);
 }
 
+LIR* ArmMir2Lir::OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* ArmMir2Lir::OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
 LIR* ArmMir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src) {
   LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm";
   return NULL;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 7f19ea1..5e0fed7 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -252,7 +252,7 @@
 }
 
 void Mir2Lir::DumpPromotionMap() {
-  int num_regs = cu_->num_dalvik_registers + cu_->num_compiler_temps + 1;
+  int num_regs = cu_->num_dalvik_registers + mir_graph_->GetNumUsedCompilerTemps();
   for (int i = 0; i < num_regs; i++) {
     PromotionMap v_reg_map = promotion_map_[i];
     std::string buf;
@@ -289,7 +289,7 @@
   LOG(INFO) << "Outs         : " << cu_->num_outs;
   LOG(INFO) << "CoreSpills       : " << num_core_spills_;
   LOG(INFO) << "FPSpills       : " << num_fp_spills_;
-  LOG(INFO) << "CompilerTemps    : " << cu_->num_compiler_temps;
+  LOG(INFO) << "CompilerTemps    : " << mir_graph_->GetNumUsedCompilerTemps();
   LOG(INFO) << "Frame size       : " << frame_size_;
   LOG(INFO) << "code size is " << total_size_ <<
     " bytes, Dalvik size is " << insns_size * 2;
@@ -992,6 +992,7 @@
       data_offset_(0),
       total_size_(0),
       block_label_list_(NULL),
+      promotion_map_(NULL),
       current_dalvik_offset_(0),
       estimated_native_code_size_(0),
       reg_pool_(NULL),
@@ -1002,10 +1003,8 @@
       core_spill_mask_(0),
       fp_spill_mask_(0),
       first_lir_insn_(NULL),
-      last_lir_insn_(NULL) {
-  promotion_map_ = static_cast<PromotionMap*>
-      (arena_->Alloc((cu_->num_dalvik_registers  + cu_->num_compiler_temps + 1) *
-                      sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc));
+      last_lir_insn_(NULL),
+      slow_paths_(arena, 32, kGrowableArraySlowPaths) {
   // Reserve pointer id 0 for NULL.
   size_t null_idx = WrapPointer(NULL);
   DCHECK_EQ(null_idx, 0U);
@@ -1081,13 +1080,27 @@
   return result;
 }
 
+size_t Mir2Lir::GetMaxPossibleCompilerTemps() const {
+  // Chose a reasonably small value in order to contain stack growth.
+  // Backends that are smarter about spill region can return larger values.
+  const size_t max_compiler_temps = 10;
+  return max_compiler_temps;
+}
+
+size_t Mir2Lir::GetNumBytesForCompilerTempSpillRegion() {
+  // By default assume that the Mir2Lir will need one slot for each temporary.
+  // If the backend can better determine temps that have non-overlapping ranges and
+  // temps that do not need spilled, it can actually provide a small region.
+  return (mir_graph_->GetNumUsedCompilerTemps() * sizeof(uint32_t));
+}
+
 int Mir2Lir::ComputeFrameSize() {
   /* Figure out the frame size */
   static const uint32_t kAlignMask = kStackAlignment - 1;
-  uint32_t size = (num_core_spills_ + num_fp_spills_ +
-                   1 /* filler word */ + cu_->num_regs + cu_->num_outs +
-                   cu_->num_compiler_temps + 1 /* cur_method* */)
-                   * sizeof(uint32_t);
+  uint32_t size = ((num_core_spills_ + num_fp_spills_ +
+                   1 /* filler word */ + cu_->num_regs + cu_->num_outs)
+                   * sizeof(uint32_t)) +
+                   GetNumBytesForCompilerTempSpillRegion();
   /* Align and set */
   return (size + kAlignMask) & ~(kAlignMask);
 }
@@ -1162,4 +1175,15 @@
   return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
 }
 
+LIR *Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                int offset, int check_value, LIR* target) {
+  // Handle this for architectures that can't compare to memory.
+  LoadWordDisp(base_reg, offset, temp_reg);
+  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
+  return branch;
+}
+
+void Mir2Lir::AddSlowPath(LIRSlowPath* slowpath) {
+  slow_paths_.Insert(slowpath);
+}
 }  // namespace art
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 522bacb..c59f3b8 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -21,6 +21,7 @@
 #include "mirror/array.h"
 #include "mirror/object-inl.h"
 #include "verifier/method_verifier.h"
+#include <functional>
 
 namespace art {
 
@@ -358,6 +359,34 @@
   }
 }
 
+//
+// Slow path to ensure a class is initialized for sget/sput.
+//
+class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
+ public:
+  StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont,
+           int storage_index, int r_base) :
+    LIRSlowPath(m2l, m2l->GetCurrentDexPc(), unresolved, cont), uninit_(uninit), storage_index_(storage_index),
+    r_base_(r_base) {
+  }
+
+  void Compile() {
+    LIR* unresolved_target = GenerateTargetLabel();
+    uninit_->target = unresolved_target;
+    m2l_->CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage),
+                            storage_index_, true);
+    // Copy helper's result into r_base, a no-op on all but MIPS.
+    m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0));
+
+    m2l_->OpUnconditionalBranch(cont_);
+  }
+
+ private:
+  LIR* const uninit_;
+  const int storage_index_;
+  const int r_base_;
+};
+
 void Mir2Lir::GenSput(uint32_t field_idx, RegLocation rl_src, bool is_long_or_double,
                       bool is_object) {
   int field_offset;
@@ -401,24 +430,20 @@
       // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
       if (!is_initialized) {
         // Check if r_base is NULL or a not yet initialized class.
-        // TUNING: fast path should fall through
+
+        // The slow path is invoked if the r_base is NULL or the class pointed
+        // to by it is not initialized.
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
         int r_tmp = TargetReg(kArg2);
         LockTemp(r_tmp);
-        // TODO: Fuse the compare of a constant with memory on X86 and avoid the load.
-        LoadWordDisp(r_base, mirror::Class::StatusOffset().Int32Value(), r_tmp);
-        LIR* initialized_branch = OpCmpImmBranch(kCondGe, r_tmp, mirror::Class::kStatusInitialized,
-                                                 NULL);
+        LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
+                                          mirror::Class::StatusOffset().Int32Value(),
+                                          mirror::Class::kStatusInitialized, NULL);
+        LIR* cont = NewLIR0(kPseudoTargetLabel);
 
-        LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
-        unresolved_branch->target = unresolved_target;
-        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage), storage_index,
-                             true);
-        // Copy helper's result into r_base, a no-op on all but MIPS.
-        OpRegCopy(r_base, TargetReg(kRet0));
-
-        LIR* initialized_target = NewLIR0(kPseudoTargetLabel);
-        initialized_branch->target = initialized_target;
+        AddSlowPath(new (arena_) StaticFieldSlowPath(this,
+                                                     unresolved_branch, uninit_branch, cont,
+                                                     storage_index, r_base));
 
         FreeTemp(r_tmp);
       }
@@ -495,24 +520,20 @@
       // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
       if (!is_initialized) {
         // Check if r_base is NULL or a not yet initialized class.
-        // TUNING: fast path should fall through
+
+        // The slow path is invoked if the r_base is NULL or the class pointed
+        // to by it is not initialized.
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
         int r_tmp = TargetReg(kArg2);
         LockTemp(r_tmp);
-        // TODO: Fuse the compare of a constant with memory on X86 and avoid the load.
-        LoadWordDisp(r_base, mirror::Class::StatusOffset().Int32Value(), r_tmp);
-        LIR* initialized_branch = OpCmpImmBranch(kCondGe, r_tmp, mirror::Class::kStatusInitialized,
-                                                 NULL);
+        LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
+                                          mirror::Class::StatusOffset().Int32Value(),
+                                          mirror::Class::kStatusInitialized, NULL);
+        LIR* cont = NewLIR0(kPseudoTargetLabel);
 
-        LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
-        unresolved_branch->target = unresolved_target;
-        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage), storage_index,
-                             true);
-        // Copy helper's result into r_base, a no-op on all but MIPS.
-        OpRegCopy(r_base, TargetReg(kRet0));
-
-        LIR* initialized_target = NewLIR0(kPseudoTargetLabel);
-        initialized_branch->target = initialized_target;
+        AddSlowPath(new (arena_) StaticFieldSlowPath(this,
+                                                     unresolved_branch, uninit_branch, cont,
+                                                     storage_index, r_base));
 
         FreeTemp(r_tmp);
       }
@@ -552,6 +573,16 @@
   }
 }
 
+// Generate code for all slow paths.
+void Mir2Lir::HandleSlowPaths() {
+  int n = slow_paths_.Size();
+  for (int i = 0; i < n; ++i) {
+    LIRSlowPath* slowpath = slow_paths_.Get(i);
+    slowpath->Compile();
+  }
+  slow_paths_.Reset();
+}
+
 void Mir2Lir::HandleSuspendLaunchPads() {
   int num_elems = suspend_launchpads_.Size();
   ThreadOffset helper_offset = QUICK_ENTRYPOINT_OFFSET(pTestSuspend);
@@ -820,32 +851,40 @@
         type_idx) || SLOW_TYPE_PATH) {
       // Slow path, at runtime test if type is null and if so initialize
       FlushAllRegs();
-      LIR* branch1 = OpCmpImmBranch(kCondEq, rl_result.low_reg, 0, NULL);
-      // Resolved, store and hop over following code
+      LIR* branch = OpCmpImmBranch(kCondEq, rl_result.low_reg, 0, NULL);
+      LIR* cont = NewLIR0(kPseudoTargetLabel);
+
+      // Object to generate the slow path for class resolution.
+      class SlowPath : public LIRSlowPath {
+       public:
+        SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, const int type_idx,
+                 const RegLocation& rl_method, const RegLocation& rl_result) :
+                   LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), type_idx_(type_idx),
+                   rl_method_(rl_method), rl_result_(rl_result) {
+        }
+
+        void Compile() {
+          GenerateTargetLabel();
+
+          m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx_,
+                                        rl_method_.low_reg, true);
+          m2l_->OpRegCopy(rl_result_.low_reg,  m2l_->TargetReg(kRet0));
+
+          m2l_->OpUnconditionalBranch(cont_);
+        }
+
+       private:
+        const int type_idx_;
+        const RegLocation rl_method_;
+        const RegLocation rl_result_;
+      };
+
+      // Add to list for future.
+      AddSlowPath(new (arena_) SlowPath(this, branch, cont,
+                                        type_idx, rl_method, rl_result));
+
       StoreValue(rl_dest, rl_result);
-      /*
-       * Because we have stores of the target value on two paths,
-       * clobber temp tracking for the destination using the ssa name
-       */
-      ClobberSReg(rl_dest.s_reg_low);
-      LIR* branch2 = OpUnconditionalBranch(0);
-      // TUNING: move slow path to end & remove unconditional branch
-      LIR* target1 = NewLIR0(kPseudoTargetLabel);
-      // Call out to helper, which will return resolved type in kArg0
-      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx,
-                              rl_method.low_reg, true);
-      RegLocation rl_result = GetReturn(false);
-      StoreValue(rl_dest, rl_result);
-      /*
-       * Because we have stores of the target value on two paths,
-       * clobber temp tracking for the destination using the ssa name
-       */
-      ClobberSReg(rl_dest.s_reg_low);
-      // Rejoin code paths
-      LIR* target2 = NewLIR0(kPseudoTargetLabel);
-      branch1->target = target1;
-      branch2->target = target2;
-    } else {
+     } else {
       // Fast path, we're done - just store result
       StoreValue(rl_dest, rl_result);
     }
@@ -861,36 +900,65 @@
     // slow path, resolve string if not in dex cache
     FlushAllRegs();
     LockCallTemps();  // Using explicit registers
-    LoadCurrMethodDirect(TargetReg(kArg2));
-    LoadWordDisp(TargetReg(kArg2),
-                 mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), TargetReg(kArg0));
+
+    // If the Method* is already in a register, we can save a copy.
+    RegLocation rl_method = mir_graph_->GetMethodLoc();
+    int r_method;
+    if (rl_method.location == kLocPhysReg) {
+      // A temp would conflict with register use below.
+      DCHECK(!IsTemp(rl_method.low_reg));
+      r_method = rl_method.low_reg;
+    } else {
+      r_method = TargetReg(kArg2);
+      LoadCurrMethodDirect(r_method);
+    }
+    LoadWordDisp(r_method, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(),
+                 TargetReg(kArg0));
+
     // Might call out to helper, which will return resolved string in kRet0
-    int r_tgt = CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(pResolveString));
     LoadWordDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
-    LoadConstant(TargetReg(kArg1), string_idx);
-    if (cu_->instruction_set == kThumb2) {
-      OpRegImm(kOpCmp, TargetReg(kRet0), 0);  // Is resolved?
+    if (cu_->instruction_set == kThumb2 ||
+        cu_->instruction_set == kMips) {
+      //  OpRegImm(kOpCmp, TargetReg(kRet0), 0);  // Is resolved?
+      LoadConstant(TargetReg(kArg1), string_idx);
+      LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0), 0, NULL);
+      LIR* cont = NewLIR0(kPseudoTargetLabel);
       GenBarrier();
-      // For testing, always force through helper
-      if (!EXERCISE_SLOWEST_STRING_PATH) {
-        OpIT(kCondEq, "T");
-      }
-      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));   // .eq
-      LIR* call_inst = OpReg(kOpBlx, r_tgt);    // .eq, helper(Method*, string_idx)
-      MarkSafepointPC(call_inst);
-      FreeTemp(r_tgt);
-    } else if (cu_->instruction_set == kMips) {
-      LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kRet0), 0, NULL);
-      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));   // .eq
-      LIR* call_inst = OpReg(kOpBlx, r_tgt);
-      MarkSafepointPC(call_inst);
-      FreeTemp(r_tgt);
-      LIR* target = NewLIR0(kPseudoTargetLabel);
-      branch->target = target;
+
+      // Object to generate the slow path for string resolution.
+      class SlowPath : public LIRSlowPath {
+       public:
+        SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, int r_method) :
+          LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), r_method_(r_method) {
+        }
+
+        void Compile() {
+          GenerateTargetLabel();
+
+          int r_tgt = m2l_->CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(pResolveString));
+
+          m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);   // .eq
+          LIR* call_inst = m2l_->OpReg(kOpBlx, r_tgt);
+          m2l_->MarkSafepointPC(call_inst);
+          m2l_->FreeTemp(r_tgt);
+
+          m2l_->OpUnconditionalBranch(cont_);
+        }
+
+       private:
+         int r_method_;
+      };
+
+      // Add to list for future.
+      AddSlowPath(new (arena_) SlowPath(this, fromfast, cont, r_method));
     } else {
       DCHECK_EQ(cu_->instruction_set, kX86);
-      CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pResolveString), TargetReg(kArg2),
+      LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kRet0), 0, NULL);
+      LoadConstant(TargetReg(kArg1), string_idx);
+      CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pResolveString), r_method,
                               TargetReg(kArg1), true);
+      LIR* target = NewLIR0(kPseudoTargetLabel);
+      branch->target = target;
     }
     GenBarrier();
     StoreValue(rl_dest, GetReturn(false));
@@ -974,6 +1042,9 @@
 // question with simple comparisons.
 void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
                                  RegLocation rl_src) {
+  // X86 has its own implementation.
+  DCHECK_NE(cu_->instruction_set, kX86);
+
   RegLocation object = LoadValue(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   int result_reg = rl_result.low_reg;
@@ -1192,37 +1263,90 @@
     LoadWordDisp(class_reg, offset_of_type, class_reg);
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) {
       // Need to test presence of type in dex cache at runtime
-      LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
-      // Not resolved
-      // Call out to helper, which will return resolved type in kArg0
-      // InitializeTypeFromCode(idx, method)
-      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx,
-                              TargetReg(kArg1), true);
-      OpRegCopy(class_reg, TargetReg(kRet0));  // Align usage with fast path
-      // Rejoin code paths
-      LIR* hop_target = NewLIR0(kPseudoTargetLabel);
-      hop_branch->target = hop_target;
+      LIR* hop_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL);
+      LIR* cont = NewLIR0(kPseudoTargetLabel);
+
+      // Slow path to initialize the type.  Executed if the type is NULL.
+      class SlowPath : public LIRSlowPath {
+       public:
+        SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, const int type_idx,
+                 const int class_reg) :
+                   LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), type_idx_(type_idx),
+                   class_reg_(class_reg) {
+        }
+
+        void Compile() {
+          GenerateTargetLabel();
+
+          // Call out to helper, which will return resolved type in kArg0
+          // InitializeTypeFromCode(idx, method)
+          m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx_,
+                                        m2l_->TargetReg(kArg1), true);
+          m2l_->OpRegCopy(class_reg_, m2l_->TargetReg(kRet0));  // Align usage with fast path
+          m2l_->OpUnconditionalBranch(cont_);
+        }
+       public:
+        const int type_idx_;
+        const int class_reg_;
+      };
+
+      AddSlowPath(new (arena_) SlowPath(this, hop_branch, cont,
+                                        type_idx, class_reg));
     }
   }
   // At this point, class_reg (kArg2) has class
   LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
-  /* Null is OK - continue */
-  LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
-  /* load object->klass_ */
-  DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1));
-  /* kArg1 now contains object->klass_ */
-  LIR* branch2 = NULL;
-  if (!type_known_abstract) {
-    branch2 = OpCmpBranch(kCondEq, TargetReg(kArg1), class_reg, NULL);
-  }
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), TargetReg(kArg2),
-                          TargetReg(kArg1), true);
-  /* branch target here */
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch1->target = target;
-  if (branch2 != NULL) {
-    branch2->target = target;
+
+  // Slow path for the case where the classes are not equal.  In this case we need
+  // to call a helper function to do the check.
+  class SlowPath : public LIRSlowPath {
+   public:
+    SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, bool load):
+               LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), load_(load) {
+    }
+
+    void Compile() {
+      GenerateTargetLabel();
+
+      if (load_) {
+        m2l_->LoadWordDisp(m2l_->TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
+                           m2l_->TargetReg(kArg1));
+      }
+      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), m2l_->TargetReg(kArg2),
+                                    m2l_->TargetReg(kArg1), true);
+
+      m2l_->OpUnconditionalBranch(cont_);
+    }
+
+   private:
+    bool load_;
+  };
+
+  if (type_known_abstract) {
+    // Easier case, run slow path if target is non-null (slow path will load from target)
+    LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kArg0), 0, NULL);
+    LIR* cont = NewLIR0(kPseudoTargetLabel);
+    AddSlowPath(new (arena_) SlowPath(this, branch, cont, true));
+  } else {
+    // Harder, more common case.  We need to generate a forward branch over the load
+    // if the target is null.  If it's non-null we perform the load and branch to the
+    // slow path if the classes are not equal.
+
+    /* Null is OK - continue */
+    LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
+    /* load object->klass_ */
+    DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
+    LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
+                    TargetReg(kArg1));
+
+    LIR* branch2 = OpCmpBranch(kCondNe, TargetReg(kArg1), class_reg, NULL);
+    LIR* cont = NewLIR0(kPseudoTargetLabel);
+
+    // Add the slow path that will not perform load since this is already done.
+    AddSlowPath(new (arena_) SlowPath(this, branch2, cont, false));
+
+    // Set the null check to branch to the continuation.
+    branch1->target = cont;
   }
 }
 
@@ -1302,6 +1426,7 @@
 
 void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
                             RegLocation rl_src1, RegLocation rl_src2) {
+  DCHECK_NE(cu_->instruction_set, kX86);
   OpKind op = kOpBkpt;
   bool is_div_rem = false;
   bool check_zero = false;
@@ -1380,15 +1505,9 @@
     } else {
       if (shift_op) {
         int t_reg = INVALID_REG;
-        if (cu_->instruction_set == kX86) {
-          // X86 doesn't require masking and must use ECX
-          t_reg = TargetReg(kCount);  // rCX
-          LoadValueDirectFixed(rl_src2, t_reg);
-        } else {
-          rl_src2 = LoadValue(rl_src2, kCoreReg);
-          t_reg = AllocTemp();
-          OpRegRegImm(kOpAnd, t_reg, rl_src2.low_reg, 31);
-        }
+        rl_src2 = LoadValue(rl_src2, kCoreReg);
+        t_reg = AllocTemp();
+        OpRegRegImm(kOpAnd, t_reg, rl_src2.low_reg, 31);
         rl_src1 = LoadValue(rl_src1, kCoreReg);
         rl_result = EvalLoc(rl_dest, kCoreReg, true);
         OpRegRegReg(op, rl_result.low_reg, rl_src1.low_reg, t_reg);
@@ -1411,9 +1530,6 @@
       }
       rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv);
       done = true;
-    } else if (cu_->instruction_set == kX86) {
-      rl_result = GenDivRem(rl_dest, rl_src1, rl_src2, op == kOpDiv, check_zero);
-      done = true;
     } else if (cu_->instruction_set == kThumb2) {
       if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) {
         // Use ARM SDIV instruction for division.  For remainder we also need to
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 4bc1a37..6aaad66 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -453,7 +453,7 @@
       if (cu->instruction_set != kX86) {
         if (direct_code == 0) {
           cg->LoadWordDisp(cg->TargetReg(kArg0),
-                           mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(),
+                           mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
                            cg->TargetReg(kInvokeTgt));
         }
         break;
@@ -506,7 +506,7 @@
     case 4:  // Get the compiled code address [uses kArg0, sets kInvokeTgt]
       if (cu->instruction_set != kX86) {
         cg->LoadWordDisp(cg->TargetReg(kArg0),
-                         mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(),
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
                          cg->TargetReg(kInvokeTgt));
         break;
       }
@@ -561,7 +561,7 @@
     case 5:  // Get the compiled code address [use kArg0, set kInvokeTgt]
       if (cu->instruction_set != kX86) {
         cg->LoadWordDisp(cg->TargetReg(kArg0),
-                         mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(),
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
                          cg->TargetReg(kInvokeTgt));
         break;
       }
@@ -811,42 +811,145 @@
     }
   }
 
+  // Logic below assumes that Method pointer is at offset zero from SP.
+  DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
+
+  // The first 3 arguments are passed via registers.
+  // TODO: For 64-bit, instead of hardcoding 4 for Method* size, we should either
+  // get size of uintptr_t or size of object reference according to model being used.
+  int outs_offset = 4 /* Method* */ + (3 * sizeof(uint32_t));
   int start_offset = SRegOffset(info->args[3].s_reg_low);
-  int outs_offset = 4 /* Method* */ + (3 * 4);
-  if (cu_->instruction_set != kThumb2) {
+  int regs_left_to_pass_via_stack = info->num_arg_words - 3;
+  DCHECK_GT(regs_left_to_pass_via_stack, 0);
+
+  if (cu_->instruction_set == kThumb2 && regs_left_to_pass_via_stack <= 16) {
+    // Use vldm/vstm pair using kArg3 as a temp
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+    OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset);
+    LIR* ld = OpVldm(TargetReg(kArg3), regs_left_to_pass_via_stack);
+    // TUNING: loosen barrier
+    ld->u.m.def_mask = ENCODE_ALL;
+    SetMemRefType(ld, true /* is_load */, kDalvikReg);
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+    OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), 4 /* Method* */ + (3 * 4));
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+    LIR* st = OpVstm(TargetReg(kArg3), regs_left_to_pass_via_stack);
+    SetMemRefType(st, false /* is_load */, kDalvikReg);
+    st->u.m.def_mask = ENCODE_ALL;
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+  } else if (cu_->instruction_set == kX86) {
+    int current_src_offset = start_offset;
+    int current_dest_offset = outs_offset;
+
+    while (regs_left_to_pass_via_stack > 0) {
+      // This is based on the knowledge that the stack itself is 16-byte aligned.
+      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
+      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
+      size_t bytes_to_move;
+
+      /*
+       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
+       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
+       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
+       * We do this because we could potentially do a smaller move to align.
+       */
+      if (regs_left_to_pass_via_stack == 4 ||
+          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
+        // Moving 128-bits via xmm register.
+        bytes_to_move = sizeof(uint32_t) * 4;
+
+        // Allocate a free xmm temp. Since we are working through the calling sequence,
+        // we expect to have an xmm temporary available.
+        int temp = AllocTempDouble();
+        CHECK_GT(temp, 0);
+
+        LIR* ld1 = nullptr;
+        LIR* ld2 = nullptr;
+        LIR* st1 = nullptr;
+        LIR* st2 = nullptr;
+
+        /*
+         * The logic is similar for both loads and stores. If we have 16-byte alignment,
+         * do an aligned move. If we have 8-byte alignment, then do the move in two
+         * parts. This approach prevents possible cache line splits. Finally, fall back
+         * to doing an unaligned move. In most cases we likely won't split the cache
+         * line but we cannot prove it and thus take a conservative approach.
+         */
+        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
+        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+
+        if (src_is_16b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP);
+        } else if (src_is_8b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP);
+          ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1), kMovHi128FP);
+        } else {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP);
+        }
+
+        if (dest_is_16b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP);
+        } else if (dest_is_8b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP);
+          st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1), temp, kMovHi128FP);
+        } else {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP);
+        }
+
+        // TODO If we could keep track of aliasing information for memory accesses that are wider
+        // than 64-bit, we wouldn't need to set up a barrier.
+        if (ld1 != nullptr) {
+          if (ld2 != nullptr) {
+            // For 64-bit load we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
+            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
+          } else {
+            // Set barrier for 128-bit load.
+            SetMemRefType(ld1, true /* is_load */, kDalvikReg);
+            ld1->u.m.def_mask = ENCODE_ALL;
+          }
+        }
+        if (st1 != nullptr) {
+          if (st2 != nullptr) {
+            // For 64-bit store we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
+            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
+          } else {
+            // Set barrier for 128-bit store.
+            SetMemRefType(st1, false /* is_load */, kDalvikReg);
+            st1->u.m.def_mask = ENCODE_ALL;
+          }
+        }
+
+        // Free the temporary used for the data movement.
+        FreeTemp(temp);
+      } else {
+        // Moving 32-bits via general purpose register.
+        bytes_to_move = sizeof(uint32_t);
+
+        // Instead of allocating a new temp, simply reuse one of the registers being used
+        // for argument passing.
+        int temp = TargetReg(kArg3);
+
+        // Now load the argument VR and store to the outs.
+        LoadWordDisp(TargetReg(kSp), current_src_offset, temp);
+        StoreWordDisp(TargetReg(kSp), current_dest_offset, temp);
+      }
+
+      current_src_offset += bytes_to_move;
+      current_dest_offset += bytes_to_move;
+      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+    }
+  } else {
     // Generate memcpy
     OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset);
     OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset);
     CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(pMemcpy), TargetReg(kArg0),
                                TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
-  } else {
-    if (info->num_arg_words >= 20) {
-      // Generate memcpy
-      OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset);
-      OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset);
-      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(pMemcpy), TargetReg(kArg0),
-                                 TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
-    } else {
-      // Use vldm/vstm pair using kArg3 as a temp
-      int regs_left = std::min(info->num_arg_words - 3, 16);
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-      OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset);
-      LIR* ld = OpVldm(TargetReg(kArg3), regs_left);
-      // TUNING: loosen barrier
-      ld->u.m.def_mask = ENCODE_ALL;
-      SetMemRefType(ld, true /* is_load */, kDalvikReg);
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-      OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), 4 /* Method* */ + (3 * 4));
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-      LIR* st = OpVstm(TargetReg(kArg3), regs_left);
-      SetMemRefType(st, false /* is_load */, kDalvikReg);
-      st->u.m.def_mask = ENCODE_ALL;
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-    }
   }
 
   call_state = LoadArgRegs(info, call_state, next_call_insn,
@@ -1229,10 +1332,6 @@
     // TODO - add Mips implementation
     return false;
   }
-  if (cu_->instruction_set == kX86 && is_object) {
-    // TODO: fix X86, it exhausts registers for card marking.
-    return false;
-  }
   // Unused - RegLocation rl_src_unsafe = info->args[0];
   RegLocation rl_src_obj = info->args[1];  // Object
   RegLocation rl_src_offset = info->args[2];  // long low
@@ -1252,6 +1351,9 @@
     rl_value = LoadValue(rl_src_value, kCoreReg);
     StoreBaseIndexed(rl_object.low_reg, rl_offset.low_reg, rl_value.low_reg, 0, kWord);
   }
+
+  // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
+  FreeTemp(rl_offset.low_reg);
   if (is_volatile) {
     GenMemBarrier(kStoreLoad);
   }
@@ -1335,7 +1437,7 @@
   } else {
     if (fast_path) {
       call_inst = OpMem(kOpBlx, TargetReg(kArg0),
-                        mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value());
+                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
     } else {
       ThreadOffset trampoline(-1);
       switch (info->type) {
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 65582dd..f7c2821 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -294,6 +294,38 @@
   }
 }
 
+void Mir2Lir::StoreFinalValue(RegLocation rl_dest, RegLocation rl_src) {
+  DCHECK_EQ(rl_src.location, kLocPhysReg);
+
+  if (rl_dest.location == kLocPhysReg) {
+    OpRegCopy(rl_dest.low_reg, rl_src.low_reg);
+  } else {
+    // Just re-assign the register.  Dest gets Src's reg.
+    rl_dest.low_reg = rl_src.low_reg;
+    rl_dest.location = kLocPhysReg;
+    Clobber(rl_src.low_reg);
+  }
+
+  // Dest is now live and dirty (until/if we flush it to home location)
+  MarkLive(rl_dest.low_reg, rl_dest.s_reg_low);
+  MarkDirty(rl_dest);
+
+
+  ResetDefLoc(rl_dest);
+  if (IsDirty(rl_dest.low_reg) &&
+      oat_live_out(rl_dest.s_reg_low)) {
+    LIR *def_start = last_lir_insn_;
+    StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low),
+                  rl_dest.low_reg, kWord);
+    MarkClean(rl_dest);
+    LIR *def_end = last_lir_insn_;
+    if (!rl_dest.ref) {
+      // Exclude references from store elimination
+      MarkDef(rl_dest, def_start, def_end);
+    }
+  }
+}
+
 void Mir2Lir::StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src) {
   DCHECK_EQ(IsFpReg(rl_src.low_reg), IsFpReg(rl_src.high_reg));
   DCHECK(rl_dest.wide);
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index aca93f5..11b8f83 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -151,6 +151,8 @@
     LIR* OpRegImm(OpKind op, int r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset);
     LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2);
+    LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type);
     LIR* OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src);
     LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value);
     LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2);
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index c5e2b36..21c971c 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -325,6 +325,16 @@
   return NewLIR2(opcode, r_dest_src1, r_src2);
 }
 
+LIR* MipsMir2Lir::OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* MipsMir2Lir::OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
 LIR* MipsMir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src) {
   LOG(FATAL) << "Unexpected use of OpCondRegReg for MIPS";
   return NULL;
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 1f4122d..ae54fb8 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -839,6 +839,8 @@
       next_bb = iter.Next();
     } while ((next_bb != NULL) && (next_bb->block_type == kDead));
   }
+  HandleSlowPaths();
+
   cu_->NewTimingSplit("Launchpads");
   HandleSuspendLaunchPads();
 
@@ -847,4 +849,15 @@
   HandleIntrinsicLaunchPads();
 }
 
+//
+// LIR Slow Path
+//
+
+LIR* Mir2Lir::LIRSlowPath::GenerateTargetLabel() {
+  LIR* target = m2l_->RawLIR(current_dex_pc_, kPseudoTargetLabel);
+  m2l_->AppendLIR(target);
+  fromfast_->target = target;
+  m2l_->SetCurrentDexPc(current_dex_pc_);
+  return target;
+}
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index bcd0eb1..3a68044 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -258,6 +258,63 @@
       bool first_in_pair;
     };
 
+    //
+    // Slow paths.  This object is used generate a sequence of code that is executed in the
+    // slow path.  For example, resolving a string or class is slow as it will only be executed
+    // once (after that it is resolved and doesn't need to be done again).  We want slow paths
+    // to be placed out-of-line, and not require a (mispredicted, probably) conditional forward
+    // branch over them.
+    //
+    // If you want to create a slow path, declare a class derived from LIRSlowPath and provide
+    // the Compile() function that will be called near the end of the code generated by the
+    // method.
+    //
+    // The basic flow for a slow path is:
+    //
+    //     CMP reg, #value
+    //     BEQ fromfast
+    //   cont:
+    //     ...
+    //     fast path code
+    //     ...
+    //     more code
+    //     ...
+    //     RETURN
+    ///
+    //   fromfast:
+    //     ...
+    //     slow path code
+    //     ...
+    //     B cont
+    //
+    // So you see we need two labels and two branches.  The first branch (called fromfast) is
+    // the conditional branch to the slow path code.  The second label (called cont) is used
+    // as an unconditional branch target for getting back to the code after the slow path
+    // has completed.
+    //
+
+    class LIRSlowPath {
+     public:
+      LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast,
+                  LIR* cont = nullptr) :
+        m2l_(m2l), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) {
+      }
+      virtual ~LIRSlowPath() {}
+      virtual void Compile() = 0;
+
+      static void* operator new(size_t size, ArenaAllocator* arena) {
+        return arena->Alloc(size, ArenaAllocator::kAllocData);
+      }
+
+     protected:
+      LIR* GenerateTargetLabel();
+
+      Mir2Lir* const m2l_;
+      const DexOffset current_dex_pc_;
+      LIR* const fromfast_;
+      LIR* const cont_;
+    };
+
     virtual ~Mir2Lir() {}
 
     int32_t s4FromSwitchData(const void* switch_data) {
@@ -310,6 +367,23 @@
     void InsertLIRBefore(LIR* current_lir, LIR* new_lir);
     void InsertLIRAfter(LIR* current_lir, LIR* new_lir);
 
+    /**
+     * @brief Provides the maximum number of compiler temporaries that the backend can/wants
+     * to place in a frame.
+     * @return Returns the maximum number of compiler temporaries.
+     */
+    size_t GetMaxPossibleCompilerTemps() const;
+
+    /**
+     * @brief Provides the number of bytes needed in frame for spilling of compiler temporaries.
+     * @return Returns the size in bytes for space needed for compiler temporary spill region.
+     */
+    size_t GetNumBytesForCompilerTempSpillRegion();
+
+    DexOffset GetCurrentDexPc() const {
+      return current_dalvik_offset_;
+    }
+
     int ComputeFrameSize();
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
@@ -457,6 +531,7 @@
     void HandleSuspendLaunchPads();
     void HandleIntrinsicLaunchPads();
     void HandleThrowLaunchPads();
+    void HandleSlowPaths();
     void GenBarrier();
     LIR* GenCheck(ConditionCode c_code, ThrowKind kind);
     LIR* GenImmedCheck(ConditionCode c_code, int reg, int imm_val,
@@ -497,8 +572,6 @@
                       RegLocation rl_src1, RegLocation rl_src2);
     void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
                         RegLocation rl_src1, RegLocation rl_shift);
-    void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
-                       RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest,
                           RegLocation rl_src, int lit);
     void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
@@ -507,8 +580,11 @@
                            RegLocation rl_src);
     void GenSuspendTest(int opt_flags);
     void GenSuspendTestAndBranch(int opt_flags, LIR* target);
+
     // This will be overridden by x86 implementation.
     virtual void GenConstWide(RegLocation rl_dest, int64_t value);
+    virtual void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
+                       RegLocation rl_src1, RegLocation rl_src2);
 
     // Shared by all targets - implemented in gen_invoke.cc.
     int CallHelperSetup(ThreadOffset helper_offset);
@@ -631,6 +707,18 @@
     void StoreValueWide(RegLocation rl_dest, RegLocation rl_src);
 
     /**
+     * @brief Used to do the final store to a destination as per bytecode semantics.
+     * @see StoreValue
+     * @param rl_dest The destination dalvik register location.
+     * @param rl_src The source register location. It must be kLocPhysReg
+     *
+     * This is used for x86 two operand computations, where we have computed the correct
+     * register value that now needs to be properly registered.  This is used to avoid an
+     * extra register copy that would result if StoreValue was called.
+     */
+    void StoreFinalValue(RegLocation rl_dest, RegLocation rl_src);
+
+    /**
      * @brief Used to do the final store in a wide destination as per bytecode semantics.
      * @see StoreValueWide
      * @param rl_dest The destination dalvik register location.
@@ -649,7 +737,19 @@
     void SpecialMIR2LIR(const InlineMethod& special);
     void MethodMIR2LIR();
 
-
+    // Routines that work for the generic case, but may be overriden by target.
+    /*
+     * @brief Compare memory to immediate, and branch if condition true.
+     * @param cond The condition code that when true will branch to the target.
+     * @param temp_reg A temporary register that can be used if compare to memory is not
+     * supported by the architecture.
+     * @param base_reg The register holding the base address.
+     * @param offset The offset from the base.
+     * @param check_value The immediate to compare to.
+     * @returns The branch instruction that was generated.
+     */
+    virtual LIR* OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                   int offset, int check_value, LIR* target);
 
     // Required for target - codegen helpers.
     virtual bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
@@ -846,6 +946,27 @@
     virtual LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2) = 0;
 
     /**
+     * @brief Used to generate an LIR that does a load from mem to reg.
+     * @param r_dest The destination physical register.
+     * @param r_base The base physical register for memory operand.
+     * @param offset The displacement for memory operand.
+     * @param move_type Specification on the move desired (size, alignment, register kind).
+     * @return Returns the generate move LIR.
+     */
+    virtual LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) = 0;
+
+    /**
+     * @brief Used to generate an LIR that does a store from reg to mem.
+     * @param r_base The base physical register for memory operand.
+     * @param offset The displacement for memory operand.
+     * @param r_src The destination physical register.
+     * @param bytes_to_move The number of bytes to move.
+     * @param is_aligned Whether the memory location is known to be aligned.
+     * @return Returns the generate move LIR.
+     */
+    virtual LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) = 0;
+
+    /**
      * @brief Used for generating a conditional register to register operation.
      * @param op The opcode kind.
      * @param cc The condition code that when true will perform the opcode.
@@ -920,9 +1041,12 @@
      */
     RegLocation ForceTempWide(RegLocation loc);
 
+    virtual void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                                    RegLocation rl_dest, RegLocation rl_src);
+
+    void AddSlowPath(LIRSlowPath* slowpath);
+
   private:
-    void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
-                            RegLocation rl_src);
     void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
                                     bool type_known_abstract, bool use_declaring_class,
                                     bool can_assume_type_is_in_dex_cache,
@@ -935,6 +1059,11 @@
       p->def_end = NULL;
     }
 
+    void SetCurrentDexPc(DexOffset dexpc) {
+      current_dalvik_offset_ = dexpc;
+    }
+
+
   public:
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
@@ -990,6 +1119,8 @@
     unsigned int fp_spill_mask_;
     LIR* first_lir_insn_;
     LIR* last_lir_insn_;
+
+    GrowableArray<LIRSlowPath*> slow_paths_;
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 32c22f2..eb70d8c 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -132,9 +132,15 @@
     DCHECK_LT(v_reg, cu_->num_dalvik_registers);
     return v_reg;
   } else {
-    int pos = std::abs(v_reg) - std::abs(SSA_METHOD_BASEREG);
-    DCHECK_LE(pos, cu_->num_compiler_temps);
-    return cu_->num_dalvik_registers + pos;
+    /*
+     * It must be the case that the v_reg for temporary is less than or equal to the
+     * base reg for temps. For that reason, "position" must be zero or positive.
+     */
+    unsigned int position = std::abs(v_reg) - std::abs(static_cast<int>(kVRegTempBaseReg));
+
+    // The temporaries are placed after dalvik registers in the promotion map
+    DCHECK_LT(position, mir_graph_->GetNumUsedCompilerTemps());
+    return cu_->num_dalvik_registers + position;
   }
 }
 
@@ -897,10 +903,12 @@
  * optimization is disabled.
  */
 void Mir2Lir::DoPromotion() {
-  int reg_bias = cu_->num_compiler_temps + 1;
   int dalvik_regs = cu_->num_dalvik_registers;
-  int num_regs = dalvik_regs + reg_bias;
+  int num_regs = dalvik_regs + mir_graph_->GetNumUsedCompilerTemps();
   const int promotion_threshold = 1;
+  // Allocate the promotion map - one entry for each Dalvik vReg or compiler temp
+  promotion_map_ = static_cast<PromotionMap*>
+      (arena_->Alloc(num_regs * sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc));
 
   // Allow target code to add any special registers
   AdjustSpillMask();
@@ -926,16 +934,13 @@
   for (int i = 0; i < dalvik_regs; i++) {
     core_regs[i].s_reg = FpRegs[i].s_reg = i;
   }
-  // Set ssa name for Method*
-  core_regs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();
-  FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();  // For consistecy.
-  FpRegs[dalvik_regs + num_regs].s_reg = mir_graph_->GetMethodSReg();  // for consistency.
-  // Set ssa names for compiler_temps
-  for (int i = 1; i <= cu_->num_compiler_temps; i++) {
-    CompilerTemp* ct = mir_graph_->compiler_temps_.Get(i);
-    core_regs[dalvik_regs + i].s_reg = ct->s_reg;
-    FpRegs[dalvik_regs + i].s_reg = ct->s_reg;
-    FpRegs[num_regs + dalvik_regs + i].s_reg = ct->s_reg;
+
+  // Set ssa names for compiler temporaries
+  for (unsigned int ct_idx = 0; ct_idx < mir_graph_->GetNumUsedCompilerTemps(); ct_idx++) {
+    CompilerTemp* ct = mir_graph_->GetCompilerTemp(ct_idx);
+    core_regs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
+    FpRegs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
+    FpRegs[num_regs + dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
   }
 
   // Duplicate in upper half to represent possible fp double starting sregs.
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index c29d6c4..ae53ddb 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -297,6 +297,24 @@
   { kX86SqrtsdRR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0xF2, 0, 0x0F, 0x51, 0, 0, 0, 0 }, "SqrtsdRR", "!0r,!1r" },
   { kX86FstpdM, kMem, IS_STORE | IS_BINARY_OP | REG_USE0, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
 
+  EXT_0F_ENCODING_MAP(Movups,    0x0, 0x10, REG_DEF0),
+  { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" },
+  { kX86MovupsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  EXT_0F_ENCODING_MAP(Movaps,    0x0, 0x28, REG_DEF0),
+  { kX86MovapsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsMR", "[!0r+!1d],!2r" },
+  { kX86MovapsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  { kX86MovlpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRM", "!0r,[!1r+!2d]" },
+  { kX86MovlpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86MovlpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsMR", "[!0r+!1d],!2r" },
+  { kX86MovlpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  { kX86MovhpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRM", "!0r,[!1r+!2d]" },
+  { kX86MovhpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86MovhpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsMR", "[!0r+!1d],!2r" },
+  { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
   EXT_0F_ENCODING_MAP(Movdxr,    0x66, 0x6E, REG_DEF0),
   { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE01,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
   { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" },
@@ -863,6 +881,20 @@
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
+void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base,
+                                int displacement, uint8_t cl) {
+  DCHECK_EQ(cl, static_cast<uint8_t>(rCX));
+  EmitPrefix(entry);
+  code_buffer_.push_back(entry->skeleton.opcode);
+  DCHECK_NE(0x0F, entry->skeleton.opcode);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  DCHECK_LT(base, 8);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
 void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) {
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
@@ -1140,6 +1172,17 @@
           break;
         }
         default:
+          if (lir->flags.fixup == kFixupLoad) {
+            LIR *target_lir = lir->target;
+            DCHECK(target_lir != NULL);
+            CodeOffset target = target_lir->offset;
+            lir->operands[2] = target;
+            int newSize = GetInsnSize(lir);
+            if (newSize != lir->flags.size) {
+              lir->flags.size = newSize;
+              res = kRetryAll;
+            }
+          }
           break;
       }
     }
@@ -1230,6 +1273,9 @@
       case kShiftRegCl:  // lir operands - 0: reg, 1: cl
         EmitShiftRegCl(entry, lir->operands[0], lir->operands[1]);
         break;
+      case kShiftMemCl:  // lir operands - 0: base, 1:displacement, 2: cl
+        EmitShiftMemCl(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
       case kRegCond:  // lir operands - 0: reg, 1: condition
         EmitRegCond(entry, lir->operands[0], lir->operands[1]);
         break;
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 4267b5b..93875c9 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -84,10 +84,19 @@
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
-  int start_of_method_reg = AllocTemp();
-  // Materialize a pointer to the switch table
   // NewLIR0(kX86Bkpt);
-  NewLIR1(kX86StartOfMethod, start_of_method_reg);
+
+  // Materialize a pointer to the switch table
+  int start_of_method_reg;
+  if (base_of_code_ != nullptr) {
+    // We can use the saved value.
+    RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+    rl_method = LoadValue(rl_method, kCoreReg);
+    start_of_method_reg = rl_method.low_reg;
+  } else {
+    start_of_method_reg = AllocTemp();
+    NewLIR1(kX86StartOfMethod, start_of_method_reg);
+  }
   int low_key = s4FromSwitchData(&table[2]);
   int keyReg;
   // Remove the bias, if necessary
@@ -142,7 +151,13 @@
   FlushAllRegs();   /* Everything to home location */
   LoadValueDirectFixed(rl_src, rX86_ARG0);
   // Materialize a pointer to the fill data image
-  NewLIR1(kX86StartOfMethod, rX86_ARG2);
+  if (base_of_code_ != nullptr) {
+    // We can use the saved value.
+    RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+    LoadValueDirect(rl_method, rX86_ARG2);
+  } else {
+    NewLIR1(kX86StartOfMethod, rX86_ARG2);
+  }
   NewLIR2(kX86PcRelAdr, rX86_ARG1, WrapPointer(tab_rec));
   NewLIR2(kX86Add32RR, rX86_ARG1, rX86_ARG2);
   CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pHandleFillArrayData), rX86_ARG0,
@@ -211,6 +226,13 @@
 
   FlushIns(ArgLocs, rl_method);
 
+  if (base_of_code_ != nullptr) {
+    // We have been asked to save the address of the method start for later use.
+    NewLIR1(kX86StartOfMethod, rX86_ARG0);
+    int displacement = SRegOffset(base_of_code_->s_reg_low);
+    StoreBaseDisp(rX86_SP, displacement, rX86_ARG0, kWord);
+  }
+
   FreeTemp(rX86_ARG0);
   FreeTemp(rX86_ARG1);
   FreeTemp(rX86_ARG2);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 6280b64..4c1c171 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -180,6 +180,15 @@
       */
     void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
 
+    /**
+     * @brief Implement instanceof a final class with x86 specific code.
+     * @param use_declaring_class 'true' if we can use the class itself.
+     * @param type_idx Type index to use if use_declaring_class is 'false'.
+     * @param rl_dest Result to be set to 0 or 1.
+     * @param rl_src Object to be tested.
+     */
+    void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                            RegLocation rl_dest, RegLocation rl_src);
     // Single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
     LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target);
@@ -195,7 +204,11 @@
     LIR* OpRegCopyNoInsert(int r_dest, int r_src);
     LIR* OpRegImm(OpKind op, int r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset);
+    LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value);
+    LIR* OpRegMem(OpKind op, int r_dest, RegLocation value);
     LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2);
+    LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type);
     LIR* OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src);
     LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value);
     LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2);
@@ -222,6 +235,16 @@
     int AllocTempDouble();
     void ResetDefLocWide(RegLocation rl);
 
+    /*
+     * @brief x86 specific codegen for int operations.
+     * @param opcode Operation to perform.
+     * @param rl_dest Destination for the result.
+     * @param rl_lhs Left hand operand.
+     * @param rl_rhs Right hand operand.
+     */
+    void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
+                       RegLocation rl_lhs, RegLocation rl_rhs);
+
   private:
     void EmitPrefix(const X86EncodingMap* entry);
     void EmitOpcode(const X86EncodingMap* entry);
@@ -251,6 +274,7 @@
     void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm);
     void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
+    void EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl);
     void EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl);
     void EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition);
 
@@ -357,6 +381,7 @@
      * @param val Constant multiplier.
      */
     void GenImulRegImm(int dest, int src, int val);
+
     /*
      * Generate an imul of a memory location by a constant or a better sequence.
      * @param dest Destination Register.
@@ -365,6 +390,80 @@
      * @param val Constant multiplier.
      */
     void GenImulMemImm(int dest, int sreg, int displacement, int val);
+
+    /*
+     * @brief Compare memory to immediate, and branch if condition true.
+     * @param cond The condition code that when true will branch to the target.
+     * @param temp_reg A temporary register that can be used if compare memory is not
+     * supported by the architecture.
+     * @param base_reg The register holding the base address.
+     * @param offset The offset from the base.
+     * @param check_value The immediate to compare to.
+     */
+    LIR* OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                           int offset, int check_value, LIR* target);
+    /*
+     * Can this operation be using core registers without temporaries?
+     * @param rl_lhs Left hand operand.
+     * @param rl_rhs Right hand operand.
+     * @returns 'true' if the operation can proceed without needing temporary regs.
+     */
+    bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs);
+
+    /*
+     * @brief Perform MIR analysis before compiling method.
+     * @note Invokes Mir2LiR::Materialize after analysis.
+     */
+    void Materialize();
+
+    /*
+     * @brief Analyze MIR before generating code, to prepare for the code generation.
+     */
+    void AnalyzeMIR();
+
+    /*
+     * @brief Analyze one basic block.
+     * @param bb Basic block to analyze.
+     */
+    void AnalyzeBB(BasicBlock * bb);
+
+    /*
+     * @brief Analyze one extended MIR instruction
+     * @param opcode MIR instruction opcode.
+     * @param bb Basic block containing instruction.
+     * @param mir Extended instruction to analyze.
+     */
+    void AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir);
+
+    /*
+     * @brief Analyze one MIR instruction
+     * @param opcode MIR instruction opcode.
+     * @param bb Basic block containing instruction.
+     * @param mir Instruction to analyze.
+     */
+    void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir);
+
+    /*
+     * @brief Analyze one MIR float/double instruction
+     * @param opcode MIR instruction opcode.
+     * @param bb Basic block containing instruction.
+     * @param mir Instruction to analyze.
+     */
+    void AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir);
+
+    /*
+     * @brief Analyze one use of a double operand.
+     * @param rl_use Double RegLocation for the operand.
+     */
+    void AnalyzeDoubleUse(RegLocation rl_use);
+
+    // Information derived from analysis of MIR
+
+    // Have we decided to compute a ptr to code and store in temporary VR?
+    bool store_method_addr_;
+
+    // The compiler temporary for the code address of the method.
+    CompilerTemp *base_of_code_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index e665f70..a567a8a 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -157,8 +157,7 @@
       NewLIR2(kX86MovdxrRR, dest_lo, src_lo);
       dest_hi = AllocTempDouble();
       NewLIR2(kX86MovdxrRR, dest_hi, src_hi);
-      NewLIR2(kX86PsllqRI, dest_hi, 32);
-      NewLIR2(kX86OrpsRR, dest_lo, dest_hi);
+      NewLIR2(kX86PunpckldqRR, dest_lo, dest_hi);
       FreeTemp(dest_hi);
     }
   } else {
@@ -831,7 +830,7 @@
 // Decrement register and branch on condition
 LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) {
   OpRegImm(kOpSub, reg, 1);
-  return OpCmpImmBranch(c_code, reg, 0, target);
+  return OpCondBranch(c_code, target);
 }
 
 bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
@@ -1650,4 +1649,302 @@
   StoreFinalValueWide(rl_dest, rl_result);
 }
 
+// For final classes there are no sub-classes to check and so we can answer the instance-of
+// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
+void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                                    RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  int result_reg = rl_result.low_reg;
+
+  // SETcc only works with EAX..EDX.
+  if (result_reg == object.low_reg || result_reg >= 4) {
+    result_reg = AllocTypedTemp(false, kCoreReg);
+    DCHECK_LT(result_reg, 4);
+  }
+
+  // Assume that there is no match.
+  LoadConstant(result_reg, 0);
+  LIR* null_branchover = OpCmpImmBranch(kCondEq, object.low_reg, 0, NULL);
+
+  int check_class = AllocTypedTemp(false, kCoreReg);
+
+  // If Method* is already in a register, we can save a copy.
+  RegLocation rl_method = mir_graph_->GetMethodLoc();
+  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() +
+    (sizeof(mirror::Class*) * type_idx);
+
+  if (rl_method.location == kLocPhysReg) {
+    if (use_declaring_class) {
+      LoadWordDisp(rl_method.low_reg,
+                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+                   check_class);
+    } else {
+      LoadWordDisp(rl_method.low_reg,
+                   mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                   check_class);
+      LoadWordDisp(check_class, offset_of_type, check_class);
+    }
+  } else {
+    LoadCurrMethodDirect(check_class);
+    if (use_declaring_class) {
+      LoadWordDisp(check_class,
+                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+                   check_class);
+    } else {
+      LoadWordDisp(check_class,
+                   mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                   check_class);
+      LoadWordDisp(check_class, offset_of_type, check_class);
+    }
+  }
+
+  // Compare the computed class to the class in the object.
+  DCHECK_EQ(object.location, kLocPhysReg);
+  OpRegMem(kOpCmp, check_class, object.low_reg,
+           mirror::Object::ClassOffset().Int32Value());
+
+  // Set the low byte of the result to 0 or 1 from the compare condition code.
+  NewLIR2(kX86Set8R, result_reg, kX86CondEq);
+
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  null_branchover->target = target;
+  FreeTemp(check_class);
+  if (IsTemp(result_reg)) {
+    OpRegCopy(rl_result.low_reg, result_reg);
+    FreeTemp(result_reg);
+  }
+  StoreValue(rl_dest, rl_result);
+}
+
+void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_lhs, RegLocation rl_rhs) {
+  OpKind op = kOpBkpt;
+  bool is_div_rem = false;
+  bool unary = false;
+  bool shift_op = false;
+  bool is_two_addr = false;
+  RegLocation rl_result;
+  switch (opcode) {
+    case Instruction::NEG_INT:
+      op = kOpNeg;
+      unary = true;
+      break;
+    case Instruction::NOT_INT:
+      op = kOpMvn;
+      unary = true;
+      break;
+    case Instruction::ADD_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::ADD_INT:
+      op = kOpAdd;
+      break;
+    case Instruction::SUB_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SUB_INT:
+      op = kOpSub;
+      break;
+    case Instruction::MUL_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::MUL_INT:
+      op = kOpMul;
+      break;
+    case Instruction::DIV_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::DIV_INT:
+      op = kOpDiv;
+      is_div_rem = true;
+      break;
+    /* NOTE: returns in kArg1 */
+    case Instruction::REM_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::REM_INT:
+      op = kOpRem;
+      is_div_rem = true;
+      break;
+    case Instruction::AND_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::AND_INT:
+      op = kOpAnd;
+      break;
+    case Instruction::OR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::OR_INT:
+      op = kOpOr;
+      break;
+    case Instruction::XOR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::XOR_INT:
+      op = kOpXor;
+      break;
+    case Instruction::SHL_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHL_INT:
+      shift_op = true;
+      op = kOpLsl;
+      break;
+    case Instruction::SHR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHR_INT:
+      shift_op = true;
+      op = kOpAsr;
+      break;
+    case Instruction::USHR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::USHR_INT:
+      shift_op = true;
+      op = kOpLsr;
+      break;
+    default:
+      LOG(FATAL) << "Invalid word arith op: " << opcode;
+  }
+
+    // Can we convert to a two address instruction?
+  if (!is_two_addr &&
+        (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
+         mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
+      is_two_addr = true;
+    }
+
+  // Get the div/rem stuff out of the way.
+  if (is_div_rem) {
+    rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true);
+    StoreValue(rl_dest, rl_result);
+    return;
+  }
+
+  if (unary) {
+    rl_lhs = LoadValue(rl_lhs, kCoreReg);
+    rl_result = UpdateLoc(rl_dest);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegReg(op, rl_result.low_reg, rl_lhs.low_reg);
+  } else {
+    if (shift_op) {
+      // X86 doesn't require masking and must use ECX.
+      int t_reg = TargetReg(kCount);  // rCX
+      LoadValueDirectFixed(rl_rhs, t_reg);
+      if (is_two_addr) {
+        // Can we do this directly into memory?
+        rl_result = UpdateLoc(rl_dest);
+        rl_rhs = LoadValue(rl_rhs, kCoreReg);
+        if (rl_result.location != kLocPhysReg) {
+          // Okay, we can do this into memory
+          OpMemReg(op, rl_result, t_reg);
+          FreeTemp(t_reg);
+          return;
+        } else if (!IsFpReg(rl_result.low_reg)) {
+          // Can do this directly into the result register
+          OpRegReg(op, rl_result.low_reg, t_reg);
+          FreeTemp(t_reg);
+          StoreFinalValue(rl_dest, rl_result);
+          return;
+        }
+      }
+      // Three address form, or we can't do directly.
+      rl_lhs = LoadValue(rl_lhs, kCoreReg);
+      rl_result = EvalLoc(rl_dest, kCoreReg, true);
+      OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, t_reg);
+      FreeTemp(t_reg);
+    } else {
+      // Multiply is 3 operand only (sort of).
+      if (is_two_addr && op != kOpMul) {
+        // Can we do this directly into memory?
+        rl_result = UpdateLoc(rl_dest);
+        if (rl_result.location == kLocPhysReg) {
+          // Can we do this from memory directly?
+          rl_rhs = UpdateLoc(rl_rhs);
+          if (rl_rhs.location != kLocPhysReg) {
+            OpRegMem(op, rl_result.low_reg, rl_rhs);
+            StoreFinalValue(rl_dest, rl_result);
+            return;
+          } else if (!IsFpReg(rl_rhs.low_reg)) {
+            OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg);
+            StoreFinalValue(rl_dest, rl_result);
+            return;
+          }
+        }
+        rl_rhs = LoadValue(rl_rhs, kCoreReg);
+        if (rl_result.location != kLocPhysReg) {
+          // Okay, we can do this into memory.
+          OpMemReg(op, rl_result, rl_rhs.low_reg);
+          return;
+        } else if (!IsFpReg(rl_result.low_reg)) {
+          // Can do this directly into the result register.
+          OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg);
+          StoreFinalValue(rl_dest, rl_result);
+          return;
+        } else {
+          rl_lhs = LoadValue(rl_lhs, kCoreReg);
+          rl_result = EvalLoc(rl_dest, kCoreReg, true);
+          OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+        }
+      } else {
+        // Try to use reg/memory instructions.
+        rl_lhs = UpdateLoc(rl_lhs);
+        rl_rhs = UpdateLoc(rl_rhs);
+        // We can't optimize with FP registers.
+        if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
+          // Something is difficult, so fall back to the standard case.
+          rl_lhs = LoadValue(rl_lhs, kCoreReg);
+          rl_rhs = LoadValue(rl_rhs, kCoreReg);
+          rl_result = EvalLoc(rl_dest, kCoreReg, true);
+          OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+        } else {
+          // We can optimize by moving to result and using memory operands.
+          if (rl_rhs.location != kLocPhysReg) {
+            // Force LHS into result.
+            rl_result = EvalLoc(rl_dest, kCoreReg, true);
+            LoadValueDirect(rl_lhs, rl_result.low_reg);
+            OpRegMem(op, rl_result.low_reg, rl_rhs);
+          } else if (rl_lhs.location != kLocPhysReg) {
+            // RHS is in a register; LHS is in memory.
+            if (op != kOpSub) {
+              // Force RHS into result and operate on memory.
+              rl_result = EvalLoc(rl_dest, kCoreReg, true);
+              OpRegCopy(rl_result.low_reg, rl_rhs.low_reg);
+              OpRegMem(op, rl_result.low_reg, rl_lhs);
+            } else {
+              // Subtraction isn't commutative.
+              rl_lhs = LoadValue(rl_lhs, kCoreReg);
+              rl_rhs = LoadValue(rl_rhs, kCoreReg);
+              rl_result = EvalLoc(rl_dest, kCoreReg, true);
+              OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+            }
+          } else {
+            // Both are in registers.
+            rl_lhs = LoadValue(rl_lhs, kCoreReg);
+            rl_rhs = LoadValue(rl_rhs, kCoreReg);
+            rl_result = EvalLoc(rl_dest, kCoreReg, true);
+            OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+          }
+        }
+      }
+    }
+  }
+  StoreValue(rl_dest, rl_result);
+}
+
+bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
+  // If we have non-core registers, then we can't do good things.
+  if (rl_lhs.location == kLocPhysReg && IsFpReg(rl_lhs.low_reg)) {
+    return false;
+  }
+  if (rl_rhs.location == kLocPhysReg && IsFpReg(rl_rhs.low_reg)) {
+    return false;
+  }
+
+  // Everything will be fine :-).
+  return true;
+}
 }  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index f223548..a347d8b 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -808,4 +808,12 @@
              << ", orig: " << loc.orig_sreg;
 }
 
+void X86Mir2Lir::Materialize() {
+  // A good place to put the analysis before starting.
+  AnalyzeMIR();
+
+  // Now continue with regular code generation.
+  Mir2Lir::Materialize();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 97c04dc..e2744d0 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -16,6 +16,7 @@
 
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/dataflow_iterator-inl.h"
 #include "x86_lir.h"
 
 namespace art {
@@ -61,7 +62,7 @@
 }
 
 bool X86Mir2Lir::InexpensiveConstantDouble(int64_t value) {
-  return false;  // TUNING
+  return value == 0;
 }
 
 /*
@@ -210,6 +211,110 @@
     return NewLIR2(opcode, r_dest_src1, r_src2);
 }
 
+LIR* X86Mir2Lir::OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) {
+  DCHECK(!(X86_FPREG(r_base)));
+
+  X86OpCode opcode = kX86Nop;
+  switch (move_type) {
+    case kMov8GP:
+      CHECK(!X86_FPREG(r_dest));
+      opcode = kX86Mov8RM;
+      break;
+    case kMov16GP:
+      CHECK(!X86_FPREG(r_dest));
+      opcode = kX86Mov16RM;
+      break;
+    case kMov32GP:
+      CHECK(!X86_FPREG(r_dest));
+      opcode = kX86Mov32RM;
+      break;
+    case kMov32FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovssRM;
+      break;
+    case kMov64FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovsdRM;
+      break;
+    case kMovU128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovupsRM;
+      break;
+    case kMovA128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovapsRM;
+      break;
+    case kMovLo128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovlpsRM;
+      break;
+    case kMovHi128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovhpsRM;
+      break;
+    case kMov64GP:
+    case kMovLo64FP:
+    case kMovHi64FP:
+    default:
+      LOG(FATAL) << "Bad case in OpMovRegMem";
+      break;
+  }
+
+  return NewLIR3(opcode, r_dest, r_base, offset);
+}
+
+LIR* X86Mir2Lir::OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) {
+  DCHECK(!(X86_FPREG(r_base)));
+
+  X86OpCode opcode = kX86Nop;
+  switch (move_type) {
+    case kMov8GP:
+      CHECK(!X86_FPREG(r_src));
+      opcode = kX86Mov8MR;
+      break;
+    case kMov16GP:
+      CHECK(!X86_FPREG(r_src));
+      opcode = kX86Mov16MR;
+      break;
+    case kMov32GP:
+      CHECK(!X86_FPREG(r_src));
+      opcode = kX86Mov32MR;
+      break;
+    case kMov32FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovssMR;
+      break;
+    case kMov64FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovsdMR;
+      break;
+    case kMovU128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovupsMR;
+      break;
+    case kMovA128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovapsMR;
+      break;
+    case kMovLo128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovlpsMR;
+      break;
+    case kMovHi128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovhpsMR;
+      break;
+    case kMov64GP:
+    case kMovLo64FP:
+    case kMovHi64FP:
+    default:
+      LOG(FATAL) << "Bad case in OpMovMemReg";
+      break;
+  }
+
+  return NewLIR3(opcode, r_base, offset, r_src);
+}
+
 LIR* X86Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src) {
   // The only conditional reg to reg operation supported is Cmov
   DCHECK_EQ(op, kOpCmov);
@@ -236,7 +341,57 @@
       LOG(FATAL) << "Bad case in OpRegMem " << op;
       break;
   }
-  return NewLIR3(opcode, r_dest, rBase, offset);
+  LIR *l = NewLIR3(opcode, r_dest, rBase, offset);
+  if (rBase == rX86_SP) {
+    AnnotateDalvikRegAccess(l, offset >> 2, true /* is_load */, false /* is_64bit */);
+  }
+  return l;
+}
+
+LIR* X86Mir2Lir::OpMemReg(OpKind op, RegLocation rl_dest, int r_value) {
+  DCHECK_NE(rl_dest.location, kLocPhysReg);
+  int displacement = SRegOffset(rl_dest.s_reg_low);
+  X86OpCode opcode = kX86Nop;
+  switch (op) {
+    case kOpSub: opcode = kX86Sub32MR; break;
+    case kOpMov: opcode = kX86Mov32MR; break;
+    case kOpCmp: opcode = kX86Cmp32MR; break;
+    case kOpAdd: opcode = kX86Add32MR; break;
+    case kOpAnd: opcode = kX86And32MR; break;
+    case kOpOr:  opcode = kX86Or32MR; break;
+    case kOpXor: opcode = kX86Xor32MR; break;
+    case kOpLsl: opcode = kX86Sal32MC; break;
+    case kOpLsr: opcode = kX86Shr32MC; break;
+    case kOpAsr: opcode = kX86Sar32MC; break;
+    default:
+      LOG(FATAL) << "Bad case in OpMemReg " << op;
+      break;
+  }
+  LIR *l = NewLIR3(opcode, rX86_SP, displacement, r_value);
+  AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, false /* is_64bit */);
+  return l;
+}
+
+LIR* X86Mir2Lir::OpRegMem(OpKind op, int r_dest, RegLocation rl_value) {
+  DCHECK_NE(rl_value.location, kLocPhysReg);
+  int displacement = SRegOffset(rl_value.s_reg_low);
+  X86OpCode opcode = kX86Nop;
+  switch (op) {
+    case kOpSub: opcode = kX86Sub32RM; break;
+    case kOpMov: opcode = kX86Mov32RM; break;
+    case kOpCmp: opcode = kX86Cmp32RM; break;
+    case kOpAdd: opcode = kX86Add32RM; break;
+    case kOpAnd: opcode = kX86And32RM; break;
+    case kOpOr:  opcode = kX86Or32RM; break;
+    case kOpXor: opcode = kX86Xor32RM; break;
+    case kOpMul: opcode = kX86Imul32RM; break;
+    default:
+      LOG(FATAL) << "Bad case in OpRegMem " << op;
+      break;
+  }
+  LIR *l = NewLIR3(opcode, r_dest, rX86_SP, displacement);
+  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */);
+  return l;
 }
 
 LIR* X86Mir2Lir::OpRegRegReg(OpKind op, int r_dest, int r_src1,
@@ -344,6 +499,27 @@
       DCHECK_EQ(r_dest_lo, r_dest_hi);
       if (value == 0) {
         return NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo);
+      } else if (base_of_code_ != nullptr) {
+        // We will load the value from the literal area.
+        LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
+        if (data_target == NULL) {
+          data_target = AddWideData(&literal_list_, val_lo, val_hi);
+        }
+
+        // Address the start of the method
+        RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+        rl_method = LoadValue(rl_method, kCoreReg);
+
+        // Load the proper value from the literal area.
+        // We don't know the proper offset for the value, so pick one that will force
+        // 4 byte offset.  We will fix this up in the assembler later to have the right
+        // value.
+        res = LoadBaseDisp(rl_method.low_reg, 256 /* bogus */, r_dest_lo, kDouble, INVALID_SREG);
+        res->target = data_target;
+        res->flags.fixup = kFixupLoad;
+        SetMemRefType(res, true, kLiteral);
+        // Redo after we assign target to ensure size is correct.
+        SetupResourceMasks(res);
       } else {
         if (val_lo == 0) {
           res = NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo);
@@ -604,4 +780,129 @@
   FreeTemp(tmp_reg);
 }
 
+LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                   int offset, int check_value, LIR* target) {
+    NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg, offset,
+            check_value);
+    LIR* branch = OpCondBranch(cond, target);
+    return branch;
+}
+
+void X86Mir2Lir::AnalyzeMIR() {
+  // Assume we don't need a pointer to the base of the code.
+  cu_->NewTimingSplit("X86 MIR Analysis");
+  store_method_addr_ = false;
+
+  // Walk the MIR looking for interesting items.
+  PreOrderDfsIterator iter(mir_graph_);
+  BasicBlock* curr_bb = iter.Next();
+  while (curr_bb != NULL) {
+    AnalyzeBB(curr_bb);
+    curr_bb = iter.Next();
+  }
+
+  // Did we need a pointer to the method code?
+  if (store_method_addr_) {
+    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, false);
+  } else {
+    base_of_code_ = nullptr;
+  }
+}
+
+void X86Mir2Lir::AnalyzeBB(BasicBlock * bb) {
+  if (bb->block_type == kDead) {
+    // Ignore dead blocks
+    return;
+  }
+
+  for (MIR *mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
+    int opcode = mir->dalvikInsn.opcode;
+    if (opcode >= kMirOpFirst) {
+      AnalyzeExtendedMIR(opcode, bb, mir);
+    } else {
+      AnalyzeMIR(opcode, bb, mir);
+    }
+  }
+}
+
+
+void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir) {
+  switch (opcode) {
+    // Instructions referencing doubles.
+    case kMirOpFusedCmplDouble:
+    case kMirOpFusedCmpgDouble:
+      AnalyzeFPInstruction(opcode, bb, mir);
+      break;
+    default:
+      // Ignore the rest.
+      break;
+  }
+}
+
+void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir) {
+  // Looking for
+  // - Do we need a pointer to the code (used for packed switches and double lits)?
+
+  switch (opcode) {
+    // Instructions referencing doubles.
+    case Instruction::CMPL_DOUBLE:
+    case Instruction::CMPG_DOUBLE:
+    case Instruction::NEG_DOUBLE:
+    case Instruction::ADD_DOUBLE:
+    case Instruction::SUB_DOUBLE:
+    case Instruction::MUL_DOUBLE:
+    case Instruction::DIV_DOUBLE:
+    case Instruction::REM_DOUBLE:
+    case Instruction::ADD_DOUBLE_2ADDR:
+    case Instruction::SUB_DOUBLE_2ADDR:
+    case Instruction::MUL_DOUBLE_2ADDR:
+    case Instruction::DIV_DOUBLE_2ADDR:
+    case Instruction::REM_DOUBLE_2ADDR:
+      AnalyzeFPInstruction(opcode, bb, mir);
+      break;
+    // Packed switches and array fills need a pointer to the base of the method.
+    case Instruction::FILL_ARRAY_DATA:
+    case Instruction::PACKED_SWITCH:
+      store_method_addr_ = true;
+      break;
+    default:
+      // Other instructions are not interesting yet.
+      break;
+  }
+}
+
+void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir) {
+  // Look at all the uses, and see if they are double constants.
+  uint64_t attrs = mir_graph_->oat_data_flow_attributes_[opcode];
+  int next_sreg = 0;
+  if (attrs & DF_UA) {
+    if (attrs & DF_A_WIDE) {
+      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      next_sreg += 2;
+    } else {
+      next_sreg++;
+    }
+  }
+  if (attrs & DF_UB) {
+    if (attrs & DF_B_WIDE) {
+      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      next_sreg += 2;
+    } else {
+      next_sreg++;
+    }
+  }
+  if (attrs & DF_UC) {
+    if (attrs & DF_C_WIDE) {
+      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+    }
+  }
+}
+
+void X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) {
+  // If this is a double literal, we will want it in the literal pool.
+  if (use.is_const) {
+    store_method_addr_ = true;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 7f35d06..6962ff7 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -357,6 +357,14 @@
   kX86PsllqRI,                  // left shift of floating point registers
   kX86SqrtsdRR,                 // sqrt of floating point register
   kX86FstpdM,                   // Store and pop top x87 fp stack
+  Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
+  kX86MovupsMR, kX86MovupsAR,   // store unaligned packed single FP values from xmm1 to m128
+  Binary0fOpCode(kX86Movaps),   // load aligned packed single FP values from xmm2/m128 to xmm1
+  kX86MovapsMR, kX86MovapsAR,   // store aligned packed single FP values from xmm1 to m128
+  kX86MovlpsRM, kX86MovlpsRA,   // load packed single FP values from m64 to low quadword of xmm
+  kX86MovlpsMR, kX86MovlpsAR,   // store packed single FP values from low quadword of xmm to m64
+  kX86MovhpsRM, kX86MovhpsRA,   // load packed single FP values from m64 to high quadword of xmm
+  kX86MovhpsMR, kX86MovhpsAR,   // store packed single FP values from high quadword of xmm to m64
   Binary0fOpCode(kX86Movdxr),   // move into xmm from gpr
   kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,  // move into reg from xmm
   kX86Set8R, kX86Set8M, kX86Set8A,  // set byte depending on condition operand
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index f211e3f..f8dc223 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -410,7 +410,8 @@
 
 void MIRGraph::InitRegLocations() {
   /* Allocate the location map */
-  RegLocation* loc = static_cast<RegLocation*>(arena_->Alloc(GetNumSSARegs() * sizeof(*loc),
+  int max_regs = GetNumSSARegs() + GetMaxPossibleCompilerTemps();
+  RegLocation* loc = static_cast<RegLocation*>(arena_->Alloc(max_regs * sizeof(*loc),
                                                              ArenaAllocator::kAllocRegAlloc));
   for (int i = 0; i < GetNumSSARegs(); i++) {
     loc[i] = fresh_loc;
@@ -418,13 +419,11 @@
     loc[i].is_const = is_constant_v_->IsBitSet(i);
   }
 
-  /* Patch up the locations for Method* and the compiler temps */
-  loc[method_sreg_].location = kLocCompilerTemp;
-  loc[method_sreg_].defined = true;
-  for (int i = 0; i < cu_->num_compiler_temps; i++) {
-    CompilerTemp* ct = compiler_temps_.Get(i);
-    loc[ct->s_reg].location = kLocCompilerTemp;
-    loc[ct->s_reg].defined = true;
+  /* Patch up the locations for the compiler temps */
+  GrowableArray<CompilerTemp*>::Iterator iter(&compiler_temps_);
+  for (CompilerTemp* ct = iter.Next(); ct != NULL; ct = iter.Next()) {
+    loc[ct->s_reg_low].location = kLocCompilerTemp;
+    loc[ct->s_reg_low].defined = true;
   }
 
   reg_location_ = loc;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d504a4e..9f48351 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -556,12 +556,15 @@
   }
 }
 
-void CompilerDriver::CompileOne(const mirror::ArtMethod* method, TimingLogger& timings) {
+void CompilerDriver::CompileOne(mirror::ArtMethod* method, TimingLogger& timings) {
   DCHECK(!Runtime::Current()->IsStarted());
   Thread* self = Thread::Current();
   jobject jclass_loader;
   const DexFile* dex_file;
   uint16_t class_def_idx;
+  uint32_t method_idx = method->GetDexMethodIndex();
+  uint32_t access_flags = method->GetAccessFlags();
+  InvokeType invoke_type = method->GetInvokeType();
   {
     ScopedObjectAccessUnchecked soa(self);
     ScopedLocalRef<jobject>
@@ -573,6 +576,7 @@
     dex_file = &mh.GetDexFile();
     class_def_idx = mh.GetClassDefIndex();
   }
+  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
   self->TransitionFromRunnableToSuspended(kNative);
 
   std::vector<const DexFile*> dex_files;
@@ -581,8 +585,6 @@
   UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
   PreCompile(jclass_loader, dex_files, *thread_pool.get(), timings);
 
-  uint32_t method_idx = method->GetDexMethodIndex();
-  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
   // Can we run DEX-to-DEX compiler on this class ?
   DexToDexCompilationLevel dex_to_dex_compilation_level = kDontDexToDexCompile;
   {
@@ -592,8 +594,8 @@
                                               soa.Decode<mirror::ClassLoader*>(jclass_loader));
     dex_to_dex_compilation_level = GetDexToDexCompilationlevel(class_loader, *dex_file, class_def);
   }
-  CompileMethod(code_item, method->GetAccessFlags(), method->GetInvokeType(),
-                class_def_idx, method_idx, jclass_loader, *dex_file, dex_to_dex_compilation_level);
+  CompileMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx, jclass_loader,
+                *dex_file, dex_to_dex_compilation_level);
 
   self->GetJniEnv()->DeleteGlobalRef(jclass_loader);
 
@@ -1008,8 +1010,8 @@
         ComputeCompilingMethodsClass(soa, dex_cache, mUnit);
     if (referrer_class != NULL) {
       mirror::Class* fields_class = resolved_field->GetDeclaringClass();
-      bool access_ok =
-          referrer_class->CanAccessResolvedField<false>(fields_class, resolved_field, field_idx);
+      bool access_ok = referrer_class->CanAccessResolvedField(fields_class, resolved_field,
+                                                              dex_cache.get(), field_idx);
       bool is_write_to_final_from_wrong_class = is_put && resolved_field->IsFinal() &&
           fields_class != referrer_class;
       if (access_ok && !is_write_to_final_from_wrong_class) {
@@ -1055,8 +1057,8 @@
         stats_->ResolvedLocalStaticField();
         return true;  // fast path
       } else {
-        bool access_ok =
-            referrer_class->CanAccessResolvedField<false>(fields_class, resolved_field, field_idx);
+        bool access_ok = referrer_class->CanAccessResolvedField(fields_class, resolved_field,
+                                                                dex_cache.get(), field_idx);
         bool is_write_to_final_from_wrong_class = is_put && resolved_field->IsFinal();
         if (access_ok && !is_write_to_final_from_wrong_class) {
           // We have the resolved field, we must make it into a index for the referrer
@@ -1170,7 +1172,7 @@
     if (no_guarantee_of_dex_cache_entry) {
       // See if the method is also declared in this dex cache.
       uint32_t dex_method_idx = MethodHelper(method).FindDexMethodIndexInOtherDexFile(
-          *referrer_class->GetDexCache()->GetDexFile());
+          *target_method->dex_file);
       if (dex_method_idx != DexFile::kDexNoIndex) {
         target_method->dex_method_index = dex_method_idx;
       } else {
@@ -1198,13 +1200,23 @@
         CHECK(!method->IsAbstract());
         *type = sharp_type;
         *direct_method = reinterpret_cast<uintptr_t>(method);
-        *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromCompiledCode());
+        if (compiler_backend_ == kQuick) {
+          *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCode());
+        } else {
+          CHECK_EQ(compiler_backend_, kPortable);
+          *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromPortableCompiledCode());
+        }
         target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
         target_method->dex_method_index = method->GetDexMethodIndex();
       } else if (!must_use_direct_pointers) {
         // Set the code and rely on the dex cache for the method.
         *type = sharp_type;
-        *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromCompiledCode());
+        if (compiler_backend_ == kQuick) {
+          *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCode());
+        } else {
+          CHECK_EQ(compiler_backend_, kPortable);
+          *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromPortableCompiledCode());
+        }
       } else {
         // Direct pointers were required but none were available.
         VLOG(compiler) << "Dex cache devirtualization failed for: " << PrettyMethod(method);
@@ -1239,8 +1251,8 @@
     bool icce = resolved_method->CheckIncompatibleClassChange(*invoke_type);
     if (referrer_class != NULL && !icce) {
       mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-      if (referrer_class->CanAccessResolvedMethod<false>(methods_class, resolved_method,
-                                                         target_method->dex_method_index)) {
+      if (referrer_class->CanAccessResolvedMethod(methods_class, resolved_method, dex_cache.get(),
+                                                  target_method->dex_method_index)) {
         const bool enableFinalBasedSharpening = enable_devirtualization;
         // Sharpen a virtual call into a direct call when the target is known not to have been
         // overridden (ie is final).
@@ -1255,9 +1267,11 @@
 
         if (enableFinalBasedSharpening && (can_sharpen_virtual_based_on_type ||
                                             can_sharpen_super_based_on_type)) {
-          // Sharpen a virtual call into a direct call. The method_idx is into referrer's
-          // dex cache, check that this resolved method is where we expect it.
-          CHECK(referrer_class->GetDexCache()->GetResolvedMethod(target_method->dex_method_index) ==
+          // Sharpen a virtual call into a direct call. The method_idx is into the DexCache
+          // associated with target_method->dex_file.
+          CHECK(target_method->dex_file == mUnit->GetDexFile());
+          DCHECK(dex_cache.get() == mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
+          CHECK(dex_cache->GetResolvedMethod(target_method->dex_method_index) ==
                 resolved_method) << PrettyMethod(resolved_method);
           InvokeType orig_invoke_type = *invoke_type;
           GetCodeAndMethodForDirectCall(invoke_type, kDirect, false, referrer_class, resolved_method,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index a8110e7..4307212 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -106,8 +106,8 @@
                   TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  // Compile a single Method
-  void CompileOne(const mirror::ArtMethod* method, TimingLogger& timings)
+  // Compile a single Method.
+  void CompileOne(mirror::ArtMethod* method, TimingLogger& timings)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   VerificationResults* GetVerificationResults() const {
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index a5eb94f..0d0c204 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -122,7 +122,11 @@
     EXPECT_TRUE(method != NULL) << "method_idx=" << i
                                 << " " << dex->GetMethodDeclaringClassDescriptor(dex->GetMethodId(i))
                                 << " " << dex->GetMethodName(dex->GetMethodId(i));
-    EXPECT_TRUE(method->GetEntryPointFromCompiledCode() != NULL) << "method_idx=" << i
+    EXPECT_TRUE(method->GetEntryPointFromQuickCompiledCode() != NULL) << "method_idx=" << i
+                                           << " "
+                                           << dex->GetMethodDeclaringClassDescriptor(dex->GetMethodId(i))
+                                           << " " << dex->GetMethodName(dex->GetMethodId(i));
+    EXPECT_TRUE(method->GetEntryPointFromPortableCompiledCode() != NULL) << "method_idx=" << i
                                            << " "
                                            << dex->GetMethodDeclaringClassDescriptor(dex->GetMethodId(i))
                                            << " " << dex->GetMethodName(dex->GetMethodId(i));
diff --git a/compiler/elf_fixup.cc b/compiler/elf_fixup.cc
index c571288..66c8da1 100644
--- a/compiler/elf_fixup.cc
+++ b/compiler/elf_fixup.cc
@@ -177,7 +177,7 @@
     if (elf_dyn_needs_fixup) {
       uint32_t d_ptr = elf_dyn.d_un.d_ptr;
       if (DEBUG_FIXUP) {
-        LOG(INFO) << StringPrintf("In %s moving Elf32_Dyn[%d] from 0x%08x to 0x%08x",
+        LOG(INFO) << StringPrintf("In %s moving Elf32_Dyn[%d] from 0x%08x to 0x%08" PRIxPTR,
                                   elf_file.GetFile().GetPath().c_str(), i,
                                   d_ptr, d_ptr + base_address);
       }
@@ -196,7 +196,7 @@
       continue;
     }
     if (DEBUG_FIXUP) {
-      LOG(INFO) << StringPrintf("In %s moving Elf32_Shdr[%d] from 0x%08x to 0x%08x",
+      LOG(INFO) << StringPrintf("In %s moving Elf32_Shdr[%d] from 0x%08x to 0x%08" PRIxPTR,
                                 elf_file.GetFile().GetPath().c_str(), i,
                                 sh.sh_addr, sh.sh_addr + base_address);
     }
@@ -213,7 +213,7 @@
     CHECK((ph.p_align == 0) || (0 == ((ph.p_vaddr - ph.p_offset) & (ph.p_align - 1))))
             << elf_file.GetFile().GetPath() << " i=" << i;
     if (DEBUG_FIXUP) {
-      LOG(INFO) << StringPrintf("In %s moving Elf32_Phdr[%d] from 0x%08x to 0x%08x",
+      LOG(INFO) << StringPrintf("In %s moving Elf32_Phdr[%d] from 0x%08x to 0x%08" PRIxPTR,
                                 elf_file.GetFile().GetPath().c_str(), i,
                                 ph.p_vaddr, ph.p_vaddr + base_address);
     }
@@ -238,7 +238,7 @@
     ::llvm::ELF::Elf32_Sym& symbol = elf_file.GetSymbol(section_type, i);
     if (symbol.st_value != 0) {
       if (DEBUG_FIXUP) {
-        LOG(INFO) << StringPrintf("In %s moving Elf32_Sym[%d] from 0x%08x to 0x%08x",
+        LOG(INFO) << StringPrintf("In %s moving Elf32_Sym[%d] from 0x%08x to 0x%08" PRIxPTR,
                                   elf_file.GetFile().GetPath().c_str(), i,
                                   symbol.st_value, symbol.st_value + base_address);
       }
@@ -255,7 +255,7 @@
       for (uint32_t i = 0; i < elf_file.GetRelNum(sh); i++) {
         llvm::ELF::Elf32_Rel& rel = elf_file.GetRel(sh, i);
         if (DEBUG_FIXUP) {
-          LOG(INFO) << StringPrintf("In %s moving Elf32_Rel[%d] from 0x%08x to 0x%08x",
+          LOG(INFO) << StringPrintf("In %s moving Elf32_Rel[%d] from 0x%08x to 0x%08" PRIxPTR,
                                     elf_file.GetFile().GetPath().c_str(), i,
                                     rel.r_offset, rel.r_offset + base_address);
         }
@@ -265,7 +265,7 @@
       for (uint32_t i = 0; i < elf_file.GetRelaNum(sh); i++) {
         llvm::ELF::Elf32_Rela& rela = elf_file.GetRela(sh, i);
         if (DEBUG_FIXUP) {
-          LOG(INFO) << StringPrintf("In %s moving Elf32_Rela[%d] from 0x%08x to 0x%08x",
+          LOG(INFO) << StringPrintf("In %s moving Elf32_Rela[%d] from 0x%08x to 0x%08" PRIxPTR,
                                     elf_file.GetFile().GetPath().c_str(), i,
                                     rela.r_offset, rela.r_offset + base_address);
         }
diff --git a/compiler/file_output_stream.cc b/compiler/file_output_stream.cc
index 0e4a294..3ee16f5 100644
--- a/compiler/file_output_stream.cc
+++ b/compiler/file_output_stream.cc
@@ -25,7 +25,7 @@
 
 FileOutputStream::FileOutputStream(File* file) : OutputStream(file->GetPath()), file_(file) {}
 
-bool FileOutputStream::WriteFully(const void* buffer, int64_t byte_count) {
+bool FileOutputStream::WriteFully(const void* buffer, size_t byte_count) {
   return file_->WriteFully(buffer, byte_count);
 }
 
diff --git a/compiler/file_output_stream.h b/compiler/file_output_stream.h
index bde9e68..76b00fe 100644
--- a/compiler/file_output_stream.h
+++ b/compiler/file_output_stream.h
@@ -29,7 +29,7 @@
 
   virtual ~FileOutputStream() {}
 
-  virtual bool WriteFully(const void* buffer, int64_t byte_count);
+  virtual bool WriteFully(const void* buffer, size_t byte_count);
 
   virtual off_t Seek(off_t offset, Whence whence);
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 09bb70c..67cd51b 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -208,12 +208,12 @@
   DCHECK_LT(image_end_, image_->Size());
 }
 
-bool ImageWriter::IsImageOffsetAssigned(const mirror::Object* object) const {
+bool ImageWriter::IsImageOffsetAssigned(mirror::Object* object) const {
   DCHECK(object != nullptr);
   return object->GetLockWord().GetState() == LockWord::kForwardingAddress;
 }
 
-size_t ImageWriter::GetImageOffset(const mirror::Object* object) const {
+size_t ImageWriter::GetImageOffset(mirror::Object* object) const {
   DCHECK(object != nullptr);
   DCHECK(IsImageOffsetAssigned(object));
   LockWord lock_word = object->GetLockWord();
@@ -226,7 +226,7 @@
   size_t length = RoundUp(Runtime::Current()->GetHeap()->GetTotalMemory(), kPageSize);
   std::string error_msg;
   image_.reset(MemMap::MapAnonymous("image writer image", NULL, length, PROT_READ | PROT_WRITE,
-                                    &error_msg));
+                                    true, &error_msg));
   if (UNLIKELY(image_.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg;
     return false;
@@ -281,7 +281,7 @@
   Runtime::Current()->GetHeap()->VisitObjects(ComputeEagerResolvedStringsCallback, this);
 }
 
-bool ImageWriter::IsImageClass(const Class* klass) {
+bool ImageWriter::IsImageClass(Class* klass) {
   return compiler_driver_.IsImageClass(ClassHelper(klass).GetDescriptor());
 }
 
@@ -447,7 +447,7 @@
   for (size_t i = 0; i < num_reference_fields; ++i) {
     mirror::ArtField* field = sirt_class->GetInstanceField(i);
     MemberOffset field_offset = field->GetOffset();
-    mirror::Object* value = obj->GetFieldObject<mirror::Object*>(field_offset, false);
+    mirror::Object* value = obj->GetFieldObject<mirror::Object>(field_offset, false);
     if (value != nullptr) {
       WalkFieldsInOrder(value);
     }
@@ -470,7 +470,7 @@
       for (size_t i = 0; i < num_static_fields; ++i) {
         mirror::ArtField* field = klass->GetStaticField(i);
         MemberOffset field_offset = field->GetOffset();
-        mirror::Object* value = sirt_obj->GetFieldObject<mirror::Object*>(field_offset, false);
+        mirror::Object* value = sirt_obj->GetFieldObject<mirror::Object>(field_offset, false);
         if (value != nullptr) {
           WalkFieldsInOrder(value);
         }
@@ -527,16 +527,16 @@
   const size_t heap_bytes_per_bitmap_byte = kBitsPerByte * gc::accounting::SpaceBitmap::kAlignment;
   const size_t bitmap_bytes = RoundUp(image_end_, heap_bytes_per_bitmap_byte) /
       heap_bytes_per_bitmap_byte;
-  ImageHeader image_header(reinterpret_cast<uint32_t>(image_begin_),
+  ImageHeader image_header(PointerToLowMemUInt32(image_begin_),
                            static_cast<uint32_t>(image_end_),
                            RoundUp(image_end_, kPageSize),
                            RoundUp(bitmap_bytes, kPageSize),
-                           reinterpret_cast<uint32_t>(GetImageAddress(image_roots.get())),
+                           PointerToLowMemUInt32(GetImageAddress(image_roots.get())),
                            oat_file_->GetOatHeader().GetChecksum(),
-                           reinterpret_cast<uint32_t>(oat_file_begin),
-                           reinterpret_cast<uint32_t>(oat_data_begin_),
-                           reinterpret_cast<uint32_t>(oat_data_end),
-                           reinterpret_cast<uint32_t>(oat_file_end));
+                           PointerToLowMemUInt32(oat_file_begin),
+                           PointerToLowMemUInt32(oat_data_begin_),
+                           PointerToLowMemUInt32(oat_data_end),
+                           PointerToLowMemUInt32(oat_file_end));
   memcpy(image_->Begin(), &image_header, sizeof(image_header));
 
   // Note that image_end_ is left at end of used space
@@ -578,7 +578,7 @@
   image_writer->FixupObject(obj, copy);
 }
 
-void ImageWriter::FixupObject(const Object* orig, Object* copy) {
+void ImageWriter::FixupObject(Object* orig, Object* copy) {
   DCHECK(orig != NULL);
   DCHECK(copy != NULL);
   copy->SetClass(down_cast<Class*>(GetImageAddress(orig->GetClass())));
@@ -594,12 +594,12 @@
   }
 }
 
-void ImageWriter::FixupClass(const Class* orig, Class* copy) {
+void ImageWriter::FixupClass(Class* orig, Class* copy) {
   FixupInstanceFields(orig, copy);
   FixupStaticFields(orig, copy);
 }
 
-void ImageWriter::FixupMethod(const ArtMethod* orig, ArtMethod* copy) {
+void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) {
   FixupInstanceFields(orig, copy);
 
   // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
@@ -607,43 +607,36 @@
 
   // The resolution method has a special trampoline to call.
   if (UNLIKELY(orig == Runtime::Current()->GetResolutionMethod())) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-    copy->SetEntryPointFromCompiledCode(GetOatAddress(portable_resolution_trampoline_offset_));
-#else
-    copy->SetEntryPointFromCompiledCode(GetOatAddress(quick_resolution_trampoline_offset_));
-#endif
+    copy->SetEntryPointFromPortableCompiledCode(GetOatAddress(portable_resolution_trampoline_offset_));
+    copy->SetEntryPointFromQuickCompiledCode(GetOatAddress(quick_resolution_trampoline_offset_));
   } else if (UNLIKELY(orig == Runtime::Current()->GetImtConflictMethod())) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-    copy->SetEntryPointFromCompiledCode(GetOatAddress(portable_imt_conflict_trampoline_offset_));
-#else
-    copy->SetEntryPointFromCompiledCode(GetOatAddress(quick_imt_conflict_trampoline_offset_));
-#endif
+    copy->SetEntryPointFromPortableCompiledCode(GetOatAddress(portable_imt_conflict_trampoline_offset_));
+    copy->SetEntryPointFromQuickCompiledCode(GetOatAddress(quick_imt_conflict_trampoline_offset_));
   } else {
     // We assume all methods have code. If they don't currently then we set them to the use the
     // resolution trampoline. Abstract methods never have code and so we need to make sure their
     // use results in an AbstractMethodError. We use the interpreter to achieve this.
     if (UNLIKELY(orig->IsAbstract())) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-      copy->SetEntryPointFromCompiledCode(GetOatAddress(portable_to_interpreter_bridge_offset_));
-#else
-      copy->SetEntryPointFromCompiledCode(GetOatAddress(quick_to_interpreter_bridge_offset_));
-#endif
+      copy->SetEntryPointFromPortableCompiledCode(GetOatAddress(portable_to_interpreter_bridge_offset_));
+      copy->SetEntryPointFromQuickCompiledCode(GetOatAddress(quick_to_interpreter_bridge_offset_));
       copy->SetEntryPointFromInterpreter(reinterpret_cast<EntryPointFromInterpreter*>
-      (const_cast<byte*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_))));
+          (const_cast<byte*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_))));
     } else {
       copy->SetEntryPointFromInterpreter(reinterpret_cast<EntryPointFromInterpreter*>
-      (const_cast<byte*>(GetOatAddress(interpreter_to_compiled_code_bridge_offset_))));
+          (const_cast<byte*>(GetOatAddress(interpreter_to_compiled_code_bridge_offset_))));
       // Use original code if it exists. Otherwise, set the code pointer to the resolution
       // trampoline.
-      const byte* code = GetOatAddress(orig->GetOatCodeOffset());
-      if (code != NULL) {
-        copy->SetEntryPointFromCompiledCode(code);
+      const byte* quick_code = GetOatAddress(orig->GetQuickOatCodeOffset());
+      if (quick_code != nullptr) {
+        copy->SetEntryPointFromQuickCompiledCode(quick_code);
       } else {
-#if defined(ART_USE_PORTABLE_COMPILER)
-        copy->SetEntryPointFromCompiledCode(GetOatAddress(portable_resolution_trampoline_offset_));
-#else
-        copy->SetEntryPointFromCompiledCode(GetOatAddress(quick_resolution_trampoline_offset_));
-#endif
+        copy->SetEntryPointFromQuickCompiledCode(GetOatAddress(quick_resolution_trampoline_offset_));
+      }
+      const byte* portable_code = GetOatAddress(orig->GetPortableOatCodeOffset());
+      if (portable_code != nullptr) {
+        copy->SetEntryPointFromPortableCompiledCode(portable_code);
+      } else {
+        copy->SetEntryPointFromPortableCompiledCode(GetOatAddress(portable_resolution_trampoline_offset_));
       }
       if (orig->IsNative()) {
         // The native method's pointer is set to a stub to lookup via dlsym.
@@ -667,14 +660,14 @@
   }
 }
 
-void ImageWriter::FixupObjectArray(const ObjectArray<Object>* orig, ObjectArray<Object>* copy) {
+void ImageWriter::FixupObjectArray(ObjectArray<Object>* orig, ObjectArray<Object>* copy) {
   for (int32_t i = 0; i < orig->GetLength(); ++i) {
-    const Object* element = orig->Get(i);
-    copy->SetPtrWithoutChecks(i, GetImageAddress(element));
+    Object* element = orig->Get(i);
+    copy->SetWithoutChecksAndWriteBarrier(i, GetImageAddress(element));
   }
 }
 
-void ImageWriter::FixupInstanceFields(const Object* orig, Object* copy) {
+void ImageWriter::FixupInstanceFields(Object* orig, Object* copy) {
   DCHECK(orig != NULL);
   DCHECK(copy != NULL);
   Class* klass = orig->GetClass();
@@ -682,13 +675,13 @@
   FixupFields(orig, copy, klass->GetReferenceInstanceOffsets(), false);
 }
 
-void ImageWriter::FixupStaticFields(const Class* orig, Class* copy) {
+void ImageWriter::FixupStaticFields(Class* orig, Class* copy) {
   DCHECK(orig != NULL);
   DCHECK(copy != NULL);
   FixupFields(orig, copy, orig->GetReferenceStaticOffsets(), true);
 }
 
-void ImageWriter::FixupFields(const Object* orig,
+void ImageWriter::FixupFields(Object* orig,
                               Object* copy,
                               uint32_t ref_offsets,
                               bool is_static) {
@@ -697,9 +690,10 @@
     while (ref_offsets != 0) {
       size_t right_shift = CLZ(ref_offsets);
       MemberOffset byte_offset = CLASS_OFFSET_FROM_CLZ(right_shift);
-      const Object* ref = orig->GetFieldObject<const Object*>(byte_offset, false);
-      // Use SetFieldPtr to avoid card marking since we are writing to the image.
-      copy->SetFieldPtr(byte_offset, GetImageAddress(ref), false);
+      Object* ref = orig->GetFieldObject<Object>(byte_offset, false);
+      // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
+      // image.
+      copy->SetFieldObjectWithoutWriteBarrier(byte_offset, GetImageAddress(ref), false);
       ref_offsets &= ~(CLASS_HIGH_BIT >> right_shift);
     }
   } else {
@@ -707,7 +701,7 @@
     // walk up the class inheritance hierarchy and find reference
     // offsets the hard way. In the static case, just consider this
     // class.
-    for (const Class *klass = is_static ? orig->AsClass() : orig->GetClass();
+    for (Class *klass = is_static ? orig->AsClass() : orig->GetClass();
          klass != NULL;
          klass = is_static ? NULL : klass->GetSuperClass()) {
       size_t num_reference_fields = (is_static
@@ -718,9 +712,10 @@
                            ? klass->GetStaticField(i)
                            : klass->GetInstanceField(i));
         MemberOffset field_offset = field->GetOffset();
-        const Object* ref = orig->GetFieldObject<const Object*>(field_offset, false);
-        // Use SetFieldPtr to avoid card marking since we are writing to the image.
-        copy->SetFieldPtr(field_offset, GetImageAddress(ref), false);
+        Object* ref = orig->GetFieldObject<Object>(field_offset, false);
+        // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
+        // image.
+        copy->SetFieldObjectWithoutWriteBarrier(field_offset, GetImageAddress(ref), false);
       }
     }
   }
@@ -728,9 +723,10 @@
     // Fix-up referent, that isn't marked as an object field, for References.
     ArtField* field = orig->GetClass()->FindInstanceField("referent", "Ljava/lang/Object;");
     MemberOffset field_offset = field->GetOffset();
-    const Object* ref = orig->GetFieldObject<const Object*>(field_offset, false);
-    // Use SetFieldPtr to avoid card marking since we are writing to the image.
-    copy->SetFieldPtr(field_offset, GetImageAddress(ref), false);
+    Object* ref = orig->GetFieldObject<Object>(field_offset, false);
+    // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
+    // image.
+    copy->SetFieldObjectWithoutWriteBarrier(field_offset, GetImageAddress(ref), false);
   }
 }
 
@@ -786,17 +782,17 @@
   for (size_t i = 0; i < code_to_patch.size(); i++) {
     const CompilerDriver::CallPatchInformation* patch = code_to_patch[i];
     ArtMethod* target = GetTargetMethod(patch);
-    uint32_t code = reinterpret_cast<uint32_t>(class_linker->GetOatCodeFor(target));
-    uint32_t code_base = reinterpret_cast<uint32_t>(&oat_file_->GetOatHeader());
-    uint32_t code_offset = code - code_base;
-    SetPatchLocation(patch, reinterpret_cast<uint32_t>(GetOatAddress(code_offset)));
+    uintptr_t quick_code = reinterpret_cast<uintptr_t>(class_linker->GetQuickOatCodeFor(target));
+    uintptr_t code_base = reinterpret_cast<uintptr_t>(&oat_file_->GetOatHeader());
+    uintptr_t code_offset = quick_code - code_base;
+    SetPatchLocation(patch, PointerToLowMemUInt32(GetOatAddress(code_offset)));
   }
 
   const CallPatches& methods_to_patch = compiler_driver_.GetMethodsToPatch();
   for (size_t i = 0; i < methods_to_patch.size(); i++) {
     const CompilerDriver::CallPatchInformation* patch = methods_to_patch[i];
     ArtMethod* target = GetTargetMethod(patch);
-    SetPatchLocation(patch, reinterpret_cast<uint32_t>(GetImageAddress(target)));
+    SetPatchLocation(patch, PointerToLowMemUInt32(GetImageAddress(target)));
   }
 
   const std::vector<const CompilerDriver::TypePatchInformation*>& classes_to_patch =
@@ -804,7 +800,7 @@
   for (size_t i = 0; i < classes_to_patch.size(); i++) {
     const CompilerDriver::TypePatchInformation* patch = classes_to_patch[i];
     Class* target = GetTargetType(patch);
-    SetPatchLocation(patch, reinterpret_cast<uint32_t>(GetImageAddress(target)));
+    SetPatchLocation(patch, PointerToLowMemUInt32(GetImageAddress(target)));
   }
 
   // Update the image header with the new checksum after patching
@@ -815,18 +811,18 @@
 
 void ImageWriter::SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const void* oat_code = class_linker->GetOatCodeFor(patch->GetDexFile(),
-                                                     patch->GetReferrerClassDefIdx(),
-                                                     patch->GetReferrerMethodIdx());
+  const void* quick_oat_code = class_linker->GetQuickOatCodeFor(patch->GetDexFile(),
+                                                                patch->GetReferrerClassDefIdx(),
+                                                                patch->GetReferrerMethodIdx());
   OatHeader& oat_header = const_cast<OatHeader&>(oat_file_->GetOatHeader());
   // TODO: make this Thumb2 specific
-  uint8_t* base = reinterpret_cast<uint8_t*>(reinterpret_cast<uint32_t>(oat_code) & ~0x1);
+  uint8_t* base = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(quick_oat_code) & ~0x1);
   uint32_t* patch_location = reinterpret_cast<uint32_t*>(base + patch->GetLiteralOffset());
   if (kIsDebugBuild) {
     if (patch->IsCall()) {
       const CompilerDriver::CallPatchInformation* cpatch = patch->AsCall();
       const DexFile::MethodId& id = cpatch->GetDexFile().GetMethodId(cpatch->GetTargetMethodIdx());
-      uint32_t expected = reinterpret_cast<uint32_t>(&id);
+      uintptr_t expected = reinterpret_cast<uintptr_t>(&id);
       uint32_t actual = *patch_location;
       CHECK(actual == expected || actual == value) << std::hex
           << "actual=" << actual
@@ -836,7 +832,7 @@
     if (patch->IsType()) {
       const CompilerDriver::TypePatchInformation* tpatch = patch->AsType();
       const DexFile::TypeId& id = tpatch->GetDexFile().GetTypeId(tpatch->GetTargetTypeIdx());
-      uint32_t expected = reinterpret_cast<uint32_t>(&id);
+      uintptr_t expected = reinterpret_cast<uintptr_t>(&id);
       uint32_t actual = *patch_location;
       CHECK(actual == expected || actual == value) << std::hex
           << "actual=" << actual
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 695f59b..a1504ee 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -66,17 +66,17 @@
   void AssignImageOffset(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetImageOffset(mirror::Object* object, size_t offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsImageOffsetAssigned(const mirror::Object* object) const;
-  size_t GetImageOffset(const mirror::Object* object) const;
+  bool IsImageOffsetAssigned(mirror::Object* object) const;
+  size_t GetImageOffset(mirror::Object* object) const;
 
-  mirror::Object* GetImageAddress(const mirror::Object* object) const {
+  mirror::Object* GetImageAddress(mirror::Object* object) const {
     if (object == NULL) {
       return NULL;
     }
     return reinterpret_cast<mirror::Object*>(image_begin_ + GetImageOffset(object));
   }
 
-  mirror::Object* GetLocalAddress(const mirror::Object* object) const {
+  mirror::Object* GetLocalAddress(mirror::Object* object) const {
     size_t offset = GetImageOffset(object);
     byte* dst = image_->Begin() + offset;
     return reinterpret_cast<mirror::Object*>(dst);
@@ -96,7 +96,7 @@
   }
 
   // Returns true if the class was in the original requested image classes list.
-  bool IsImageClass(const mirror::Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsImageClass(mirror::Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Debug aid that list of requested image classes.
   void DumpImageClasses();
@@ -141,20 +141,20 @@
   void CopyAndFixupObjects();
   static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupClass(const mirror::Class* orig, mirror::Class* copy)
+  void FixupClass(mirror::Class* orig, mirror::Class* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupMethod(const mirror::ArtMethod* orig, mirror::ArtMethod* copy)
+  void FixupMethod(mirror::ArtMethod* orig, mirror::ArtMethod* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupObject(const mirror::Object* orig, mirror::Object* copy)
+  void FixupObject(mirror::Object* orig, mirror::Object* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupObjectArray(const mirror::ObjectArray<mirror::Object>* orig,
+  void FixupObjectArray(mirror::ObjectArray<mirror::Object>* orig,
                         mirror::ObjectArray<mirror::Object>* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupInstanceFields(const mirror::Object* orig, mirror::Object* copy)
+  void FixupInstanceFields(mirror::Object* orig, mirror::Object* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupStaticFields(const mirror::Class* orig, mirror::Class* copy)
+  void FixupStaticFields(mirror::Class* orig, mirror::Class* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupFields(const mirror::Object* orig, mirror::Object* copy, uint32_t ref_offsets,
+  void FixupFields(mirror::Object* orig, mirror::Object* copy, uint32_t ref_offsets,
                    bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 21dd11e..c77d319 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -58,11 +58,14 @@
       method = c->FindVirtualMethod(method_name, method_sig);
     }
     ASSERT_TRUE(method != NULL) << method_name << " " << method_sig;
-    if (method->GetEntryPointFromCompiledCode() != NULL) {
-      return;
+    if (method->GetEntryPointFromQuickCompiledCode() == nullptr) {
+      ASSERT_TRUE(method->GetEntryPointFromPortableCompiledCode() == nullptr);
+      CompileMethod(method);
+      ASSERT_TRUE(method->GetEntryPointFromQuickCompiledCode() != nullptr)
+          << method_name << " " << method_sig;
+      ASSERT_TRUE(method->GetEntryPointFromPortableCompiledCode() != nullptr)
+          << method_name << " " << method_sig;
     }
-    CompileMethod(method);
-    ASSERT_TRUE(method->GetEntryPointFromCompiledCode() != NULL) << method_name << " " << method_sig;
   }
 
   void SetUpForTest(bool direct, const char* method_name, const char* method_sig,
@@ -122,19 +125,19 @@
 int gJava_MyClassNatives_foo_calls = 0;
 void Java_MyClassNatives_foo(JNIEnv* env, jobject thisObj) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   Locks::mutator_lock_->AssertNotHeld(Thread::Current());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_foo_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
 }
 
 TEST_F(JniCompilerTest, CompileAndRunNoArgMethod) {
   TEST_DISABLED_FOR_PORTABLE();
-  SetUpForTest(false, "foo", "()V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_foo));
+  SetUpForTest(false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClassNatives_foo));
 
   EXPECT_EQ(0, gJava_MyClassNatives_foo_calls);
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
@@ -150,9 +153,10 @@
 
   ScopedObjectAccess soa(Thread::Current());
   std::string reason;
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                            soa.Decode<mirror::ClassLoader*>(class_loader_));
   ASSERT_TRUE(
-      Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", soa.Decode<mirror::ClassLoader*>(class_loader_),
-                                                         &reason)) << reason;
+      Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", class_loader, &reason)) << reason;
 
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24);
   EXPECT_EQ(25, result);
@@ -165,9 +169,10 @@
 
   ScopedObjectAccess soa(Thread::Current());
   std::string reason;
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                            soa.Decode<mirror::ClassLoader*>(class_loader_));
   ASSERT_TRUE(
-      Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", soa.Decode<mirror::ClassLoader*>(class_loader_),
-                                                         &reason)) << reason;
+      Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", class_loader, &reason)) << reason;
 
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42);
   EXPECT_EQ(43, result);
@@ -176,12 +181,13 @@
 int gJava_MyClassNatives_fooI_calls = 0;
 jint Java_MyClassNatives_fooI(JNIEnv* env, jobject thisObj, jint x) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooI_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x;
 }
 
@@ -202,12 +208,13 @@
 int gJava_MyClassNatives_fooII_calls = 0;
 jint Java_MyClassNatives_fooII(JNIEnv* env, jobject thisObj, jint x, jint y) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooII_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x - y;  // non-commutative operator
 }
 
@@ -229,12 +236,13 @@
 int gJava_MyClassNatives_fooJJ_calls = 0;
 jlong Java_MyClassNatives_fooJJ(JNIEnv* env, jobject thisObj, jlong x, jlong y) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooJJ_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x - y;  // non-commutative operator
 }
 
@@ -257,12 +265,13 @@
 int gJava_MyClassNatives_fooDD_calls = 0;
 jdouble Java_MyClassNatives_fooDD(JNIEnv* env, jobject thisObj, jdouble x, jdouble y) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooDD_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x - y;  // non-commutative operator
 }
 
@@ -286,12 +295,13 @@
 int gJava_MyClassNatives_fooJJ_synchronized_calls = 0;
 jlong Java_MyClassNatives_fooJJ_synchronized(JNIEnv* env, jobject thisObj, jlong x, jlong y) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooJJ_synchronized_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x | y;
 }
 
@@ -312,12 +322,13 @@
 jobject Java_MyClassNatives_fooIOO(JNIEnv* env, jobject thisObj, jint x, jobject y,
                             jobject z) {
   // 3 = this + y + z
-  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooIOO_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   switch (x) {
     case 1:
       return y;
@@ -363,12 +374,13 @@
 int gJava_MyClassNatives_fooSII_calls = 0;
 jint Java_MyClassNatives_fooSII(JNIEnv* env, jclass klass, jint x, jint y) {
   // 1 = klass
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSII_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x + y;
 }
 
@@ -386,12 +398,13 @@
 int gJava_MyClassNatives_fooSDD_calls = 0;
 jdouble Java_MyClassNatives_fooSDD(JNIEnv* env, jclass klass, jdouble x, jdouble y) {
   // 1 = klass
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSDD_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x - y;  // non-commutative operator
 }
 
@@ -415,12 +428,13 @@
 jobject Java_MyClassNatives_fooSIOO(JNIEnv* env, jclass klass, jint x, jobject y,
                              jobject z) {
   // 3 = klass + y + z
-  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSIOO_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   switch (x) {
     case 1:
       return y;
@@ -467,12 +481,13 @@
 int gJava_MyClassNatives_fooSSIOO_calls = 0;
 jobject Java_MyClassNatives_fooSSIOO(JNIEnv* env, jclass klass, jint x, jobject y, jobject z) {
   // 3 = klass + y + z
-  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSSIOO_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   switch (x) {
     case 1:
       return y;
diff --git a/compiler/leb128_encoder_test.cc b/compiler/leb128_encoder_test.cc
index c63dfa2..7af8518 100644
--- a/compiler/leb128_encoder_test.cc
+++ b/compiler/leb128_encoder_test.cc
@@ -14,14 +14,13 @@
  * limitations under the License.
  */
 
-#include "base/histogram-inl.h"
-#include "common_test.h"
 #include "leb128.h"
 #include "leb128_encoder.h"
 
-namespace art {
+#include "gtest/gtest.h"
+#include "base/histogram-inl.h"
 
-class Leb128Test : public CommonTest {};
+namespace art {
 
 struct DecodeUnsignedLeb128TestCase {
   uint32_t decoded;
@@ -92,7 +91,7 @@
     {(-1) << 31, {0x80, 0x80, 0x80, 0x80, 0x78}},
 };
 
-TEST_F(Leb128Test, UnsignedSinglesVector) {
+TEST(Leb128Test, UnsignedSinglesVector) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
     Leb128EncodingVector builder;
@@ -111,7 +110,7 @@
   }
 }
 
-TEST_F(Leb128Test, UnsignedSingles) {
+TEST(Leb128Test, UnsignedSingles) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
     uint8_t encoded_data[5];
@@ -130,7 +129,7 @@
   }
 }
 
-TEST_F(Leb128Test, UnsignedStreamVector) {
+TEST(Leb128Test, UnsignedStreamVector) {
   // Encode a number of entries.
   Leb128EncodingVector builder;
   for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
@@ -151,7 +150,7 @@
             static_cast<size_t>(encoded_data_ptr - &builder.GetData()[0]));
 }
 
-TEST_F(Leb128Test, UnsignedStream) {
+TEST(Leb128Test, UnsignedStream) {
   // Encode a number of entries.
   uint8_t encoded_data[5 * arraysize(uleb128_tests)];
   uint8_t* end = encoded_data;
@@ -173,7 +172,7 @@
   EXPECT_EQ(data_size, static_cast<size_t>(encoded_data_ptr - encoded_data));
 }
 
-TEST_F(Leb128Test, SignedSinglesVector) {
+TEST(Leb128Test, SignedSinglesVector) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(sleb128_tests); ++i) {
     Leb128EncodingVector builder;
@@ -192,7 +191,7 @@
   }
 }
 
-TEST_F(Leb128Test, SignedSingles) {
+TEST(Leb128Test, SignedSingles) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(sleb128_tests); ++i) {
     uint8_t encoded_data[5];
@@ -211,7 +210,7 @@
   }
 }
 
-TEST_F(Leb128Test, SignedStreamVector) {
+TEST(Leb128Test, SignedStreamVector) {
   // Encode a number of entries.
   Leb128EncodingVector builder;
   for (size_t i = 0; i < arraysize(sleb128_tests); ++i) {
@@ -232,7 +231,7 @@
             static_cast<size_t>(encoded_data_ptr - &builder.GetData()[0]));
 }
 
-TEST_F(Leb128Test, SignedStream) {
+TEST(Leb128Test, SignedStream) {
   // Encode a number of entries.
   uint8_t encoded_data[5 * arraysize(sleb128_tests)];
   uint8_t* end = encoded_data;
@@ -254,7 +253,7 @@
   EXPECT_EQ(data_size, static_cast<size_t>(encoded_data_ptr - encoded_data));
 }
 
-TEST_F(Leb128Test, Speed) {
+TEST(Leb128Test, Speed) {
   UniquePtr<Histogram<uint64_t> > enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
   UniquePtr<Histogram<uint64_t> > dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
   Leb128EncodingVector builder;
diff --git a/compiler/llvm/compiler_llvm.cc b/compiler/llvm/compiler_llvm.cc
index 94408bb..6563eb5 100644
--- a/compiler/llvm/compiler_llvm.cc
+++ b/compiler/llvm/compiler_llvm.cc
@@ -126,7 +126,7 @@
   MutexLock GUARD(Thread::Current(), next_cunit_id_lock_);
   LlvmCompilationUnit* cunit = new LlvmCompilationUnit(this, next_cunit_id_++);
   if (!bitcode_filename_.empty()) {
-    cunit->SetBitcodeFileName(StringPrintf("%s-%zu",
+    cunit->SetBitcodeFileName(StringPrintf("%s-%u",
                                            bitcode_filename_.c_str(),
                                            cunit->GetCompilationUnitId()));
   }
diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc
index 6423cd7..8f22a97 100644
--- a/compiler/llvm/gbc_expander.cc
+++ b/compiler/llvm/gbc_expander.cc
@@ -897,7 +897,7 @@
   } else {
     code_addr =
         irb_.LoadFromObjectOffset(callee_method_object_addr,
-                                  art::mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(),
+                                  art::mirror::ArtMethod::EntryPointFromPortableCompiledCodeOffset().Int32Value(),
                                   func_type->getPointerTo(), kTBAARuntimeInfo);
   }
 
@@ -1234,7 +1234,7 @@
 
   llvm::Value* code_addr =
     irb_.LoadFromObjectOffset(callee_method_object_addr,
-                              art::mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(),
+                              art::mirror::ArtMethod::EntryPointFromPortableCompiledCodeOffset().Int32Value(),
                               callee_method_type->getPointerTo(),
                               kTBAARuntimeInfo);
 
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index 038f5dc..d23706d 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -151,7 +151,7 @@
 void LlvmCompilationUnit::DumpBitcodeToFile() {
   std::string bitcode;
   DumpBitcodeToString(bitcode);
-  std::string filename(StringPrintf("%s/Art%u.bc", DumpDirectory().c_str(), cunit_id_));
+  std::string filename(StringPrintf("%s/Art%zu.bc", DumpDirectory().c_str(), cunit_id_));
   UniquePtr<File> output(OS::CreateEmptyFile(filename.c_str()));
   output->WriteFully(bitcode.data(), bitcode.size());
   LOG(INFO) << ".bc file written successfully: " << filename;
@@ -178,7 +178,7 @@
   const bool kDumpELF = false;
   if (kDumpELF) {
     // Dump the ELF image for debugging
-    std::string filename(StringPrintf("%s/Art%u.o", DumpDirectory().c_str(), cunit_id_));
+    std::string filename(StringPrintf("%s/Art%zu.o", DumpDirectory().c_str(), cunit_id_));
     UniquePtr<File> output(OS::CreateEmptyFile(filename.c_str()));
     output->WriteFully(elf_object_.data(), elf_object_.size());
     LOG(INFO) << ".o file written successfully: " << filename;
diff --git a/compiler/llvm/llvm_compilation_unit.h b/compiler/llvm/llvm_compilation_unit.h
index ced9f81..58aa6fd 100644
--- a/compiler/llvm/llvm_compilation_unit.h
+++ b/compiler/llvm/llvm_compilation_unit.h
@@ -101,10 +101,10 @@
 
  private:
   LlvmCompilationUnit(const CompilerLLVM* compiler_llvm,
-                      uint32_t cunit_id);
+                      size_t cunit_id);
 
   const CompilerLLVM* compiler_llvm_;
-  const uint32_t cunit_id_;
+  const size_t cunit_id_;
 
   UniquePtr< ::llvm::LLVMContext> context_;
   UniquePtr<IRBuilder> irb_;
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index fc45412..b3070b6 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -39,29 +39,42 @@
                                                             method->GetDexMethodIndex()));
 
     if (compiled_method == NULL) {
-      EXPECT_TRUE(oat_method.GetCode() == NULL) << PrettyMethod(method) << " "
-                                                << oat_method.GetCode();
-#if !defined(ART_USE_PORTABLE_COMPILER)
-      EXPECT_EQ(oat_method.GetFrameSizeInBytes(), kCompile ? kStackAlignment : 0);
+      EXPECT_TRUE(oat_method.GetQuickCode() == NULL) << PrettyMethod(method) << " "
+                                                     << oat_method.GetQuickCode();
+      EXPECT_TRUE(oat_method.GetPortableCode() == NULL) << PrettyMethod(method) << " "
+                                                        << oat_method.GetPortableCode();
+      EXPECT_EQ(oat_method.GetFrameSizeInBytes(), 0U);
       EXPECT_EQ(oat_method.GetCoreSpillMask(), 0U);
       EXPECT_EQ(oat_method.GetFpSpillMask(), 0U);
-#endif
     } else {
-      const void* oat_code = oat_method.GetCode();
-      EXPECT_TRUE(oat_code != NULL) << PrettyMethod(method);
-      uintptr_t oat_code_aligned = RoundDown(reinterpret_cast<uintptr_t>(oat_code), 2);
-      oat_code = reinterpret_cast<const void*>(oat_code_aligned);
-
-      const std::vector<uint8_t>& code = compiled_method->GetCode();
-      size_t code_size = code.size() * sizeof(code[0]);
-      EXPECT_EQ(0, memcmp(oat_code, &code[0], code_size))
-          << PrettyMethod(method) << " " << code_size;
-      CHECK_EQ(0, memcmp(oat_code, &code[0], code_size));
-#if !defined(ART_USE_PORTABLE_COMPILER)
-      EXPECT_EQ(oat_method.GetFrameSizeInBytes(), compiled_method->GetFrameSizeInBytes());
-      EXPECT_EQ(oat_method.GetCoreSpillMask(), compiled_method->GetCoreSpillMask());
-      EXPECT_EQ(oat_method.GetFpSpillMask(), compiled_method->GetFpSpillMask());
-#endif
+      const void* quick_oat_code = oat_method.GetQuickCode();
+      if (quick_oat_code != nullptr) {
+        EXPECT_EQ(oat_method.GetFrameSizeInBytes(), compiled_method->GetFrameSizeInBytes());
+        EXPECT_EQ(oat_method.GetCoreSpillMask(), compiled_method->GetCoreSpillMask());
+        EXPECT_EQ(oat_method.GetFpSpillMask(), compiled_method->GetFpSpillMask());
+        uintptr_t oat_code_aligned = RoundDown(reinterpret_cast<uintptr_t>(quick_oat_code), 2);
+        quick_oat_code = reinterpret_cast<const void*>(oat_code_aligned);
+        const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode();
+        EXPECT_TRUE(quick_code != nullptr);
+        size_t code_size = quick_code->size() * sizeof(quick_code[0]);
+        EXPECT_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size))
+            << PrettyMethod(method) << " " << code_size;
+        CHECK_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size));
+      } else {
+        const void* portable_oat_code = oat_method.GetPortableCode();
+        EXPECT_TRUE(portable_oat_code != nullptr) << PrettyMethod(method);
+        EXPECT_EQ(oat_method.GetFrameSizeInBytes(), 0U);
+        EXPECT_EQ(oat_method.GetCoreSpillMask(), 0U);
+        EXPECT_EQ(oat_method.GetFpSpillMask(), 0U);
+        uintptr_t oat_code_aligned = RoundDown(reinterpret_cast<uintptr_t>(portable_oat_code), 2);
+        portable_oat_code = reinterpret_cast<const void*>(oat_code_aligned);
+        const std::vector<uint8_t>* portable_code = compiled_method->GetPortableCode();
+        EXPECT_TRUE(portable_code != nullptr);
+        size_t code_size = portable_code->size() * sizeof(portable_code[0]);
+        EXPECT_EQ(0, memcmp(quick_oat_code, &portable_code[0], code_size))
+            << PrettyMethod(method) << " " << code_size;
+        CHECK_EQ(0, memcmp(quick_oat_code, &portable_code[0], code_size));
+      }
     }
   }
 };
@@ -70,12 +83,8 @@
   TimingLogger timings("CommonTest::WriteRead", false, false);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
-  // TODO: make selectable
-#if defined(ART_USE_PORTABLE_COMPILER)
-  CompilerBackend compiler_backend = kPortable;
-#else
-  CompilerBackend compiler_backend = kQuick;
-#endif
+  // TODO: make selectable.
+  CompilerBackend compiler_backend = kUsePortableCompiler ? kPortable : kQuick;
   InstructionSet insn_set = kIsTargetBuild ? kThumb2 : kX86;
 
   InstructionSetFeatures insn_features;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 7a902d8..7c5669a 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -39,7 +39,7 @@
 
 OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
                      uint32_t image_file_location_oat_checksum,
-                     uint32_t image_file_location_oat_begin,
+                     uintptr_t image_file_location_oat_begin,
                      const std::string& image_file_location,
                      const CompilerDriver* compiler,
                      TimingLogger* timings)
@@ -348,8 +348,8 @@
                                     bool __attribute__((unused)) is_native,
                                     InvokeType invoke_type,
                                     uint32_t method_idx, const DexFile& dex_file) {
-  // derived from CompiledMethod if available
-  uint32_t code_offset = 0;
+  // Derived from CompiledMethod if available.
+  uint32_t quick_code_offset = 0;
   uint32_t frame_size_in_bytes = kStackAlignment;
   uint32_t core_spill_mask = 0;
   uint32_t fp_spill_mask = 0;
@@ -358,36 +358,38 @@
   uint32_t gc_map_offset = 0;
 
   OatClass* oat_class = oat_classes_[oat_class_index];
-#if defined(ART_USE_PORTABLE_COMPILER)
-  size_t oat_method_offsets_offset =
-      oat_class->GetOatMethodOffsetsOffsetFromOatHeader(class_def_method_index);
-#endif
-
   CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
-  if (compiled_method != NULL) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-    compiled_method->AddOatdataOffsetToCompliledCodeOffset(
-        oat_method_offsets_offset + OFFSETOF_MEMBER(OatMethodOffsets, code_offset_));
-#else
-    const std::vector<uint8_t>& code = compiled_method->GetCode();
-    offset = compiled_method->AlignCode(offset);
-    DCHECK_ALIGNED(offset, kArmAlignment);
-    uint32_t code_size = code.size() * sizeof(code[0]);
-    CHECK_NE(code_size, 0U);
-    uint32_t thumb_offset = compiled_method->CodeDelta();
-    code_offset = offset + sizeof(code_size) + thumb_offset;
 
-    // Deduplicate code arrays
-    SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter = code_offsets_.find(&code);
-    if (code_iter != code_offsets_.end()) {
-      code_offset = code_iter->second;
+  if (compiled_method != NULL) {
+    const std::vector<uint8_t>* portable_code = compiled_method->GetPortableCode();
+    const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode();
+    if (portable_code != nullptr) {
+      CHECK(quick_code == nullptr);
+      size_t oat_method_offsets_offset =
+          oat_class->GetOatMethodOffsetsOffsetFromOatHeader(class_def_method_index);
+      compiled_method->AddOatdataOffsetToCompliledCodeOffset(
+          oat_method_offsets_offset + OFFSETOF_MEMBER(OatMethodOffsets, code_offset_));
     } else {
-      code_offsets_.Put(&code, code_offset);
-      offset += sizeof(code_size);  // code size is prepended before code
-      offset += code_size;
-      oat_header_->UpdateChecksum(&code[0], code_size);
+      CHECK(quick_code != nullptr);
+      offset = compiled_method->AlignCode(offset);
+      DCHECK_ALIGNED(offset, kArmAlignment);
+      uint32_t code_size = quick_code->size() * sizeof(uint8_t);
+      CHECK_NE(code_size, 0U);
+      uint32_t thumb_offset = compiled_method->CodeDelta();
+      quick_code_offset = offset + sizeof(code_size) + thumb_offset;
+
+      // Deduplicate code arrays
+      SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter =
+          code_offsets_.find(quick_code);
+      if (code_iter != code_offsets_.end()) {
+        quick_code_offset = code_iter->second;
+      } else {
+        code_offsets_.Put(quick_code, quick_code_offset);
+        offset += sizeof(code_size);  // code size is prepended before code
+        offset += code_size;
+        oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
+      }
     }
-#endif
     frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
     core_spill_mask = compiled_method->GetCoreSpillMask();
     fp_spill_mask = compiled_method->GetFpSpillMask();
@@ -456,7 +458,7 @@
     }
 
     oat_class->method_offsets_[*method_offsets_index] =
-        OatMethodOffsets(code_offset,
+        OatMethodOffsets(quick_code_offset,
                          frame_size_in_bytes,
                          core_spill_mask,
                          fp_spill_mask,
@@ -483,9 +485,11 @@
     // Don't overwrite static method trampoline
     if (!method->IsStatic() || method->IsConstructor() ||
         method->GetDeclaringClass()->IsInitialized()) {
-      method->SetOatCodeOffset(code_offset);
+      // TODO: record portable code offsets: method->SetPortableOatCodeOffset(portable_code_offset);
+      method->SetQuickOatCodeOffset(quick_code_offset);
     } else {
-      method->SetEntryPointFromCompiledCode(NULL);
+      method->SetEntryPointFromPortableCompiledCode(nullptr);
+      method->SetEntryPointFromQuickCompiledCode(nullptr);
     }
     method->SetOatVmapTableOffset(vmap_table_offset);
     method->SetOatNativeGcMapOffset(gc_map_offset);
@@ -753,52 +757,52 @@
   if (compiled_method != NULL) {  // ie. not an abstract method
     const OatMethodOffsets method_offsets = oat_class->method_offsets_[*method_offsets_index];
     (*method_offsets_index)++;
-
-#if !defined(ART_USE_PORTABLE_COMPILER)
-    uint32_t aligned_offset = compiled_method->AlignCode(relative_offset);
-    uint32_t aligned_code_delta = aligned_offset - relative_offset;
-    if (aligned_code_delta != 0) {
-      off_t new_offset = out.Seek(aligned_code_delta, kSeekCurrent);
-      size_code_alignment_ += aligned_code_delta;
-      uint32_t expected_offset = file_offset + aligned_offset;
-      if (static_cast<uint32_t>(new_offset) != expected_offset) {
-        PLOG(ERROR) << "Failed to seek to align oat code. Actual: " << new_offset
-                    << " Expected: " << expected_offset << " File: " << out.GetLocation();
-        return 0;
+    const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode();
+    if (quick_code != nullptr) {
+      CHECK(compiled_method->GetPortableCode() == nullptr);
+      uint32_t aligned_offset = compiled_method->AlignCode(relative_offset);
+      uint32_t aligned_code_delta = aligned_offset - relative_offset;
+      if (aligned_code_delta != 0) {
+        off_t new_offset = out.Seek(aligned_code_delta, kSeekCurrent);
+        size_code_alignment_ += aligned_code_delta;
+        uint32_t expected_offset = file_offset + aligned_offset;
+        if (static_cast<uint32_t>(new_offset) != expected_offset) {
+          PLOG(ERROR) << "Failed to seek to align oat code. Actual: " << new_offset
+              << " Expected: " << expected_offset << " File: " << out.GetLocation();
+          return 0;
+        }
+        relative_offset += aligned_code_delta;
+        DCHECK_OFFSET();
       }
-      relative_offset += aligned_code_delta;
+      DCHECK_ALIGNED(relative_offset, kArmAlignment);
+      uint32_t code_size = quick_code->size() * sizeof(uint8_t);
+      CHECK_NE(code_size, 0U);
+
+      // Deduplicate code arrays
+      size_t code_offset = relative_offset + sizeof(code_size) + compiled_method->CodeDelta();
+      SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter =
+          code_offsets_.find(quick_code);
+      if (code_iter != code_offsets_.end() && code_offset != method_offsets.code_offset_) {
+        DCHECK(code_iter->second == method_offsets.code_offset_)
+              << PrettyMethod(method_idx, dex_file);
+      } else {
+        DCHECK(code_offset == method_offsets.code_offset_) << PrettyMethod(method_idx, dex_file);
+        if (!out.WriteFully(&code_size, sizeof(code_size))) {
+          ReportWriteFailure("method code size", method_idx, dex_file, out);
+          return 0;
+        }
+        size_code_size_ += sizeof(code_size);
+        relative_offset += sizeof(code_size);
+        DCHECK_OFFSET();
+        if (!out.WriteFully(&(*quick_code)[0], code_size)) {
+          ReportWriteFailure("method code", method_idx, dex_file, out);
+          return 0;
+        }
+        size_code_ += code_size;
+        relative_offset += code_size;
+      }
       DCHECK_OFFSET();
     }
-    DCHECK_ALIGNED(relative_offset, kArmAlignment);
-    const std::vector<uint8_t>& code = compiled_method->GetCode();
-    uint32_t code_size = code.size() * sizeof(code[0]);
-    CHECK_NE(code_size, 0U);
-
-    // Deduplicate code arrays
-    size_t code_offset = relative_offset + sizeof(code_size) + compiled_method->CodeDelta();
-    SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter = code_offsets_.find(&code);
-    if (code_iter != code_offsets_.end() && code_offset != method_offsets.code_offset_) {
-      DCHECK(code_iter->second == method_offsets.code_offset_)
-          << PrettyMethod(method_idx, dex_file);
-    } else {
-      DCHECK(code_offset == method_offsets.code_offset_) << PrettyMethod(method_idx, dex_file);
-      if (!out.WriteFully(&code_size, sizeof(code_size))) {
-        ReportWriteFailure("method code size", method_idx, dex_file, out);
-        return 0;
-      }
-      size_code_size_ += sizeof(code_size);
-      relative_offset += sizeof(code_size);
-      DCHECK_OFFSET();
-      if (!out.WriteFully(&code[0], code_size)) {
-        ReportWriteFailure("method code", method_idx, dex_file, out);
-        return 0;
-      }
-      size_code_ += code_size;
-      relative_offset += code_size;
-    }
-    DCHECK_OFFSET();
-#endif
-
     const std::vector<uint8_t>& mapping_table = compiled_method->GetMappingTable();
     size_t mapping_table_size = mapping_table.size() * sizeof(mapping_table[0]);
 
@@ -994,7 +998,6 @@
   delete compiled_methods_;
 }
 
-#if defined(ART_USE_PORTABLE_COMPILER)
 size_t OatWriter::OatClass::GetOatMethodOffsetsOffsetFromOatHeader(
     size_t class_def_method_index_) const {
   uint32_t method_offset = GetOatMethodOffsetsOffsetFromOatClass(class_def_method_index_);
@@ -1008,7 +1011,6 @@
     size_t class_def_method_index_) const {
   return oat_method_offsets_offsets_from_oat_class_[class_def_method_index_];
 }
-#endif
 
 size_t OatWriter::OatClass::SizeOf() const {
   return sizeof(status_)
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 64275e6..067c789 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -65,7 +65,7 @@
  public:
   OatWriter(const std::vector<const DexFile*>& dex_files,
             uint32_t image_file_location_oat_checksum,
-            uint32_t image_file_location_oat_begin,
+            uintptr_t image_file_location_oat_begin,
             const std::string& image_file_location,
             const CompilerDriver* compiler,
             TimingLogger* timings);
@@ -150,10 +150,8 @@
                       uint32_t num_non_null_compiled_methods,
                       mirror::Class::Status status);
     ~OatClass();
-#if defined(ART_USE_PORTABLE_COMPILER)
     size_t GetOatMethodOffsetsOffsetFromOatHeader(size_t class_def_method_index_) const;
     size_t GetOatMethodOffsetsOffsetFromOatClass(size_t class_def_method_index_) const;
-#endif
     size_t SizeOf() const;
     void UpdateChecksum(OatHeader& oat_header) const;
     bool Write(OatWriter* oat_writer, OutputStream& out, const size_t file_offset) const;
@@ -217,7 +215,7 @@
 
   // dependencies on the image.
   uint32_t image_file_location_oat_checksum_;
-  uint32_t image_file_location_oat_begin_;
+  uintptr_t image_file_location_oat_begin_;
   std::string image_file_location_;
 
   // data to write
diff --git a/compiler/output_stream.h b/compiler/output_stream.h
index 112dcfc..478a854 100644
--- a/compiler/output_stream.h
+++ b/compiler/output_stream.h
@@ -41,7 +41,7 @@
     return location_;
   }
 
-  virtual bool WriteFully(const void* buffer, int64_t byte_count) = 0;
+  virtual bool WriteFully(const void* buffer, size_t byte_count) = 0;
 
   virtual off_t Seek(off_t offset, Whence whence) = 0;
 
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index 638e0ec..7cc253c 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -62,7 +62,9 @@
 
   explicit DedupeSet(const char* set_name) {
     for (HashType i = 0; i < kShard; ++i) {
-      lock_name_[i] = StringPrintf("%s lock %d", set_name, i);
+      std::ostringstream oss;
+      oss << set_name << " lock " << i;
+      lock_name_[i] = oss.str();
       lock_[i].reset(new Mutex(lock_name_[i].c_str()));
     }
   }
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 2be3d56..fdd2bab 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -23,18 +23,6 @@
 
 namespace art {
 namespace mips {
-#if 0
-class DirectCallRelocation : public AssemblerFixup {
- public:
-  void Process(const MemoryRegion& region, int position) {
-    // Direct calls are relative to the following instruction on mips.
-    int32_t pointer = region.Load<int32_t>(position);
-    int32_t start = reinterpret_cast<int32_t>(region.start());
-    int32_t delta = start + position + sizeof(int32_t);
-    region.Store<int32_t>(position, pointer - delta);
-  }
-};
-#endif
 
 std::ostream& operator<<(std::ostream& os, const DRegister& rhs) {
   if (rhs >= D0 && rhs < kNumberOfDRegisters) {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 9095180..136d248 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -24,17 +24,6 @@
 namespace art {
 namespace x86 {
 
-class DirectCallRelocation : public AssemblerFixup {
- public:
-  void Process(const MemoryRegion& region, int position) {
-    // Direct calls are relative to the following instruction on x86.
-    int32_t pointer = region.Load<int32_t>(position);
-    int32_t start = reinterpret_cast<int32_t>(region.start());
-    int32_t delta = start + position + sizeof(int32_t);
-    region.Store<int32_t>(position, pointer - delta);
-  }
-};
-
 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
   return os << "XMM" << static_cast<int>(reg);
 }
@@ -1304,15 +1293,6 @@
 }
 
 
-void X86Assembler::Stop(const char* message) {
-  // Emit the message address as immediate operand in the test rax instruction,
-  // followed by the int3 instruction.
-  // Execution can be resumed with the 'cont' command in gdb.
-  testl(EAX, Immediate(reinterpret_cast<int32_t>(message)));
-  int3();
-}
-
-
 void X86Assembler::EmitOperand(int reg_or_opcode, const Operand& operand) {
   CHECK_GE(reg_or_opcode, 0);
   CHECK_LT(reg_or_opcode, 8);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 4ba03d1..0fa8e00 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -452,9 +452,6 @@
   void Align(int alignment, int offset);
   void Bind(Label* label);
 
-  // Debugging and bringup support.
-  void Stop(const char* message);
-
   //
   // Overridden common assembler high-level functionality
   //
diff --git a/compiler/vector_output_stream.h b/compiler/vector_output_stream.h
index a3f8226..09daa12 100644
--- a/compiler/vector_output_stream.h
+++ b/compiler/vector_output_stream.h
@@ -31,7 +31,7 @@
 
   virtual ~VectorOutputStream() {}
 
-  bool WriteFully(const void* buffer, int64_t byte_count) {
+  bool WriteFully(const void* buffer, size_t byte_count) {
     if (static_cast<size_t>(offset_) == vector_.size()) {
       const uint8_t* start = reinterpret_cast<const uint8_t*>(buffer);
       vector_.insert(vector_.end(), &start[0], &start[byte_count]);
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index 05dcd7b..6cd0538 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -33,7 +33,7 @@
   ifeq ($(ART_BUILD_NDEBUG),true)
     $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart-compiler,art/compiler,host,ndebug))
   endif
-  ifeq ($(ART_BUILD_NDEBUG),true)
+  ifeq ($(ART_BUILD_DEBUG),true)
     $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd-compiler,art/compiler,host,debug))
   endif
 endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 97df199..5ac01f2 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -180,7 +180,11 @@
 
   ~Dex2Oat() {
     delete runtime_;
-    VLOG(compiler) << "dex2oat took " << PrettyDuration(NanoTime() - start_ns_)
+    LogCompletionTime();
+  }
+
+  void LogCompletionTime() {
+    LOG(INFO) << "dex2oat took " << PrettyDuration(NanoTime() - start_ns_)
               << " (threads: " << thread_count_ << ")";
   }
 
@@ -287,13 +291,13 @@
     timings.NewSplit("dex2oat OatWriter");
     std::string image_file_location;
     uint32_t image_file_location_oat_checksum = 0;
-    uint32_t image_file_location_oat_data_begin = 0;
+    uintptr_t image_file_location_oat_data_begin = 0;
     if (!driver->IsImage()) {
       TimingLogger::ScopedSplit split("Loading image checksum", &timings);
       gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
       image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum();
       image_file_location_oat_data_begin =
-          reinterpret_cast<uint32_t>(image_space->GetImageHeader().GetOatDataBegin());
+          reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatDataBegin());
       image_file_location = image_space->GetImageFilename();
       if (host_prefix != NULL && StartsWith(image_file_location, host_prefix->c_str())) {
         image_file_location = image_file_location.substr(host_prefix->size());
@@ -679,11 +683,7 @@
   std::string android_root;
   std::vector<const char*> runtime_args;
   int thread_count = sysconf(_SC_NPROCESSORS_CONF);
-#if defined(ART_USE_PORTABLE_COMPILER)
-  CompilerBackend compiler_backend = kPortable;
-#else
-  CompilerBackend compiler_backend = kQuick;
-#endif
+  CompilerBackend compiler_backend = kUsePortableCompiler ? kPortable : kQuick;
 
   // Take the default set of instruction features from the build.
   InstructionSetFeatures instruction_set_features =
@@ -696,7 +696,7 @@
 #elif defined(__mips__)
   InstructionSet instruction_set = kMips;
 #else
-#error "Unsupported architecture"
+  InstructionSet instruction_set = kNone;
 #endif
 
 
@@ -773,6 +773,8 @@
         instruction_set = kMips;
       } else if (instruction_set_str == "x86") {
         instruction_set = kX86;
+      } else if (instruction_set_str == "x86_64") {
+        instruction_set = kX86_64;
       }
     } else if (option.starts_with("--instruction-set-features=")) {
       StringPiece str = option.substr(strlen("--instruction-set-features=")).data();
@@ -1189,6 +1191,7 @@
   // Everything was successfully written, do an explicit exit here to avoid running Runtime
   // destructors that take time (bug 10645725) unless we're a debug build or running on valgrind.
   if (!kIsDebugBuild || (RUNNING_ON_VALGRIND == 0)) {
+    dex2oat->LogCompletionTime();
     exit(EXIT_SUCCESS);
   }
 
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 68626f6..3e6e33f 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -16,6 +16,8 @@
 
 #include "disassembler_arm.h"
 
+#include <inttypes.h>
+
 #include <iostream>
 
 #include "base/logging.h"
@@ -711,7 +713,7 @@
                 if (Rn.r == 15 && U == 1) {
                   intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr);
                   lit_adr = RoundDown(lit_adr, 4) + 4 + (imm8 << 2);
-                  args << StringPrintf("  ; 0x%llx", *reinterpret_cast<int64_t*>(lit_adr));
+                  args << StringPrintf("  ; 0x%" PRIx64, *reinterpret_cast<int64_t*>(lit_adr));
                 }
               } else if (Rn.r == 13 && W == 1 && U == L) {  // VPUSH/VPOP
                 opcode << (L == 1 ? "vpop" : "vpush");
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 6c25e0a..903d755 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -246,6 +246,42 @@
         load = *instr == 0x10;
         store = !load;
         break;
+      case 0x12: case 0x13:
+        if (prefix[2] == 0x66) {
+          opcode << "movlpd";
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else if (prefix[0] == 0) {
+          opcode << "movlps";
+        }
+        has_modrm = true;
+        src_reg_file = dst_reg_file = SSE;
+        load = *instr == 0x12;
+        store = !load;
+        break;
+      case 0x16: case 0x17:
+        if (prefix[2] == 0x66) {
+          opcode << "movhpd";
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else if (prefix[0] == 0) {
+          opcode << "movhps";
+        }
+        has_modrm = true;
+        src_reg_file = dst_reg_file = SSE;
+        load = *instr == 0x16;
+        store = !load;
+        break;
+      case 0x28: case 0x29:
+        if (prefix[2] == 0x66) {
+          opcode << "movapd";
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else if (prefix[0] == 0) {
+          opcode << "movaps";
+        }
+        has_modrm = true;
+        src_reg_file = dst_reg_file = SSE;
+        load = *instr == 0x28;
+        store = !load;
+        break;
       case 0x2A:
         if (prefix[2] == 0x66) {
           opcode << "cvtpi2pd";
diff --git a/jdwpspy/Android.mk b/jdwpspy/Android.mk
deleted file mode 100644
index 97162f0..0000000
--- a/jdwpspy/Android.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# Copyright (C) 2006 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-LOCAL_PATH:= $(call my-dir)
-
-include $(CLEAR_VARS)
-LOCAL_SRC_FILES:= Main.cpp Net.cpp
-LOCAL_C_INCLUDES += art/runtime
-LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
-LOCAL_MODULE := jdwpspy
-include $(BUILD_HOST_EXECUTABLE)
-ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
diff --git a/jdwpspy/Common.h b/jdwpspy/Common.h
deleted file mode 100644
index 30a49fb..0000000
--- a/jdwpspy/Common.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright 2006 The Android Open Source Project
- *
- * jdwpspy common stuff.
- */
-#ifndef ART_JDWPSPY_COMMON_H_
-#define ART_JDWPSPY_COMMON_H_
-
-#include <stdint.h>
-#include <stdio.h>
-#include <sys/types.h>
-
-typedef uint8_t u1;
-typedef uint16_t u2;
-typedef uint32_t u4;
-typedef uint64_t u8;
-
-#define NELEM(x) (sizeof(x) / sizeof((x)[0]))
-
-#ifndef _JDWP_MISC_INLINE
-# define INLINE extern inline
-#else
-# define INLINE
-#endif
-
-/*
- * Get 1 byte.  (Included to make the code more legible.)
- */
-INLINE u1 get1(unsigned const char* pSrc) {
-    return *pSrc;
-}
-
-/*
- * Get 2 big-endian bytes.
- */
-INLINE u2 get2BE(unsigned char const* pSrc) {
-    u2 result;
-
-    result = *pSrc++ << 8;
-    result |= *pSrc++;
-
-    return result;
-}
-
-/*
- * Get 4 big-endian bytes.
- */
-INLINE u4 get4BE(unsigned char const* pSrc) {
-    u4 result;
-
-    result = *pSrc++ << 24;
-    result |= *pSrc++ << 16;
-    result |= *pSrc++ << 8;
-    result |= *pSrc++;
-
-    return result;
-}
-
-/*
- * Get 8 big-endian bytes.
- */
-INLINE u8 get8BE(unsigned char const* pSrc) {
-    u8 result;
-
-    result = (u8) *pSrc++ << 56;
-    result |= (u8) *pSrc++ << 48;
-    result |= (u8) *pSrc++ << 40;
-    result |= (u8) *pSrc++ << 32;
-    result |= (u8) *pSrc++ << 24;
-    result |= (u8) *pSrc++ << 16;
-    result |= (u8) *pSrc++ << 8;
-    result |= (u8) *pSrc++;
-
-    return result;
-}
-
-
-/*
- * Start here.
- */
-int run(const char* connectHost, int connectPort, int listenPort);
-
-/*
- * Print a hex dump to the specified file pointer.
- *
- * "local" mode prints a hex dump starting from offset 0 (roughly equivalent
- * to "xxd -g1").
- *
- * "mem" mode shows the actual memory address, and will offset the start
- * so that the low nibble of the address is always zero.
- */
-enum HexDumpMode { kHexDumpLocal, kHexDumpMem };
-void printHexDump(const void* vaddr, size_t length);
-void printHexDump2(const void* vaddr, size_t length, const char* prefix);
-void printHexDumpEx(FILE* fp, const void* vaddr, size_t length,
-    HexDumpMode mode, const char* prefix);
-
-#endif  // ART_JDWPSPY_COMMON_H_
diff --git a/jdwpspy/Main.cpp b/jdwpspy/Main.cpp
deleted file mode 100644
index 0f68d52..0000000
--- a/jdwpspy/Main.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright 2006 The Android Open Source Project
- *
- * JDWP spy.
- */
-#define _JDWP_MISC_INLINE
-#include "Common.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <ctype.h>
-
-static const char gHexDigit[] = "0123456789abcdef";
-
-/*
- * Print a hex dump.  Just hands control off to the fancy version.
- */
-void printHexDump(const void* vaddr, size_t length)
-{
-    printHexDumpEx(stdout, vaddr, length, kHexDumpLocal, "");
-}
-void printHexDump2(const void* vaddr, size_t length, const char* prefix)
-{
-    printHexDumpEx(stdout, vaddr, length, kHexDumpLocal, prefix);
-}
-
-/*
- * Print a hex dump in this format:
- *
-01234567: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef\n
- */
-void printHexDumpEx(FILE* fp, const void* vaddr, size_t length,
-    HexDumpMode mode, const char* prefix)
-{
-    const unsigned char* addr = reinterpret_cast<const unsigned char*>(vaddr);
-    char out[77];       /* exact fit */
-    unsigned int offset;    /* offset to show while printing */
-    char* hex;
-    char* asc;
-    int gap;
-
-    if (mode == kHexDumpLocal)
-        offset = 0;
-    else
-        offset = (int) addr;
-
-    memset(out, ' ', sizeof(out)-1);
-    out[8] = ':';
-    out[sizeof(out)-2] = '\n';
-    out[sizeof(out)-1] = '\0';
-
-    gap = (int) offset & 0x0f;
-    while (length) {
-        unsigned int lineOffset = offset & ~0x0f;
-        char* hex = out;
-        char* asc = out + 59;
-
-        for (int i = 0; i < 8; i++) {
-            *hex++ = gHexDigit[lineOffset >> 28];
-            lineOffset <<= 4;
-        }
-        hex++;
-        hex++;
-
-        int count = ((int)length > 16-gap) ? 16-gap : (int) length; /* cap length */
-        assert(count != 0);
-        assert(count+gap <= 16);
-
-        if (gap) {
-            /* only on first line */
-            hex += gap * 3;
-            asc += gap;
-        }
-
-        int i;
-        for (i = gap ; i < count+gap; i++) {
-            *hex++ = gHexDigit[*addr >> 4];
-            *hex++ = gHexDigit[*addr & 0x0f];
-            hex++;
-            if (isprint(*addr))
-                *asc++ = *addr;
-            else
-                *asc++ = '.';
-            addr++;
-        }
-        for ( ; i < 16; i++) {
-            /* erase extra stuff; only happens on last line */
-            *hex++ = ' ';
-            *hex++ = ' ';
-            hex++;
-            *asc++ = ' ';
-        }
-
-        fprintf(fp, "%s%s", prefix, out);
-
-        gap = 0;
-        length -= count;
-        offset += count;
-    }
-}
-
-
-/*
- * Explain it.
- */
-static void usage(const char* progName)
-{
-    fprintf(stderr, "Usage: %s VM-port [debugger-listen-port]\n\n", progName);
-    fprintf(stderr,
-"When a debugger connects to the debugger-listen-port, jdwpspy will connect\n");
-    fprintf(stderr, "to the VM on the VM-port.\n");
-}
-
-/*
- * Parse args.
- */
-int main(int argc, char* argv[])
-{
-    if (argc < 2 || argc > 3) {
-        usage("jdwpspy");
-        return 2;
-    }
-
-    setvbuf(stdout, NULL, _IONBF, 0);
-
-    /* may want this to be host:port */
-    int connectPort = atoi(argv[1]);
-
-    int listenPort;
-    if (argc > 2)
-        listenPort = atoi(argv[2]);
-    else
-        listenPort = connectPort + 1;
-
-    int cc = run("localhost", connectPort, listenPort);
-
-    return (cc != 0);
-}
diff --git a/jdwpspy/Net.cpp b/jdwpspy/Net.cpp
deleted file mode 100644
index 38d4e26..0000000
--- a/jdwpspy/Net.cpp
+++ /dev/null
@@ -1,751 +0,0 @@
-/*
- * Copyright 2006 The Android Open Source Project
- *
- * JDWP spy.  This is a rearranged version of the JDWP code from the VM.
- */
-#include "Common.h"
-#include "jdwp/jdwp_constants.h"
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <time.h>
-#include <errno.h>
-#include <assert.h>
-
-#include <iostream>
-#include <sstream>
-
-#define kInputBufferSize    (256*1024)
-
-#define kMagicHandshakeLen  14      /* "JDWP-Handshake" */
-#define kJDWPHeaderLen      11
-#define kJDWPFlagReply      0x80
-
-
-/*
- * Information about the remote end.
- */
-struct Peer {
-    char    label[2];           /* 'D' or 'V' */
-
-    int     sock;
-    unsigned char   inputBuffer[kInputBufferSize];
-    int     inputCount;
-
-    bool    awaitingHandshake;  /* waiting for "JDWP-Handshake" */
-};
-
-
-/*
- * Network state.
- */
-struct NetState {
-    /* listen here for connection from debugger */
-    int     listenSock;
-
-    /* connect here to contact VM */
-    in_addr vmAddr;
-    uint16_t vmPort;
-
-    Peer    dbg;
-    Peer    vm;
-};
-
-/*
- * Function names.
- */
-struct JdwpHandlerMap {
-    u1  cmdSet;
-    u1  cmd;
-    const char* descr;
-};
-
-/*
- * Map commands to names.
- *
- * Command sets 0-63 are incoming requests, 64-127 are outbound requests,
- * and 128-256 are vendor-defined.
- */
-static const JdwpHandlerMap gHandlerMap[] = {
-    /* VirtualMachine command set (1) */
-    { 1,    1,  "VirtualMachine.Version" },
-    { 1,    2,  "VirtualMachine.ClassesBySignature" },
-    { 1,    3,  "VirtualMachine.AllClasses" },
-    { 1,    4,  "VirtualMachine.AllThreads" },
-    { 1,    5,  "VirtualMachine.TopLevelThreadGroups" },
-    { 1,    6,  "VirtualMachine.Dispose" },
-    { 1,    7,  "VirtualMachine.IDSizes" },
-    { 1,    8,  "VirtualMachine.Suspend" },
-    { 1,    9,  "VirtualMachine.Resume" },
-    { 1,    10, "VirtualMachine.Exit" },
-    { 1,    11, "VirtualMachine.CreateString" },
-    { 1,    12, "VirtualMachine.Capabilities" },
-    { 1,    13, "VirtualMachine.ClassPaths" },
-    { 1,    14, "VirtualMachine.DisposeObjects" },
-    { 1,    15, "VirtualMachine.HoldEvents" },
-    { 1,    16, "VirtualMachine.ReleaseEvents" },
-    { 1,    17, "VirtualMachine.CapabilitiesNew" },
-    { 1,    18, "VirtualMachine.RedefineClasses" },
-    { 1,    19, "VirtualMachine.SetDefaultStratum" },
-    { 1,    20, "VirtualMachine.AllClassesWithGeneric"},
-    { 1,    21, "VirtualMachine.InstanceCounts"},
-
-    /* ReferenceType command set (2) */
-    { 2,    1,  "ReferenceType.Signature" },
-    { 2,    2,  "ReferenceType.ClassLoader" },
-    { 2,    3,  "ReferenceType.Modifiers" },
-    { 2,    4,  "ReferenceType.Fields" },
-    { 2,    5,  "ReferenceType.Methods" },
-    { 2,    6,  "ReferenceType.GetValues" },
-    { 2,    7,  "ReferenceType.SourceFile" },
-    { 2,    8,  "ReferenceType.NestedTypes" },
-    { 2,    9,  "ReferenceType.Status" },
-    { 2,    10, "ReferenceType.Interfaces" },
-    { 2,    11, "ReferenceType.ClassObject" },
-    { 2,    12, "ReferenceType.SourceDebugExtension" },
-    { 2,    13, "ReferenceType.SignatureWithGeneric" },
-    { 2,    14, "ReferenceType.FieldsWithGeneric" },
-    { 2,    15, "ReferenceType.MethodsWithGeneric" },
-    { 2,    16, "ReferenceType.Instances" },
-    { 2,    17, "ReferenceType.ClassFileVersion" },
-    { 2,    18, "ReferenceType.ConstantPool" },
-
-    /* ClassType command set (3) */
-    { 3,    1,  "ClassType.Superclass" },
-    { 3,    2,  "ClassType.SetValues" },
-    { 3,    3,  "ClassType.InvokeMethod" },
-    { 3,    4,  "ClassType.NewInstance" },
-
-    /* ArrayType command set (4) */
-    { 4,    1,  "ArrayType.NewInstance" },
-
-    /* InterfaceType command set (5) */
-
-    /* Method command set (6) */
-    { 6,    1,  "Method.LineTable" },
-    { 6,    2,  "Method.VariableTable" },
-    { 6,    3,  "Method.Bytecodes" },
-    { 6,    4,  "Method.IsObsolete" },
-    { 6,    5,  "Method.VariableTableWithGeneric" },
-
-    /* Field command set (8) */
-
-    /* ObjectReference command set (9) */
-    { 9,    1,  "ObjectReference.ReferenceType" },
-    { 9,    2,  "ObjectReference.GetValues" },
-    { 9,    3,  "ObjectReference.SetValues" },
-    { 9,    4,  "ObjectReference.UNUSED" },
-    { 9,    5,  "ObjectReference.MonitorInfo" },
-    { 9,    6,  "ObjectReference.InvokeMethod" },
-    { 9,    7,  "ObjectReference.DisableCollection" },
-    { 9,    8,  "ObjectReference.EnableCollection" },
-    { 9,    9,  "ObjectReference.IsCollected" },
-    { 9,    10, "ObjectReference.ReferringObjects" },
-
-    /* StringReference command set (10) */
-    { 10,   1,  "StringReference.Value" },
-
-    /* ThreadReference command set (11) */
-    { 11,   1,  "ThreadReference.Name" },
-    { 11,   2,  "ThreadReference.Suspend" },
-    { 11,   3,  "ThreadReference.Resume" },
-    { 11,   4,  "ThreadReference.Status" },
-    { 11,   5,  "ThreadReference.ThreadGroup" },
-    { 11,   6,  "ThreadReference.Frames" },
-    { 11,   7,  "ThreadReference.FrameCount" },
-    { 11,   8,  "ThreadReference.OwnedMonitors" },
-    { 11,   9,  "ThreadReference.CurrentContendedMonitor" },
-    { 11,   10, "ThreadReference.Stop" },
-    { 11,   11, "ThreadReference.Interrupt" },
-    { 11,   12, "ThreadReference.SuspendCount" },
-    { 11,   13, "ThreadReference.OwnedMonitorsStackDepthInfo" },
-    { 11,   14, "ThreadReference.ForceEarlyReturn" },
-
-    /* ThreadGroupReference command set (12) */
-    { 12,   1,  "ThreadGroupReference.Name" },
-    { 12,   2,  "ThreadGroupReference.Parent" },
-    { 12,   3,  "ThreadGroupReference.Children" },
-
-    /* ArrayReference command set (13) */
-    { 13,   1,  "ArrayReference.Length" },
-    { 13,   2,  "ArrayReference.GetValues" },
-    { 13,   3,  "ArrayReference.SetValues" },
-
-    /* ClassLoaderReference command set (14) */
-    { 14,   1,  "ArrayReference.VisibleClasses" },
-
-    /* EventRequest command set (15) */
-    { 15,   1,  "EventRequest.Set" },
-    { 15,   2,  "EventRequest.Clear" },
-    { 15,   3,  "EventRequest.ClearAllBreakpoints" },
-
-    /* StackFrame command set (16) */
-    { 16,   1,  "StackFrame.GetValues" },
-    { 16,   2,  "StackFrame.SetValues" },
-    { 16,   3,  "StackFrame.ThisObject" },
-    { 16,   4,  "StackFrame.PopFrames" },
-
-    /* ClassObjectReference command set (17) */
-    { 17,   1,  "ClassObjectReference.ReflectedType" },
-
-    /* Event command set (64) */
-    { 64,  100, "Event.Composite" },
-
-    /* DDMS */
-    { 199,  1,  "DDMS.Chunk" },
-};
-
-/*
- * Look up a command's name.
- */
-static const char* getCommandName(int cmdSet, int cmd)
-{
-    for (int i = 0; i < (int) NELEM(gHandlerMap); i++) {
-        if (gHandlerMap[i].cmdSet == cmdSet &&
-            gHandlerMap[i].cmd == cmd)
-        {
-            return gHandlerMap[i].descr;
-        }
-    }
-
-    return "?UNKNOWN?";
-}
-
-
-void jdwpNetFree(NetState* netState);       /* fwd */
-
-/*
- * Allocate state structure and bind to the listen port.
- *
- * Returns 0 on success.
- */
-NetState* jdwpNetStartup(uint16_t listenPort, const char* connectHost, uint16_t connectPort) {
-    NetState* netState = new NetState;
-    memset(netState, 0, sizeof(*netState));
-    netState->listenSock = -1;
-    netState->dbg.sock = netState->vm.sock = -1;
-
-    strcpy(netState->dbg.label, "D");
-    strcpy(netState->vm.label, "V");
-
-    /*
-     * Set up a socket to listen for connections from the debugger.
-     */
-
-    netState->listenSock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
-    if (netState->listenSock < 0) {
-        fprintf(stderr, "Socket create failed: %s\n", strerror(errno));
-        goto fail;
-    }
-
-    /* allow immediate re-use if we die */
-    {
-        int one = 1;
-        if (setsockopt(netState->listenSock, SOL_SOCKET, SO_REUSEADDR, &one,
-                sizeof(one)) < 0)
-        {
-            fprintf(stderr, "setsockopt(SO_REUSEADDR) failed: %s\n",
-                strerror(errno));
-            goto fail;
-        }
-    }
-
-    sockaddr_in addr;
-    addr.sin_family = AF_INET;
-    addr.sin_port = htons(listenPort);
-    addr.sin_addr.s_addr = INADDR_ANY;
-
-    if (bind(netState->listenSock, (sockaddr*) &addr, sizeof(addr)) != 0)
-    {
-        fprintf(stderr, "attempt to bind to port %u failed: %s\n",
-            listenPort, strerror(errno));
-        goto fail;
-    }
-
-    fprintf(stderr, "+++ bound to port %u\n", listenPort);
-
-    if (listen(netState->listenSock, 5) != 0) {
-        fprintf(stderr, "Listen failed: %s\n", strerror(errno));
-        goto fail;
-    }
-
-    /*
-     * Do the hostname lookup for the VM.
-     */
-    hostent* pHost;
-
-    pHost = gethostbyname(connectHost);
-    if (pHost == NULL) {
-        fprintf(stderr, "Name lookup of '%s' failed: %s\n",
-            connectHost, strerror(h_errno));
-        goto fail;
-    }
-
-    netState->vmAddr = *((in_addr*) pHost->h_addr_list[0]);
-    netState->vmPort = connectPort;
-
-    fprintf(stderr, "+++ connect host resolved to %s\n",
-        inet_ntoa(netState->vmAddr));
-
-    return netState;
-
-fail:
-    jdwpNetFree(netState);
-    return NULL;
-}
-
-/*
- * Shut down JDWP listener.  Don't free state.
- *
- * Note that "netState" may be partially initialized if "startup" failed.
- */
-void jdwpNetShutdown(NetState* netState)
-{
-    int listenSock = netState->listenSock;
-    int dbgSock = netState->dbg.sock;
-    int vmSock = netState->vm.sock;
-
-    /* clear these out so it doesn't wake up and try to reuse them */
-    /* (important when multi-threaded) */
-    netState->listenSock = netState->dbg.sock = netState->vm.sock = -1;
-
-    if (listenSock >= 0) {
-        shutdown(listenSock, SHUT_RDWR);
-        close(listenSock);
-    }
-    if (dbgSock >= 0) {
-        shutdown(dbgSock, SHUT_RDWR);
-        close(dbgSock);
-    }
-    if (vmSock >= 0) {
-        shutdown(vmSock, SHUT_RDWR);
-        close(vmSock);
-    }
-}
-
-/*
- * Shut down JDWP listener and free its state.
- */
-void jdwpNetFree(NetState* netState)
-{
-    if (netState == NULL)
-        return;
-
-    jdwpNetShutdown(netState);
-    delete netState;
-}
-
-/*
- * Disable the TCP Nagle algorithm, which delays transmission of outbound
- * packets until the previous transmissions have been acked.  JDWP does a
- * lot of back-and-forth with small packets, so this may help.
- */
-static int setNoDelay(int fd)
-{
-    int cc, on = 1;
-
-    cc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
-    assert(cc == 0);
-    return cc;
-}
-
-/*
- * Accept a connection.  This will block waiting for somebody to show up.
- */
-bool jdwpAcceptConnection(NetState* netState)
-{
-    sockaddr_in addr;
-    socklen_t addrlen;
-    int sock;
-
-    if (netState->listenSock < 0)
-        return false;       /* you're not listening! */
-
-    assert(netState->dbg.sock < 0);     /* must not already be talking */
-
-    addrlen = sizeof(addr);
-    do {
-        sock = accept(netState->listenSock, (sockaddr*) &addr, &addrlen);
-        if (sock < 0 && errno != EINTR) {
-            fprintf(stderr, "accept failed: %s\n", strerror(errno));
-            return false;
-        }
-    } while (sock < 0);
-
-    fprintf(stderr, "+++ accepted connection from %s:%u\n",
-        inet_ntoa(addr.sin_addr), ntohs(addr.sin_port));
-
-    netState->dbg.sock = sock;
-    netState->dbg.awaitingHandshake = true;
-    netState->dbg.inputCount = 0;
-
-    setNoDelay(sock);
-
-    return true;
-}
-
-/*
- * Close the connections to the debugger and VM.
- *
- * Reset the state so we're ready to receive a new connection.
- */
-void jdwpCloseConnection(NetState* netState)
-{
-    if (netState->dbg.sock >= 0) {
-        fprintf(stderr, "+++ closing connection to debugger\n");
-        close(netState->dbg.sock);
-        netState->dbg.sock = -1;
-    }
-    if (netState->vm.sock >= 0) {
-        fprintf(stderr, "+++ closing connection to vm\n");
-        close(netState->vm.sock);
-        netState->vm.sock = -1;
-    }
-}
-
-/*
- * Figure out if we have a full packet in the buffer.
- */
-static bool haveFullPacket(Peer* pPeer)
-{
-    long length;
-
-    if (pPeer->awaitingHandshake)
-        return (pPeer->inputCount >= kMagicHandshakeLen);
-
-    if (pPeer->inputCount < 4)
-        return false;
-
-    length = get4BE(pPeer->inputBuffer);
-    return (pPeer->inputCount >= length);
-}
-
-/*
- * Consume bytes from the buffer.
- *
- * This would be more efficient with a circular buffer.  However, we're
- * usually only going to find one packet, which is trivial to handle.
- */
-static void consumeBytes(Peer* pPeer, int count)
-{
-    assert(count > 0);
-    assert(count <= pPeer->inputCount);
-
-    if (count == pPeer->inputCount) {
-        pPeer->inputCount = 0;
-        return;
-    }
-
-    memmove(pPeer->inputBuffer, pPeer->inputBuffer + count,
-        pPeer->inputCount - count);
-    pPeer->inputCount -= count;
-}
-
-/*
- * Get the current time.
- */
-static void getCurrentTime(int* pMin, int* pSec)
-{
-    time_t now;
-    tm* ptm;
-
-    now = time(NULL);
-    ptm = localtime(&now);
-    *pMin = ptm->tm_min;
-    *pSec = ptm->tm_sec;
-}
-
-/*
- * Dump the contents of a packet to stdout.
- */
-static void dumpPacket(const unsigned char* packetBuf, const char* srcName,
-    const char* dstName)
-{
-    const unsigned char* buf = packetBuf;
-    char prefix[3];
-    u4 length, id;
-    u1 flags, cmdSet=0, cmd=0;
-    art::JDWP::JdwpError error = art::JDWP::ERR_NONE;
-    bool reply;
-    int dataLen;
-
-    length = get4BE(buf+0);
-    id = get4BE(buf+4);
-    flags = get1(buf+8);
-    if ((flags & kJDWPFlagReply) != 0) {
-        reply = true;
-        error = static_cast<art::JDWP::JdwpError>(get2BE(buf+9));
-    } else {
-        reply = false;
-        cmdSet = get1(buf+9);
-        cmd = get1(buf+10);
-    }
-
-    buf += kJDWPHeaderLen;
-    dataLen = length - (buf - packetBuf);
-
-    if (!reply) {
-        prefix[0] = srcName[0];
-        prefix[1] = '>';
-    } else {
-        prefix[0] = dstName[0];
-        prefix[1] = '<';
-    }
-    prefix[2] = '\0';
-
-    int min, sec;
-    getCurrentTime(&min, &sec);
-
-    if (!reply) {
-        printf("%s REQUEST dataLen=%-5u id=0x%08x flags=0x%02x cmd=%d/%d [%02d:%02d]\n",
-            prefix, dataLen, id, flags, cmdSet, cmd, min, sec);
-        printf("%s   --> %s\n", prefix, getCommandName(cmdSet, cmd));
-    } else {
-        std::ostringstream ss;
-        ss << "TODO";  // get access to the operator<<, or regenerate it for jdwpspy?
-        printf("%s REPLY   dataLen=%-5u id=0x%08x flags=0x%02x err=%d (%s) [%02d:%02d]\n",
-            prefix, dataLen, id, flags, error, ss.str().c_str(), min,sec);
-    }
-    if (dataLen > 0)
-        printHexDump2(buf, dataLen, prefix);
-    printf("%s ----------\n", prefix);
-}
-
-/*
- * Handle a packet.  Returns "false" if we encounter a connection-fatal error.
- */
-static bool handlePacket(Peer* pDst, Peer* pSrc)
-{
-    const unsigned char* buf = pSrc->inputBuffer;
-    u4 length;
-    u1 flags;
-    int cc;
-
-    length = get4BE(buf+0);
-    flags = get1(buf+9);
-
-    assert((int) length <= pSrc->inputCount);
-
-    dumpPacket(buf, pSrc->label, pDst->label);
-
-    cc = write(pDst->sock, buf, length);
-    if (cc != (int) length) {
-        fprintf(stderr, "Failed sending packet: %s\n", strerror(errno));
-        return false;
-    }
-    /*printf("*** wrote %d bytes from %c to %c\n",
-        cc, pSrc->label[0], pDst->label[0]);*/
-
-    consumeBytes(pSrc, length);
-    return true;
-}
-
-/*
- * Handle incoming data.  If we have a full packet in the buffer, process it.
- */
-static bool handleIncoming(Peer* pWritePeer, Peer* pReadPeer)
-{
-    if (haveFullPacket(pReadPeer)) {
-        if (pReadPeer->awaitingHandshake) {
-            printf("Handshake [%c]: %.14s\n",
-                pReadPeer->label[0], pReadPeer->inputBuffer);
-            if (write(pWritePeer->sock, pReadPeer->inputBuffer,
-                    kMagicHandshakeLen) != kMagicHandshakeLen)
-            {
-                fprintf(stderr,
-                    "+++ [%c] handshake write failed\n", pReadPeer->label[0]);
-                goto fail;
-            }
-            consumeBytes(pReadPeer, kMagicHandshakeLen);
-            pReadPeer->awaitingHandshake = false;
-        } else {
-            if (!handlePacket(pWritePeer, pReadPeer))
-                goto fail;
-        }
-    } else {
-        /*printf("*** %c not full yet\n", pReadPeer->label[0]);*/
-    }
-
-    return true;
-
-fail:
-    return false;
-}
-
-/*
- * Process incoming data.  If no data is available, this will block until
- * some arrives.
- *
- * Returns "false" on error (indicating that the connection has been severed).
- */
-bool jdwpProcessIncoming(NetState* netState)
-{
-    int cc;
-
-    assert(netState->dbg.sock >= 0);
-    assert(netState->vm.sock >= 0);
-
-    while (!haveFullPacket(&netState->dbg) && !haveFullPacket(&netState->vm)) {
-        /* read some more */
-        int highFd;
-        fd_set readfds;
-
-        highFd = (netState->dbg.sock > netState->vm.sock) ?
-            netState->dbg.sock+1 : netState->vm.sock+1;
-        FD_ZERO(&readfds);
-        FD_SET(netState->dbg.sock, &readfds);
-        FD_SET(netState->vm.sock, &readfds);
-
-        errno = 0;
-        cc = select(highFd, &readfds, NULL, NULL, NULL);
-        if (cc < 0) {
-            if (errno == EINTR) {
-                fprintf(stderr, "+++ EINTR on select\n");
-                continue;
-            }
-            fprintf(stderr, "+++ select failed: %s\n", strerror(errno));
-            goto fail;
-        }
-
-        if (FD_ISSET(netState->dbg.sock, &readfds)) {
-            cc = read(netState->dbg.sock,
-                netState->dbg.inputBuffer + netState->dbg.inputCount,
-                sizeof(netState->dbg.inputBuffer) - netState->dbg.inputCount);
-            if (cc < 0) {
-                if (errno == EINTR) {
-                    fprintf(stderr, "+++ EINTR on read\n");
-                    continue;
-                }
-                fprintf(stderr, "+++ dbg read failed: %s\n", strerror(errno));
-                goto fail;
-            }
-            if (cc == 0) {
-                if (sizeof(netState->dbg.inputBuffer) ==
-                        netState->dbg.inputCount)
-                    fprintf(stderr, "+++ debugger sent huge message\n");
-                else
-                    fprintf(stderr, "+++ debugger disconnected\n");
-                goto fail;
-            }
-
-            /*printf("*** %d bytes from dbg\n", cc);*/
-            netState->dbg.inputCount += cc;
-        }
-
-        if (FD_ISSET(netState->vm.sock, &readfds)) {
-            cc = read(netState->vm.sock,
-                netState->vm.inputBuffer + netState->vm.inputCount,
-                sizeof(netState->vm.inputBuffer) - netState->vm.inputCount);
-            if (cc < 0) {
-                if (errno == EINTR) {
-                    fprintf(stderr, "+++ EINTR on read\n");
-                    continue;
-                }
-                fprintf(stderr, "+++ vm read failed: %s\n", strerror(errno));
-                goto fail;
-            }
-            if (cc == 0) {
-                if (sizeof(netState->vm.inputBuffer) ==
-                        netState->vm.inputCount)
-                    fprintf(stderr, "+++ vm sent huge message\n");
-                else
-                    fprintf(stderr, "+++ vm disconnected\n");
-                goto fail;
-            }
-
-            /*printf("*** %d bytes from vm\n", cc);*/
-            netState->vm.inputCount += cc;
-        }
-    }
-
-    if (!handleIncoming(&netState->dbg, &netState->vm))
-        goto fail;
-    if (!handleIncoming(&netState->vm, &netState->dbg))
-        goto fail;
-
-    return true;
-
-fail:
-    jdwpCloseConnection(netState);
-    return false;
-}
-
-/*
- * Connect to the VM.
- */
-bool jdwpConnectToVm(NetState* netState)
-{
-    sockaddr_in addr;
-    int sock = -1;
-
-    sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
-    if (sock < 0) {
-        fprintf(stderr, "Socket create failed: %s\n", strerror(errno));
-        goto fail;
-    }
-
-    addr.sin_family = AF_INET;
-    addr.sin_addr = netState->vmAddr;
-    addr.sin_port = htons(netState->vmPort);
-    if (connect(sock, (struct sockaddr*) &addr, sizeof(addr)) != 0) {
-        fprintf(stderr, "Connection to %s:%u failed: %s\n",
-            inet_ntoa(addr.sin_addr), ntohs(addr.sin_port), strerror(errno));
-        goto fail;
-    }
-    fprintf(stderr, "+++ connected to VM %s:%u\n",
-        inet_ntoa(addr.sin_addr), ntohs(addr.sin_port));
-
-    netState->vm.sock = sock;
-    netState->vm.awaitingHandshake = true;
-    netState->vm.inputCount = 0;
-
-    setNoDelay(netState->vm.sock);
-    return true;
-
-fail:
-    if (sock >= 0)
-        close(sock);
-    return false;
-}
-
-/*
- * Establish network connections and start things running.
- *
- * We wait for a new connection from the debugger.  When one arrives we
- * open a connection to the VM.  If one side or the other goes away, we
- * drop both ends and go back to listening.
- */
-int run(const char* connectHost, int connectPort, int listenPort)
-{
-    NetState* state;
-
-    state = jdwpNetStartup(listenPort, connectHost, connectPort);
-    if (state == NULL)
-        return -1;
-
-    while (true) {
-        if (!jdwpAcceptConnection(state))
-            break;
-
-        if (jdwpConnectToVm(state)) {
-            while (true) {
-                if (!jdwpProcessIncoming(state))
-                    break;
-            }
-        }
-
-        jdwpCloseConnection(state);
-    }
-
-    jdwpNetFree(state);
-
-    return 0;
-}
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 9bde30d..53b07f9 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -49,6 +49,7 @@
 #include "runtime.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
+#include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier.h"
 #include "vmap_table.h"
 
@@ -162,12 +163,11 @@
         reinterpret_cast<const byte*>(oat_data) > oat_file_.End()) {
       return 0;  // Address not in oat file
     }
-    uint32_t begin_offset = reinterpret_cast<size_t>(oat_data) -
-                            reinterpret_cast<size_t>(oat_file_.Begin());
-    typedef std::set<uint32_t>::iterator It;
-    It it = offsets_.upper_bound(begin_offset);
+    uintptr_t begin_offset = reinterpret_cast<uintptr_t>(oat_data) -
+                             reinterpret_cast<uintptr_t>(oat_file_.Begin());
+    auto it = offsets_.upper_bound(begin_offset);
     CHECK(it != offsets_.end());
-    uint32_t end_offset = *it;
+    uintptr_t end_offset = *it;
     return end_offset - begin_offset;
   }
 
@@ -175,7 +175,7 @@
     return oat_file_.GetOatHeader().GetInstructionSet();
   }
 
-  const void* GetOatCode(mirror::ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const void* GetQuickOatCode(mirror::ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     MethodHelper mh(m);
     for (size_t i = 0; i < oat_dex_files_.size(); i++) {
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
@@ -193,7 +193,7 @@
           const OatFile::OatClass* oat_class = oat_dex_file->GetOatClass(class_def_index);
           CHECK(oat_class != NULL);
           size_t method_index = m->GetMethodIndex();
-          return oat_class->GetOatMethod(method_index).GetCode();
+          return oat_class->GetOatMethod(method_index).GetQuickCode();
         }
       }
     }
@@ -216,7 +216,7 @@
             << "': " << error_msg;
         continue;
       }
-      offsets_.insert(reinterpret_cast<uint32_t>(&dex_file->GetHeader()));
+      offsets_.insert(reinterpret_cast<uintptr_t>(&dex_file->GetHeader()));
       for (size_t class_def_index = 0; class_def_index < dex_file->NumClassDefs(); class_def_index++) {
         const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
         UniquePtr<const OatFile::OatClass> oat_class(oat_dex_file->GetOatClass(class_def_index));
@@ -240,7 +240,7 @@
     // If the last thing in the file is code for a method, there won't be an offset for the "next"
     // thing. Instead of having a special case in the upper_bound code, let's just add an entry
     // for the end of the file.
-    offsets_.insert(static_cast<uint32_t>(oat_file_.Size()));
+    offsets_.insert(oat_file_.Size());
   }
 
   void AddOffsets(const OatFile::OatMethod& oat_method) {
@@ -374,11 +374,17 @@
       }
     }
     {
+      const void* code = oat_method.GetQuickCode();
+      uint32_t code_size = oat_method.GetQuickCodeSize();
+      if (code == nullptr) {
+        code = oat_method.GetPortableCode();
+        code_size = oat_method.GetPortableCodeSize();
+      }
       indent1_os << StringPrintf("CODE: %p (offset=0x%08x size=%d)%s\n",
-                                 oat_method.GetCode(),
+                                 code,
                                  oat_method.GetCodeOffset(),
-                                 oat_method.GetCodeSize(),
-                                 oat_method.GetCode() != NULL ? "..." : "");
+                                 code_size,
+                                 code != nullptr ? "..." : "");
       Indenter indent2_filter(indent1_os.rdbuf(), kIndentChar, kIndentBy1Count);
       std::ostream indent2_os(&indent2_filter);
 
@@ -468,42 +474,60 @@
     }
   }
 
+  void DumpGcMapRegisters(std::ostream& os, const OatFile::OatMethod& oat_method,
+                          const DexFile::CodeItem* code_item,
+                          size_t num_regs, const uint8_t* reg_bitmap) {
+    bool first = true;
+    for (size_t reg = 0; reg < num_regs; reg++) {
+      if (((reg_bitmap[reg / 8] >> (reg % 8)) & 0x01) != 0) {
+        if (first) {
+          os << "  v" << reg << " (";
+          DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
+          os << ")";
+          first = false;
+        } else {
+          os << ", v" << reg << " (";
+          DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
+          os << ")";
+        }
+      }
+    }
+    if (first) {
+      os << "No registers in GC map\n";
+    } else {
+      os << "\n";
+    }
+  }
   void DumpGcMap(std::ostream& os, const OatFile::OatMethod& oat_method,
                  const DexFile::CodeItem* code_item) {
     const uint8_t* gc_map_raw = oat_method.GetNativeGcMap();
-    if (gc_map_raw == NULL) {
-      return;
+    if (gc_map_raw == nullptr) {
+      return;  // No GC map.
     }
-    NativePcOffsetToReferenceMap map(gc_map_raw);
-    const void* code = oat_method.GetCode();
-    for (size_t entry = 0; entry < map.NumEntries(); entry++) {
-      const uint8_t* native_pc = reinterpret_cast<const uint8_t*>(code) +
-                                 map.GetNativePcOffset(entry);
-      os << StringPrintf("%p", native_pc);
-      size_t num_regs = map.RegWidth() * 8;
-      const uint8_t* reg_bitmap = map.GetBitMap(entry);
-      bool first = true;
-      for (size_t reg = 0; reg < num_regs; reg++) {
-        if (((reg_bitmap[reg / 8] >> (reg % 8)) & 0x01) != 0) {
-          if (first) {
-            os << "  v" << reg << " (";
-            DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-            os << ")";
-            first = false;
-          } else {
-            os << ", v" << reg << " (";
-            DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-            os << ")";
-          }
-        }
+    const void* quick_code = oat_method.GetQuickCode();
+    if (quick_code != nullptr) {
+      NativePcOffsetToReferenceMap map(gc_map_raw);
+      for (size_t entry = 0; entry < map.NumEntries(); entry++) {
+        const uint8_t* native_pc = reinterpret_cast<const uint8_t*>(quick_code) +
+            map.GetNativePcOffset(entry);
+        os << StringPrintf("%p", native_pc);
+        DumpGcMapRegisters(os, oat_method, code_item, map.RegWidth() * 8, map.GetBitMap(entry));
       }
-      os << "\n";
+    } else {
+      const void* portable_code = oat_method.GetPortableCode();
+      CHECK(portable_code != nullptr);
+      verifier::DexPcToReferenceMap map(gc_map_raw);
+      for (size_t entry = 0; entry < map.NumEntries(); entry++) {
+        uint32_t dex_pc = map.GetDexPc(entry);
+        os << StringPrintf("0x%08x", dex_pc);
+        DumpGcMapRegisters(os, oat_method, code_item, map.RegWidth() * 8, map.GetBitMap(entry));
+      }
     }
   }
 
   void DumpMappingTable(std::ostream& os, const OatFile::OatMethod& oat_method) {
-    const void* code = oat_method.GetCode();
-    if (code == NULL) {
+    const void* quick_code = oat_method.GetQuickCode();
+    if (quick_code == nullptr) {
       return;
     }
     MappingTable table(oat_method.GetMappingTable());
@@ -645,31 +669,37 @@
 
   void DumpCode(std::ostream& os, verifier::MethodVerifier* verifier,
                 const OatFile::OatMethod& oat_method, const DexFile::CodeItem* code_item) {
-    const void* code = oat_method.GetCode();
-    size_t code_size = oat_method.GetCodeSize();
-    if (code == NULL || code_size == 0) {
+    const void* portable_code = oat_method.GetPortableCode();
+    const void* quick_code = oat_method.GetQuickCode();
+
+    size_t code_size = oat_method.GetQuickCodeSize();
+    if ((code_size == 0) || ((portable_code == nullptr) && (quick_code == nullptr))) {
       os << "NO CODE!\n";
       return;
-    }
-    const uint8_t* native_pc = reinterpret_cast<const uint8_t*>(code);
-    size_t offset = 0;
-    while (offset < code_size) {
-      DumpMappingAtOffset(os, oat_method, offset, false);
-      offset += disassembler_->Dump(os, native_pc + offset);
-      uint32_t dex_pc = DumpMappingAtOffset(os, oat_method, offset, true);
-      if (dex_pc != DexFile::kDexNoIndex) {
-        DumpGcMapAtNativePcOffset(os, oat_method, code_item, offset);
-        if (verifier != nullptr) {
-          DumpVRegsAtDexPc(os, verifier, oat_method, code_item, dex_pc);
+    } else if (quick_code != nullptr) {
+      const uint8_t* quick_native_pc = reinterpret_cast<const uint8_t*>(quick_code);
+      size_t offset = 0;
+      while (offset < code_size) {
+        DumpMappingAtOffset(os, oat_method, offset, false);
+        offset += disassembler_->Dump(os, quick_native_pc + offset);
+        uint32_t dex_pc = DumpMappingAtOffset(os, oat_method, offset, true);
+        if (dex_pc != DexFile::kDexNoIndex) {
+          DumpGcMapAtNativePcOffset(os, oat_method, code_item, offset);
+          if (verifier != nullptr) {
+            DumpVRegsAtDexPc(os, verifier, oat_method, code_item, dex_pc);
+          }
         }
       }
+    } else {
+      CHECK(portable_code != nullptr);
+      CHECK_EQ(code_size, 0U);  // TODO: disassembly of portable is currently not supported.
     }
   }
 
   const std::string host_prefix_;
   const OatFile& oat_file_;
   std::vector<const OatFile::OatDexFile*> oat_dex_files_;
-  std::set<uint32_t> offsets_;
+  std::set<uintptr_t> offsets_;
   UniquePtr<Disassembler> disassembler_;
 };
 
@@ -856,7 +886,7 @@
     if (descriptor[0] != 'L' && descriptor[0] != '[') {
       mirror::Class* type = fh.GetType();
       if (type->IsPrimitiveLong()) {
-        os << StringPrintf("%lld (0x%llx)\n", field->Get64(obj), field->Get64(obj));
+        os << StringPrintf("%" PRId64 " (0x%" PRIx64 ")\n", field->Get64(obj), field->Get64(obj));
       } else if (type->IsPrimitiveDouble()) {
         os << StringPrintf("%f (%a)\n", field->GetDouble(obj), field->GetDouble(obj));
       } else if (type->IsPrimitiveFloat()) {
@@ -902,34 +932,34 @@
     return image_space_.Contains(object);
   }
 
-  const void* GetOatCodeBegin(mirror::ArtMethod* m)
+  const void* GetQuickOatCodeBegin(mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const void* code = m->GetEntryPointFromCompiledCode();
-    if (code == GetResolutionTrampoline(Runtime::Current()->GetClassLinker())) {
-      code = oat_dumper_->GetOatCode(m);
+    const void* quick_code = m->GetEntryPointFromQuickCompiledCode();
+    if (quick_code == GetQuickResolutionTrampoline(Runtime::Current()->GetClassLinker())) {
+      quick_code = oat_dumper_->GetQuickOatCode(m);
     }
     if (oat_dumper_->GetInstructionSet() == kThumb2) {
-      code = reinterpret_cast<void*>(reinterpret_cast<uint32_t>(code) & ~0x1);
+      quick_code = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(quick_code) & ~0x1);
     }
-    return code;
+    return quick_code;
   }
 
-  uint32_t GetOatCodeSize(mirror::ArtMethod* m)
+  uint32_t GetQuickOatCodeSize(mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const uint32_t* oat_code_begin = reinterpret_cast<const uint32_t*>(GetOatCodeBegin(m));
-    if (oat_code_begin == NULL) {
+    const uint32_t* oat_code_begin = reinterpret_cast<const uint32_t*>(GetQuickOatCodeBegin(m));
+    if (oat_code_begin == nullptr) {
       return 0;
     }
     return oat_code_begin[-1];
   }
 
-  const void* GetOatCodeEnd(mirror::ArtMethod* m)
+  const void* GetQuickOatCodeEnd(mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const uint8_t* oat_code_begin = reinterpret_cast<const uint8_t*>(GetOatCodeBegin(m));
+    const uint8_t* oat_code_begin = reinterpret_cast<const uint8_t*>(GetQuickOatCodeBegin(m));
     if (oat_code_begin == NULL) {
       return NULL;
     }
-    return oat_code_begin + GetOatCodeSize(m);
+    return oat_code_begin + GetQuickOatCodeSize(m);
   }
 
   static void Callback(mirror::Object* obj, void* arg)
@@ -1006,17 +1036,18 @@
     } else if (obj->IsArtMethod()) {
       mirror::ArtMethod* method = obj->AsArtMethod();
       if (method->IsNative()) {
-        DCHECK(method->GetNativeGcMap() == NULL) << PrettyMethod(method);
-        DCHECK(method->GetMappingTable() == NULL) << PrettyMethod(method);
+        // TODO: portable dumping.
+        DCHECK(method->GetNativeGcMap() == nullptr) << PrettyMethod(method);
+        DCHECK(method->GetMappingTable() == nullptr) << PrettyMethod(method);
         bool first_occurrence;
-        const void* oat_code = state->GetOatCodeBegin(method);
-        uint32_t oat_code_size = state->GetOatCodeSize(method);
-        state->ComputeOatSize(oat_code, &first_occurrence);
+        const void* quick_oat_code = state->GetQuickOatCodeBegin(method);
+        uint32_t quick_oat_code_size = state->GetQuickOatCodeSize(method);
+        state->ComputeOatSize(quick_oat_code, &first_occurrence);
         if (first_occurrence) {
-          state->stats_.native_to_managed_code_bytes += oat_code_size;
+          state->stats_.native_to_managed_code_bytes += quick_oat_code_size;
         }
-        if (oat_code != method->GetEntryPointFromCompiledCode()) {
-          indent_os << StringPrintf("OAT CODE: %p\n", oat_code);
+        if (quick_oat_code != method->GetEntryPointFromQuickCompiledCode()) {
+          indent_os << StringPrintf("OAT CODE: %p\n", quick_oat_code);
         }
       } else if (method->IsAbstract() || method->IsCalleeSaveMethod() ||
           method->IsResolutionMethod() || method->IsImtConflictMethod() ||
@@ -1050,33 +1081,34 @@
           state->stats_.vmap_table_bytes += vmap_table_bytes;
         }
 
-        const void* oat_code_begin = state->GetOatCodeBegin(method);
-        const void* oat_code_end = state->GetOatCodeEnd(method);
-        uint32_t oat_code_size = state->GetOatCodeSize(method);
-        state->ComputeOatSize(oat_code_begin, &first_occurrence);
+        // TODO: portable dumping.
+        const void* quick_oat_code_begin = state->GetQuickOatCodeBegin(method);
+        const void* quick_oat_code_end = state->GetQuickOatCodeEnd(method);
+        uint32_t quick_oat_code_size = state->GetQuickOatCodeSize(method);
+        state->ComputeOatSize(quick_oat_code_begin, &first_occurrence);
         if (first_occurrence) {
-          state->stats_.managed_code_bytes += oat_code_size;
+          state->stats_.managed_code_bytes += quick_oat_code_size;
           if (method->IsConstructor()) {
             if (method->IsStatic()) {
-              state->stats_.class_initializer_code_bytes += oat_code_size;
+              state->stats_.class_initializer_code_bytes += quick_oat_code_size;
             } else if (dex_instruction_bytes > kLargeConstructorDexBytes) {
-              state->stats_.large_initializer_code_bytes += oat_code_size;
+              state->stats_.large_initializer_code_bytes += quick_oat_code_size;
             }
           } else if (dex_instruction_bytes > kLargeMethodDexBytes) {
-            state->stats_.large_method_code_bytes += oat_code_size;
+            state->stats_.large_method_code_bytes += quick_oat_code_size;
           }
         }
-        state->stats_.managed_code_bytes_ignoring_deduplication += oat_code_size;
+        state->stats_.managed_code_bytes_ignoring_deduplication += quick_oat_code_size;
 
-        indent_os << StringPrintf("OAT CODE: %p-%p\n", oat_code_begin, oat_code_end);
+        indent_os << StringPrintf("OAT CODE: %p-%p\n", quick_oat_code_begin, quick_oat_code_end);
         indent_os << StringPrintf("SIZE: Dex Instructions=%zd GC=%zd Mapping=%zd\n",
                                   dex_instruction_bytes, gc_map_bytes, pc_mapping_table_bytes);
 
         size_t total_size = dex_instruction_bytes + gc_map_bytes + pc_mapping_table_bytes +
-            vmap_table_bytes + oat_code_size + object_bytes;
+            vmap_table_bytes + quick_oat_code_size + object_bytes;
 
         double expansion =
-            static_cast<double>(oat_code_size) / static_cast<double>(dex_instruction_bytes);
+            static_cast<double>(quick_oat_code_size) / static_cast<double>(dex_instruction_bytes);
         state->stats_.ComputeOutliers(total_size, expansion, method);
       }
     }
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 9e6d5f9..223ae7c 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -23,6 +23,7 @@
 	barrier.cc \
 	base/allocator.cc \
 	base/bit_vector.cc \
+	base/hex_dump.cc \
 	base/logging.cc \
 	base/mutex.cc \
 	base/stringpiece.cc \
@@ -65,6 +66,7 @@
 	gc/space/malloc_space.cc \
 	gc/space/rosalloc_space.cc \
 	gc/space/space.cc \
+	gc/space/zygote_space.cc \
 	hprof/hprof.cc \
 	image.cc \
 	indirect_reference_table.cc \
@@ -213,12 +215,13 @@
 else # TARGET_ARCH != x86
 ifeq ($(TARGET_ARCH),x86_64)
 LIBART_TARGET_SRC_FILES += \
-	arch/x86/context_x86.cc \
-	arch/x86/entrypoints_init_x86.cc \
-	arch/x86/jni_entrypoints_x86.S \
-	arch/x86/portable_entrypoints_x86.S \
-	arch/x86/quick_entrypoints_x86.S \
-	arch/x86/thread_x86.cc
+	arch/x86_64/context_x86_64.cc \
+	arch/x86_64/entrypoints_init_x86_64.cc \
+	arch/x86_64/jni_entrypoints_x86_64.S \
+	arch/x86_64/portable_entrypoints_x86_64.S \
+	arch/x86_64/quick_entrypoints_x86_64.S \
+	arch/x86_64/thread_x86_64.cc \
+	monitor_pool.cc
 LIBART_LDFLAGS += -Wl,--no-fatal-warnings
 else # TARGET_ARCH != x86_64
 ifeq ($(TARGET_ARCH),mips)
@@ -233,7 +236,11 @@
 ifeq ($(TARGET_ARCH),arm64)
 $(info TODOArm64: $(LOCAL_PATH)/Android.mk Add Arm64 specific runtime files)
 else
+ifeq ($(TARGET_ARCH),mips64)
+$(info TODOMips64: $(LOCAL_PATH)/Android.mk Add mips64 specific runtime files)
+else
 $(error unsupported TARGET_ARCH=$(TARGET_ARCH))
+endif # TARGET_ARCH != mips64
 endif # TARGET_ARCH != arm64
 endif # TARGET_ARCH != mips
 endif # TARGET_ARCH != x86_64
@@ -249,6 +256,16 @@
 	thread_linux.cc
 
 ifeq ($(HOST_ARCH),x86)
+ifneq ($(BUILD_HOST_64bit),)
+LIBART_HOST_SRC_FILES += \
+	arch/x86_64/context_x86_64.cc \
+	arch/x86_64/entrypoints_init_x86_64.cc \
+	arch/x86_64/jni_entrypoints_x86_64.S \
+	arch/x86_64/portable_entrypoints_x86_64.S \
+	arch/x86_64/quick_entrypoints_x86_64.S \
+	arch/x86_64/thread_x86_64.cc \
+	monitor_pool.cc
+else
 LIBART_HOST_SRC_FILES += \
 	arch/x86/context_x86.cc \
 	arch/x86/entrypoints_init_x86.cc \
@@ -256,6 +273,7 @@
 	arch/x86/portable_entrypoints_x86.S \
 	arch/x86/quick_entrypoints_x86.S \
 	arch/x86/thread_x86.cc
+endif
 else # HOST_ARCH != x86
 $(error unsupported HOST_ARCH=$(HOST_ARCH))
 endif # HOST_ARCH != x86
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 00651ff..020cae0 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -35,7 +35,7 @@
 
   virtual void Reset();
 
-  virtual void FillCalleeSaves(const StackVisitor& fr);
+  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   virtual void SetSP(uintptr_t new_sp) {
     SetGPR(SP, new_sp);
diff --git a/runtime/arch/arm/portable_entrypoints_arm.S b/runtime/arch/arm/portable_entrypoints_arm.S
index ac519d5..98d17dc 100644
--- a/runtime/arch/arm/portable_entrypoints_arm.S
+++ b/runtime/arch/arm/portable_entrypoints_arm.S
@@ -53,7 +53,7 @@
     mov    ip, #0                          @ set ip to 0
     str    ip, [sp]                        @ store NULL for method* at bottom of frame
     add    sp, #16                         @ first 4 args are not passed on stack for portable
-    ldr    ip, [r0, #METHOD_CODE_OFFSET]   @ get pointer to the code
+    ldr    ip, [r0, #METHOD_PORTABLE_CODE_OFFSET]  @ get pointer to the code
     blx    ip                              @ call the method
     mov    sp, r11                         @ restore the stack pointer
     ldr    ip, [sp, #24]                   @ load the result pointer
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 34de93f..0e5c60a 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -302,7 +302,7 @@
     ldr    r3, [sp, #12]                   @ copy arg value for r3
     mov    ip, #0                          @ set ip to 0
     str    ip, [sp]                        @ store NULL for method* at bottom of frame
-    ldr    ip, [r0, #METHOD_CODE_OFFSET]   @ get pointer to the code
+    ldr    ip, [r0, #METHOD_QUICK_CODE_OFFSET]  @ get pointer to the code
     blx    ip                              @ call the method
     mov    sp, r11                         @ restore the stack pointer
     ldr    ip, [sp, #24]                   @ load the result pointer
diff --git a/runtime/arch/arm/thread_arm.cc b/runtime/arch/arm/thread_arm.cc
index 8c1efeb..df4a04a 100644
--- a/runtime/arch/arm/thread_arm.cc
+++ b/runtime/arch/arm/thread_arm.cc
@@ -28,4 +28,8 @@
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
+void Thread::CleanupCpu() {
+  // Do nothing.
+}
+
 }  // namespace art
diff --git a/runtime/arch/context.cc b/runtime/arch/context.cc
index 7075e42..5eaf809 100644
--- a/runtime/arch/context.cc
+++ b/runtime/arch/context.cc
@@ -22,6 +22,10 @@
 #include "mips/context_mips.h"
 #elif defined(__i386__)
 #include "x86/context_x86.h"
+#elif defined(__x86_64__)
+#include "x86_64/context_x86_64.h"
+#else
+#include "base/logging.h"
 #endif
 
 namespace art {
@@ -33,8 +37,11 @@
   return new mips::MipsContext();
 #elif defined(__i386__)
   return new x86::X86Context();
+#elif defined(__x86_64__)
+  return new x86_64::X86_64Context();
 #else
   UNIMPLEMENTED(FATAL);
+  return nullptr;
 #endif
 }
 
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index 91e0cd6..3d11178 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -20,6 +20,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include "locks.h"
+
 namespace art {
 
 class StackVisitor;
@@ -38,7 +40,8 @@
 
   // Read values from callee saves in the given frame. The frame also holds
   // the method that holds the layout.
-  virtual void FillCalleeSaves(const StackVisitor& fr) = 0;
+  virtual void FillCalleeSaves(const StackVisitor& fr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Set the stack pointer value
   virtual void SetSP(uintptr_t new_sp) = 0;
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index 5595f86..4145cd3 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -33,7 +33,7 @@
 
   virtual void Reset();
 
-  virtual void FillCalleeSaves(const StackVisitor& fr);
+  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   virtual void SetSP(uintptr_t new_sp) {
     SetGPR(SP, new_sp);
diff --git a/runtime/arch/mips/portable_entrypoints_mips.S b/runtime/arch/mips/portable_entrypoints_mips.S
index 9208a8a..7545ce0 100644
--- a/runtime/arch/mips/portable_entrypoints_mips.S
+++ b/runtime/arch/mips/portable_entrypoints_mips.S
@@ -61,5 +61,73 @@
     .cfi_adjust_cfa_offset -64
 END art_portable_proxy_invoke_handler
 
+    /*
+     * Invocation stub for portable code.
+     * On entry:
+     *   a0 = method pointer
+     *   a1 = argument array or NULL for no argument methods
+     *   a2 = size of argument array in bytes
+     *   a3 = (managed) thread pointer
+     *   [sp + 16] = JValue* result
+     *   [sp + 20] = result type char
+     */
+ENTRY art_portable_invoke_stub
+    GENERATE_GLOBAL_POINTER
+    sw    $a0, 0($sp)           # save out a0
+    addiu $sp, $sp, -16         # spill s0, s1, fp, ra
+    .cfi_adjust_cfa_offset 16
+    sw    $ra, 12($sp)
+    .cfi_rel_offset 31, 12
+    sw    $fp, 8($sp)
+    .cfi_rel_offset 30, 8
+    sw    $s1, 4($sp)
+    .cfi_rel_offset 17, 4
+    sw    $s0, 0($sp)
+    .cfi_rel_offset 16, 0
+    move  $fp, $sp              # save sp in fp
+    .cfi_def_cfa_register 30
+    move  $s1, $a3              # move managed thread pointer into s1
+    addiu $s0, $zero, SUSPEND_CHECK_INTERVAL  # reset s0 to suspend check interval
+    addiu $t0, $a2, 16          # create space for method pointer in frame
+    srl   $t0, $t0, 3           # shift the frame size right 3
+    sll   $t0, $t0, 3           # shift the frame size left 3 to align to 16 bytes
+    subu  $sp, $sp, $t0         # reserve stack space for argument array
+    addiu $a0, $sp, 4           # pass stack pointer + method ptr as dest for memcpy
+    jal   memcpy                # (dest, src, bytes)
+    addiu $sp, $sp, -16         # make space for argument slots for memcpy
+    addiu $sp, $sp, 16          # restore stack after memcpy
+    lw    $a0, 16($fp)          # restore method*
+    lw    $a1, 4($sp)           # copy arg value for a1
+    lw    $a2, 8($sp)           # copy arg value for a2
+    lw    $a3, 12($sp)          # copy arg value for a3
+    lw    $t9, METHOD_PORTABLE_CODE_OFFSET($a0)  # get pointer to the code
+    jalr  $t9                   # call the method
+    sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
+    move  $sp, $fp              # restore the stack
+    lw    $s0, 0($sp)
+    .cfi_restore 16
+    lw    $s1, 4($sp)
+    .cfi_restore 17
+    lw    $fp, 8($sp)
+    .cfi_restore 30
+    lw    $ra, 12($sp)
+    .cfi_restore 31
+    addiu $sp, $sp, 16
+    .cfi_adjust_cfa_offset -16
+    lw    $t0, 16($sp)          # get result pointer
+    lw    $t1, 20($sp)          # get result type char
+    li    $t2, 68               # put char 'D' into t2
+    beq   $t1, $t2, 1f          # branch if result type char == 'D'
+    li    $t3, 70               # put char 'F' into t3
+    beq   $t1, $t3, 1f          # branch if result type char == 'F'
+    sw    $v0, 0($t0)           # store the result
+    jr    $ra
+    sw    $v1, 4($t0)           # store the other half of the result
+1:
+    s.s   $f0, 0($t0)           # store floating point result
+    jr    $ra
+    s.s   $f1, 4($t0)           # store other half of floating point result
+END art_portable_invoke_stub
+
 UNIMPLEMENTED art_portable_resolution_trampoline
 UNIMPLEMENTED art_portable_to_interpreter_bridge
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 2d1e87a..c60bca0 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -449,7 +449,7 @@
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
     /*
-     * Common invocation stub for portable and quick.
+     * Invocation stub for quick code.
      * On entry:
      *   a0 = method pointer
      *   a1 = argument array or NULL for no argument methods
@@ -458,9 +458,6 @@
      *   [sp + 16] = JValue* result
      *   [sp + 20] = result type char
      */
-    .type art_portable_invoke_stub, %function
-    .global art_portable_invoke_stub
-art_portable_invoke_stub:
 ENTRY art_quick_invoke_stub
     GENERATE_GLOBAL_POINTER
     sw    $a0, 0($sp)           # save out a0
@@ -490,7 +487,7 @@
     lw    $a1, 4($sp)           # copy arg value for a1
     lw    $a2, 8($sp)           # copy arg value for a2
     lw    $a3, 12($sp)          # copy arg value for a3
-    lw    $t9, METHOD_CODE_OFFSET($a0)  # get pointer to the code
+    lw    $t9, METHOD_QUICK_CODE_OFFSET($a0)  # get pointer to the code
     jalr  $t9                   # call the method
     sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
     move  $sp, $fp              # restore the stack
@@ -518,7 +515,6 @@
     jr    $ra
     s.s   $f1, 4($t0)           # store other half of floating point result
 END art_quick_invoke_stub
-    .size art_portable_invoke_stub, .-art_portable_invoke_stub
 
     /*
      * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc
index bd54549..f5d211f 100644
--- a/runtime/arch/mips/thread_mips.cc
+++ b/runtime/arch/mips/thread_mips.cc
@@ -27,4 +27,8 @@
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
 }
 
+void Thread::CleanupCpu() {
+  // Do nothing.
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 66a51f7..d7dca64 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -23,7 +23,7 @@
 namespace art {
 namespace x86 {
 
-static const uint32_t gZero = 0;
+static const uintptr_t gZero = 0;
 
 void X86Context::Reset() {
   for (int i = 0; i < kNumberOfCpuRegisters; i++) {
@@ -55,8 +55,8 @@
 
 void X86Context::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
-  gprs_[EAX] = const_cast<uint32_t*>(&gZero);
-  gprs_[EDX] = const_cast<uint32_t*>(&gZero);
+  gprs_[EAX] = const_cast<uintptr_t*>(&gZero);
+  gprs_[EDX] = const_cast<uintptr_t*>(&gZero);
   gprs_[ECX] = NULL;
   gprs_[EBX] = NULL;
 }
@@ -89,7 +89,7 @@
       : "g"(&gprs[0])  // input.
       :);  // clobber.
 #else
-    UNIMPLEMENTED(FATAL);
+  UNIMPLEMENTED(FATAL);
 #endif
 }
 
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index d7d2210..598314d 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -33,7 +33,7 @@
 
   virtual void Reset();
 
-  virtual void FillCalleeSaves(const StackVisitor& fr);
+  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   virtual void SetSP(uintptr_t new_sp) {
     SetGPR(ESP, new_sp);
diff --git a/runtime/arch/x86/jni_entrypoints_x86.S b/runtime/arch/x86/jni_entrypoints_x86.S
index 72047d5..2eb5ada 100644
--- a/runtime/arch/x86/jni_entrypoints_x86.S
+++ b/runtime/arch/x86/jni_entrypoints_x86.S
@@ -17,7 +17,7 @@
 #include "asm_support_x86.S"
 
     /*
-     * Portable resolution trampoline.
+     * Jni dlsym lookup stub.
      */
 DEFINE_FUNCTION art_jni_dlsym_lookup_stub
     subl LITERAL(4), %esp         // align stack
diff --git a/runtime/arch/x86/portable_entrypoints_x86.S b/runtime/arch/x86/portable_entrypoints_x86.S
index 48de7c1..4bd6173 100644
--- a/runtime/arch/x86/portable_entrypoints_x86.S
+++ b/runtime/arch/x86/portable_entrypoints_x86.S
@@ -46,7 +46,7 @@
     addl LITERAL(12), %esp        // pop arguments to memcpy
     mov 12(%ebp), %eax            // move method pointer into eax
     mov %eax, (%esp)              // push method pointer onto stack
-    call *METHOD_CODE_OFFSET(%eax) // call the method
+    call *METHOD_PORTABLE_CODE_OFFSET(%eax) // call the method
     mov %ebp, %esp                // restore stack pointer
     POP ebx                       // pop ebx
     POP ebp                       // pop ebp
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 74ec761..9c3eb30 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -275,7 +275,7 @@
     mov 4(%esp), %ecx             // copy arg1 into ecx
     mov 8(%esp), %edx             // copy arg2 into edx
     mov 12(%esp), %ebx            // copy arg3 into ebx
-    call *METHOD_CODE_OFFSET(%eax) // call the method
+    call *METHOD_QUICK_CODE_OFFSET(%eax) // call the method
     mov %ebp, %esp                // restore stack pointer
     CFI_DEF_CFA_REGISTER(esp)
     POP ebx                       // pop ebx
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 39bad58..235da99 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -40,8 +40,9 @@
 
 namespace art {
 
+static Mutex modify_ldt_lock("modify_ldt lock");
+
 void Thread::InitCpu() {
-  static Mutex modify_ldt_lock("modify_ldt lock");
   MutexLock mu(Thread::Current(), modify_ldt_lock);
 
   const uintptr_t base = reinterpret_cast<uintptr_t>(this);
@@ -113,7 +114,6 @@
   uint16_t table_indicator = 1 << 2;  // LDT
   uint16_t rpl = 3;  // Requested privilege level
   uint16_t selector = (entry_number << 3) | table_indicator | rpl;
-  // TODO: use our assembler to generate code
   __asm__ __volatile__("movw %w0, %%fs"
       :    // output
       : "q"(selector)  // input
@@ -124,7 +124,6 @@
 
   // Sanity check that reads from %fs point to this Thread*.
   Thread* self_check;
-  // TODO: use our assembler to generate code
   CHECK_EQ(THREAD_SELF_OFFSET, OFFSETOF_MEMBER(Thread, self_));
   __asm__ __volatile__("movl %%fs:(%1), %0"
       : "=r"(self_check)  // output
@@ -138,4 +137,36 @@
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
+void Thread::CleanupCpu() {
+  MutexLock mu(Thread::Current(), modify_ldt_lock);
+
+  // Sanity check that reads from %fs point to this Thread*.
+  Thread* self_check;
+  __asm__ __volatile__("movl %%fs:(%1), %0"
+      : "=r"(self_check)  // output
+      : "r"(THREAD_SELF_OFFSET)  // input
+      :);  // clobber
+  CHECK_EQ(self_check, this);
+
+  // Extract the LDT entry number from the FS register.
+  uint16_t selector;
+  __asm__ __volatile__("movw %%fs, %w0"
+      : "=q"(selector)  // output
+      :  // input
+      :);  // clobber
+
+  // Free LDT entry.
+#if defined(__APPLE__)
+  i386_set_ldt(selector >> 3, 0, 1);
+#else
+  user_desc ldt_entry;
+  memset(&ldt_entry, 0, sizeof(ldt_entry));
+  ldt_entry.entry_number = selector >> 3;
+  ldt_entry.contents = MODIFY_LDT_CONTENTS_DATA;
+  ldt_entry.seg_not_present = 1;
+
+  syscall(__NR_modify_ldt, 1, &ldt_entry, sizeof(ldt_entry));
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
new file mode 100644
index 0000000..b59c0cb
--- /dev/null
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_S_
+#define ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_S_
+
+#include "asm_support_x86_64.h"
+
+#if defined(__APPLE__)
+    // Mac OS' as(1) doesn't let you name macro parameters.
+    #define MACRO0(macro_name) .macro macro_name
+    #define MACRO1(macro_name, macro_arg1) .macro macro_name
+    #define MACRO2(macro_name, macro_arg1, macro_args2) .macro macro_name
+    #define MACRO3(macro_name, macro_arg1, macro_args2, macro_args3) .macro macro_name
+    #define END_MACRO .endmacro
+
+    // Mac OS' as(1) uses $0, $1, and so on for macro arguments, and function names
+    // are mangled with an extra underscore prefix. The use of $x for arguments
+    // mean that literals need to be represented with $$x in macros.
+    #define SYMBOL(name) _ ## name
+    #define PLT_SYMBOL(name) _ ## name
+    #define VAR(name,index) SYMBOL($index)
+    #define PLT_VAR(name, index) SYMBOL($index)
+    #define REG_VAR(name,index) %$index
+    #define CALL_MACRO(name,index) $index
+    #define LITERAL(value) $value
+    #define MACRO_LITERAL(value) $$value
+
+    // Mac OS' doesn't like cfi_* directives
+    #define CFI_STARTPROC
+    #define CFI_ENDPROC
+    #define CFI_ADJUST_CFA_OFFSET(size)
+    #define CFI_DEF_CFA(reg,size)
+    #define CFI_DEF_CFA_REGISTER(reg)
+    #define CFI_RESTORE(reg)
+    #define CFI_REL_OFFSET(reg,size)
+
+    // Mac OS' doesn't support certain directives
+    #define FUNCTION_TYPE(name)
+    #define SIZE(name)
+#else
+    // Regular gas(1) lets you name macro parameters.
+    #define MACRO0(macro_name) .macro macro_name
+    #define MACRO1(macro_name, macro_arg1) .macro macro_name macro_arg1
+    #define MACRO2(macro_name, macro_arg1, macro_arg2) .macro macro_name macro_arg1, macro_arg2
+    #define MACRO3(macro_name, macro_arg1, macro_arg2, macro_arg3) .macro macro_name macro_arg1, macro_arg2, macro_arg3
+    #define END_MACRO .endm
+
+    // Regular gas(1) uses \argument_name for macro arguments.
+    // We need to turn on alternate macro syntax so we can use & instead or the preprocessor
+    // will screw us by inserting a space between the \ and the name. Even in this mode there's
+    // no special meaning to $, so literals are still just $x. The use of altmacro means % is a
+    // special character meaning care needs to be taken when passing registers as macro arguments.
+    .altmacro
+    #define SYMBOL(name) name
+    #define PLT_SYMBOL(name) name@PLT
+    #define VAR(name,index) name&
+    #define PLT_VAR(name, index) name&@PLT
+    #define REG_VAR(name,index) %name
+    #define CALL_MACRO(name,index) name&
+    #define LITERAL(value) $value
+    #define MACRO_LITERAL(value) $value
+
+    // CFI support
+    #define CFI_STARTPROC .cfi_startproc
+    #define CFI_ENDPROC .cfi_endproc
+    #define CFI_ADJUST_CFA_OFFSET(size) .cfi_adjust_cfa_offset size
+    #define CFI_DEF_CFA(reg,size) .cfi_def_cfa reg,size
+    #define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
+    #define CFI_RESTORE(reg) .cfi_restore reg
+    #define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size
+
+    #define FUNCTION_TYPE(name) .type name&, @function
+    #define SIZE(name) .size name, .-name
+#endif
+
+    /* Cache alignment for function entry */
+MACRO0(ALIGN_FUNCTION_ENTRY)
+    .balign 16
+END_MACRO
+
+MACRO1(DEFINE_FUNCTION, c_name)
+    FUNCTION_TYPE(\c_name)
+    .globl VAR(c_name, 0)
+    ALIGN_FUNCTION_ENTRY
+VAR(c_name, 0):
+    CFI_STARTPROC
+END_MACRO
+
+MACRO1(END_FUNCTION, c_name)
+    CFI_ENDPROC
+    SIZE(\c_name)
+END_MACRO
+
+MACRO1(PUSH, reg)
+    pushq REG_VAR(reg, 0)
+    CFI_ADJUST_CFA_OFFSET(8)
+    CFI_REL_OFFSET(REG_VAR(reg, 0), 0)
+END_MACRO
+
+MACRO1(POP, reg)
+    popq REG_VAR(reg,0)
+    CFI_ADJUST_CFA_OFFSET(-8)
+    CFI_RESTORE(REG_VAR(reg,0))
+END_MACRO
+
+MACRO1(UNIMPLEMENTED,name)
+    FUNCTION_TYPE(\name)
+    .globl VAR(name, 0)
+    ALIGN_FUNCTION_ENTRY
+VAR(name, 0):
+    CFI_STARTPROC
+    int3
+    int3
+    CFI_ENDPROC
+    SIZE(\name)
+END_MACRO
+
+MACRO0(SETUP_GOT_NOSAVE)
+    call __x86.get_pc_thunk.bx
+    addl $_GLOBAL_OFFSET_TABLE_, %ebx
+END_MACRO
+
+MACRO0(SETUP_GOT)
+    PUSH  ebx
+    SETUP_GOT_NOSAVE
+END_MACRO
+
+MACRO0(UNDO_SETUP_GOT)
+    POP  ebx
+END_MACRO
+
+#endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_S_
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
new file mode 100644
index 0000000..d425ed8
--- /dev/null
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
+#define ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
+
+#include "asm_support.h"
+
+// Offset of field Thread::self_ verified in InitCpu
+#define THREAD_SELF_OFFSET 72
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
+// Offset of field Thread::exception_ verified in InitCpu
+#define THREAD_EXCEPTION_OFFSET 16
+// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
+#define THREAD_ID_OFFSET 112
+
+#endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
new file mode 100644
index 0000000..4d1131c
--- /dev/null
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "context_x86_64.h"
+
+#include "mirror/art_method.h"
+#include "mirror/object-inl.h"
+#include "stack.h"
+
+namespace art {
+namespace x86_64 {
+
+static const uintptr_t gZero = 0;
+
+void X86_64Context::Reset() {
+  for (int i = 0; i < kNumberOfCpuRegisters; i++) {
+    gprs_[i] = NULL;
+  }
+  gprs_[RSP] = &rsp_;
+  // Initialize registers with easy to spot debug values.
+  rsp_ = X86_64Context::kBadGprBase + RSP;
+  rip_ = X86_64Context::kBadGprBase + kNumberOfCpuRegisters;
+}
+
+void X86_64Context::FillCalleeSaves(const StackVisitor& fr) {
+  mirror::ArtMethod* method = fr.GetMethod();
+  uint32_t core_spills = method->GetCoreSpillMask();
+  size_t spill_count = __builtin_popcount(core_spills);
+  DCHECK_EQ(method->GetFpSpillMask(), 0u);
+  size_t frame_size = method->GetFrameSizeInBytes();
+  if (spill_count > 0) {
+    // Lowest number spill is farthest away, walk registers and fill into context.
+    int j = 2;  // Offset j to skip return address spill.
+    for (int i = 0; i < kNumberOfCpuRegisters; i++) {
+      if (((core_spills >> i) & 1) != 0) {
+        gprs_[i] = fr.CalleeSaveAddress(spill_count - j, frame_size);
+        j++;
+      }
+    }
+  }
+}
+
+void X86_64Context::SmashCallerSaves() {
+  // This needs to be 0 because we want a null/zero return value.
+  gprs_[RAX] = const_cast<uintptr_t*>(&gZero);
+  gprs_[RDX] = const_cast<uintptr_t*>(&gZero);
+  gprs_[RCX] = nullptr;
+  gprs_[RBX] = nullptr;
+}
+
+void X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
+  CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
+  CHECK_NE(gprs_[reg], &gZero);
+  CHECK(gprs_[reg] != NULL);
+  *gprs_[reg] = value;
+}
+
+void X86_64Context::DoLongJump() {
+  UNIMPLEMENTED(FATAL);
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
new file mode 100644
index 0000000..3e49165
--- /dev/null
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_64_CONTEXT_X86_64_H_
+#define ART_RUNTIME_ARCH_X86_64_CONTEXT_X86_64_H_
+
+#include "arch/context.h"
+#include "base/logging.h"
+#include "registers_x86_64.h"
+
+namespace art {
+namespace x86_64 {
+
+class X86_64Context : public Context {
+ public:
+  X86_64Context() {
+    Reset();
+  }
+  virtual ~X86_64Context() {}
+
+  virtual void Reset();
+
+  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  virtual void SetSP(uintptr_t new_sp) {
+    SetGPR(RSP, new_sp);
+  }
+
+  virtual void SetPC(uintptr_t new_pc) {
+    rip_ = new_pc;
+  }
+
+  virtual uintptr_t GetGPR(uint32_t reg) {
+    const uint32_t kNumberOfCpuRegisters = 8;
+    DCHECK_LT(reg, kNumberOfCpuRegisters);
+    return *gprs_[reg];
+  }
+
+  virtual void SetGPR(uint32_t reg, uintptr_t value);
+
+  virtual void SmashCallerSaves();
+  virtual void DoLongJump();
+
+ private:
+  // Pointers to register locations, floating point registers are all caller save. Values are
+  // initialized to NULL or the special registers below.
+  uintptr_t* gprs_[kNumberOfCpuRegisters];
+  // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat
+  // special in that it cannot be encoded normally as a register operand to an instruction (except
+  // in 64bit addressing modes).
+  uintptr_t rsp_, rip_;
+};
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_X86_64_CONTEXT_X86_64_H_
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
new file mode 100644
index 0000000..589c7d9
--- /dev/null
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "entrypoints/portable/portable_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/entrypoint_utils.h"
+
+namespace art {
+
+// Interpreter entrypoints.
+extern "C" void artInterpreterToInterpreterBridge(Thread* self, MethodHelper& mh,
+                                                  const DexFile::CodeItem* code_item,
+                                                  ShadowFrame* shadow_frame, JValue* result);
+extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, MethodHelper& mh,
+                                           const DexFile::CodeItem* code_item,
+                                           ShadowFrame* shadow_frame, JValue* result);
+
+// Portable entrypoints.
+extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
+extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
+
+// Cast entrypoints.
+extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
+                                                const mirror::Class* ref_class);
+extern "C" void art_quick_check_cast(void*, void*);
+
+// DexCache entrypoints.
+extern "C" void* art_quick_initialize_static_storage(uint32_t, void*);
+extern "C" void* art_quick_initialize_type(uint32_t, void*);
+extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, void*);
+extern "C" void* art_quick_resolve_string(void*, uint32_t);
+
+// Field entrypoints.
+extern "C" int art_quick_set32_instance(uint32_t, void*, int32_t);
+extern "C" int art_quick_set32_static(uint32_t, int32_t);
+extern "C" int art_quick_set64_instance(uint32_t, void*, int64_t);
+extern "C" int art_quick_set64_static(uint32_t, int64_t);
+extern "C" int art_quick_set_obj_instance(uint32_t, void*, void*);
+extern "C" int art_quick_set_obj_static(uint32_t, void*);
+extern "C" int32_t art_quick_get32_instance(uint32_t, void*);
+extern "C" int32_t art_quick_get32_static(uint32_t);
+extern "C" int64_t art_quick_get64_instance(uint32_t, void*);
+extern "C" int64_t art_quick_get64_static(uint32_t);
+extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
+extern "C" void* art_quick_get_obj_static(uint32_t);
+
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
+extern "C" void art_quick_handle_fill_data(void*, void*);
+
+// Lock entrypoints.
+extern "C" void art_quick_lock_object(void*);
+extern "C" void art_quick_unlock_object(void*);
+
+// Math entrypoints.
+extern "C" double art_quick_fmod(double, double);
+extern "C" float art_quick_fmodf(float, float);
+extern "C" double art_quick_l2d(int64_t);
+extern "C" float art_quick_l2f(int64_t);
+extern "C" int64_t art_quick_d2l(double);
+extern "C" int64_t art_quick_f2l(float);
+extern "C" int32_t art_quick_idivmod(int32_t, int32_t);
+extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
+extern "C" int64_t art_quick_lmod(int64_t, int64_t);
+extern "C" int64_t art_quick_lmul(int64_t, int64_t);
+extern "C" uint64_t art_quick_lshl(uint64_t, uint32_t);
+extern "C" uint64_t art_quick_lshr(uint64_t, uint32_t);
+extern "C" uint64_t art_quick_lushr(uint64_t, uint32_t);
+
+// Intrinsic entrypoints.
+extern "C" int32_t art_quick_memcmp16(void*, void*, int32_t);
+extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
+extern "C" int32_t art_quick_string_compareto(void*, void*);
+extern "C" void* art_quick_memcpy(void*, const void*, size_t);
+
+// Invoke entrypoints.
+extern "C" void art_quick_imt_conflict_trampoline(mirror::ArtMethod*);
+extern "C" void art_quick_resolution_trampoline(mirror::ArtMethod*);
+extern "C" void art_quick_to_interpreter_bridge(mirror::ArtMethod*);
+extern "C" void art_quick_invoke_direct_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_interface_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_static_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_super_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
+
+// Thread entrypoints.
+extern void CheckSuspendFromCode(Thread* thread);
+extern "C" void art_quick_test_suspend();
+
+// Throw entrypoints.
+extern "C" void art_quick_deliver_exception(void*);
+extern "C" void art_quick_throw_array_bounds(int32_t index, int32_t limit);
+extern "C" void art_quick_throw_div_zero();
+extern "C" void art_quick_throw_no_such_method(int32_t method_idx);
+extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_stack_overflow(void*);
+
+extern void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
+
+void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
+                     PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
+  // Interpreter
+  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
+  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
+
+  // JNI
+  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
+
+  // Portable
+  ppoints->pPortableResolutionTrampoline = art_portable_resolution_trampoline;
+  ppoints->pPortableToInterpreterBridge = art_portable_to_interpreter_bridge;
+
+  // Alloc
+  ResetQuickAllocEntryPoints(qpoints);
+
+  // Cast
+  qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
+  qpoints->pCheckCast = art_quick_check_cast;
+
+  // DexCache
+  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
+  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
+  qpoints->pInitializeType = art_quick_initialize_type;
+  qpoints->pResolveString = art_quick_resolve_string;
+
+  // Field
+  qpoints->pSet32Instance = art_quick_set32_instance;
+  qpoints->pSet32Static = art_quick_set32_static;
+  qpoints->pSet64Instance = art_quick_set64_instance;
+  qpoints->pSet64Static = art_quick_set64_static;
+  qpoints->pSetObjInstance = art_quick_set_obj_instance;
+  qpoints->pSetObjStatic = art_quick_set_obj_static;
+  qpoints->pGet32Instance = art_quick_get32_instance;
+  qpoints->pGet64Instance = art_quick_get64_instance;
+  qpoints->pGetObjInstance = art_quick_get_obj_instance;
+  qpoints->pGet32Static = art_quick_get32_static;
+  qpoints->pGet64Static = art_quick_get64_static;
+  qpoints->pGetObjStatic = art_quick_get_obj_static;
+
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
+  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
+
+  // JNI
+  qpoints->pJniMethodStart = JniMethodStart;
+  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
+  qpoints->pJniMethodEnd = JniMethodEnd;
+  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
+  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
+  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
+
+  // Locks
+  qpoints->pLockObject = art_quick_lock_object;
+  qpoints->pUnlockObject = art_quick_unlock_object;
+
+  // Math
+  // points->pCmpgDouble = NULL;  // Not needed on x86.
+  // points->pCmpgFloat = NULL;  // Not needed on x86.
+  // points->pCmplDouble = NULL;  // Not needed on x86.
+  // points->pCmplFloat = NULL;  // Not needed on x86.
+  qpoints->pFmod = art_quick_fmod;
+  // qpoints->pSqrt = NULL;  // Not needed on x86.
+  qpoints->pL2d = art_quick_l2d;
+  qpoints->pFmodf = art_quick_fmodf;
+  qpoints->pL2f = art_quick_l2f;
+  // points->pD2iz = NULL;  // Not needed on x86.
+  // points->pF2iz = NULL;  // Not needed on x86.
+  qpoints->pIdivmod = art_quick_idivmod;
+  qpoints->pD2l = art_quick_d2l;
+  qpoints->pF2l = art_quick_f2l;
+  qpoints->pLdiv = art_quick_ldiv;
+  qpoints->pLmod = art_quick_lmod;
+  qpoints->pLmul = art_quick_lmul;
+  qpoints->pShlLong = art_quick_lshl;
+  qpoints->pShrLong = art_quick_lshr;
+  qpoints->pUshrLong = art_quick_lushr;
+
+  // Intrinsics
+  qpoints->pIndexOf = art_quick_indexof;
+  qpoints->pMemcmp16 = art_quick_memcmp16;
+  qpoints->pStringCompareTo = art_quick_string_compareto;
+  qpoints->pMemcpy = art_quick_memcpy;
+
+  // Invocation
+  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
+  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
+  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
+  qpoints->pInvokeDirectTrampolineWithAccessCheck = art_quick_invoke_direct_trampoline_with_access_check;
+  qpoints->pInvokeInterfaceTrampolineWithAccessCheck = art_quick_invoke_interface_trampoline_with_access_check;
+  qpoints->pInvokeStaticTrampolineWithAccessCheck = art_quick_invoke_static_trampoline_with_access_check;
+  qpoints->pInvokeSuperTrampolineWithAccessCheck = art_quick_invoke_super_trampoline_with_access_check;
+  qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
+
+  // Thread
+  qpoints->pCheckSuspend = CheckSuspendFromCode;
+  qpoints->pTestSuspend = art_quick_test_suspend;
+
+  // Throws
+  qpoints->pDeliverException = art_quick_deliver_exception;
+  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
+  qpoints->pThrowDivZero = art_quick_throw_div_zero;
+  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
+  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
+  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
+};
+
+}  // namespace art
diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
new file mode 100644
index 0000000..35fcccb
--- /dev/null
+++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "asm_support_x86_64.S"
+
+    /*
+     * Jni dlsym lookup stub.
+     */
+UNIMPLEMENTED art_jni_dlsym_lookup_stub
diff --git a/runtime/arch/x86_64/portable_entrypoints_x86_64.S b/runtime/arch/x86_64/portable_entrypoints_x86_64.S
new file mode 100644
index 0000000..2e9d19a
--- /dev/null
+++ b/runtime/arch/x86_64/portable_entrypoints_x86_64.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "asm_support_x86_64.S"
+
+    /*
+     * Portable invocation stub.
+     */
+UNIMPLEMENTED art_portable_invoke_stub
+
+UNIMPLEMENTED art_portable_proxy_invoke_handler
+
+UNIMPLEMENTED art_portable_resolution_trampoline
+
+UNIMPLEMENTED art_portable_to_interpreter_bridge
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
new file mode 100644
index 0000000..e01a31b
--- /dev/null
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -0,0 +1,401 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "asm_support_x86_64.S"
+
+// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     */
+MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
+    int3
+    int3
+END_MACRO
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     */
+MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
+    int3
+    int3
+END_MACRO
+
+MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
+    int3
+    int3
+END_MACRO
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
+     */
+MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
+    int3
+    int3
+END_MACRO
+
+MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
+    int3
+END_MACRO
+
+    /*
+     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_.
+     */
+MACRO0(DELIVER_PENDING_EXCEPTION)
+    int3
+    int3
+END_MACRO
+
+MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+    /*
+     * Called by managed code to create and deliver a NullPointerException.
+     */
+NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
+
+    /*
+     * Called by managed code to create and deliver an ArithmeticException.
+     */
+NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
+
+    /*
+     * Called by managed code to create and deliver a StackOverflowError.
+     */
+NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
+
+    /*
+     * Called by managed code, saves callee saves and then calls artThrowException
+     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
+
+    /*
+     * Called by managed code to create and deliver a NoSuchMethodError.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
+
+    /*
+     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
+     * index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
+
+    /*
+     * All generated callsites for interface invokes and invocation slow paths will load arguments
+     * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
+     * the method_idx.  This wrapper will save arg1-arg3, load the caller's Method*, align the
+     * stack and call the appropriate C helper.
+     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
+     *
+     * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
+     * of the target Method* in r0 and method->code_ in r1.
+     *
+     * If unsuccessful, the helper will return NULL/NULL. There will bea pending exception in the
+     * thread and we branch to another stub to deliver it.
+     *
+     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
+     * pointing back to the original caller.
+     */
+MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
+INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
+
+INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
+INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
+INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
+INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
+
+    /*
+     * Quick invocation stub.
+     */
+DEFINE_FUNCTION art_quick_invoke_stub
+    int3
+    int3
+END_FUNCTION art_quick_invoke_stub
+
+MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
+    int3
+    testl %eax, %eax               // eax == 0 ?
+    jz  1f                         // if eax == 0 goto 1
+    ret                            // return
+1:                                 // deliver exception on current thread
+    DELIVER_PENDING_EXCEPTION
+END_MACRO
+
+MACRO0(RETURN_IF_EAX_ZERO)
+    int3
+    testl %eax, %eax               // eax == 0 ?
+    jnz  1f                        // if eax != 0 goto 1
+    ret                            // return
+1:                                 // deliver exception on current thread
+    DELIVER_PENDING_EXCEPTION
+END_MACRO
+
+MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
+    int3
+    int3
+    DELIVER_PENDING_EXCEPTION
+END_MACRO
+
+// Generate the allocation entrypoints for each allocator.
+// TODO: use arch/quick_alloc_entrypoints.S. Currently we don't as we need to use concatenation
+// macros to work around differences between OS/X's as and binutils as (OS/X lacks named arguments
+// to macros and the VAR macro won't concatenate arguments properly), this also breaks having
+// multi-line macros that use each other (hence using 1 macro per newline below).
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check ## c_suffix, artAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+
+TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+
+TWO_ARG_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
+
+DEFINE_FUNCTION art_quick_lock_object
+    int3
+    int3
+END_FUNCTION art_quick_lock_object
+
+DEFINE_FUNCTION art_quick_unlock_object
+    int3
+    int3
+END_FUNCTION art_quick_unlock_object
+
+DEFINE_FUNCTION art_quick_is_assignable
+    int3
+    int3
+END_FUNCTION art_quick_is_assignable
+
+DEFINE_FUNCTION art_quick_check_cast
+    int3
+    int3
+END_FUNCTION art_quick_check_cast
+
+    /*
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * eax = array, ecx = index, edx = value
+     */
+UNIMPLEMENTED art_quick_aput_obj_with_null_and_bound_check
+UNIMPLEMENTED art_quick_aput_obj_with_bound_check
+UNIMPLEMENTED art_quick_aput_obj
+UNIMPLEMENTED art_quick_memcpy
+
+NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
+
+UNIMPLEMENTED art_quick_fmod
+UNIMPLEMENTED art_quick_fmodf
+UNIMPLEMENTED art_quick_l2d
+UNIMPLEMENTED art_quick_l2f
+UNIMPLEMENTED art_quick_d2l
+UNIMPLEMENTED art_quick_f2l
+UNIMPLEMENTED art_quick_idivmod
+UNIMPLEMENTED art_quick_ldiv
+UNIMPLEMENTED art_quick_lmod
+UNIMPLEMENTED art_quick_lmul
+UNIMPLEMENTED art_quick_lshl
+UNIMPLEMENTED art_quick_lshr
+UNIMPLEMENTED art_quick_lushr
+UNIMPLEMENTED art_quick_set32_instance
+UNIMPLEMENTED art_quick_set64_instance
+UNIMPLEMENTED art_quick_set_obj_instance
+UNIMPLEMENTED art_quick_get32_instance
+UNIMPLEMENTED art_quick_get64_instance
+UNIMPLEMENTED art_quick_get_obj_instance
+UNIMPLEMENTED art_quick_set32_static
+UNIMPLEMENTED art_quick_set64_static
+UNIMPLEMENTED art_quick_set_obj_static
+UNIMPLEMENTED art_quick_get32_static
+UNIMPLEMENTED art_quick_get64_static
+UNIMPLEMENTED art_quick_get_obj_static
+UNIMPLEMENTED art_quick_proxy_invoke_handler
+
+    /*
+     * Called to resolve an imt conflict.
+     */
+UNIMPLEMENTED art_quick_imt_conflict_trampoline
+UNIMPLEMENTED art_quick_resolution_trampoline
+UNIMPLEMENTED art_quick_to_interpreter_bridge
+
+    /*
+     * Routine that intercepts method calls and returns.
+     */
+UNIMPLEMENTED art_quick_instrumentation_entry
+UNIMPLEMENTED art_quick_instrumentation_exit
+
+    /*
+     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+UNIMPLEMENTED art_quick_deoptimize
+
+UNIMPLEMENTED art_quick_indexof
+UNIMPLEMENTED art_quick_string_compareto
+UNIMPLEMENTED art_quick_memcmp16
diff --git a/runtime/arch/x86_64/registers_x86_64.cc b/runtime/arch/x86_64/registers_x86_64.cc
new file mode 100644
index 0000000..38f3494
--- /dev/null
+++ b/runtime/arch/x86_64/registers_x86_64.cc
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "registers_x86_64.h"
+
+#include <ostream>
+
+namespace art {
+namespace x86_64 {
+
+static const char* kRegisterNames[] = {
+  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
+  "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+};
+std::ostream& operator<<(std::ostream& os, const Register& rhs) {
+  if (rhs >= RAX && rhs <= R15) {
+    os << kRegisterNames[rhs];
+  } else {
+    os << "Register[" << static_cast<int>(rhs) << "]";
+  }
+  return os;
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/runtime/arch/x86_64/registers_x86_64.h b/runtime/arch/x86_64/registers_x86_64.h
new file mode 100644
index 0000000..9808d91
--- /dev/null
+++ b/runtime/arch/x86_64/registers_x86_64.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_64_REGISTERS_X86_64_H_
+#define ART_RUNTIME_ARCH_X86_64_REGISTERS_X86_64_H_
+
+#include <iosfwd>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "globals.h"
+
+namespace art {
+namespace x86_64 {
+
+enum Register {
+  RAX = 0,
+  RCX = 1,
+  RDX = 2,
+  RBX = 3,
+  RSP = 4,
+  RBP = 5,
+  RSI = 6,
+  RDI = 7,
+  R8  = 8,
+  R9  = 9,
+  R10 = 10,
+  R11 = 11,
+  R12 = 12,
+  R13 = 13,
+  R14 = 14,
+  R15 = 15,
+  kNumberOfCpuRegisters = 16,
+  kNoRegister = -1  // Signals an illegal register.
+};
+std::ostream& operator<<(std::ostream& os, const Register& rhs);
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_X86_64_REGISTERS_X86_64_H_
diff --git a/runtime/arch/x86_64/thread_x86_64.cc b/runtime/arch/x86_64/thread_x86_64.cc
new file mode 100644
index 0000000..9e45a72
--- /dev/null
+++ b/runtime/arch/x86_64/thread_x86_64.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "thread.h"
+
+#include "asm_support_x86_64.h"
+#include "base/macros.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+namespace art {
+
+static void arch_prctl(int code, void* val) {
+  syscall(__NR_arch_prctl, code, val);
+}
+void Thread::InitCpu() {
+  static Mutex modify_ldt_lock("modify_ldt lock");
+  MutexLock mu(Thread::Current(), modify_ldt_lock);
+  arch_prctl(ARCH_SET_GS, this);
+
+  // Allow easy indirection back to Thread*.
+  self_ = this;
+
+  // Sanity check that reads from %gs point to this Thread*.
+  Thread* self_check;
+  CHECK_EQ(THREAD_SELF_OFFSET, OFFSETOF_MEMBER(Thread, self_));
+  __asm__ __volatile__("movq %%gs:(%1), %0"
+      : "=r"(self_check)  // output
+      : "r"(THREAD_SELF_OFFSET)  // input
+      :);  // clobber
+  CHECK_EQ(self_check, this);
+
+  // Sanity check other offsets.
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
+  CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
+}
+
+void Thread::CleanupCpu() {
+  // Sanity check that reads from %gs point to this Thread*.
+  Thread* self_check;
+  CHECK_EQ(THREAD_SELF_OFFSET, OFFSETOF_MEMBER(Thread, self_));
+  __asm__ __volatile__("movq %%gs:(%1), %0"
+      : "=r"(self_check)  // output
+      : "r"(THREAD_SELF_OFFSET)  // input
+      :);  // clobber
+  CHECK_EQ(self_check, this);
+
+  // Do nothing.
+}
+
+}  // namespace art
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 06c7b53..4c42099 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -40,6 +40,7 @@
 
 // Offsets within java.lang.Method.
 #define METHOD_DEX_CACHE_METHODS_OFFSET 12
-#define METHOD_CODE_OFFSET 36
+#define METHOD_PORTABLE_CODE_OFFSET 40
+#define METHOD_QUICK_CODE_OFFSET 48
 
 #endif  // ART_RUNTIME_ASM_SUPPORT_H_
diff --git a/runtime/atomic.cc b/runtime/atomic.cc
index bac0a99..63f2cf8 100644
--- a/runtime/atomic.cc
+++ b/runtime/atomic.cc
@@ -24,7 +24,7 @@
 std::vector<Mutex*>* QuasiAtomic::gSwapMutexes = nullptr;
 
 Mutex* QuasiAtomic::GetSwapMutex(const volatile int64_t* addr) {
-  return (*gSwapMutexes)[(reinterpret_cast<unsigned>(addr) >> 3U) % kSwapMutexCount];
+  return (*gSwapMutexes)[(reinterpret_cast<uintptr_t>(addr) >> 3U) % kSwapMutexCount];
 }
 
 void QuasiAtomic::Startup() {
diff --git a/runtime/atomic.h b/runtime/atomic.h
index b1e9870..2a47e46 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -26,6 +26,69 @@
 
 class Mutex;
 
+template<typename T>
+class Atomic {
+ public:
+  Atomic<T>() : value_(0) { }
+
+  explicit Atomic<T>(T value) : value_(value) { }
+
+  Atomic<T>& operator=(T desired) {
+    Store(desired);
+    return *this;
+  }
+
+  T Load() const {
+    return value_;
+  }
+
+  operator T() const {
+    return Load();
+  }
+
+  T FetchAndAdd(const T value) {
+    return __sync_fetch_and_add(&value_, value);  // Return old_value.
+  }
+
+  T FetchAndSub(const T value) {
+    return __sync_fetch_and_sub(&value_, value);  // Return old value.
+  }
+
+  T operator++() {  // Prefix operator.
+    return __sync_add_and_fetch(&value_, 1);  // Return new value.
+  }
+
+  T operator++(int) {  // Postfix operator.
+    return __sync_fetch_and_add(&value_, 1);  // Return old value.
+  }
+
+  T operator--() {  // Prefix operator.
+    return __sync_sub_and_fetch(&value_, 1);  // Return new value.
+  }
+
+  T operator--(int) {  // Postfix operator.
+    return __sync_fetch_and_sub(&value_, 1);  // Return old value.
+  }
+
+  bool CompareAndSwap(T expected_value, T desired_value) {
+    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
+  }
+
+  volatile T* Address() {
+    return &value_;
+  }
+
+ private:
+  // Unsafe = operator for non atomic operations on the integer.
+  void Store(T desired) {
+    value_ = desired;
+  }
+
+  volatile T value_;
+};
+
+typedef Atomic<int32_t> AtomicInteger;
+
 // NOTE: Two "quasiatomic" operations on the exact same memory address
 // are guaranteed to operate atomically with respect to each other,
 // but no guarantees are made about quasiatomic operations mixed with
@@ -80,7 +143,7 @@
   static void MembarLoadStore() {
   #if defined(__arm__)
     __asm__ __volatile__("dmb ish" : : : "memory");
-  #elif defined(__i386__)
+  #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("" : : : "memory");
   #elif defined(__mips__)
     __asm__ __volatile__("sync" : : : "memory");
@@ -92,7 +155,7 @@
   static void MembarLoadLoad() {
   #if defined(__arm__)
     __asm__ __volatile__("dmb ish" : : : "memory");
-  #elif defined(__i386__)
+  #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("" : : : "memory");
   #elif defined(__mips__)
     __asm__ __volatile__("sync" : : : "memory");
@@ -104,7 +167,7 @@
   static void MembarStoreStore() {
   #if defined(__arm__)
     __asm__ __volatile__("dmb ishst" : : : "memory");
-  #elif defined(__i386__)
+  #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("" : : : "memory");
   #elif defined(__mips__)
     __asm__ __volatile__("sync" : : : "memory");
@@ -116,7 +179,7 @@
   static void MembarStoreLoad() {
   #if defined(__arm__)
     __asm__ __volatile__("dmb ish" : : : "memory");
-  #elif defined(__i386__)
+  #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("mfence" : : : "memory");
   #elif defined(__mips__)
     __asm__ __volatile__("sync" : : : "memory");
diff --git a/runtime/atomic_integer.h b/runtime/atomic_integer.h
deleted file mode 100644
index 651ca4a..0000000
--- a/runtime/atomic_integer.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_ATOMIC_INTEGER_H_
-#define ART_RUNTIME_ATOMIC_INTEGER_H_
-
-#include <stdint.h>
-
-namespace art {
-
-class AtomicInteger {
- public:
-  AtomicInteger() : value_(0) { }
-
-  explicit AtomicInteger(int32_t value) : value_(value) { }
-
-  AtomicInteger& operator=(int32_t desired) {
-    Store(desired);
-    return *this;
-  }
-
-  int32_t Load() const {
-    return value_;
-  }
-
-  operator int32_t() const {
-    return Load();
-  }
-
-  int32_t FetchAndAdd(const int32_t value) {
-    return __sync_fetch_and_add(&value_, value);  // Return old_value.
-  }
-
-  int32_t FetchAndSub(const int32_t value) {
-    return __sync_fetch_and_sub(&value_, value);  // Return old value.
-  }
-
-  int32_t operator++() {  // Prefix operator.
-    return __sync_add_and_fetch(&value_, 1);  // Return new value.
-  }
-
-  int32_t operator++(int32_t) {  // Postfix operator.
-    return __sync_fetch_and_add(&value_, 1);  // Return old value.
-  }
-
-  int32_t operator--() {  // Prefix operator.
-    return __sync_sub_and_fetch(&value_, 1);  // Return new value.
-  }
-
-  int32_t operator--(int32_t) {  // Postfix operator.
-    return __sync_fetch_and_sub(&value_, 1);  // Return old value.
-  }
-
-  bool CompareAndSwap(int32_t expected_value, int32_t desired_value) {
-    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
-  }
-
-  volatile int32_t* Address() {
-    return &value_;
-  }
-
- private:
-  // Unsafe = operator for non atomic operations on the integer.
-  void Store(int32_t desired) {
-    value_ = desired;
-  }
-
-  volatile int32_t value_;
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_ATOMIC_INTEGER_H_
diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc
index 91fc143..69951c5 100644
--- a/runtime/barrier_test.cc
+++ b/runtime/barrier_test.cc
@@ -18,7 +18,7 @@
 
 #include <string>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "common_test.h"
 #include "mirror/object_array-inl.h"
 #include "thread_pool.h"
diff --git a/runtime/base/bit_vector_test.cc b/runtime/base/bit_vector_test.cc
index d99d059..3fc9b86 100644
--- a/runtime/base/bit_vector_test.cc
+++ b/runtime/base/bit_vector_test.cc
@@ -25,7 +25,7 @@
 
   BitVector bv(kBits, false, Allocator::GetMallocAllocator());
   EXPECT_EQ(1U, bv.GetStorageSize());
-  EXPECT_EQ(kWordSize, bv.GetSizeOf());
+  EXPECT_EQ(sizeof(uint32_t), bv.GetSizeOf());
   EXPECT_FALSE(bv.IsExpandable());
 
   EXPECT_EQ(0U, bv.NumSetBits());
@@ -70,7 +70,7 @@
 
   BitVector bv(0U, false, Allocator::GetNoopAllocator(), kWords, bits);
   EXPECT_EQ(kWords, bv.GetStorageSize());
-  EXPECT_EQ(kWords * kWordSize, bv.GetSizeOf());
+  EXPECT_EQ(kWords * sizeof(uint32_t), bv.GetSizeOf());
   EXPECT_EQ(bits, bv.GetRawStorage());
   EXPECT_EQ(0U, bv.NumSetBits());
 
diff --git a/runtime/base/hex_dump.cc b/runtime/base/hex_dump.cc
new file mode 100644
index 0000000..936c52b
--- /dev/null
+++ b/runtime/base/hex_dump.cc
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hex_dump.h"
+
+#include "globals.h"
+
+#include <string.h>
+
+namespace art {
+
+void HexDump::Dump(std::ostream& os) const {
+  if (byte_count_ == 0) {
+    return;
+  }
+
+  if (address_ == NULL) {
+    os << "00000000:";
+    return;
+  }
+
+  static const char gHexDigit[] = "0123456789abcdef";
+  const unsigned char* addr = reinterpret_cast<const unsigned char*>(address_);
+  // 01234560: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
+  char out[(kBitsPerWord / 4) + /* offset */
+           1 + /* colon */
+           (16 * 3) + /* 16 hex digits and space */
+           2 + /* white space */
+           16 + /* 16 characters*/
+           1 /* \0 */ ];
+  size_t offset;    /* offset to show while printing */
+
+  if (show_actual_addresses_) {
+    offset = reinterpret_cast<size_t>(addr);
+  } else {
+    offset = 0;
+  }
+  memset(out, ' ', sizeof(out)-1);
+  out[kBitsPerWord / 4] = ':';
+  out[sizeof(out)-1] = '\0';
+
+  size_t byte_count = byte_count_;
+  size_t gap = offset & 0x0f;
+  while (byte_count > 0) {
+    size_t line_offset = offset & ~0x0f;
+
+    char* hex = out;
+    char* asc = out + (kBitsPerWord / 4) + /* offset */ 1 + /* colon */
+        (16 * 3) + /* 16 hex digits and space */ 2 /* white space */;
+
+    for (int i = 0; i < (kBitsPerWord / 4); i++) {
+      *hex++ = gHexDigit[line_offset >> (kBitsPerWord - 4)];
+      line_offset <<= 4;
+    }
+    hex++;
+    hex++;
+
+    size_t count = std::min(byte_count, 16 - gap);
+    // CHECK_NE(count, 0U);
+    // CHECK_LE(count + gap, 16U);
+
+    if (gap) {
+      /* only on first line */
+      hex += gap * 3;
+      asc += gap;
+    }
+
+    size_t i;
+    for (i = gap ; i < count + gap; i++) {
+      *hex++ = gHexDigit[*addr >> 4];
+      *hex++ = gHexDigit[*addr & 0x0f];
+      hex++;
+      if (*addr >= 0x20 && *addr < 0x7f /*isprint(*addr)*/) {
+        *asc++ = *addr;
+      } else {
+        *asc++ = '.';
+      }
+      addr++;
+    }
+    for (; i < 16; i++) {
+      /* erase extra stuff; only happens on last line */
+      *hex++ = ' ';
+      *hex++ = ' ';
+      hex++;
+      *asc++ = ' ';
+    }
+
+    os << prefix_ << out;
+
+    gap = 0;
+    byte_count -= count;
+    offset += count;
+    if (byte_count > 0) {
+      os << "\n";
+    }
+  }
+}
+
+}  // namespace art
diff --git a/runtime/base/hex_dump.h b/runtime/base/hex_dump.h
new file mode 100644
index 0000000..8769ece
--- /dev/null
+++ b/runtime/base/hex_dump.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_HEX_DUMP_H_
+#define ART_RUNTIME_BASE_HEX_DUMP_H_
+
+#include "macros.h"
+
+#include <ostream>
+
+namespace art {
+
+// Prints a hex dump in this format:
+//
+// 01234560: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
+// 01234568: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
+class HexDump {
+ public:
+  HexDump(const void* address, size_t byte_count, bool show_actual_addresses, const char* prefix)
+      : address_(address), byte_count_(byte_count), show_actual_addresses_(show_actual_addresses),
+        prefix_(prefix) {
+  }
+
+  void Dump(std::ostream& os) const;
+
+ private:
+  const void* const address_;
+  const size_t byte_count_;
+  const bool show_actual_addresses_;
+  const char* const prefix_;
+
+  DISALLOW_COPY_AND_ASSIGN(HexDump);
+};
+
+inline std::ostream& operator<<(std::ostream& os, const HexDump& rhs) {
+  rhs.Dump(os);
+  return os;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_HEX_DUMP_H_
diff --git a/runtime/base/hex_dump_test.cc b/runtime/base/hex_dump_test.cc
new file mode 100644
index 0000000..d950961
--- /dev/null
+++ b/runtime/base/hex_dump_test.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hex_dump.h"
+
+#include "globals.h"
+
+#include "gtest/gtest.h"
+
+#include <stdint.h>
+
+namespace art {
+
+TEST(HexDump, OneLine) {
+  const char* test_text = "0123456789abcdef";
+  std::ostringstream oss;
+  oss << HexDump(test_text, strlen(test_text), false, "");
+  EXPECT_STREQ(oss.str().c_str(),
+               "00000000: 30 31 32 33 34 35 36 37 38 39 61 62 63 64 65 66  0123456789abcdef");
+}
+
+TEST(HexDump, MultiLine) {
+  const char* test_text = "0123456789abcdef0123456789ABCDEF";
+  std::ostringstream oss;
+  oss << HexDump(test_text, strlen(test_text), false, "");
+  EXPECT_STREQ(oss.str().c_str(),
+               "00000000: 30 31 32 33 34 35 36 37 38 39 61 62 63 64 65 66  0123456789abcdef\n"
+               "00000010: 30 31 32 33 34 35 36 37 38 39 41 42 43 44 45 46  0123456789ABCDEF");
+}
+
+uint64_t g16byte_aligned_number __attribute__ ((aligned(16)));  // NOLINT(whitespace/parens)
+TEST(HexDump, ShowActualAddresses) {
+  g16byte_aligned_number = 0x6162636465666768;
+  std::ostringstream oss;
+  oss << HexDump(&g16byte_aligned_number, 8, true, "");
+  // Compare ignoring pointer.
+  EXPECT_STREQ(oss.str().c_str() + (kBitsPerWord / 4),
+               ": 68 67 66 65 64 63 62 61                          hgfedcba        ");
+}
+
+TEST(HexDump, Prefix) {
+  const char* test_text = "0123456789abcdef";
+  std::ostringstream oss;
+  oss << HexDump(test_text, strlen(test_text), false, "test prefix: ");
+  EXPECT_STREQ(oss.str().c_str(),
+               "test prefix: 00000000: 30 31 32 33 34 35 36 37 38 39 61 62 63 64 65 66  "
+               "0123456789abcdef");
+}
+
+}  // namespace art
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 15554ac..46b8ff2 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -161,97 +161,4 @@
   }
 }
 
-HexDump::HexDump(const void* address, size_t byte_count, bool show_actual_addresses)
-    : address_(address), byte_count_(byte_count), show_actual_addresses_(show_actual_addresses) {
-}
-
-void HexDump::Dump(std::ostream& os) const {
-  if (byte_count_ == 0) {
-    return;
-  }
-
-  if (address_ == NULL) {
-    os << "00000000:";
-    return;
-  }
-
-  static const char gHexDigit[] = "0123456789abcdef";
-  const unsigned char* addr = reinterpret_cast<const unsigned char*>(address_);
-  // 01234560: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
-  char out[(kBitsPerWord / 4) + /* offset */
-           1 + /* colon */
-           (16 * 3) + /* 16 hex digits and space */
-           2 + /* white space */
-           16 + /* 16 characters*/
-           1 /* \0 */ ];
-  size_t offset;    /* offset to show while printing */
-
-  if (show_actual_addresses_) {
-    offset = reinterpret_cast<size_t>(addr);
-  } else {
-    offset = 0;
-  }
-  memset(out, ' ', sizeof(out)-1);
-  out[kBitsPerWord / 4] = ':';
-  out[sizeof(out)-1] = '\0';
-
-  size_t byte_count = byte_count_;
-  size_t gap = offset & 0x0f;
-  while (byte_count) {
-    size_t line_offset = offset & ~0x0f;
-
-    char* hex = out;
-    char* asc = out + (kBitsPerWord / 4) + /* offset */ 1 + /* colon */
-        (16 * 3) + /* 16 hex digits and space */ 2 /* white space */;
-
-    for (int i = 0; i < (kBitsPerWord / 4); i++) {
-      *hex++ = gHexDigit[line_offset >> (kBitsPerWord - 4)];
-      line_offset <<= 4;
-    }
-    hex++;
-    hex++;
-
-    size_t count = std::min(byte_count, 16 - gap);
-    CHECK_NE(count, 0U);
-    CHECK_LE(count + gap, 16U);
-
-    if (gap) {
-      /* only on first line */
-      hex += gap * 3;
-      asc += gap;
-    }
-
-    size_t i;
-    for (i = gap ; i < count + gap; i++) {
-      *hex++ = gHexDigit[*addr >> 4];
-      *hex++ = gHexDigit[*addr & 0x0f];
-      hex++;
-      if (*addr >= 0x20 && *addr < 0x7f /*isprint(*addr)*/) {
-        *asc++ = *addr;
-      } else {
-        *asc++ = '.';
-      }
-      addr++;
-    }
-    for (; i < 16; i++) {
-      /* erase extra stuff; only happens on last line */
-      *hex++ = ' ';
-      *hex++ = ' ';
-      hex++;
-      *asc++ = ' ';
-    }
-
-    os << out;
-
-    gap = 0;
-    byte_count -= count;
-    offset += count;
-  }
-}
-
-std::ostream& operator<<(std::ostream& os, const HexDump& rhs) {
-  rhs.Dump(os);
-  return os;
-}
-
 }  // namespace art
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 8e40da0..075d571 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -208,24 +208,6 @@
   DISALLOW_COPY_AND_ASSIGN(LogMessage);
 };
 
-// Prints a hex dump in this format:
-//
-// 01234560: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
-// 01234568: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
-class HexDump {
- public:
-  HexDump(const void* address, size_t byte_count, bool show_actual_addresses = false);
-  void Dump(std::ostream& os) const;
-
- private:
-  const void* address_;
-  size_t byte_count_;
-  bool show_actual_addresses_;
-
-  DISALLOW_COPY_AND_ASSIGN(HexDump);
-};
-std::ostream& operator<<(std::ostream& os, const HexDump& rhs);
-
 // A convenience to allow any class with a "Dump(std::ostream& os)" member function
 // but without an operator<< to be used as if it had an operator<<. Use like this:
 //
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 05e3a83..ff72d16 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -47,7 +47,7 @@
 
 struct AllMutexData {
   // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait).
-  AtomicInteger all_mutexes_guard;
+  Atomic<const BaseMutex*> all_mutexes_guard;
   // All created mutexes guarded by all_mutexes_guard_.
   std::set<BaseMutex*>* all_mutexes;
   AllMutexData() : all_mutexes(NULL) {}
@@ -57,12 +57,12 @@
 class ScopedAllMutexesLock {
  public:
   explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) {
-    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(0, reinterpret_cast<int32_t>(mutex))) {
+    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(0, mutex)) {
       NanoSleep(100);
     }
   }
   ~ScopedAllMutexesLock() {
-    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(reinterpret_cast<int32_t>(mutex_), 0)) {
+    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(mutex_, 0)) {
       NanoSleep(100);
     }
   }
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 1c1dcaf..63ed6cb 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -23,7 +23,7 @@
 #include <iosfwd>
 #include <string>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index f48c76d..87d1c06 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -102,11 +102,11 @@
   return fd_ >= 0;
 }
 
-bool FdFile::ReadFully(void* buffer, int64_t byte_count) {
+bool FdFile::ReadFully(void* buffer, size_t byte_count) {
   char* ptr = static_cast<char*>(buffer);
   while (byte_count > 0) {
-    int bytes_read = TEMP_FAILURE_RETRY(read(fd_, ptr, byte_count));
-    if (bytes_read <= 0) {
+    ssize_t bytes_read = TEMP_FAILURE_RETRY(read(fd_, ptr, byte_count));
+    if (bytes_read == -1) {
       return false;
     }
     byte_count -= bytes_read;  // Reduce the number of remaining bytes.
@@ -115,15 +115,15 @@
   return true;
 }
 
-bool FdFile::WriteFully(const void* buffer, int64_t byte_count) {
+bool FdFile::WriteFully(const void* buffer, size_t byte_count) {
   const char* ptr = static_cast<const char*>(buffer);
   while (byte_count > 0) {
-    int bytes_read = TEMP_FAILURE_RETRY(write(fd_, ptr, byte_count));
-    if (bytes_read < 0) {
+    ssize_t bytes_written = TEMP_FAILURE_RETRY(write(fd_, ptr, byte_count));
+    if (bytes_written == -1) {
       return false;
     }
-    byte_count -= bytes_read;  // Reduce the number of remaining bytes.
-    ptr += bytes_read;  // Move the buffer forward.
+    byte_count -= bytes_written;  // Reduce the number of remaining bytes.
+    ptr += bytes_written;  // Move the buffer forward.
   }
   return true;
 }
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 19e3511..01f4ca2 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -61,8 +61,8 @@
     return file_path_;
   }
   void DisableAutoClose();
-  bool ReadFully(void* buffer, int64_t byte_count);
-  bool WriteFully(const void* buffer, int64_t byte_count);
+  bool ReadFully(void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count);
 
  private:
   int fd_;
diff --git a/runtime/base/unix_file/mapped_file.cc b/runtime/base/unix_file/mapped_file.cc
index b63fdd3..bc23a74 100644
--- a/runtime/base/unix_file/mapped_file.cc
+++ b/runtime/base/unix_file/mapped_file.cc
@@ -101,7 +101,8 @@
       errno = EINVAL;
       return -errno;
     }
-    int64_t read_size = std::max(0LL, std::min(byte_count, file_size_ - offset));
+    int64_t read_size = std::max(static_cast<int64_t>(0),
+                                 std::min(byte_count, file_size_ - offset));
     if (read_size > 0) {
       memcpy(buf, data() + offset, read_size);
     }
@@ -136,7 +137,8 @@
       errno = EINVAL;
       return -errno;
     }
-    int64_t write_size = std::max(0LL, std::min(byte_count, file_size_ - offset));
+    int64_t write_size = std::max(static_cast<int64_t>(0),
+                                  std::min(byte_count, file_size_ - offset));
     if (write_size > 0) {
       memcpy(data() + offset, buf, write_size);
     }
diff --git a/runtime/base/unix_file/mapped_file_test.cc b/runtime/base/unix_file/mapped_file_test.cc
index 3dda02f..49750f4 100644
--- a/runtime/base/unix_file/mapped_file_test.cc
+++ b/runtime/base/unix_file/mapped_file_test.cc
@@ -65,7 +65,7 @@
   ASSERT_TRUE(file.Open(good_path_, MappedFile::kReadOnlyMode));
   EXPECT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
-  EXPECT_EQ(kContent.size(), file.size());
+  EXPECT_EQ(kContent.size(), static_cast<uint64_t>(file.size()));
   EXPECT_EQ(0, file.Close());
   EXPECT_EQ(-1, file.Fd());
   EXPECT_FALSE(file.IsOpened());
@@ -86,7 +86,7 @@
   EXPECT_FALSE(file.IsMapped());
   EXPECT_TRUE(file.MapReadOnly());
   EXPECT_TRUE(file.IsMapped());
-  EXPECT_EQ(kContent.size(), file.size());
+  EXPECT_EQ(kContent.size(), static_cast<uint64_t>(file.size()));
   ASSERT_TRUE(file.data());
   EXPECT_EQ(0, memcmp(kContent.c_str(), file.data(), file.size()));
   EXPECT_EQ(0, file.Flush());
@@ -113,7 +113,7 @@
   ASSERT_TRUE(file.Open(new_path, MappedFile::kReadWriteMode));
   EXPECT_TRUE(file.MapReadWrite(kContent.size()));
   EXPECT_TRUE(file.IsMapped());
-  EXPECT_EQ(kContent.size(), file.size());
+  EXPECT_EQ(kContent.size(), static_cast<uint64_t>(file.size()));
   ASSERT_TRUE(file.data());
   memcpy(file.data(), kContent.c_str(), kContent.size());
   EXPECT_EQ(0, file.Close());
@@ -200,15 +200,16 @@
   // A zero-length write is a no-op.
   EXPECT_EQ(0, file.Write(kContent.c_str(), 0, 0));
   // But the file size is as given when mapped.
-  EXPECT_EQ(kContent.size(), file.GetLength());
+  EXPECT_EQ(kContent.size(), static_cast<uint64_t>(file.GetLength()));
 
   // Data written past the end are discarded.
   EXPECT_EQ(kContent.size() - 1,
-            file.Write(kContent.c_str(), kContent.size(), 1));
+            static_cast<uint64_t>(file.Write(kContent.c_str(), kContent.size(), 1)));
   EXPECT_EQ(0, memcmp(kContent.c_str(), file.data() + 1, kContent.size() - 1));
 
   // Data can be overwritten.
-  EXPECT_EQ(kContent.size(), file.Write(kContent.c_str(), kContent.size(), 0));
+  EXPECT_EQ(kContent.size(),
+            static_cast<uint64_t>(file.Write(kContent.c_str(), kContent.size(), 0)));
   EXPECT_EQ(0, memcmp(kContent.c_str(), file.data(), kContent.size()));
 }
 
diff --git a/runtime/base/unix_file/null_file_test.cc b/runtime/base/unix_file/null_file_test.cc
index 0f20acd..410fdfc 100644
--- a/runtime/base/unix_file/null_file_test.cc
+++ b/runtime/base/unix_file/null_file_test.cc
@@ -48,7 +48,7 @@
   NullFile f;
   // The length is always 0.
   ASSERT_EQ(0, f.GetLength());
-  ASSERT_EQ(content.size(), f.Write(content.data(), content.size(), 0));
+  ASSERT_EQ(content.size(), static_cast<uint64_t>(f.Write(content.data(), content.size(), 0)));
   ASSERT_EQ(0, f.GetLength());
 }
 
@@ -58,8 +58,8 @@
   // You can't write at a negative offset...
   ASSERT_EQ(-EINVAL, f.Write(content.data(), content.size(), -128));
   // But you can write anywhere else...
-  ASSERT_EQ(content.size(), f.Write(content.data(), content.size(), 0));
-  ASSERT_EQ(content.size(), f.Write(content.data(), content.size(), 128));
+  ASSERT_EQ(content.size(), static_cast<uint64_t>(f.Write(content.data(), content.size(), 0)));
+  ASSERT_EQ(content.size(), static_cast<uint64_t>(f.Write(content.data(), content.size(), 128)));
   // ...though the file will remain empty.
   ASSERT_EQ(0, f.GetLength());
 }
diff --git a/runtime/base/unix_file/random_access_file_test.h b/runtime/base/unix_file/random_access_file_test.h
index 9d8550d..3152788 100644
--- a/runtime/base/unix_file/random_access_file_test.h
+++ b/runtime/base/unix_file/random_access_file_test.h
@@ -71,7 +71,7 @@
     ASSERT_EQ(0, file->Read(buf, 123, 0));
 
     const std::string content("hello");
-    ASSERT_EQ(content.size(), file->Write(content.data(), content.size(), 0));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Write(content.data(), content.size(), 0)));
 
     TestReadContent(content, file.get());
   }
@@ -83,21 +83,21 @@
     ASSERT_EQ(-EINVAL, file->Read(buf.get(), 0, -123));
 
     // Reading too much gets us just what's in the file.
-    ASSERT_EQ(content.size(), file->Read(buf.get(), buf_size, 0));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Read(buf.get(), buf_size, 0)));
     ASSERT_EQ(std::string(buf.get(), content.size()), content);
 
     // We only get as much as we ask for.
     const size_t short_request = 2;
     ASSERT_LT(short_request, content.size());
-    ASSERT_EQ(short_request, file->Read(buf.get(), short_request, 0));
+    ASSERT_EQ(short_request, static_cast<uint64_t>(file->Read(buf.get(), short_request, 0)));
     ASSERT_EQ(std::string(buf.get(), short_request),
               content.substr(0, short_request));
 
     // We don't have to start at the beginning.
     const int non_zero_offset = 2;
     ASSERT_GT(non_zero_offset, 0);
-    ASSERT_EQ(short_request,
-              file->Read(buf.get(), short_request, non_zero_offset));
+    ASSERT_EQ(short_request, static_cast<uint64_t>(file->Read(buf.get(), short_request,
+                                                              non_zero_offset)));
     ASSERT_EQ(std::string(buf.get(), short_request),
               content.substr(non_zero_offset, short_request));
 
@@ -109,8 +109,8 @@
   void TestSetLength() {
     const std::string content("hello");
     UniquePtr<RandomAccessFile> file(MakeTestFile());
-    ASSERT_EQ(content.size(), file->Write(content.data(), content.size(), 0));
-    ASSERT_EQ(content.size(), file->GetLength());
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Write(content.data(), content.size(), 0)));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->GetLength()));
 
     // Can't give a file a negative length.
     ASSERT_EQ(-EINVAL, file->SetLength(-123));
@@ -143,20 +143,20 @@
     ASSERT_EQ(0, file->GetLength());
 
     // We can write data.
-    ASSERT_EQ(content.size(), file->Write(content.data(), content.size(), 0));
-    ASSERT_EQ(content.size(), file->GetLength());
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Write(content.data(), content.size(), 0)));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->GetLength()));
     std::string new_content;
     ASSERT_TRUE(ReadString(file.get(), &new_content));
     ASSERT_EQ(new_content, content);
 
     // We can read it back.
     char buf[256];
-    ASSERT_EQ(content.size(), file->Read(buf, sizeof(buf), 0));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Read(buf, sizeof(buf), 0)));
     ASSERT_EQ(std::string(buf, content.size()), content);
 
     // We can append data past the end.
-    ASSERT_EQ(content.size(),
-    file->Write(content.data(), content.size(), file->GetLength() + 1));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Write(content.data(), content.size(),
+                                                                file->GetLength() + 1)));
     int64_t new_length = 2*content.size() + 1;
     ASSERT_EQ(file->GetLength(), new_length);
     ASSERT_TRUE(ReadString(file.get(), &new_content));
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 1b79ee0..960c26d 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -40,23 +40,23 @@
 static void JniAbort(const char* jni_function_name, const char* msg) {
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
-  mirror::ArtMethod* current_method = self->GetCurrentMethod(NULL);
+  mirror::ArtMethod* current_method = self->GetCurrentMethod(nullptr);
 
   std::ostringstream os;
   os << "JNI DETECTED ERROR IN APPLICATION: " << msg;
 
-  if (jni_function_name != NULL) {
+  if (jni_function_name != nullptr) {
     os << "\n    in call to " << jni_function_name;
   }
   // TODO: is this useful given that we're about to dump the calling thread's stack?
-  if (current_method != NULL) {
+  if (current_method != nullptr) {
     os << "\n    from " << PrettyMethod(current_method);
   }
   os << "\n";
   self->Dump(os);
 
   JavaVMExt* vm = Runtime::Current()->GetJavaVM();
-  if (vm->check_jni_abort_hook != NULL) {
+  if (vm->check_jni_abort_hook != nullptr) {
     vm->check_jni_abort_hook(vm->check_jni_abort_hook_data, os.str());
   } else {
     // Ensure that we get a native stack trace for this thread.
@@ -118,10 +118,10 @@
   "Ljavax/",
   "Llibcore/",
   "Lorg/apache/harmony/",
-  NULL
+  nullptr
 };
 
-static bool ShouldTrace(JavaVMExt* vm, const mirror::ArtMethod* method)
+static bool ShouldTrace(JavaVMExt* vm, mirror::ArtMethod* method)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // If both "-Xcheck:jni" and "-Xjnitrace:" are enabled, we print trace messages
   // when a native method that matches the -Xjnitrace argument calls a JNI function
@@ -135,7 +135,7 @@
   if (VLOG_IS_ON(third_party_jni)) {
     // Return true if we're trying to log all third-party JNI activity and 'method' doesn't look
     // like part of Android.
-    for (size_t i = 0; gBuiltInPrefixes[i] != NULL; ++i) {
+    for (size_t i = 0; gBuiltInPrefixes[i] != nullptr; ++i) {
       if (StartsWith(class_name, gBuiltInPrefixes[i])) {
         return false;
       }
@@ -192,15 +192,16 @@
    *
    * Works for both static and instance fields.
    */
-  void CheckFieldType(jobject java_object, jfieldID fid, char prim, bool isStatic)
+  void CheckFieldType(jvalue value, jfieldID fid, char prim, bool isStatic)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtField* f = CheckFieldID(fid);
-    if (f == NULL) {
+    if (f == nullptr) {
       return;
     }
     mirror::Class* field_type = FieldHelper(f).GetType();
     if (!field_type->IsPrimitive()) {
-      if (java_object != NULL) {
+      jobject java_object = value.l;
+      if (java_object != nullptr) {
         mirror::Object* obj = soa_.Decode<mirror::Object*>(java_object);
         // If java_object is a weak global ref whose referent has been cleared,
         // obj will be NULL.  Otherwise, obj should always be non-NULL
@@ -242,7 +243,7 @@
   void CheckInstanceFieldID(jobject java_object, jfieldID fid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::Object* o = soa_.Decode<mirror::Object*>(java_object);
-    if (o == NULL || !Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
+    if (o == nullptr || !Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "field operation on invalid %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object);
@@ -250,12 +251,12 @@
     }
 
     mirror::ArtField* f = CheckFieldID(fid);
-    if (f == NULL) {
+    if (f == nullptr) {
       return;
     }
     mirror::Class* c = o->GetClass();
     FieldHelper fh(f);
-    if (c->FindInstanceField(fh.GetName(), fh.GetTypeDescriptor()) == NULL) {
+    if (c->FindInstanceField(fh.GetName(), fh.GetTypeDescriptor()) == nullptr) {
       JniAbortF(function_name_, "jfieldID %s not valid for an object of class %s",
                 PrettyField(f).c_str(), PrettyTypeOf(o).c_str());
     }
@@ -265,7 +266,7 @@
    * Verify that the pointer value is non-NULL.
    */
   void CheckNonNull(const void* ptr) {
-    if (ptr == NULL) {
+    if (ptr == nullptr) {
       JniAbortF(function_name_, "non-nullable argument was NULL");
     }
   }
@@ -277,7 +278,7 @@
   void CheckSig(jmethodID mid, const char* expectedType, bool isStatic)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = CheckMethodID(mid);
-    if (m == NULL) {
+    if (m == nullptr) {
       return;
     }
     if (*expectedType != MethodHelper(m).GetShorty()[0]) {
@@ -303,8 +304,8 @@
   void CheckStaticFieldID(jclass java_class, jfieldID fid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::Class* c = soa_.Decode<mirror::Class*>(java_class);
-    const mirror::ArtField* f = CheckFieldID(fid);
-    if (f == NULL) {
+    mirror::ArtField* f = CheckFieldID(fid);
+    if (f == nullptr) {
       return;
     }
     if (f->GetDeclaringClass() != c) {
@@ -324,8 +325,8 @@
    */
   void CheckStaticMethod(jclass java_class, jmethodID mid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const mirror::ArtMethod* m = CheckMethodID(mid);
-    if (m == NULL) {
+    mirror::ArtMethod* m = CheckMethodID(mid);
+    if (m == nullptr) {
       return;
     }
     mirror::Class* c = soa_.Decode<mirror::Class*>(java_class);
@@ -344,8 +345,8 @@
    */
   void CheckVirtualMethod(jobject java_object, jmethodID mid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const mirror::ArtMethod* m = CheckMethodID(mid);
-    if (m == NULL) {
+    mirror::ArtMethod* m = CheckMethodID(mid);
+    if (m == nullptr) {
       return;
     }
     mirror::Object* o = soa_.Decode<mirror::Object*>(java_object);
@@ -394,17 +395,18 @@
   void Check(bool entry, const char* fmt0, ...) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     va_list ap;
 
-    const mirror::ArtMethod* traceMethod = NULL;
+    mirror::ArtMethod* traceMethod = nullptr;
     if (has_method_ && (!soa_.Vm()->trace.empty() || VLOG_IS_ON(third_party_jni))) {
       // We need to guard some of the invocation interface's calls: a bad caller might
       // use DetachCurrentThread or GetEnv on a thread that's not yet attached.
       Thread* self = Thread::Current();
-      if ((flags_ & kFlag_Invocation) == 0 || self != NULL) {
-        traceMethod = self->GetCurrentMethod(NULL);
+      if ((flags_ & kFlag_Invocation) == 0 || self != nullptr) {
+        traceMethod = self->GetCurrentMethod(nullptr);
       }
     }
 
-    if (((flags_ & kFlag_ForceTrace) != 0) || (traceMethod != NULL && ShouldTrace(soa_.Vm(), traceMethod))) {
+    if (((flags_ & kFlag_ForceTrace) != 0) ||
+        (traceMethod != nullptr && ShouldTrace(soa_.Vm(), traceMethod))) {
       va_start(ap, fmt0);
       std::string msg;
       for (const char* fmt = fmt0; *fmt;) {
@@ -428,7 +430,7 @@
         } else if (ch == 'I' || ch == 'S') {  // jint, jshort
           StringAppendF(&msg, "%d", va_arg(ap, int));
         } else if (ch == 'J') {  // jlong
-          StringAppendF(&msg, "%lld", va_arg(ap, jlong));
+          StringAppendF(&msg, "%" PRId64, va_arg(ap, jlong));
         } else if (ch == 'Z') {  // jboolean
           StringAppendF(&msg, "%s", va_arg(ap, int) ? "true" : "false");
         } else if (ch == 'V') {  // void
@@ -442,7 +444,7 @@
         } else if (ch == 'L' || ch == 'a' || ch == 's') {  // jobject, jarray, jstring
           // For logging purposes, these are identical.
           jobject o = va_arg(ap, jobject);
-          if (o == NULL) {
+          if (o == nullptr) {
             msg += "NULL";
           } else {
             StringAppendF(&msg, "%p", o);
@@ -453,7 +455,7 @@
         } else if (ch == 'c') {  // jclass
           jclass jc = va_arg(ap, jclass);
           mirror::Class* c = reinterpret_cast<mirror::Class*>(Thread::Current()->DecodeJObject(jc));
-          if (c == NULL) {
+          if (c == nullptr) {
             msg += "NULL";
           } else if (c == kInvalidIndirectRefObject ||
               !Runtime::Current()->GetHeap()->IsValidObjectAddress(c)) {
@@ -488,7 +490,7 @@
           }
         } else if (ch == 'p') {  // void* ("pointer")
           void* p = va_arg(ap, void*);
-          if (p == NULL) {
+          if (p == nullptr) {
             msg += "NULL";
           } else {
             StringAppendF(&msg, "(void*) %p", p);
@@ -506,7 +508,7 @@
           }
         } else if (ch == 'u') {  // const char* (Modified UTF-8)
           const char* utf = va_arg(ap, const char*);
-          if (utf == NULL) {
+          if (utf == nullptr) {
             msg += "NULL";
           } else {
             StringAppendF(&msg, "\"%s\"", utf);
@@ -563,7 +565,7 @@
           }
         } else if (ch == 'z') {
           CheckLengthPositive(va_arg(ap, jsize));
-        } else if (strchr("BCISZbfmpEv", ch) != NULL) {
+        } else if (strchr("BCISZbfmpEv", ch) != nullptr) {
           va_arg(ap, uint32_t);  // Skip this argument.
         } else if (ch == 'D' || ch == 'F') {
           va_arg(ap, double);  // Skip this argument.
@@ -595,7 +597,7 @@
    */
   bool CheckInstance(InstanceKind kind, jobject java_object)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const char* what = NULL;
+    const char* what = nullptr;
     switch (kind) {
     case kClass:
       what = "jclass";
@@ -616,7 +618,7 @@
       LOG(FATAL) << "Unknown kind " << static_cast<int>(kind);
     }
 
-    if (java_object == NULL) {
+    if (java_object == nullptr) {
       JniAbortF(function_name_, "%s received null %s", function_name_, what);
       return false;
     }
@@ -670,7 +672,7 @@
    * Since we're dealing with objects, switch to "running" mode.
    */
   void CheckArray(jarray java_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (java_array == NULL) {
+    if (java_array == nullptr) {
       JniAbortF(function_name_, "jarray was NULL");
       return;
     }
@@ -692,29 +694,29 @@
   }
 
   mirror::ArtField* CheckFieldID(jfieldID fid) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (fid == NULL) {
+    if (fid == nullptr) {
       JniAbortF(function_name_, "jfieldID was NULL");
-      return NULL;
+      return nullptr;
     }
     mirror::ArtField* f = soa_.DecodeField(fid);
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(f) || !f->IsArtField()) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "invalid jfieldID: %p", fid);
-      return NULL;
+      return nullptr;
     }
     return f;
   }
 
   mirror::ArtMethod* CheckMethodID(jmethodID mid) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (mid == NULL) {
+    if (mid == nullptr) {
       JniAbortF(function_name_, "jmethodID was NULL");
-      return NULL;
+      return nullptr;
     }
     mirror::ArtMethod* m = soa_.DecodeMethod(mid);
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(m) || !m->IsArtMethod()) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "invalid jmethodID: %p", mid);
-      return NULL;
+      return nullptr;
     }
     return m;
   }
@@ -727,7 +729,7 @@
    */
   void CheckObject(jobject java_object)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (java_object == NULL) {
+    if (java_object == nullptr) {
       return;
     }
 
@@ -752,7 +754,7 @@
 
   void CheckThread(int flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Thread* self = Thread::Current();
-    if (self == NULL) {
+    if (self == nullptr) {
       JniAbortF(function_name_, "a thread (tid %d) is making JNI calls without being attached", GetTid());
       return;
     }
@@ -813,7 +815,7 @@
 
   // Verifies that "bytes" points to valid Modified UTF-8 data.
   void CheckUtfString(const char* bytes, bool nullable) {
-    if (bytes == NULL) {
+    if (bytes == nullptr) {
       if (!nullable) {
         JniAbortF(function_name_, "non-nullable const char* was NULL");
         return;
@@ -821,9 +823,9 @@
       return;
     }
 
-    const char* errorKind = NULL;
+    const char* errorKind = nullptr;
     uint8_t utf8 = CheckUtfBytes(bytes, &errorKind);
-    if (errorKind != NULL) {
+    if (errorKind != nullptr) {
       JniAbortF(function_name_,
                 "input is not valid Modified UTF-8: illegal %s byte %#x\n"
                 "    string: '%s'", errorKind, utf8, bytes);
@@ -998,7 +1000,7 @@
     const uint16_t* pat = reinterpret_cast<const uint16_t*>(fullBuf);
     for (size_t i = sizeof(GuardedCopy) / 2; i < (kGuardLen / 2 - sizeof(GuardedCopy)) / 2; i++) {
       if (pat[i] != kGuardPattern) {
-        JniAbortF(functionName, "guard pattern(1) disturbed at %p +%d", fullBuf, i*2);
+        JniAbortF(functionName, "guard pattern(1) disturbed at %p +%zd", fullBuf, i*2);
       }
     }
 
@@ -1018,7 +1020,7 @@
     pat = reinterpret_cast<const uint16_t*>(fullBuf + offset);
     for (size_t i = 0; i < kGuardLen / 4; i++) {
       if (pat[i] != kGuardPattern) {
-        JniAbortF(functionName, "guard pattern(2) disturbed at %p +%d", fullBuf, offset + i*2);
+        JniAbortF(functionName, "guard pattern(2) disturbed at %p +%zd", fullBuf, offset + i*2);
       }
     }
 
@@ -1037,7 +1039,7 @@
 
  private:
   static uint8_t* DebugAlloc(size_t len) {
-    void* result = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+    void* result = mmap(nullptr, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
     if (result == MAP_FAILED) {
       PLOG(FATAL) << "GuardedCopy::create mmap(" << len << ") failed";
     }
@@ -1081,8 +1083,8 @@
   mirror::Array* a = soa.Decode<mirror::Array*>(java_array);
   size_t component_size = a->GetClass()->GetComponentSize();
   size_t byte_count = a->GetLength() * component_size;
-  void* result = GuardedCopy::Create(a->GetRawData(component_size), byte_count, true);
-  if (isCopy != NULL) {
+  void* result = GuardedCopy::Create(a->GetRawData(component_size, 0), byte_count, true);
+  if (isCopy != nullptr) {
     *isCopy = JNI_TRUE;
   }
   return result;
@@ -1100,7 +1102,7 @@
 
   if (mode != JNI_ABORT) {
     size_t len = GuardedCopy::FromData(dataBuf)->original_length;
-    memcpy(a->GetRawData(a->GetClass()->GetComponentSize()), dataBuf, len);
+    memcpy(a->GetRawData(a->GetClass()->GetComponentSize(), 0), dataBuf, len);
   }
   if (mode != JNI_COMMIT) {
     GuardedCopy::Destroy(dataBuf);
@@ -1223,7 +1225,7 @@
 
   static void DeleteGlobalRef(JNIEnv* env, jobject globalRef) {
     CHECK_JNI_ENTRY(kFlag_Default | kFlag_ExcepOkay, "EL", env, globalRef);
-    if (globalRef != NULL && GetIndirectRefKind(globalRef) != kGlobal) {
+    if (globalRef != nullptr && GetIndirectRefKind(globalRef) != kGlobal) {
       JniAbortF(__FUNCTION__, "DeleteGlobalRef on %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(globalRef)).c_str(), globalRef);
     } else {
@@ -1234,7 +1236,7 @@
 
   static void DeleteWeakGlobalRef(JNIEnv* env, jweak weakGlobalRef) {
     CHECK_JNI_ENTRY(kFlag_Default | kFlag_ExcepOkay, "EL", env, weakGlobalRef);
-    if (weakGlobalRef != NULL && GetIndirectRefKind(weakGlobalRef) != kWeakGlobal) {
+    if (weakGlobalRef != nullptr && GetIndirectRefKind(weakGlobalRef) != kWeakGlobal) {
       JniAbortF(__FUNCTION__, "DeleteWeakGlobalRef on %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(weakGlobalRef)).c_str(), weakGlobalRef);
     } else {
@@ -1245,7 +1247,7 @@
 
   static void DeleteLocalRef(JNIEnv* env, jobject localRef) {
     CHECK_JNI_ENTRY(kFlag_Default | kFlag_ExcepOkay, "EL", env, localRef);
-    if (localRef != NULL && GetIndirectRefKind(localRef) != kLocal && !IsSirtLocalRef(env, localRef)) {
+    if (localRef != nullptr && GetIndirectRefKind(localRef) != kLocal && !IsSirtLocalRef(env, localRef)) {
       JniAbortF(__FUNCTION__, "DeleteLocalRef on %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(localRef)).c_str(), localRef);
     } else {
@@ -1318,7 +1320,7 @@
     return CHECK_JNI_EXIT("f", baseEnv(env)->GetStaticFieldID(env, c, name, sig));
   }
 
-#define FIELD_ACCESSORS(_ctype, _jname, _type) \
+#define FIELD_ACCESSORS(_ctype, _jname, _jvalue_type, _type) \
     static _ctype GetStatic##_jname##Field(JNIEnv* env, jclass c, jfieldID fid) { \
         CHECK_JNI_ENTRY(kFlag_Default, "Ecf", env, c, fid); \
         sc.CheckStaticFieldID(c, fid); \
@@ -1333,7 +1335,9 @@
         CHECK_JNI_ENTRY(kFlag_Default, "Ecf" _type, env, c, fid, value); \
         sc.CheckStaticFieldID(c, fid); \
         /* "value" arg only used when type == ref */ \
-        sc.CheckFieldType((jobject)(uint32_t)value, fid, _type[0], true); \
+        jvalue java_type_value; \
+        java_type_value._jvalue_type = value; \
+        sc.CheckFieldType(java_type_value, fid, _type[0], true); \
         baseEnv(env)->SetStatic##_jname##Field(env, c, fid, value); \
         CHECK_JNI_EXIT_VOID(); \
     } \
@@ -1341,20 +1345,22 @@
         CHECK_JNI_ENTRY(kFlag_Default, "ELf" _type, env, obj, fid, value); \
         sc.CheckInstanceFieldID(obj, fid); \
         /* "value" arg only used when type == ref */ \
-        sc.CheckFieldType((jobject)(uint32_t) value, fid, _type[0], false); \
+        jvalue java_type_value; \
+        java_type_value._jvalue_type = value; \
+        sc.CheckFieldType(java_type_value, fid, _type[0], false); \
         baseEnv(env)->Set##_jname##Field(env, obj, fid, value); \
         CHECK_JNI_EXIT_VOID(); \
     }
 
-FIELD_ACCESSORS(jobject, Object, "L");
-FIELD_ACCESSORS(jboolean, Boolean, "Z");
-FIELD_ACCESSORS(jbyte, Byte, "B");
-FIELD_ACCESSORS(jchar, Char, "C");
-FIELD_ACCESSORS(jshort, Short, "S");
-FIELD_ACCESSORS(jint, Int, "I");
-FIELD_ACCESSORS(jlong, Long, "J");
-FIELD_ACCESSORS(jfloat, Float, "F");
-FIELD_ACCESSORS(jdouble, Double, "D");
+FIELD_ACCESSORS(jobject, Object, l, "L");
+FIELD_ACCESSORS(jboolean, Boolean, z, "Z");
+FIELD_ACCESSORS(jbyte, Byte, b, "B");
+FIELD_ACCESSORS(jchar, Char, c, "C");
+FIELD_ACCESSORS(jshort, Short, s, "S");
+FIELD_ACCESSORS(jint, Int, i, "I");
+FIELD_ACCESSORS(jlong, Long, j, "J");
+FIELD_ACCESSORS(jfloat, Float, f, "F");
+FIELD_ACCESSORS(jdouble, Double, d, "D");
 
 #define CALL(_ctype, _jname, _retdecl, _retasgn, _retok, _retsig) \
     /* Virtual... */ \
@@ -1484,11 +1490,11 @@
   static const jchar* GetStringChars(JNIEnv* env, jstring java_string, jboolean* isCopy) {
     CHECK_JNI_ENTRY(kFlag_CritOkay, "Esp", env, java_string, isCopy);
     const jchar* result = baseEnv(env)->GetStringChars(env, java_string, isCopy);
-    if (sc.ForceCopy() && result != NULL) {
+    if (sc.ForceCopy() && result != nullptr) {
       mirror::String* s = sc.soa().Decode<mirror::String*>(java_string);
       int byteCount = s->GetLength() * 2;
       result = (const jchar*) GuardedCopy::Create(result, byteCount, false);
-      if (isCopy != NULL) {
+      if (isCopy != nullptr) {
         *isCopy = JNI_TRUE;
       }
     }
@@ -1519,9 +1525,9 @@
   static const char* GetStringUTFChars(JNIEnv* env, jstring string, jboolean* isCopy) {
     CHECK_JNI_ENTRY(kFlag_CritOkay, "Esp", env, string, isCopy);
     const char* result = baseEnv(env)->GetStringUTFChars(env, string, isCopy);
-    if (sc.ForceCopy() && result != NULL) {
+    if (sc.ForceCopy() && result != nullptr) {
       result = (const char*) GuardedCopy::Create(result, strlen(result) + 1, false);
-      if (isCopy != NULL) {
+      if (isCopy != nullptr) {
         *isCopy = JNI_TRUE;
       }
     }
@@ -1578,7 +1584,7 @@
   ForceCopyGetChecker(ScopedCheck& sc, jboolean* isCopy) {
     force_copy = sc.ForceCopy();
     no_copy = 0;
-    if (force_copy && isCopy != NULL) {
+    if (force_copy && isCopy != nullptr) {
       // Capture this before the base call tramples on it.
       no_copy = *reinterpret_cast<uint32_t*>(isCopy);
     }
@@ -1586,7 +1592,7 @@
 
   template<typename ResultT>
   ResultT Check(JNIEnv* env, jarray array, jboolean* isCopy, ResultT result) {
-    if (force_copy && result != NULL) {
+    if (force_copy && result != nullptr) {
       result = reinterpret_cast<ResultT>(CreateGuardedPACopy(env, array, isCopy));
     }
     return result;
@@ -1690,7 +1696,7 @@
   static void* GetPrimitiveArrayCritical(JNIEnv* env, jarray array, jboolean* isCopy) {
     CHECK_JNI_ENTRY(kFlag_CritGet, "Eap", env, array, isCopy);
     void* result = baseEnv(env)->GetPrimitiveArrayCritical(env, array, isCopy);
-    if (sc.ForceCopy() && result != NULL) {
+    if (sc.ForceCopy() && result != nullptr) {
       result = CreateGuardedPACopy(env, array, isCopy);
     }
     return CHECK_JNI_EXIT("p", result);
@@ -1709,11 +1715,11 @@
   static const jchar* GetStringCritical(JNIEnv* env, jstring java_string, jboolean* isCopy) {
     CHECK_JNI_ENTRY(kFlag_CritGet, "Esp", env, java_string, isCopy);
     const jchar* result = baseEnv(env)->GetStringCritical(env, java_string, isCopy);
-    if (sc.ForceCopy() && result != NULL) {
+    if (sc.ForceCopy() && result != nullptr) {
       mirror::String* s = sc.soa().Decode<mirror::String*>(java_string);
       int byteCount = s->GetLength() * 2;
       result = (const jchar*) GuardedCopy::Create(result, byteCount, false);
-      if (isCopy != NULL) {
+      if (isCopy != nullptr) {
         *isCopy = JNI_TRUE;
       }
     }
@@ -1751,11 +1757,11 @@
 
   static jobject NewDirectByteBuffer(JNIEnv* env, void* address, jlong capacity) {
     CHECK_JNI_ENTRY(kFlag_Default, "EpJ", env, address, capacity);
-    if (address == NULL) {
+    if (address == nullptr) {
       JniAbortF(__FUNCTION__, "non-nullable address is NULL");
     }
     if (capacity < 0) {
-      JniAbortF(__FUNCTION__, "capacity must be non-negative: %lld", capacity);
+      JniAbortF(__FUNCTION__, "capacity must be non-negative: %" PRId64, capacity);
     }
     return CHECK_JNI_EXIT("L", baseEnv(env)->NewDirectByteBuffer(env, address, capacity));
   }
@@ -1779,10 +1785,10 @@
 };
 
 const JNINativeInterface gCheckNativeInterface = {
-  NULL,  // reserved0.
-  NULL,  // reserved1.
-  NULL,  // reserved2.
-  NULL,  // reserved3.
+  nullptr,  // reserved0.
+  nullptr,  // reserved1.
+  nullptr,  // reserved2.
+  nullptr,  // reserved3.
   CheckJNI::GetVersion,
   CheckJNI::DefineClass,
   CheckJNI::FindClass,
@@ -2057,9 +2063,9 @@
 };
 
 const JNIInvokeInterface gCheckInvokeInterface = {
-  NULL,  // reserved0
-  NULL,  // reserved1
-  NULL,  // reserved2
+  nullptr,  // reserved0
+  nullptr,  // reserved1
+  nullptr,  // reserved2
   CheckJII::DestroyJavaVM,
   CheckJII::AttachCurrentThread,
   CheckJII::DetachCurrentThread,
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 0436435..66c24b5 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -28,7 +28,7 @@
 namespace art {
 
 inline mirror::String* ClassLinker::ResolveString(uint32_t string_idx,
-                                                  const mirror::ArtMethod* referrer) {
+                                                  mirror::ArtMethod* referrer) {
   mirror::String* resolved_string = referrer->GetDexCacheStrings()->Get(string_idx);
   if (UNLIKELY(resolved_string == NULL)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
@@ -40,7 +40,7 @@
 }
 
 inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx,
-                                               const mirror::ArtMethod* referrer) {
+                                               mirror::ArtMethod* referrer) {
   mirror::Class* resolved_type = referrer->GetDexCacheResolvedTypes()->Get(type_idx);
   if (UNLIKELY(resolved_type == NULL)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
@@ -53,7 +53,7 @@
   return resolved_type;
 }
 
-inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, const mirror::ArtField* referrer) {
+inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, mirror::ArtField* referrer) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
   mirror::DexCache* dex_cache_ptr = declaring_class->GetDexCache();
   mirror::Class* resolved_type = dex_cache_ptr->GetResolvedType(type_idx);
@@ -68,7 +68,7 @@
 }
 
 inline mirror::ArtMethod* ClassLinker::ResolveMethod(uint32_t method_idx,
-                                                     const mirror::ArtMethod* referrer,
+                                                     mirror::ArtMethod* referrer,
                                                      InvokeType type) {
   mirror::ArtMethod* resolved_method =
       referrer->GetDexCacheResolvedMethods()->Get(method_idx);
@@ -84,7 +84,7 @@
 }
 
 inline mirror::ArtField* ClassLinker::ResolveField(uint32_t field_idx,
-                                                   const mirror::ArtMethod* referrer,
+                                                   mirror::ArtMethod* referrer,
                                                    bool is_static) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
   mirror::ArtField* resolved_field =
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 344da3f..978c99b 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -649,15 +649,9 @@
 
 const OatFile* ClassLinker::RegisterOatFile(const OatFile* oat_file) {
   WriterMutexLock mu(Thread::Current(), dex_lock_);
-  for (size_t i = 0; i < oat_files_.size(); ++i) {
-    if (UNLIKELY(oat_file->GetLocation() == oat_files_[i]->GetLocation())) {
-      VLOG(class_linker) << "Attempt to register oat file that's already registered: "
-          << oat_file->GetLocation();
-      for (size_t j = i; j < oat_files_.size(); ++j) {
-        CHECK_NE(oat_file, oat_files_[j]) << "Attempt to re-register dex file.";
-      }
-      delete oat_file;
-      return oat_files_[i];
+  if (kIsDebugBuild) {
+    for (size_t i = 0; i < oat_files_.size(); ++i) {
+      CHECK_NE(oat_file, oat_files_[i]) << oat_file->GetLocation();
     }
   }
   VLOG(class_linker) << "Registering " << oat_file->GetLocation();
@@ -718,11 +712,11 @@
     return nullptr;
   }
 
-  uint32_t expected_image_oat_offset = reinterpret_cast<uint32_t>(image_header.GetOatDataBegin());
+  uintptr_t expected_image_oat_offset = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin());
   uint32_t actual_image_oat_offset = oat_file->GetOatHeader().GetImageFileLocationOatDataBegin();
   if (expected_image_oat_offset != actual_image_oat_offset) {
-    *error_msg = StringPrintf("Failed to find oat file at '%s' with expected image oat offset %ud, "
-                              "found %ud", oat_location, expected_image_oat_offset,
+    *error_msg = StringPrintf("Failed to find oat file at '%s' with expected image oat offset %"
+                              PRIuPTR ", found %ud", oat_location, expected_image_oat_offset,
                               actual_image_oat_offset);
     return nullptr;
   }
@@ -826,20 +820,6 @@
       << oat_location << "': " << *error_msg;
   error_msg->clear();
 
-  {
-    // We might have registered an outdated OatFile in FindDexFileInOatLocation().
-    // Get rid of it as its MAP_PRIVATE mapping may not reflect changes we're about to do.
-    WriterMutexLock mu(Thread::Current(), dex_lock_);
-    for (size_t i = 0; i < oat_files_.size(); ++i) {
-      if (oat_location == oat_files_[i]->GetLocation()) {
-        VLOG(class_linker) << "De-registering old OatFile: " << oat_location;
-        delete oat_files_[i];
-        oat_files_.erase(oat_files_.begin() + i);
-        break;
-      }
-    }
-  }
-
   // Generate the output oat file for the dex file
   VLOG(class_linker) << "Generating oat file " << oat_location << " for " << dex_location;
   if (!GenerateOatFile(dex_location, scoped_flock.GetFile().Fd(), oat_location, error_msg)) {
@@ -878,7 +858,7 @@
   Runtime* runtime = Runtime::Current();
   const ImageHeader& image_header = runtime->GetHeap()->GetImageSpace()->GetImageHeader();
   uint32_t image_oat_checksum = image_header.GetOatChecksum();
-  uint32_t image_oat_data_begin = reinterpret_cast<uint32_t>(image_header.GetOatDataBegin());
+  uintptr_t image_oat_data_begin = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin());
   bool image_check = ((oat_file->GetOatHeader().GetImageFileLocationOatChecksum() == image_oat_checksum)
                       && (oat_file->GetOatHeader().GetImageFileLocationOatDataBegin() == image_oat_data_begin));
 
@@ -905,7 +885,7 @@
     ScopedObjectAccess soa(Thread::Current());
     mirror::String* oat_location = image_header.GetImageRoot(ImageHeader::kOatLocation)->AsString();
     std::string image_file(oat_location->ToModifiedUtf8());
-    *error_msg = StringPrintf("oat file '%s' mismatch (0x%x, %d) with '%s' (0x%x, %d)",
+    *error_msg = StringPrintf("oat file '%s' mismatch (0x%x, %d) with '%s' (0x%x, %" PRIdPTR ")",
                               oat_file->GetLocation().c_str(),
                               oat_file->GetOatHeader().GetImageFileLocationOatChecksum(),
                               oat_file->GetOatHeader().GetImageFileLocationOatDataBegin(),
@@ -1043,7 +1023,8 @@
     if (!method->IsNative()) {
       method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge);
       if (method != Runtime::Current()->GetResolutionMethod()) {
-        method->SetEntryPointFromCompiledCode(GetCompiledCodeToInterpreterBridge());
+        method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+        method->SetEntryPointFromPortableCompiledCode(GetPortableToInterpreterBridge());
       }
     }
   }
@@ -1592,7 +1573,7 @@
   return 0;
 }
 
-const OatFile::OatMethod ClassLinker::GetOatMethodFor(const mirror::ArtMethod* method) {
+const OatFile::OatMethod ClassLinker::GetOatMethodFor(mirror::ArtMethod* method) {
   // Although we overwrite the trampoline of non-static methods, we may get here via the resolution
   // method for direct methods (or virtual methods made direct).
   mirror::Class* declaring_class = method->GetDeclaringClass();
@@ -1628,35 +1609,68 @@
 }
 
 // Special case to get oat code without overwriting a trampoline.
-const void* ClassLinker::GetOatCodeFor(const mirror::ArtMethod* method) {
+const void* ClassLinker::GetQuickOatCodeFor(mirror::ArtMethod* method) {
   CHECK(!method->IsAbstract()) << PrettyMethod(method);
   if (method->IsProxyMethod()) {
-#if !defined(ART_USE_PORTABLE_COMPILER)
-    return reinterpret_cast<void*>(art_quick_proxy_invoke_handler);
-#else
-    return reinterpret_cast<void*>(art_portable_proxy_invoke_handler);
-#endif
+    return GetQuickProxyInvokeHandler();
   }
-  const void* result = GetOatMethodFor(method).GetCode();
-  if (result == NULL) {
-    // No code? You must mean to go into the interpreter.
-    result = GetCompiledCodeToInterpreterBridge();
+  const void* result = GetOatMethodFor(method).GetQuickCode();
+  if (result == nullptr) {
+    if (method->IsPortableCompiled()) {
+      // No code? Do we expect portable code?
+      result = GetQuickToPortableBridge();
+    } else {
+      // No code? You must mean to go into the interpreter.
+      result = GetQuickToInterpreterBridge();
+    }
   }
   return result;
 }
 
-const void* ClassLinker::GetOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx,
-                                       uint32_t method_idx) {
+const void* ClassLinker::GetPortableOatCodeFor(mirror::ArtMethod* method,
+                                               bool* have_portable_code) {
+  CHECK(!method->IsAbstract()) << PrettyMethod(method);
+  *have_portable_code = false;
+  if (method->IsProxyMethod()) {
+    return GetPortableProxyInvokeHandler();
+  }
+  const void* result = GetOatMethodFor(method).GetPortableCode();
+  if (result == nullptr) {
+    if (GetOatMethodFor(method).GetQuickCode() == nullptr) {
+      // No code? You must mean to go into the interpreter.
+      result = GetPortableToInterpreterBridge();
+    } else {
+      // No code? But there's quick code, so use a bridge.
+      result = GetPortableToQuickBridge();
+    }
+  } else {
+    *have_portable_code = true;
+  }
+  return result;
+}
+
+const void* ClassLinker::GetQuickOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx,
+                                            uint32_t method_idx) {
   UniquePtr<const OatFile::OatClass> oat_class(GetOatClass(dex_file, class_def_idx));
   CHECK(oat_class.get() != nullptr);
   uint32_t oat_method_idx = GetOatMethodIndexFromMethodIndex(dex_file, class_def_idx, method_idx);
-  return oat_class->GetOatMethod(oat_method_idx).GetCode();
+  return oat_class->GetOatMethod(oat_method_idx).GetQuickCode();
+}
+
+const void* ClassLinker::GetPortableOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx,
+                                               uint32_t method_idx) {
+  UniquePtr<const OatFile::OatClass> oat_class(GetOatClass(dex_file, class_def_idx));
+  CHECK(oat_class.get() != nullptr);
+  uint32_t oat_method_idx = GetOatMethodIndexFromMethodIndex(dex_file, class_def_idx, method_idx);
+  return oat_class->GetOatMethod(oat_method_idx).GetPortableCode();
 }
 
 // Returns true if the method must run with interpreter, false otherwise.
-static bool NeedsInterpreter(const mirror::ArtMethod* method, const void* code) {
-  if (code == NULL) {
+static bool NeedsInterpreter(mirror::ArtMethod* method, const void* quick_code,
+                             const void* portable_code) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if ((quick_code == nullptr) && (portable_code == nullptr)) {
     // No code: need interpreter.
+    DCHECK(!method->IsNative());
     return true;
   }
 #ifdef ART_SEA_IR_MODE
@@ -1704,13 +1718,26 @@
       // Only update static methods.
       continue;
     }
-    const void* code = oat_class->GetOatMethod(method_index).GetCode();
-    const bool enter_interpreter = NeedsInterpreter(method, code);
+    const void* portable_code = oat_class->GetOatMethod(method_index).GetPortableCode();
+    const void* quick_code = oat_class->GetOatMethod(method_index).GetQuickCode();
+    const bool enter_interpreter = NeedsInterpreter(method, quick_code, portable_code);
+    bool have_portable_code = false;
     if (enter_interpreter) {
       // Use interpreter entry point.
-      code = GetCompiledCodeToInterpreterBridge();
+      portable_code = GetPortableToInterpreterBridge();
+      quick_code = GetQuickToInterpreterBridge();
+    } else {
+      if (portable_code == nullptr) {
+        portable_code = GetPortableToQuickBridge();
+      } else {
+        have_portable_code = true;
+      }
+      if (quick_code == nullptr) {
+        quick_code = GetQuickToPortableBridge();
+      }
     }
-    runtime->GetInstrumentation()->UpdateMethodsCode(method, code);
+    runtime->GetInstrumentation()->UpdateMethodsCode(method, quick_code, portable_code,
+                                                     have_portable_code);
   }
   // Ignore virtual methods on the iterator.
 }
@@ -1719,7 +1746,8 @@
                      uint32_t method_index)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Method shouldn't have already been linked.
-  DCHECK(method->GetEntryPointFromCompiledCode() == NULL);
+  DCHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr);
+  DCHECK(method->GetEntryPointFromPortableCompiledCode() == nullptr);
   // Every kind of method should at least get an invoke stub from the oat_method.
   // non-abstract methods also get their code pointers.
   const OatFile::OatMethod oat_method = oat_class->GetOatMethod(method_index);
@@ -1727,7 +1755,9 @@
 
   // Install entry point from interpreter.
   Runtime* runtime = Runtime::Current();
-  bool enter_interpreter = NeedsInterpreter(method.get(), method->GetEntryPointFromCompiledCode());
+  bool enter_interpreter = NeedsInterpreter(method.get(),
+                                            method->GetEntryPointFromQuickCompiledCode(),
+                                            method->GetEntryPointFromPortableCompiledCode());
   if (enter_interpreter) {
     method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge);
   } else {
@@ -1735,18 +1765,29 @@
   }
 
   if (method->IsAbstract()) {
-    method->SetEntryPointFromCompiledCode(GetCompiledCodeToInterpreterBridge());
+    method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+    method->SetEntryPointFromPortableCompiledCode(GetPortableToInterpreterBridge());
     return;
   }
 
+  bool have_portable_code = false;
   if (method->IsStatic() && !method->IsConstructor()) {
     // For static methods excluding the class initializer, install the trampoline.
     // It will be replaced by the proper entry point by ClassLinker::FixupStaticTrampolines
     // after initializing class (see ClassLinker::InitializeClass method).
-    method->SetEntryPointFromCompiledCode(GetResolutionTrampoline(runtime->GetClassLinker()));
+    method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionTrampoline(runtime->GetClassLinker()));
+    method->SetEntryPointFromPortableCompiledCode(GetPortableResolutionTrampoline(runtime->GetClassLinker()));
   } else if (enter_interpreter) {
     // Set entry point from compiled code if there's no code or in interpreter only mode.
-    method->SetEntryPointFromCompiledCode(GetCompiledCodeToInterpreterBridge());
+    method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+    method->SetEntryPointFromPortableCompiledCode(GetPortableToInterpreterBridge());
+  } else if (method->GetEntryPointFromPortableCompiledCode() != nullptr) {
+    DCHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr);
+    have_portable_code = true;
+    method->SetEntryPointFromQuickCompiledCode(GetQuickToPortableBridge());
+  } else {
+    DCHECK(method->GetEntryPointFromQuickCompiledCode() != nullptr);
+    method->SetEntryPointFromPortableCompiledCode(GetPortableToQuickBridge());
   }
 
   if (method->IsNative()) {
@@ -1756,7 +1797,9 @@
 
   // Allow instrumentation its chance to hijack code.
   runtime->GetInstrumentation()->UpdateMethodsCode(method.get(),
-                                                   method->GetEntryPointFromCompiledCode());
+                                                   method->GetEntryPointFromQuickCompiledCode(),
+                                                   method->GetEntryPointFromPortableCompiledCode(),
+                                                   have_portable_code);
 }
 
 void ClassLinker::LoadClass(const DexFile& dex_file,
@@ -2096,10 +2139,11 @@
                                              const SirtRef<mirror::ClassLoader>& class_loader) {
   // Identify the underlying component type
   CHECK_EQ('[', descriptor[0]);
-  mirror::Class* component_type = FindClass(descriptor + 1, class_loader);
-  if (component_type == NULL) {
-    DCHECK(Thread::Current()->IsExceptionPending());
-    return NULL;
+  Thread* self = Thread::Current();
+  SirtRef<mirror::Class> component_type(self, FindClass(descriptor + 1, class_loader));
+  if (component_type.get() == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return nullptr;
   }
 
   // See if the component type is already loaded.  Array classes are
@@ -2134,7 +2178,6 @@
   //
   // Array classes are simple enough that we don't need to do a full
   // link step.
-  Thread* self = Thread::Current();
   SirtRef<mirror::Class> new_class(self, NULL);
   if (UNLIKELY(!init_done_)) {
     // Classes that were hand created, ie not by FindSystemClass
@@ -2156,12 +2199,12 @@
       new_class.reset(GetClassRoot(kIntArrayClass));
     }
   }
-  if (new_class.get() == NULL) {
+  if (new_class.get() == nullptr) {
     new_class.reset(AllocClass(self, sizeof(mirror::Class)));
-    if (new_class.get() == NULL) {
-      return NULL;
+    if (new_class.get() == nullptr) {
+      return nullptr;
     }
-    new_class->SetComponentType(component_type);
+    new_class->SetComponentType(component_type.get());
   }
   ObjectLock<mirror::Class> lock(self, &new_class);  // Must hold lock on object when initializing.
   DCHECK(new_class->GetComponentType() != NULL);
@@ -2187,7 +2230,7 @@
 
   // Use the single, global copies of "interfaces" and "iftable"
   // (remember not to free them for arrays).
-  CHECK(array_iftable_ != NULL);
+  CHECK(array_iftable_ != nullptr);
   new_class->SetIfTable(array_iftable_);
 
   // Inherit access flags from the component type.
@@ -2202,7 +2245,7 @@
   new_class->SetAccessFlags(access_flags);
 
   mirror::Class* existing = InsertClass(descriptor, new_class.get(), Hash(descriptor));
-  if (existing == NULL) {
+  if (existing == nullptr) {
     return new_class.get();
   }
   // Another thread must have loaded the class after we
@@ -2823,15 +2866,15 @@
   return klass.get();
 }
 
-std::string ClassLinker::GetDescriptorForProxy(const mirror::Class* proxy_class) {
+std::string ClassLinker::GetDescriptorForProxy(mirror::Class* proxy_class) {
   DCHECK(proxy_class->IsProxyClass());
   mirror::String* name = proxy_class->GetName();
   DCHECK(name != NULL);
   return DotToDescriptor(name->ToModifiedUtf8().c_str());
 }
 
-mirror::ArtMethod* ClassLinker::FindMethodForProxy(const mirror::Class* proxy_class,
-                                                        const mirror::ArtMethod* proxy_method) {
+mirror::ArtMethod* ClassLinker::FindMethodForProxy(mirror::Class* proxy_class,
+                                                   mirror::ArtMethod* proxy_method) {
   DCHECK(proxy_class->IsProxyClass());
   DCHECK(proxy_method->IsProxyMethod());
   // Locate the dex cache of the original interface/Object
@@ -2912,7 +2955,8 @@
   method->SetCoreSpillMask(refs_and_args->GetCoreSpillMask());
   method->SetFpSpillMask(refs_and_args->GetFpSpillMask());
   method->SetFrameSizeInBytes(refs_and_args->GetFrameSizeInBytes());
-  method->SetEntryPointFromCompiledCode(GetProxyInvokeHandler());
+  method->SetEntryPointFromQuickCompiledCode(GetQuickProxyInvokeHandler());
+  method->SetEntryPointFromPortableCompiledCode(GetPortableProxyInvokeHandler());
   method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
 
   return method;
@@ -3195,7 +3239,7 @@
       klass->GetClassLoader() != klass->GetSuperClass()->GetClassLoader()) {
     SirtRef<mirror::Class> super(self, klass->GetSuperClass());
     for (int i = super->GetVTable()->GetLength() - 1; i >= 0; --i) {
-      const mirror::ArtMethod* method = klass->GetVTable()->Get(i);
+      mirror::ArtMethod* method = klass->GetVTable()->Get(i);
       if (method != super->GetVTable()->Get(i) &&
           !IsSameMethodSignatureInDifferentClassContexts(method, super.get(), klass.get())) {
         ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in superclass %s",
@@ -3209,7 +3253,7 @@
     SirtRef<mirror::Class> interface(self, klass->GetIfTable()->GetInterface(i));
     if (klass->GetClassLoader() != interface->GetClassLoader()) {
       for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-        const mirror::ArtMethod* method = klass->GetIfTable()->GetMethodArray(i)->Get(j);
+        mirror::ArtMethod* method = klass->GetIfTable()->GetMethodArray(i)->Get(j);
         if (!IsSameMethodSignatureInDifferentClassContexts(method, interface.get(),
                                                            method->GetDeclaringClass())) {
           ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in interface %s",
@@ -3226,9 +3270,9 @@
 
 // Returns true if classes referenced by the signature of the method are the
 // same classes in klass1 as they are in klass2.
-bool ClassLinker::IsSameMethodSignatureInDifferentClassContexts(const mirror::ArtMethod* method,
-                                                                const mirror::Class* klass1,
-                                                                const mirror::Class* klass2) {
+bool ClassLinker::IsSameMethodSignatureInDifferentClassContexts(mirror::ArtMethod* method,
+                                                                mirror::Class* klass1,
+                                                                mirror::Class* klass2) {
   if (klass1 == klass2) {
     return true;
   }
@@ -3810,23 +3854,24 @@
   explicit LinkFieldsComparator() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   }
   // No thread safety analysis as will be called from STL. Checked lock held in constructor.
-  bool operator()(const mirror::ArtField* field1, const mirror::ArtField* field2)
+  bool operator()(mirror::ArtField* field1, mirror::ArtField* field2)
       NO_THREAD_SAFETY_ANALYSIS {
     // First come reference fields, then 64-bit, and finally 32-bit
     FieldHelper fh1(field1);
     Primitive::Type type1 = fh1.GetTypeAsPrimitiveType();
     FieldHelper fh2(field2);
     Primitive::Type type2 = fh2.GetTypeAsPrimitiveType();
-    bool isPrimitive1 = type1 != Primitive::kPrimNot;
-    bool isPrimitive2 = type2 != Primitive::kPrimNot;
-    bool is64bit1 = isPrimitive1 && (type1 == Primitive::kPrimLong || type1 == Primitive::kPrimDouble);
-    bool is64bit2 = isPrimitive2 && (type2 == Primitive::kPrimLong || type2 == Primitive::kPrimDouble);
-    int order1 = (!isPrimitive1 ? 0 : (is64bit1 ? 1 : 2));
-    int order2 = (!isPrimitive2 ? 0 : (is64bit2 ? 1 : 2));
-    if (order1 != order2) {
-      return order1 < order2;
+    if (type1 != type2) {
+      bool is_primitive1 = type1 != Primitive::kPrimNot;
+      bool is_primitive2 = type2 != Primitive::kPrimNot;
+      bool is64bit1 = is_primitive1 && (type1 == Primitive::kPrimLong || type1 == Primitive::kPrimDouble);
+      bool is64bit2 = is_primitive2 && (type2 == Primitive::kPrimLong || type2 == Primitive::kPrimDouble);
+      int order1 = !is_primitive1 ? 0 : (is64bit1 ? 1 : 2);
+      int order2 = !is_primitive2 ? 0 : (is64bit2 ? 1 : 2);
+      if (order1 != order2) {
+        return order1 < order2;
+      }
     }
-
     // same basic group? then sort by string.
     const char* name1 = fh1.GetName();
     const char* name2 = fh2.GetName();
@@ -4016,14 +4061,14 @@
   size_t num_reference_fields =
       is_static ? klass->NumReferenceStaticFieldsDuringLinking()
                 : klass->NumReferenceInstanceFieldsDuringLinking();
-  const mirror::ObjectArray<mirror::ArtField>* fields =
+  mirror::ObjectArray<mirror::ArtField>* fields =
       is_static ? klass->GetSFields() : klass->GetIFields();
   // All of the fields that contain object references are guaranteed
   // to be at the beginning of the fields list.
   for (size_t i = 0; i < num_reference_fields; ++i) {
     // Note that byte_offset is the offset from the beginning of
     // object, not the offset into instance data
-    const mirror::ArtField* field = fields->Get(i);
+    mirror::ArtField* field = fields->Get(i);
     MemberOffset byte_offset = field->GetOffsetDuringLinking();
     CHECK_EQ(byte_offset.Uint32Value() & (CLASS_OFFSET_ALIGNMENT - 1), 0U);
     if (CLASS_CAN_ENCODE_OFFSET(byte_offset.Uint32Value())) {
@@ -4058,7 +4103,7 @@
 }
 
 mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file, uint16_t type_idx,
-                                        const mirror::Class* referrer) {
+                                        mirror::Class* referrer) {
   Thread* self = Thread::Current();
   SirtRef<mirror::DexCache> dex_cache(self, referrer->GetDexCache());
   SirtRef<mirror::ClassLoader> class_loader(self, referrer->GetClassLoader());
@@ -4101,7 +4146,7 @@
                                               uint32_t method_idx,
                                               const SirtRef<mirror::DexCache>& dex_cache,
                                               const SirtRef<mirror::ClassLoader>& class_loader,
-                                              const mirror::ArtMethod* referrer,
+                                              mirror::ArtMethod* referrer,
                                               InvokeType type) {
   DCHECK(dex_cache.get() != NULL);
   // Check for hit in the dex cache.
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 8722de3..7e31356 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -121,7 +121,7 @@
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::String* ResolveString(uint32_t string_idx, const mirror::ArtMethod* referrer)
+  mirror::String* ResolveString(uint32_t string_idx, mirror::ArtMethod* referrer)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a String with the given index from the DexFile, storing the
@@ -133,17 +133,16 @@
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identity the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx,
-                             const mirror::Class* referrer)
+  mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx, mirror::Class* referrer)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::Class* ResolveType(uint16_t type_idx, const mirror::ArtMethod* referrer)
+  mirror::Class* ResolveType(uint16_t type_idx, mirror::ArtMethod* referrer)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Class* ResolveType(uint16_t type_idx, const mirror::ArtField* referrer)
+  mirror::Class* ResolveType(uint16_t type_idx, mirror::ArtField* referrer)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a type with the given ID from the DexFile, storing the
@@ -164,15 +163,15 @@
                                    uint32_t method_idx,
                                    const SirtRef<mirror::DexCache>& dex_cache,
                                    const SirtRef<mirror::ClassLoader>& class_loader,
-                                   const mirror::ArtMethod* referrer,
+                                   mirror::ArtMethod* referrer,
                                    InvokeType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ArtMethod* ResolveMethod(uint32_t method_idx, const mirror::ArtMethod* referrer,
+  mirror::ArtMethod* ResolveMethod(uint32_t method_idx, mirror::ArtMethod* referrer,
                                    InvokeType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ArtField* ResolveField(uint32_t field_idx, const mirror::ArtMethod* referrer,
+  mirror::ArtField* ResolveField(uint32_t field_idx, mirror::ArtMethod* referrer,
                                  bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -319,19 +318,23 @@
   mirror::Class* CreateProxyClass(ScopedObjectAccess& soa, jstring name, jobjectArray interfaces,
                                   jobject loader, jobjectArray methods, jobjectArray throws)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  std::string GetDescriptorForProxy(const mirror::Class* proxy_class)
+  std::string GetDescriptorForProxy(mirror::Class* proxy_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::ArtMethod* FindMethodForProxy(const mirror::Class* proxy_class,
-                                        const mirror::ArtMethod* proxy_method)
+  mirror::ArtMethod* FindMethodForProxy(mirror::Class* proxy_class,
+                                        mirror::ArtMethod* proxy_method)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get the oat code for a method when its class isn't yet initialized
-  const void* GetOatCodeFor(const mirror::ArtMethod* method)
+  const void* GetQuickOatCodeFor(mirror::ArtMethod* method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const void* GetPortableOatCodeFor(mirror::ArtMethod* method, bool* have_portable_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get the oat code for a method from a method index.
-  const void* GetOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx, uint32_t method_idx)
+  const void* GetQuickOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx, uint32_t method_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const void* GetPortableOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx, uint32_t method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   pid_t GetClassesLockOwner();  // For SignalCatcher.
@@ -368,7 +371,7 @@
   mirror::ArtMethod* AllocArtMethod(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  const OatFile::OatMethod GetOatMethodFor(const mirror::ArtMethod* method)
+  const OatFile::OatMethod GetOatMethodFor(mirror::ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   OatFile& GetImageOatFile(gc::space::ImageSpace* space)
@@ -451,9 +454,9 @@
                                                 SirtRef<mirror::ClassLoader>& class_loader2)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsSameMethodSignatureInDifferentClassContexts(const mirror::ArtMethod* method,
-                                                     const mirror::Class* klass1,
-                                                     const mirror::Class* klass2)
+  bool IsSameMethodSignatureInDifferentClassContexts(mirror::ArtMethod* method,
+                                                     mirror::Class* klass1,
+                                                     mirror::Class* klass2)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkClass(Thread* self, const SirtRef<mirror::Class>& klass,
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 1744050..fb979c2 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -55,7 +55,7 @@
     AssertPrimitiveClass(descriptor, class_linker_->FindSystemClass(descriptor.c_str()));
   }
 
-  void AssertPrimitiveClass(const std::string& descriptor, const mirror::Class* primitive)
+  void AssertPrimitiveClass(const std::string& descriptor, mirror::Class* primitive)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ClassHelper primitive_ch(primitive);
     ASSERT_TRUE(primitive != NULL);
@@ -212,7 +212,7 @@
       }
     }
     EXPECT_EQ(klass->IsInterface(), klass->GetVTable() == NULL);
-    const mirror::IfTable* iftable = klass->GetIfTable();
+    mirror::IfTable* iftable = klass->GetIfTable();
     for (int i = 0; i < klass->GetIfTableCount(); i++) {
       mirror::Class* interface = iftable->GetInterface(i);
       ASSERT_TRUE(interface != NULL);
@@ -469,20 +469,23 @@
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_cache_resolved_types_),             "dexCacheResolvedTypes"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_cache_strings_),                    "dexCacheStrings"));
 
+    // alphabetical 64-bit
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_interpreter_),            "entryPointFromInterpreter"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_jni_),                    "entryPointFromJni"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_portable_compiled_code_), "entryPointFromPortableCompiledCode"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_quick_compiled_code_),    "entryPointFromQuickCompiledCode"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, gc_map_),                                  "gcMap"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_mapping_table_),                     "quickMappingTable"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_vmap_table_),                        "quickVmapTable"));
+
     // alphabetical 32-bit
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, access_flags_),                   "accessFlags"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, code_item_offset_),               "codeItemOffset"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, core_spill_mask_),                "coreSpillMask"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_compiled_code_), "entryPointFromCompiledCode"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_interpreter_),   "entryPointFromInterpreter"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, fp_spill_mask_),                  "fpSpillMask"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, frame_size_in_bytes_),            "frameSizeInBytes"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, gc_map_),                         "gcMap"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, mapping_table_),                  "mappingTable"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, method_dex_index_),               "methodDexIndex"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_code_item_offset_),           "dexCodeItemOffset"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_method_index_),               "dexMethodIndex"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, method_index_),                   "methodIndex"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, native_method_),                  "nativeMethod"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, vmap_table_),                     "vmapTable"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_core_spill_mask_),          "quickCoreSpillMask"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_fp_spill_mask_),            "quickFpSpillMask"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_frame_size_in_bytes_),      "quickFrameSizeInBytes"));
   };
 };
 
@@ -762,10 +765,10 @@
   mirror::Class* array_class = class_linker_->FindSystemClass("[Ljava/lang/String;");
   mirror::ObjectArray<mirror::String>* array =
       mirror::ObjectArray<mirror::String>::Alloc(soa.Self(), array_class, 0);
-  uint32_t array_offset = reinterpret_cast<uint32_t>(array);
-  uint32_t data_offset =
-      array_offset + mirror::ObjectArray<mirror::String>::DataOffset(sizeof(mirror::String*)).Uint32Value();
-  if (sizeof(mirror::String*) == sizeof(int32_t)) {
+  uintptr_t data_offset =
+      reinterpret_cast<uintptr_t>(array->GetRawData(sizeof(mirror::HeapReference<mirror::String>),
+                                                    0));
+  if (sizeof(mirror::HeapReference<mirror::String>) == sizeof(int32_t)) {
     EXPECT_TRUE(IsAligned<4>(data_offset));  // Check 4 byte alignment.
   } else {
     EXPECT_TRUE(IsAligned<8>(data_offset));  // Check 8 byte alignment.
diff --git a/runtime/common_test.h b/runtime/common_test.h
index 5e6354e..ddaf52a 100644
--- a/runtime/common_test.h
+++ b/runtime/common_test.h
@@ -48,6 +48,7 @@
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "thread.h"
+#include "utils.h"
 #include "UniquePtr.h"
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
@@ -160,11 +161,7 @@
 
 #if defined(__arm__)
 
-
-#include <signal.h>
-#include <asm/sigcontext.h>
-#include <asm-generic/ucontext.h>
-
+#include <sys/ucontext.h>
 
 // A signal handler called when have an illegal instruction.  We record the fact in
 // a global boolean and then increment the PC in the signal context to return to
@@ -266,11 +263,6 @@
 
 class CommonTest : public testing::Test {
  public:
-  static void MakeExecutable(const mirror::ByteArray* code_array) {
-    CHECK(code_array != NULL);
-    MakeExecutable(code_array->GetData(), code_array->GetLength());
-  }
-
   static void MakeExecutable(const std::vector<uint8_t>& code) {
     CHECK_NE(code.size(), 0U);
     MakeExecutable(&code[0], code.size());
@@ -284,31 +276,39 @@
                                      const uint8_t* mapping_table,
                                      const uint8_t* vmap_table,
                                      const uint8_t* gc_map) {
-      return OatFile::OatMethod(NULL,
-                                reinterpret_cast<uint32_t>(code),
-                                frame_size_in_bytes,
-                                core_spill_mask,
-                                fp_spill_mask,
-                                reinterpret_cast<uint32_t>(mapping_table),
-                                reinterpret_cast<uint32_t>(vmap_table),
-                                reinterpret_cast<uint32_t>(gc_map));
+    const byte* base = nullptr;  // Base of data in oat file, ie 0.
+    uint32_t code_offset = PointerToLowMemUInt32(code);
+    uint32_t mapping_table_offset = PointerToLowMemUInt32(mapping_table);
+    uint32_t vmap_table_offset = PointerToLowMemUInt32(vmap_table);
+    uint32_t gc_map_offset = PointerToLowMemUInt32(gc_map);
+    return OatFile::OatMethod(base,
+                              code_offset,
+                              frame_size_in_bytes,
+                              core_spill_mask,
+                              fp_spill_mask,
+                              mapping_table_offset,
+                              vmap_table_offset,
+                              gc_map_offset);
   }
 
   void MakeExecutable(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(method != NULL);
+    CHECK(method != nullptr);
 
-    const CompiledMethod* compiled_method = NULL;
+    const CompiledMethod* compiled_method = nullptr;
     if (!method->IsAbstract()) {
-      const mirror::DexCache* dex_cache = method->GetDeclaringClass()->GetDexCache();
+      mirror::DexCache* dex_cache = method->GetDeclaringClass()->GetDexCache();
       const DexFile& dex_file = *dex_cache->GetDexFile();
       compiled_method =
           compiler_driver_->GetCompiledMethod(MethodReference(&dex_file,
                                                               method->GetDexMethodIndex()));
     }
-    if (compiled_method != NULL) {
-      const std::vector<uint8_t>& code = compiled_method->GetCode();
-      MakeExecutable(code);
-      const void* method_code = CompiledMethod::CodePointer(&code[0],
+    if (compiled_method != nullptr) {
+      const std::vector<uint8_t>* code = compiled_method->GetQuickCode();
+      if (code == nullptr) {
+        code = compiled_method->GetPortableCode();
+      }
+      MakeExecutable(*code);
+      const void* method_code = CompiledMethod::CodePointer(&(*code)[0],
                                                             compiled_method->GetInstructionSet());
       LOG(INFO) << "MakeExecutable " << PrettyMethod(method) << " code=" << method_code;
       OatFile::OatMethod oat_method = CreateOatMethod(method_code,
@@ -321,9 +321,9 @@
       oat_method.LinkMethod(method);
       method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
     } else {
-      const void* method_code;
       // No code? You must mean to go into the interpreter.
-      method_code = GetCompiledCodeToInterpreterBridge();
+      const void* method_code = kUsePortableCompiler ? GetPortableToInterpreterBridge()
+                                                     : GetQuickToInterpreterBridge();
       OatFile::OatMethod oat_method = CreateOatMethod(method_code,
                                                       kStackAlignment,
                                                       0,
@@ -334,6 +334,14 @@
       oat_method.LinkMethod(method);
       method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge);
     }
+    // Create bridges to transition between different kinds of compiled bridge.
+    if (method->GetEntryPointFromPortableCompiledCode() == nullptr) {
+      method->SetEntryPointFromPortableCompiledCode(GetPortableToQuickBridge());
+    } else {
+      CHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr);
+      method->SetEntryPointFromQuickCompiledCode(GetQuickToPortableBridge());
+      method->SetIsPortableCompiled();
+    }
   }
 
   static void MakeExecutable(const void* code_start, size_t code_length) {
@@ -419,11 +427,7 @@
     std::string max_heap_string(StringPrintf("-Xmx%zdm", gc::Heap::kDefaultMaximumSize / MB));
 
     // TODO: make selectable
-#if defined(ART_USE_PORTABLE_COMPILER)
-    CompilerBackend compiler_backend = kPortable;
-#else
-    CompilerBackend compiler_backend = kQuick;
-#endif
+    CompilerBackend compiler_backend = kUsePortableCompiler ? kPortable : kQuick;
 
     verification_results_.reset(new VerificationResults);
     method_inliner_map_.reset(compiler_backend == kQuick ? new DexFileToMethodInlinerMap : nullptr);
@@ -464,6 +468,8 @@
       instruction_set = kMips;
 #elif defined(__i386__)
       instruction_set = kX86;
+#elif defined(__x86_64__)
+      instruction_set = kX86_64;
 #endif
 
       for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
@@ -644,7 +650,9 @@
     image_reservation_.reset(MemMap::MapAnonymous("image reservation",
                                                   reinterpret_cast<byte*>(ART_BASE_ADDRESS),
                                                   (size_t)100 * 1024 * 1024,  // 100MB
-                                                  PROT_NONE, &error_msg));
+                                                  PROT_NONE,
+                                                  false /* no need for 4gb flag with fixed mmap*/,
+                                                  &error_msg));
     CHECK(image_reservation_.get() != nullptr) << error_msg;
   }
 
@@ -737,11 +745,12 @@
 // MCLinker link LLVM ELF output because we no longer just have code
 // blobs in memory. We'll need to dlopen to load and relocate
 // temporary output to resurrect these tests.
-#if defined(ART_USE_PORTABLE_COMPILER)
-#define TEST_DISABLED_FOR_PORTABLE() printf("WARNING: TEST DISABLED FOR PORTABLE\n"); return
-#else
-#define TEST_DISABLED_FOR_PORTABLE()
-#endif
+#define TEST_DISABLED_FOR_PORTABLE() \
+  if (kUsePortableCompiler) { \
+    printf("WARNING: TEST DISABLED FOR PORTABLE\n"); \
+    return; \
+  }
+
 }  // namespace art
 
 namespace std {
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index dd832df..24d16c4 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -33,7 +33,7 @@
 
 namespace art {
 
-static void AddReferrerLocation(std::ostream& os, const mirror::Class* referrer)
+static void AddReferrerLocation(std::ostream& os, mirror::Class* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (referrer != NULL) {
     ClassHelper kh(referrer);
@@ -46,7 +46,7 @@
 }
 
 static void ThrowException(const ThrowLocation* throw_location, const char* exception_descriptor,
-                           const mirror::Class* referrer, const char* fmt, va_list* args = NULL)
+                           mirror::Class* referrer, const char* fmt, va_list* args = NULL)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   std::ostringstream msg;
   if (args != NULL) {
@@ -68,7 +68,7 @@
 
 // AbstractMethodError
 
-void ThrowAbstractMethodError(const mirror::ArtMethod* method) {
+void ThrowAbstractMethodError(mirror::ArtMethod* method) {
   ThrowException(NULL, "Ljava/lang/AbstractMethodError;", NULL,
                  StringPrintf("abstract method \"%s\"",
                               PrettyMethod(method).c_str()).c_str());
@@ -89,8 +89,7 @@
 
 // ArrayStoreException
 
-void ThrowArrayStoreException(const mirror::Class* element_class,
-                              const mirror::Class* array_class) {
+void ThrowArrayStoreException(mirror::Class* element_class, mirror::Class* array_class) {
   ThrowException(NULL, "Ljava/lang/ArrayStoreException;", NULL,
                  StringPrintf("%s cannot be stored in an array of type %s",
                               PrettyDescriptor(element_class).c_str(),
@@ -99,7 +98,7 @@
 
 // ClassCastException
 
-void ThrowClassCastException(const mirror::Class* dest_type, const mirror::Class* src_type) {
+void ThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type) {
   ThrowException(NULL, "Ljava/lang/ClassCastException;", NULL,
                  StringPrintf("%s cannot be cast to %s",
                               PrettyDescriptor(src_type).c_str(),
@@ -120,7 +119,7 @@
 
 // ClassFormatError
 
-void ThrowClassFormatError(const mirror::Class* referrer, const char* fmt, ...) {
+void ThrowClassFormatError(mirror::Class* referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException(NULL, "Ljava/lang/ClassFormatError;", referrer, fmt, &args);
@@ -136,7 +135,7 @@
 }
 
 void ThrowIllegalAccessErrorClassForMethodDispatch(mirror::Class* referrer, mirror::Class* accessed,
-                                                   const mirror::ArtMethod* called,
+                                                   mirror::ArtMethod* called,
                                                    InvokeType type) {
   std::ostringstream msg;
   msg << "Illegal class access ('" << PrettyDescriptor(referrer) << "' attempting to access '"
@@ -159,7 +158,7 @@
   ThrowException(NULL, "Ljava/lang/IllegalAccessError;", referrer, msg.str().c_str());
 }
 
-void ThrowIllegalAccessErrorFinalField(const mirror::ArtMethod* referrer,
+void ThrowIllegalAccessErrorFinalField(mirror::ArtMethod* referrer,
                                        mirror::ArtField* accessed) {
   std::ostringstream msg;
   msg << "Final field '" << PrettyField(accessed, false) << "' cannot be written to by method '"
@@ -187,7 +186,7 @@
 
 void ThrowIncompatibleClassChangeError(InvokeType expected_type, InvokeType found_type,
                                        mirror::ArtMethod* method,
-                                       const mirror::ArtMethod* referrer) {
+                                       mirror::ArtMethod* referrer) {
   std::ostringstream msg;
   msg << "The method '" << PrettyMethod(method) << "' was expected to be of type "
       << expected_type << " but instead was found to be of type " << found_type;
@@ -196,9 +195,9 @@
                  msg.str().c_str());
 }
 
-void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(const mirror::ArtMethod* interface_method,
+void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(mirror::ArtMethod* interface_method,
                                                                 mirror::Object* this_object,
-                                                                const mirror::ArtMethod* referrer) {
+                                                                mirror::ArtMethod* referrer) {
   // Referrer is calling interface_method on this_object, however, the interface_method isn't
   // implemented by this_object.
   CHECK(this_object != NULL);
@@ -212,8 +211,8 @@
                  msg.str().c_str());
 }
 
-void ThrowIncompatibleClassChangeErrorField(const mirror::ArtField* resolved_field, bool is_static,
-                                            const mirror::ArtMethod* referrer) {
+void ThrowIncompatibleClassChangeErrorField(mirror::ArtField* resolved_field, bool is_static,
+                                            mirror::ArtMethod* referrer) {
   std::ostringstream msg;
   msg << "Expected '" << PrettyField(resolved_field) << "' to be a "
       << (is_static ? "static" : "instance") << " field" << " rather than a "
@@ -222,7 +221,7 @@
                  msg.str().c_str());
 }
 
-void ThrowIncompatibleClassChangeError(const mirror::Class* referrer, const char* fmt, ...) {
+void ThrowIncompatibleClassChangeError(mirror::Class* referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException(NULL, "Ljava/lang/IncompatibleClassChangeError;", referrer, fmt, &args);
@@ -240,7 +239,7 @@
 
 // LinkageError
 
-void ThrowLinkageError(const mirror::Class* referrer, const char* fmt, ...) {
+void ThrowLinkageError(mirror::Class* referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException(NULL, "Ljava/lang/LinkageError;", referrer, fmt, &args);
@@ -486,7 +485,7 @@
 
 // VerifyError
 
-void ThrowVerifyError(const mirror::Class* referrer, const char* fmt, ...) {
+void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException(NULL, "Ljava/lang/VerifyError;", referrer, fmt, &args);
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 7f13891..792cdef 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -33,7 +33,7 @@
 
 // AbstractMethodError
 
-void ThrowAbstractMethodError(const mirror::ArtMethod* method)
+void ThrowAbstractMethodError(mirror::ArtMethod* method)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
 // ArithmeticException
@@ -47,8 +47,7 @@
 
 // ArrayStoreException
 
-void ThrowArrayStoreException(const mirror::Class* element_class,
-                              const mirror::Class* array_class)
+void ThrowArrayStoreException(mirror::Class* element_class, mirror::Class* array_class)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
 // ClassCircularityError
@@ -58,7 +57,7 @@
 
 // ClassCastException
 
-void ThrowClassCastException(const mirror::Class* dest_type, const mirror::Class* src_type)
+void ThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowClassCastException(const ThrowLocation* throw_location, const char* msg)
@@ -66,7 +65,7 @@
 
 // ClassFormatError
 
-void ThrowClassFormatError(const mirror::Class* referrer, const char* fmt, ...)
+void ThrowClassFormatError(mirror::Class* referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -76,7 +75,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowIllegalAccessErrorClassForMethodDispatch(mirror::Class* referrer, mirror::Class* accessed,
-                                                   const mirror::ArtMethod* called,
+                                                   mirror::ArtMethod* called,
                                                    InvokeType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -86,8 +85,7 @@
 void ThrowIllegalAccessErrorField(mirror::Class* referrer, mirror::ArtField* accessed)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIllegalAccessErrorFinalField(const mirror::ArtMethod* referrer,
-                                       mirror::ArtField* accessed)
+void ThrowIllegalAccessErrorFinalField(mirror::ArtMethod* referrer, mirror::ArtField* accessed)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowIllegalAccessError(mirror::Class* referrer, const char* fmt, ...)
@@ -102,20 +100,19 @@
 // IncompatibleClassChangeError
 
 void ThrowIncompatibleClassChangeError(InvokeType expected_type, InvokeType found_type,
-                                       mirror::ArtMethod* method,
-                                       const mirror::ArtMethod* referrer)
+                                       mirror::ArtMethod* method, mirror::ArtMethod* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(const mirror::ArtMethod* interface_method,
+void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(mirror::ArtMethod* interface_method,
                                                                 mirror::Object* this_object,
-                                                                const mirror::ArtMethod* referrer)
+                                                                mirror::ArtMethod* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIncompatibleClassChangeErrorField(const mirror::ArtField* resolved_field, bool is_static,
-                                            const mirror::ArtMethod* referrer)
+void ThrowIncompatibleClassChangeErrorField(mirror::ArtField* resolved_field, bool is_static,
+                                            mirror::ArtMethod* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIncompatibleClassChangeError(const mirror::Class* referrer, const char* fmt, ...)
+void ThrowIncompatibleClassChangeError(mirror::Class* referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -126,7 +123,7 @@
 
 // LinkageError
 
-void ThrowLinkageError(const mirror::Class* referrer, const char* fmt, ...)
+void ThrowLinkageError(mirror::Class* referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -185,7 +182,7 @@
 
 // VerifyError
 
-void ThrowVerifyError(const mirror::Class* referrer, const char* fmt, ...)
+void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 9194d73..99e7867 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -101,7 +101,7 @@
   virtual ~DebugInstrumentationListener() {}
 
   virtual void MethodEntered(Thread* thread, mirror::Object* this_object,
-                             const mirror::ArtMethod* method, uint32_t dex_pc)
+                             mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
@@ -111,7 +111,7 @@
   }
 
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method,
+                            mirror::ArtMethod* method,
                             uint32_t dex_pc, const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (method->IsNative()) {
@@ -122,7 +122,7 @@
   }
 
   virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc)
+                            mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
     LOG(ERROR) << "Unexpected method unwind event in debugger " << PrettyMethod(method)
@@ -130,7 +130,7 @@
   }
 
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                          const mirror::ArtMethod* method, uint32_t new_dex_pc)
+                          mirror::ArtMethod* method, uint32_t new_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc);
   }
@@ -303,7 +303,7 @@
  *
  * Null objects are tagged JT_OBJECT.
  */
-static JDWP::JdwpTag TagFromObject(const mirror::Object* o)
+static JDWP::JdwpTag TagFromObject(mirror::Object* o)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return (o == NULL) ? JDWP::JT_OBJECT : TagFromClass(o->GetClass());
 }
@@ -1056,16 +1056,16 @@
     size_t width = GetTagWidth(tag);
     uint8_t* dst = expandBufAddSpace(pReply, count * width);
     if (width == 8) {
-      const uint64_t* src8 = reinterpret_cast<uint64_t*>(a->GetRawData(sizeof(uint64_t)));
+      const uint64_t* src8 = reinterpret_cast<uint64_t*>(a->GetRawData(sizeof(uint64_t), 0));
       for (int i = 0; i < count; ++i) JDWP::Write8BE(&dst, src8[offset + i]);
     } else if (width == 4) {
-      const uint32_t* src4 = reinterpret_cast<uint32_t*>(a->GetRawData(sizeof(uint32_t)));
+      const uint32_t* src4 = reinterpret_cast<uint32_t*>(a->GetRawData(sizeof(uint32_t), 0));
       for (int i = 0; i < count; ++i) JDWP::Write4BE(&dst, src4[offset + i]);
     } else if (width == 2) {
-      const uint16_t* src2 = reinterpret_cast<uint16_t*>(a->GetRawData(sizeof(uint16_t)));
+      const uint16_t* src2 = reinterpret_cast<uint16_t*>(a->GetRawData(sizeof(uint16_t), 0));
       for (int i = 0; i < count; ++i) JDWP::Write2BE(&dst, src2[offset + i]);
     } else {
-      const uint8_t* src = reinterpret_cast<uint8_t*>(a->GetRawData(sizeof(uint8_t)));
+      const uint8_t* src = reinterpret_cast<uint8_t*>(a->GetRawData(sizeof(uint8_t), 0));
       memcpy(dst, &src[offset * width], count * width);
     }
   } else {
@@ -1081,10 +1081,13 @@
   return JDWP::ERR_NONE;
 }
 
-template <typename T> void CopyArrayData(mirror::Array* a, JDWP::Request& src, int offset, int count) {
+template <typename T>
+static void CopyArrayData(mirror::Array* a, JDWP::Request& src, int offset, int count)
+    NO_THREAD_SAFETY_ANALYSIS {
+  // TODO: fix when annotalysis correctly handles non-member functions.
   DCHECK(a->GetClass()->IsPrimitiveArray());
 
-  T* dst = &(reinterpret_cast<T*>(a->GetRawData(sizeof(T)))[offset * sizeof(T)]);
+  T* dst = reinterpret_cast<T*>(a->GetRawData(sizeof(T), offset));
   for (int i = 0; i < count; ++i) {
     *dst++ = src.ReadValue(sizeof(T));
   }
@@ -1928,7 +1931,7 @@
         JDWP::FrameId frame_id(GetFrameId());
         JDWP::JdwpLocation location;
         SetLocation(location, GetMethod(), GetDexPc());
-        VLOG(jdwp) << StringPrintf("    Frame %3zd: id=%3lld ", depth_, frame_id) << location;
+        VLOG(jdwp) << StringPrintf("    Frame %3zd: id=%3" PRIu64 " ", depth_, frame_id) << location;
         expandBufAdd8BE(buf_, frame_id);
         expandBufAddLocation(buf_, location);
       }
@@ -2285,7 +2288,7 @@
   visitor.WalkStack();
 }
 
-void Dbg::PostLocationEvent(const mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
+void Dbg::PostLocationEvent(mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
                             int event_flags, const JValue* return_value) {
   mirror::Class* c = m->GetDeclaringClass();
 
@@ -2340,7 +2343,7 @@
 }
 
 void Dbg::UpdateDebugger(Thread* thread, mirror::Object* this_object,
-                         const mirror::ArtMethod* m, uint32_t dex_pc) {
+                         mirror::ArtMethod* m, uint32_t dex_pc) {
   if (!IsDebuggerActive() || dex_pc == static_cast<uint32_t>(-2) /* fake method exit */) {
     return;
   }
@@ -2632,7 +2635,7 @@
       if (!m->IsRuntimeMethod()) {
         ++single_step_control_->stack_depth;
         if (single_step_control_->method == NULL) {
-          const mirror::DexCache* dex_cache = m->GetDeclaringClass()->GetDexCache();
+          mirror::DexCache* dex_cache = m->GetDeclaringClass()->GetDexCache();
           single_step_control_->method = m;
           *line_number_ = -1;
           if (dex_cache != NULL) {
@@ -2701,7 +2704,7 @@
     uint32_t last_pc;
   };
   single_step_control->dex_pcs.clear();
-  const mirror::ArtMethod* m = single_step_control->method;
+  mirror::ArtMethod* m = single_step_control->method;
   if (!m->IsNative()) {
     DebugCallbackContext context(single_step_control, line_number);
     MethodHelper mh(m);
@@ -3064,7 +3067,7 @@
   // Run through and find all chunks.  [Currently just find the first.]
   ScopedByteArrayRO contents(env, dataArray.get());
   if (length != request_length) {
-    LOG(WARNING) << StringPrintf("bad chunk found (len=%u pktLen=%d)", length, request_length);
+    LOG(WARNING) << StringPrintf("bad chunk found (len=%u pktLen=%zd)", length, request_length);
     return false;
   }
 
@@ -3456,7 +3459,7 @@
             Flush();
         }
     }
-    const mirror::Object* obj = reinterpret_cast<const mirror::Object*>(start);
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(start);
 
     // Determine the type of this chunk.
     // OLD-TODO: if context.merge, see if this chunk is different from the last chunk.
@@ -3499,8 +3502,8 @@
     *p_++ = length - 1;
   }
 
-  uint8_t ExamineObject(const mirror::Object* o, bool is_native_heap)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+  uint8_t ExamineObject(mirror::Object* o, bool is_native_heap)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     if (o == NULL) {
       return HPSG_STATE(SOLIDITY_FREE, 0);
     }
@@ -3753,7 +3756,7 @@
               << PrettyClass(record->type);
 
     for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) {
-      const mirror::ArtMethod* m = record->stack[stack_frame].method;
+      mirror::ArtMethod* m = record->stack[stack_frame].method;
       if (m == NULL) {
         break;
       }
diff --git a/runtime/debugger.h b/runtime/debugger.h
index a3f8b9c..328c9cd 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -366,7 +366,7 @@
     kMethodEntry    = 0x04,
     kMethodExit     = 0x08,
   };
-  static void PostLocationEvent(const mirror::ArtMethod* method, int pcOffset,
+  static void PostLocationEvent(mirror::ArtMethod* method, int pcOffset,
                                 mirror::Object* thisPtr, int eventFlags,
                                 const JValue* return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -382,7 +382,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void UpdateDebugger(Thread* thread, mirror::Object* this_object,
-                             const mirror::ArtMethod* method, uint32_t new_dex_pc)
+                             mirror::ArtMethod* method, uint32_t new_dex_pc)
       LOCKS_EXCLUDED(Locks::breakpoint_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/dex_file-inl.h b/runtime/dex_file-inl.h
index 3b2135c..a459308 100644
--- a/runtime/dex_file-inl.h
+++ b/runtime/dex_file-inl.h
@@ -44,7 +44,7 @@
 inline const DexFile::TryItem* DexFile::GetTryItems(const CodeItem& code_item, uint32_t offset) {
   const uint16_t* insns_end_ = &code_item.insns_[code_item.insns_size_in_code_units_];
   return reinterpret_cast<const TryItem*>
-      (RoundUp(reinterpret_cast<uint32_t>(insns_end_), 4)) + offset;
+      (RoundUp(reinterpret_cast<uintptr_t>(insns_end_), 4)) + offset;
 }
 
 static inline bool DexFileStringEquals(const DexFile* df1, uint32_t sidx1,
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 429c516..eaba7eb 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -284,6 +284,27 @@
   }
 }
 
+DexFile::DexFile(const byte* base, size_t size,
+                 const std::string& location,
+                 uint32_t location_checksum,
+                 MemMap* mem_map)
+    : begin_(base),
+      size_(size),
+      location_(location),
+      location_checksum_(location_checksum),
+      mem_map_(mem_map),
+      modification_lock("DEX modification lock"),
+      header_(reinterpret_cast<const Header*>(base)),
+      string_ids_(reinterpret_cast<const StringId*>(base + header_->string_ids_off_)),
+      type_ids_(reinterpret_cast<const TypeId*>(base + header_->type_ids_off_)),
+      field_ids_(reinterpret_cast<const FieldId*>(base + header_->field_ids_off_)),
+      method_ids_(reinterpret_cast<const MethodId*>(base + header_->method_ids_off_)),
+      proto_ids_(reinterpret_cast<const ProtoId*>(base + header_->proto_ids_off_)),
+      class_defs_(reinterpret_cast<const ClassDef*>(base + header_->class_defs_off_)) {
+  CHECK(begin_ != NULL) << GetLocation();
+  CHECK_GT(size_, 0U) << GetLocation();
+}
+
 DexFile::~DexFile() {
   // We don't call DeleteGlobalRef on dex_object_ because we're only called by DestroyJavaVM, and
   // that's only called after DetachCurrentThread, which means there's no JNIEnv. We could
@@ -292,25 +313,12 @@
 }
 
 bool DexFile::Init(std::string* error_msg) {
-  InitMembers();
   if (!CheckMagicAndVersion(error_msg)) {
     return false;
   }
   return true;
 }
 
-void DexFile::InitMembers() {
-  const byte* b = begin_;
-  header_ = reinterpret_cast<const Header*>(b);
-  const Header* h = header_;
-  string_ids_ = reinterpret_cast<const StringId*>(b + h->string_ids_off_);
-  type_ids_ = reinterpret_cast<const TypeId*>(b + h->type_ids_off_);
-  field_ids_ = reinterpret_cast<const FieldId*>(b + h->field_ids_off_);
-  method_ids_ = reinterpret_cast<const MethodId*>(b + h->method_ids_off_);
-  proto_ids_ = reinterpret_cast<const ProtoId*>(b + h->proto_ids_off_);
-  class_defs_ = reinterpret_cast<const ClassDef*>(b + h->class_defs_off_);
-}
-
 bool DexFile::CheckMagicAndVersion(std::string* error_msg) const {
   CHECK(header_->magic_ != NULL) << GetLocation();
   if (!IsMagicValid(header_->magic_)) {
@@ -613,7 +621,7 @@
   return Signature(this, *proto_id);
 }
 
-int32_t DexFile::GetLineNumFromPC(const mirror::ArtMethod* method, uint32_t rel_pc) const {
+int32_t DexFile::GetLineNumFromPC(mirror::ArtMethod* method, uint32_t rel_pc) const {
   // For native method, lineno should be -2 to indicate it is native. Note that
   // "line number == -2" is how libcore tells from StackTraceElement.
   if (method->GetCodeItemOffset() == 0) {
@@ -856,6 +864,13 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const DexFile& dex_file) {
+  os << StringPrintf("[DexFile: %s dex-checksum=%08x location-checksum=%08x %p-%p]",
+                     dex_file.GetLocation().c_str(),
+                     dex_file.GetHeader().checksum_, dex_file.GetLocationChecksum(),
+                     dex_file.Begin(), dex_file.Begin() + dex_file.Size());
+  return os;
+}
 std::string Signature::ToString() const {
   if (dex_file_ == nullptr) {
     CHECK(proto_id_ == nullptr);
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 69593cd..46df455 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -794,7 +794,7 @@
   // Returns -2 for native methods (as expected in exception traces).
   //
   // This is used by runtime; therefore use art::Method not art::DexFile::Method.
-  int32_t GetLineNumFromPC(const mirror::ArtMethod* method, uint32_t rel_pc) const
+  int32_t GetLineNumFromPC(mirror::ArtMethod* method, uint32_t rel_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void DecodeDebugInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
@@ -849,30 +849,11 @@
   DexFile(const byte* base, size_t size,
           const std::string& location,
           uint32_t location_checksum,
-          MemMap* mem_map)
-      : begin_(base),
-        size_(size),
-        location_(location),
-        location_checksum_(location_checksum),
-        mem_map_(mem_map),
-        modification_lock("DEX modification lock"),
-        header_(0),
-        string_ids_(0),
-        type_ids_(0),
-        field_ids_(0),
-        method_ids_(0),
-        proto_ids_(0),
-        class_defs_(0) {
-    CHECK(begin_ != NULL) << GetLocation();
-    CHECK_GT(size_, 0U) << GetLocation();
-  }
+          MemMap* mem_map);
 
   // Top-level initializer that calls other Init methods.
   bool Init(std::string* error_msg);
 
-  // Caches pointers into to the various file sections.
-  void InitMembers();
-
   // Returns true if the header magic and version numbers are of the expected values.
   bool CheckMagicAndVersion(std::string* error_msg) const;
 
@@ -903,26 +884,27 @@
   Mutex modification_lock;
 
   // Points to the header section.
-  const Header* header_;
+  const Header* const header_;
 
   // Points to the base of the string identifier list.
-  const StringId* string_ids_;
+  const StringId* const string_ids_;
 
   // Points to the base of the type identifier list.
-  const TypeId* type_ids_;
+  const TypeId* const type_ids_;
 
   // Points to the base of the field identifier list.
-  const FieldId* field_ids_;
+  const FieldId* const field_ids_;
 
   // Points to the base of the method identifier list.
-  const MethodId* method_ids_;
+  const MethodId* const method_ids_;
 
   // Points to the base of the prototype identifier list.
-  const ProtoId* proto_ids_;
+  const ProtoId* const proto_ids_;
 
   // Points to the base of the class definition list.
-  const ClassDef* class_defs_;
+  const ClassDef* const class_defs_;
 };
+std::ostream& operator<<(std::ostream& os, const DexFile& dex_file);
 
 // Iterate over a dex file's ProtoId's paramters
 class DexFileParameterIterator {
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 2f7c38a..261c217 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -646,7 +646,8 @@
       reservation_name += file_->GetPath();
       std::string error_msg;
       UniquePtr<MemMap> reserve(MemMap::MapAnonymous(reservation_name.c_str(),
-                                                     NULL, GetLoadedSize(), PROT_NONE, &error_msg));
+                                                     NULL, GetLoadedSize(), PROT_NONE, false,
+                                                     &error_msg));
       CHECK(reserve.get() != NULL) << file_->GetPath() << ": " << error_msg;
       base_address_ = reserve->Begin();
       segments_.push_back(reserve.release());
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index f6e8ca3..20532f4 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -282,7 +282,7 @@
 };
 
 template<FindFieldType type, bool access_check>
-static inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, const mirror::ArtMethod* referrer,
+static inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
                                                   Thread* self, size_t expected_size) {
   bool is_primitive;
   bool is_set;
@@ -311,8 +311,8 @@
       return nullptr;
     }
     mirror::Class* referring_class = referrer->GetDeclaringClass();
-    if (UNLIKELY(!referring_class->CanAccessResolvedField<true>(fields_class, resolved_field,
-                                                                field_idx))) {
+    if (UNLIKELY(!referring_class->CheckResolvedFieldAccess(fields_class, resolved_field,
+                                                            field_idx))) {
       DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
       return nullptr;  // Failure.
     }
@@ -321,8 +321,7 @@
       return nullptr;  // Failure.
     } else {
       FieldHelper fh(resolved_field);
-      if (UNLIKELY(fh.IsPrimitiveType() != is_primitive ||
-                   fh.FieldSize() != expected_size)) {
+      if (UNLIKELY(fh.IsPrimitiveType() != is_primitive || fh.FieldSize() != expected_size)) {
         ThrowLocation throw_location = self->GetCurrentLocationForThrow();
         DCHECK(throw_location.GetMethod() == referrer);
         self->ThrowNewExceptionF(throw_location, "Ljava/lang/NoSuchFieldError;",
@@ -358,7 +357,7 @@
 #define EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL(_type, _access_check) \
 template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE \
 mirror::ArtField* FindFieldFromCode<_type, _access_check>(uint32_t field_idx, \
-                                                          const mirror::ArtMethod* referrer, \
+                                                          mirror::ArtMethod* referrer, \
                                                           Thread* self, size_t expected_size) \
 
 #define EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(_type) \
@@ -381,11 +380,12 @@
 static inline mirror::ArtMethod* FindMethodFromCode(uint32_t method_idx, mirror::Object* this_object,
                                                     mirror::ArtMethod* referrer, Thread* self) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  SirtRef<mirror::Object> sirt_this(self, this_object);
   mirror::ArtMethod* resolved_method = class_linker->ResolveMethod(method_idx, referrer, type);
   if (UNLIKELY(resolved_method == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
-  } else if (UNLIKELY(this_object == nullptr && type != kStatic)) {
+  } else if (UNLIKELY(sirt_this.get() == nullptr && type != kStatic)) {
     // Maintain interpreter-like semantics where NullPointerException is thrown
     // after potential NoSuchMethodError from class linker.
     ThrowLocation throw_location = self->GetCurrentLocationForThrow();
@@ -402,8 +402,8 @@
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
     mirror::Class* referring_class = referrer->GetDeclaringClass();
     bool can_access_resolved_method =
-        referring_class->CanAccessResolvedMethod<true, type>(methods_class, resolved_method,
-                                                             method_idx);
+        referring_class->CheckResolvedMethodAccess<type>(methods_class, resolved_method,
+                                                         method_idx);
     if (UNLIKELY(!can_access_resolved_method)) {
       DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
       return nullptr;  // Failure.
@@ -414,7 +414,7 @@
     case kDirect:
       return resolved_method;
     case kVirtual: {
-      mirror::ObjectArray<mirror::ArtMethod>* vtable = this_object->GetClass()->GetVTable();
+      mirror::ObjectArray<mirror::ArtMethod>* vtable = sirt_this->GetClass()->GetVTable();
       uint16_t vtable_index = resolved_method->GetMethodIndex();
       if (access_check &&
           (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength()))) {
@@ -451,16 +451,16 @@
     }
     case kInterface: {
       uint32_t imt_index = resolved_method->GetDexMethodIndex() % ClassLinker::kImtSize;
-      mirror::ObjectArray<mirror::ArtMethod>* imt_table = this_object->GetClass()->GetImTable();
+      mirror::ObjectArray<mirror::ArtMethod>* imt_table = sirt_this->GetClass()->GetImTable();
       mirror::ArtMethod* imt_method = imt_table->Get(imt_index);
       if (!imt_method->IsImtConflictMethod()) {
         return imt_method;
       } else {
         mirror::ArtMethod* interface_method =
-            this_object->GetClass()->FindVirtualMethodForInterface(resolved_method);
+            sirt_this->GetClass()->FindVirtualMethodForInterface(resolved_method);
         if (UNLIKELY(interface_method == nullptr)) {
-          ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(resolved_method, this_object,
-                                                                     referrer);
+          ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(resolved_method,
+                                                                     sirt_this.get(), referrer);
           return nullptr;  // Failure.
         } else {
           return interface_method;
@@ -495,7 +495,7 @@
 
 // Fast path field resolution that can't initialize classes or throw exceptions.
 static inline mirror::ArtField* FindFieldFast(uint32_t field_idx,
-                                              const mirror::ArtMethod* referrer,
+                                              mirror::ArtMethod* referrer,
                                               FindFieldType type, size_t expected_size)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* resolved_field =
@@ -551,7 +551,7 @@
 // Fast path method resolution that can't throw exceptions.
 static inline mirror::ArtMethod* FindMethodFast(uint32_t method_idx,
                                                 mirror::Object* this_object,
-                                                const mirror::ArtMethod* referrer,
+                                                mirror::ArtMethod* referrer,
                                                 bool access_check, InvokeType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   bool is_direct = type == kStatic || type == kDirect;
@@ -592,7 +592,7 @@
 }
 
 static inline mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx,
-                                                    const mirror::ArtMethod* referrer,
+                                                    mirror::ArtMethod* referrer,
                                                     Thread* self, bool can_run_clinit,
                                                     bool verify_access)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -629,7 +629,7 @@
 
 extern void ThrowStackOverflowError(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-static inline mirror::String* ResolveStringFromCode(const mirror::ArtMethod* referrer,
+static inline mirror::String* ResolveStringFromCode(mirror::ArtMethod* referrer,
                                                     uint32_t string_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -718,21 +718,21 @@
   return reinterpret_cast<void*>(art_portable_to_interpreter_bridge);
 }
 
+static inline const void* GetPortableToQuickBridge() {
+  // TODO: portable to quick bridge. Bug: 8196384
+  return GetPortableToInterpreterBridge();
+}
+
 extern "C" void art_quick_to_interpreter_bridge(mirror::ArtMethod*);
 static inline const void* GetQuickToInterpreterBridge() {
   return reinterpret_cast<void*>(art_quick_to_interpreter_bridge);
 }
 
-// Return address of interpreter stub.
-static inline const void* GetCompiledCodeToInterpreterBridge() {
-#if defined(ART_USE_PORTABLE_COMPILER)
-  return GetPortableToInterpreterBridge();
-#else
+static inline const void* GetQuickToPortableBridge() {
+  // TODO: quick to portable bridge. Bug: 8196384
   return GetQuickToInterpreterBridge();
-#endif
 }
 
-
 static inline const void* GetPortableResolutionTrampoline(ClassLinker* class_linker) {
   return class_linker->GetPortableResolutionTrampoline();
 }
@@ -741,15 +741,6 @@
   return class_linker->GetQuickResolutionTrampoline();
 }
 
-// Return address of resolution trampoline stub for defined compiler.
-static inline const void* GetResolutionTrampoline(ClassLinker* class_linker) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-  return GetPortableResolutionTrampoline(class_linker);
-#else
-  return GetQuickResolutionTrampoline(class_linker);
-#endif
-}
-
 static inline const void* GetPortableImtConflictTrampoline(ClassLinker* class_linker) {
   return class_linker->GetPortableImtConflictTrampoline();
 }
@@ -758,15 +749,6 @@
   return class_linker->GetQuickImtConflictTrampoline();
 }
 
-// Return address of imt conflict trampoline stub for defined compiler.
-static inline const void* GetImtConflictTrampoline(ClassLinker* class_linker) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-  return GetPortableImtConflictTrampoline(class_linker);
-#else
-  return GetQuickImtConflictTrampoline(class_linker);
-#endif
-}
-
 extern "C" void art_portable_proxy_invoke_handler();
 static inline const void* GetPortableProxyInvokeHandler() {
   return reinterpret_cast<void*>(art_portable_proxy_invoke_handler);
@@ -777,14 +759,6 @@
   return reinterpret_cast<void*>(art_quick_proxy_invoke_handler);
 }
 
-static inline const void* GetProxyInvokeHandler() {
-#if defined(ART_USE_PORTABLE_COMPILER)
-  return GetPortableProxyInvokeHandler();
-#else
-  return GetQuickProxyInvokeHandler();
-#endif
-}
-
 extern "C" void* art_jni_dlsym_lookup_stub(JNIEnv*, jobject);
 static inline void* GetJniDlsymLookupStub() {
   return reinterpret_cast<void*>(art_jni_dlsym_lookup_stub);
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
index 0df00c2..8a2ce51 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
@@ -45,15 +45,15 @@
     }
   }
   uint16_t arg_offset = (code_item == NULL) ? 0 : code_item->registers_size_ - code_item->ins_size_;
-#if defined(ART_USE_PORTABLE_COMPILER)
-  ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
-  arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
-  method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result, mh.GetShorty()[0]);
-#else
-  method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
-                 (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
-                 result, mh.GetShorty()[0]);
-#endif
+  if (kUsePortableCompiler) {
+    ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
+    arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
+    method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result, mh.GetShorty()[0]);
+  } else {
+    method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
+                   (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
+                   result, mh.GetShorty()[0]);
+  }
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/portable/portable_cast_entrypoints.cc b/runtime/entrypoints/portable/portable_cast_entrypoints.cc
index d343c5d..a553a22 100644
--- a/runtime/entrypoints/portable/portable_cast_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_cast_entrypoints.cc
@@ -20,16 +20,16 @@
 
 namespace art {
 
-extern "C" int32_t art_portable_is_assignable_from_code(const mirror::Class* dest_type,
-                                                        const mirror::Class* src_type)
+extern "C" int32_t art_portable_is_assignable_from_code(mirror::Class* dest_type,
+                                                        mirror::Class* src_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(dest_type != NULL);
   DCHECK(src_type != NULL);
   return dest_type->IsAssignableFrom(src_type) ? 1 : 0;
 }
 
-extern "C" void art_portable_check_cast_from_code(const mirror::Class* dest_type,
-                                                  const mirror::Class* src_type)
+extern "C" void art_portable_check_cast_from_code(mirror::Class* dest_type,
+                                                  mirror::Class* src_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(dest_type->IsClass()) << PrettyClass(dest_type);
   DCHECK(src_type->IsClass()) << PrettyClass(src_type);
@@ -38,8 +38,8 @@
   }
 }
 
-extern "C" void art_portable_check_put_array_element_from_code(const mirror::Object* element,
-                                                               const mirror::Object* array)
+extern "C" void art_portable_check_put_array_element_from_code(mirror::Object* element,
+                                                               mirror::Object* array)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (element == NULL) {
     return;
diff --git a/runtime/entrypoints/portable/portable_field_entrypoints.cc b/runtime/entrypoints/portable/portable_field_entrypoints.cc
index 095e99e..0b54b9c 100644
--- a/runtime/entrypoints/portable/portable_field_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_field_entrypoints.cc
@@ -65,13 +65,13 @@
                                                          mirror::Object* new_value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     field->SetObj(field->GetDeclaringClass(), new_value);
     return 0;
   }
   field = FindFieldFromCode<StaticObjectWrite, true>(field_idx, referrer, Thread::Current(),
-                                                     sizeof(mirror::Object*));
+                                                     sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     field->SetObj(field->GetDeclaringClass(), new_value);
     return 0;
@@ -113,12 +113,12 @@
                                                                  mirror::ArtMethod* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectRead,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(field->GetDeclaringClass());
   }
   field = FindFieldFromCode<StaticObjectRead, true>(field_idx, referrer, Thread::Current(),
-                                                    sizeof(mirror::Object*));
+                                                    sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(field->GetDeclaringClass());
   }
@@ -167,13 +167,13 @@
                                                            mirror::Object* new_value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     field->SetObj(obj, new_value);
     return 0;
   }
   field = FindFieldFromCode<InstanceObjectWrite, true>(field_idx, referrer, Thread::Current(),
-                                                       sizeof(mirror::Object*));
+                                                       sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     field->SetObj(obj, new_value);
     return 0;
@@ -218,12 +218,12 @@
                                                                    mirror::Object* obj)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectRead,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(obj);
   }
   field = FindFieldFromCode<InstanceObjectRead, true>(field_idx, referrer, Thread::Current(),
-                                                      sizeof(mirror::Object*));
+                                                      sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(obj);
   }
diff --git a/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc b/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
index 8cf4eed..1005d0e 100644
--- a/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
@@ -44,7 +44,7 @@
     return;  // Error
   }
   uint32_t size_in_bytes = payload->element_count * payload->element_width;
-  memcpy(array->GetRawData(payload->element_width), payload->data, size_in_bytes);
+  memcpy(array->GetRawData(payload->element_width, 0), payload->data, size_in_bytes);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
index 47ccbb1..d34b097 100644
--- a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
@@ -34,7 +34,7 @@
     }
   }
   DCHECK(!thread->IsExceptionPending());
-  const void* code = method->GetEntryPointFromCompiledCode();
+  const void* code = method->GetEntryPointFromPortableCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
   if (UNLIKELY(code == NULL)) {
diff --git a/runtime/entrypoints/portable/portable_throw_entrypoints.cc b/runtime/entrypoints/portable/portable_throw_entrypoints.cc
index 2a0df9b..1fdb832 100644
--- a/runtime/entrypoints/portable/portable_throw_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_throw_entrypoints.cc
@@ -75,7 +75,7 @@
   ThrowLocation throw_location;
   mirror::Throwable* exception = self->GetException(&throw_location);
   // Check for special deoptimization exception.
-  if (UNLIKELY(reinterpret_cast<int32_t>(exception) == -1)) {
+  if (UNLIKELY(reinterpret_cast<intptr_t>(exception) == -1)) {
     return -1;
   }
   mirror::Class* exception_type = exception->GetClass();
diff --git a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
index 2162dcc..55fd301 100644
--- a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
@@ -47,6 +47,11 @@
 #define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 0
 #define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 0
 #define PORTABLE_STACK_ARG_SKIP 4
+#elif defined(__x86_64__)
+// TODO: implement and check these.
+#define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 16
+#define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 96
+#define PORTABLE_STACK_ARG_SKIP 0
 #else
 #error "Unsupported architecture"
 #define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 0
@@ -387,43 +392,42 @@
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type));
   }
-  const void* code = NULL;
+  const void* code = nullptr;
   if (LIKELY(!thread->IsExceptionPending())) {
     // Ensure that the called method's class is initialized.
     SirtRef<mirror::Class> called_class(thread, called->GetDeclaringClass());
     linker->EnsureInitialized(called_class, true, true);
     if (LIKELY(called_class->IsInitialized())) {
-      code = called->GetEntryPointFromCompiledCode();
+      code = called->GetEntryPointFromPortableCompiledCode();
       // TODO: remove this after we solve the link issue.
-      {  // for lazy link.
-        if (code == NULL) {
-          code = linker->GetOatCodeFor(called);
-        }
+      if (code == nullptr) {
+        bool have_portable_code;
+        code = linker->GetPortableOatCodeFor(called, &have_portable_code);
       }
     } else if (called_class->IsInitializing()) {
       if (invoke_type == kStatic) {
         // Class is still initializing, go to oat and grab code (trampoline must be left in place
         // until class is initialized to stop races between threads).
-        code = linker->GetOatCodeFor(called);
+        bool have_portable_code;
+        code = linker->GetPortableOatCodeFor(called, &have_portable_code);
       } else {
         // No trampoline for non-static methods.
-        code = called->GetEntryPointFromCompiledCode();
+        code = called->GetEntryPointFromPortableCompiledCode();
         // TODO: remove this after we solve the link issue.
-        {  // for lazy link.
-          if (code == NULL) {
-            code = linker->GetOatCodeFor(called);
-          }
+        if (code == nullptr) {
+          bool have_portable_code;
+          code = linker->GetPortableOatCodeFor(called, &have_portable_code);
         }
       }
     } else {
       DCHECK(called_class->IsErroneous());
     }
   }
-  if (LIKELY(code != NULL)) {
+  if (LIKELY(code != nullptr)) {
     // Expect class to at least be initializing.
     DCHECK(called->GetDeclaringClass()->IsInitializing());
     // Don't want infinite recursion.
-    DCHECK(code != GetResolutionTrampoline(linker));
+    DCHECK(code != GetPortableResolutionTrampoline(linker));
     // Set up entry into main method
     *called_addr = called;
   }
diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
index ae53d6c..a6ab69b 100644
--- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
@@ -20,8 +20,7 @@
 namespace art {
 
 // Assignable test for code, won't throw.  Null and equality tests already performed
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class)
+extern "C" uint32_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(klass != NULL);
   DCHECK(ref_class != NULL);
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 003047a..ab428a5 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -26,7 +26,7 @@
 namespace art {
 
 extern "C" mirror::Class* artInitializeStaticStorageFromCode(uint32_t type_idx,
-                                                             const mirror::ArtMethod* referrer,
+                                                             mirror::ArtMethod* referrer,
                                                              Thread* self,
                                                              mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -38,7 +38,7 @@
 }
 
 extern "C" mirror::Class* artInitializeTypeFromCode(uint32_t type_idx,
-                                                    const mirror::ArtMethod* referrer,
+                                                    mirror::ArtMethod* referrer,
                                                     Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
@@ -47,7 +47,7 @@
 }
 
 extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx,
-                                                                   const mirror::ArtMethod* referrer,
+                                                                   mirror::ArtMethod* referrer,
                                                                    Thread* self,
                                                                    mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 0a533bd..93ff7aa 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -26,7 +26,7 @@
 namespace art {
 
 extern "C" uint32_t artGet32StaticFromCode(uint32_t field_idx,
-                                           const mirror::ArtMethod* referrer,
+                                           mirror::ArtMethod* referrer,
                                            Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
@@ -43,7 +43,7 @@
 }
 
 extern "C" uint64_t artGet64StaticFromCode(uint32_t field_idx,
-                                           const mirror::ArtMethod* referrer,
+                                           mirror::ArtMethod* referrer,
                                            Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
@@ -60,17 +60,17 @@
 }
 
 extern "C" mirror::Object* artGetObjStaticFromCode(uint32_t field_idx,
-                                                   const mirror::ArtMethod* referrer,
+                                                   mirror::ArtMethod* referrer,
                                                    Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectRead,
-                                       sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(field->GetDeclaringClass());
   }
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticObjectRead, true>(field_idx, referrer, self,
-                                                    sizeof(mirror::Object*));
+                                                    sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(field->GetDeclaringClass());
   }
@@ -78,7 +78,7 @@
 }
 
 extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             const mirror::ArtMethod* referrer, Thread* self,
+                                             mirror::ArtMethod* referrer, Thread* self,
                                              mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
@@ -101,7 +101,7 @@
 }
 
 extern "C" uint64_t artGet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             const mirror::ArtMethod* referrer, Thread* self,
+                                             mirror::ArtMethod* referrer, Thread* self,
                                              mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
@@ -124,18 +124,18 @@
 }
 
 extern "C" mirror::Object* artGetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                                     const mirror::ArtMethod* referrer,
+                                                     mirror::ArtMethod* referrer,
                                                      Thread* self,
                                                      mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectRead,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL && obj != NULL)) {
     return field->GetObj(obj);
   }
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstanceObjectRead, true>(field_idx, referrer, self,
-                                                      sizeof(mirror::Object*));
+                                                      sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     if (UNLIKELY(obj == NULL)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
@@ -148,7 +148,7 @@
 }
 
 extern "C" int artSet32StaticFromCode(uint32_t field_idx, uint32_t new_value,
-                                      const mirror::ArtMethod* referrer, Thread* self,
+                                      mirror::ArtMethod* referrer, Thread* self,
                                       mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
@@ -166,7 +166,7 @@
   return -1;  // failure
 }
 
-extern "C" int artSet64StaticFromCode(uint32_t field_idx, const mirror::ArtMethod* referrer,
+extern "C" int artSet64StaticFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
                                       uint64_t new_value, Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
@@ -185,11 +185,11 @@
 }
 
 extern "C" int artSetObjStaticFromCode(uint32_t field_idx, mirror::Object* new_value,
-                                       const mirror::ArtMethod* referrer, Thread* self,
+                                       mirror::ArtMethod* referrer, Thread* self,
                                        mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     if (LIKELY(!FieldHelper(field).IsPrimitiveType())) {
       field->SetObj(field->GetDeclaringClass(), new_value);
@@ -198,7 +198,7 @@
   }
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticObjectWrite, true>(field_idx, referrer, self,
-                                                     sizeof(mirror::Object*));
+                                                     sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     field->SetObj(field->GetDeclaringClass(), new_value);
     return 0;  // success
@@ -207,7 +207,7 @@
 }
 
 extern "C" int artSet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint32_t new_value,
-                                        const mirror::ArtMethod* referrer, Thread* self,
+                                        mirror::ArtMethod* referrer, Thread* self,
                                         mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
@@ -261,18 +261,18 @@
 
 extern "C" int artSetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
                                          mirror::Object* new_value,
-                                         const mirror::ArtMethod* referrer, Thread* self,
+                                         mirror::ArtMethod* referrer, Thread* self,
                                          mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL && obj != NULL)) {
     field->SetObj(obj, new_value);
     return 0;  // success
   }
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstanceObjectWrite, true>(field_idx, referrer, self,
-                                                       sizeof(mirror::Object*));
+                                                       sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     if (UNLIKELY(obj == NULL)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
diff --git a/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc b/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
index ca0c92e..8dac750 100644
--- a/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
@@ -56,7 +56,7 @@
     return -1;  // Error
   }
   uint32_t size_in_bytes = payload->element_count * payload->element_width;
-  memcpy(array->GetRawData(payload->element_width), payload->data, size_in_bytes);
+  memcpy(array->GetRawData(payload->element_width, 0), payload->data, size_in_bytes);
   return 0;  // Success
 }
 
diff --git a/runtime/entrypoints/quick/quick_invoke_entrypoints.cc b/runtime/entrypoints/quick/quick_invoke_entrypoints.cc
index 5a1b3e8..c081768 100644
--- a/runtime/entrypoints/quick/quick_invoke_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_invoke_entrypoints.cc
@@ -124,21 +124,23 @@
       return 0;  // Failure.
     }
   }
-  const void* code = method->GetEntryPointFromCompiledCode();
+  const void* code = method->GetEntryPointFromQuickCompiledCode();
 
-#ifndef NDEBUG
   // When we return, the caller will branch to this address, so it had better not be 0!
-  if (UNLIKELY(code == NULL)) {
+  if (kIsDebugBuild && UNLIKELY(code == nullptr)) {
       MethodHelper mh(method);
       LOG(FATAL) << "Code was NULL in method: " << PrettyMethod(method)
                  << " location: " << mh.GetDexFile().GetLocation();
   }
-#endif
-
+#ifdef __LP64__
+  UNIMPLEMENTED(FATAL);
+  return 0;
+#else
   uint32_t method_uint = reinterpret_cast<uint32_t>(method);
   uint64_t code_uint = reinterpret_cast<uint32_t>(code);
   uint64_t result = ((code_uint << 32) | method_uint);
   return result;
+#endif
 }
 
 template<InvokeType type, bool access_check>
@@ -156,21 +158,23 @@
     }
   }
   DCHECK(!self->IsExceptionPending());
-  const void* code = method->GetEntryPointFromCompiledCode();
+  const void* code = method->GetEntryPointFromQuickCompiledCode();
 
-#ifndef NDEBUG
   // When we return, the caller will branch to this address, so it had better not be 0!
-  if (UNLIKELY(code == NULL)) {
+  if (kIsDebugBuild && UNLIKELY(code == NULL)) {
       MethodHelper mh(method);
       LOG(FATAL) << "Code was NULL in method: " << PrettyMethod(method)
                  << " location: " << mh.GetDexFile().GetLocation();
   }
-#endif
-
+#ifdef __LP64__
+  UNIMPLEMENTED(FATAL);
+  return 0;
+#else
   uint32_t method_uint = reinterpret_cast<uint32_t>(method);
   uint64_t code_uint = reinterpret_cast<uint32_t>(code);
   uint64_t result = ((code_uint << 32) | method_uint);
   return result;
+#endif
 }
 
 // Explicit template declarations of artInvokeCommon for all invoke types.
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index b589384..9f30190 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -97,6 +97,12 @@
 #define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__LR_OFFSET 28
 #define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 32
 #define QUICK_STACK_ARG_SKIP 16
+#elif defined(__x86_64__)
+// TODO: implement and check these.
+#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 8
+#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__LR_OFFSET 56
+#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 64
+#define QUICK_STACK_ARG_SKIP 32
 #else
 #error "Unsupported architecture"
 #define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 0
@@ -567,15 +573,15 @@
     SirtRef<mirror::Class> called_class(soa.Self(), called->GetDeclaringClass());
     linker->EnsureInitialized(called_class, true, true);
     if (LIKELY(called_class->IsInitialized())) {
-      code = called->GetEntryPointFromCompiledCode();
+      code = called->GetEntryPointFromQuickCompiledCode();
     } else if (called_class->IsInitializing()) {
       if (invoke_type == kStatic) {
         // Class is still initializing, go to oat and grab code (trampoline must be left in place
         // until class is initialized to stop races between threads).
-        code = linker->GetOatCodeFor(called);
+        code = linker->GetQuickOatCodeFor(called);
       } else {
         // No trampoline for non-static methods.
-        code = called->GetEntryPointFromCompiledCode();
+        code = called->GetEntryPointFromQuickCompiledCode();
       }
     } else {
       DCHECK(called_class->IsErroneous());
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index 978faeb..f7b621f 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -76,7 +76,7 @@
     method_f_ = my_klass_->FindVirtualMethod("f", "()I");
     ASSERT_TRUE(method_f_ != NULL);
     method_f_->SetFrameSizeInBytes(kStackAlignment);
-    method_f_->SetEntryPointFromCompiledCode(CompiledMethod::CodePointer(&fake_code_[sizeof(code_size)], kThumb2));
+    method_f_->SetEntryPointFromQuickCompiledCode(CompiledMethod::CodePointer(&fake_code_[sizeof(code_size)], kThumb2));
     method_f_->SetMappingTable(&fake_mapping_data_.GetData()[0]);
     method_f_->SetVmapTable(&fake_vmap_table_data_.GetData()[0]);
     method_f_->SetNativeGcMap(&fake_gc_map_[0]);
@@ -84,7 +84,7 @@
     method_g_ = my_klass_->FindVirtualMethod("g", "(I)V");
     ASSERT_TRUE(method_g_ != NULL);
     method_g_->SetFrameSizeInBytes(kStackAlignment);
-    method_g_->SetEntryPointFromCompiledCode(CompiledMethod::CodePointer(&fake_code_[sizeof(code_size)], kThumb2));
+    method_g_->SetEntryPointFromQuickCompiledCode(CompiledMethod::CodePointer(&fake_code_[sizeof(code_size)], kThumb2));
     method_g_->SetMappingTable(&fake_mapping_data_.GetData()[0]);
     method_g_->SetVmapTable(&fake_vmap_table_data_.GetData()[0]);
     method_g_->SetNativeGcMap(&fake_gc_map_[0]);
@@ -105,6 +105,7 @@
 };
 
 TEST_F(ExceptionTest, FindCatchHandler) {
+  ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = dex_->GetCodeItem(method_f_->GetCodeItemOffset());
 
   ASSERT_TRUE(code_item != NULL);
@@ -151,51 +152,51 @@
   ASSERT_EQ(kStackAlignment, 16U);
   ASSERT_EQ(sizeof(uintptr_t), sizeof(uint32_t));
 
-#if !defined(ART_USE_PORTABLE_COMPILER)
-  // Create two fake stack frames with mapping data created in SetUp. We map offset 3 in the code
-  // to dex pc 3.
-  const uint32_t dex_pc = 3;
+  if (!kUsePortableCompiler) {
+    // Create two fake stack frames with mapping data created in SetUp. We map offset 3 in the code
+    // to dex pc 3.
+    const uint32_t dex_pc = 3;
 
-  // Create/push fake 16byte stack frame for method g
-  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(method_f_->ToNativePc(dex_pc));  // return pc
+    // Create/push fake 16byte stack frame for method g
+    fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
+    fake_stack.push_back(method_f_->ToNativePc(dex_pc));  // return pc
 
-  // Create/push fake 16byte stack frame for method f
-  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(0xEBAD6070);  // return pc
+    // Create/push fake 16byte stack frame for method f
+    fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
+    fake_stack.push_back(0xEBAD6070);  // return pc
 
-  // Pull Method* of NULL to terminate the trace
-  fake_stack.push_back(0);
+    // Pull Method* of NULL to terminate the trace
+    fake_stack.push_back(0);
 
-  // Push null values which will become null incoming arguments.
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
+    // Push null values which will become null incoming arguments.
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
 
-  // Set up thread to appear as if we called out of method_g_ at pc dex 3
-  thread->SetTopOfStack(&fake_stack[0], method_g_->ToNativePc(dex_pc));  // return pc
-#else
-  // Create/push fake 20-byte shadow frame for method g
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
-  fake_stack.push_back(3);
-  fake_stack.push_back(0);
+    // Set up thread to appear as if we called out of method_g_ at pc dex 3
+    thread->SetTopOfStack(&fake_stack[0], method_g_->ToNativePc(dex_pc));  // return pc
+  } else {
+    // Create/push fake 20-byte shadow frame for method g
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
+    fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
+    fake_stack.push_back(3);
+    fake_stack.push_back(0);
 
-  // Create/push fake 20-byte shadow frame for method f
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
-  fake_stack.push_back(3);
-  fake_stack.push_back(0);
+    // Create/push fake 20-byte shadow frame for method f
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
+    fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
+    fake_stack.push_back(3);
+    fake_stack.push_back(0);
 
-  thread->PushShadowFrame(reinterpret_cast<ShadowFrame*>(&fake_stack[5]));
-  thread->PushShadowFrame(reinterpret_cast<ShadowFrame*>(&fake_stack[0]));
-#endif
+    thread->PushShadowFrame(reinterpret_cast<ShadowFrame*>(&fake_stack[5]));
+    thread->PushShadowFrame(reinterpret_cast<ShadowFrame*>(&fake_stack[0]));
+  }
 
   jobject internal = thread->CreateInternalStackTrace(soa);
   ASSERT_TRUE(internal != NULL);
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index 02e01b8..ea8f89c 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -19,7 +19,7 @@
 
 #include <string>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "UniquePtr.h"
@@ -165,7 +165,7 @@
   void Init() {
     std::string error_msg;
     mem_map_.reset(MemMap::MapAnonymous(name_.c_str(), NULL, capacity_ * sizeof(T),
-                                        PROT_READ | PROT_WRITE, &error_msg));
+                                        PROT_READ | PROT_WRITE, false, &error_msg));
     CHECK(mem_map_.get() != NULL) << "couldn't allocate mark stack.\n" << error_msg;
     byte* addr = mem_map_->Begin();
     CHECK(addr != NULL);
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index e099137..714e6f7 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -57,7 +57,7 @@
   std::string error_msg;
   UniquePtr<MemMap> mem_map(MemMap::MapAnonymous("card table", NULL,
                                                  capacity + 256, PROT_READ | PROT_WRITE,
-                                                 &error_msg));
+                                                 false, &error_msg));
   CHECK(mem_map.get() != NULL) << "couldn't allocate card table: " << error_msg;
   // All zeros is the correct initial value; all clean. Anonymous mmaps are initialized to zero, we
   // don't clear the card table to avoid unnecessary pages being allocated
@@ -72,11 +72,11 @@
   byte* biased_begin = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(cardtable_begin) -
       (reinterpret_cast<uintptr_t>(heap_begin) >> kCardShift));
   if (((uintptr_t)biased_begin & 0xff) != kCardDirty) {
-    int delta = kCardDirty - (reinterpret_cast<int>(biased_begin) & 0xff);
+    int delta = kCardDirty - (reinterpret_cast<uintptr_t>(biased_begin) & 0xff);
     offset = delta + (delta < 0 ? 0x100 : 0);
     biased_begin += offset;
   }
-  CHECK_EQ(reinterpret_cast<int>(biased_begin) & 0xff, kCardDirty);
+  CHECK_EQ(reinterpret_cast<uintptr_t>(biased_begin) & 0xff, kCardDirty);
 
   return new CardTable(mem_map.release(), biased_begin, offset);
 }
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 6d9dde7..0225f29 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -82,9 +82,9 @@
     if (ref != nullptr) {
       Object* new_ref = visitor_(ref, arg_);
       if (new_ref != ref) {
-        // Use SetFieldPtr to avoid card mark as an optimization which reduces dirtied pages and
-        // improves performance.
-        obj->SetFieldPtr(offset, new_ref, true);
+        // Use SetFieldObjectWithoutWriteBarrier to avoid card mark as an optimization which
+        // reduces dirtied pages and improves performance.
+        obj->SetFieldObjectWithoutWriteBarrier(offset, new_ref, true);
       }
     }
   }
@@ -122,9 +122,8 @@
 class AddToReferenceArrayVisitor {
  public:
   explicit AddToReferenceArrayVisitor(ModUnionTableReferenceCache* mod_union_table,
-                                      std::vector<Object**>* references)
-    : mod_union_table_(mod_union_table),
-      references_(references) {
+                                      std::vector<mirror::HeapReference<Object>*>* references)
+    : mod_union_table_(mod_union_table), references_(references) {
   }
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
@@ -133,19 +132,19 @@
     // Only add the reference if it is non null and fits our criteria.
     if (ref != nullptr && mod_union_table_->AddReference(obj, ref)) {
       // Push the adddress of the reference.
-      references_->push_back(obj->GetFieldObjectAddr(offset));
+      references_->push_back(obj->GetFieldObjectReferenceAddr(offset));
     }
   }
 
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
-  std::vector<Object**>* const references_;
+  std::vector<mirror::HeapReference<Object>*>* const references_;
 };
 
 class ModUnionReferenceVisitor {
  public:
   explicit ModUnionReferenceVisitor(ModUnionTableReferenceCache* const mod_union_table,
-                                    std::vector<Object**>* references)
+                                    std::vector<mirror::HeapReference<Object>*>* references)
     : mod_union_table_(mod_union_table),
       references_(references) {
   }
@@ -160,7 +159,7 @@
   }
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
-  std::vector<Object**>* const references_;
+  std::vector<mirror::HeapReference<Object>*>* const references_;
 };
 
 class CheckReferenceVisitor {
@@ -173,7 +172,7 @@
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
   // TODO: Fixme when anotatalysis works with visitors.
-  void operator()(const Object* obj, const Object* ref,
+  void operator()(Object* obj, Object* ref,
                   const MemberOffset& /* offset */, bool /* is_static */) const
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
     Heap* heap = mod_union_table_->GetHeap();
@@ -219,8 +218,8 @@
 void ModUnionTableReferenceCache::Verify() {
   // Start by checking that everything in the mod union table is marked.
   for (const auto& ref_pair : references_) {
-    for (Object** ref : ref_pair.second) {
-      CHECK(heap_->IsLiveObjectLocked(*ref));
+    for (mirror::HeapReference<Object>* ref : ref_pair.second) {
+      CHECK(heap_->IsLiveObjectLocked(ref->AsMirrorPtr()));
     }
   }
 
@@ -231,8 +230,8 @@
     const byte* card = ref_pair.first;
     if (*card == CardTable::kCardClean) {
       std::set<const Object*> reference_set;
-      for (Object** obj_ptr : ref_pair.second) {
-        reference_set.insert(*obj_ptr);
+      for (mirror::HeapReference<Object>* obj_ptr : ref_pair.second) {
+        reference_set.insert(obj_ptr->AsMirrorPtr());
       }
       ModUnionCheckReferences visitor(this, reference_set);
       uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
@@ -255,8 +254,8 @@
     uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
     uintptr_t end = start + CardTable::kCardSize;
     os << reinterpret_cast<void*>(start) << "-" << reinterpret_cast<void*>(end) << "->{";
-    for (Object** ref : ref_pair.second) {
-      os << reinterpret_cast<const void*>(*ref) << ",";
+    for (mirror::HeapReference<Object>* ref : ref_pair.second) {
+      os << reinterpret_cast<const void*>(ref->AsMirrorPtr()) << ",";
     }
     os << "},";
   }
@@ -266,7 +265,7 @@
   Heap* heap = GetHeap();
   CardTable* card_table = heap->GetCardTable();
 
-  std::vector<Object**> cards_references;
+  std::vector<mirror::HeapReference<Object>*> cards_references;
   ModUnionReferenceVisitor add_visitor(this, &cards_references);
 
   for (const auto& card : cleared_cards_) {
@@ -294,13 +293,13 @@
   cleared_cards_.clear();
   size_t count = 0;
   for (const auto& ref : references_) {
-    for (const auto& obj_ptr : ref.second) {
-      Object* obj = *obj_ptr;
+    for (mirror::HeapReference<Object>* obj_ptr : ref.second) {
+      Object* obj = obj_ptr->AsMirrorPtr();
       if (obj != nullptr) {
         Object* new_obj = visitor(obj, arg);
         // Avoid dirtying pages in the image unless necessary.
         if (new_obj != obj) {
-          *obj_ptr = new_obj;
+          obj_ptr->Assign(new_obj);
         }
       }
     }
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index 5a99f1b..a89dbd1 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -112,20 +112,23 @@
 
   // Exclusive lock is required since verify uses SpaceBitmap::VisitMarkedRange and
   // VisitMarkedRange can't know if the callback will modify the bitmap or not.
-  void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void Verify()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Function that tells whether or not to add a reference to the table.
   virtual bool AddReference(const mirror::Object* obj, const mirror::Object* ref) = 0;
 
-  void Dump(std::ostream& os);
+  void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
   // Cleared card array, used to update the mod-union table.
   ModUnionTable::CardSet cleared_cards_;
 
   // Maps from dirty cards to their corresponding alloc space references.
-  SafeMap<const byte*, std::vector<mirror::Object**>, std::less<const byte*>,
-    GcAllocator<std::pair<const byte*, std::vector<mirror::Object**> > > > references_;
+  SafeMap<const byte*, std::vector<mirror::HeapReference<mirror::Object>*>, std::less<const byte*>,
+      GcAllocator<std::pair<const byte*, std::vector<mirror::HeapReference<mirror::Object>*> > > >
+      references_;
 };
 
 // Card caching implementation. Keeps track of which cards we cleared and only this information.
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 01c70fa..d6d1b3e 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -37,9 +37,11 @@
     old_word = *address;
     // Fast path: The bit is already set.
     if ((old_word & mask) != 0) {
+      DCHECK(Test(obj));
       return true;
     }
   } while (!__sync_bool_compare_and_swap(address, old_word, old_word | mask));
+  DCHECK(Test(obj));
   return false;
 }
 
@@ -56,6 +58,15 @@
 void SpaceBitmap::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
                                    const Visitor& visitor) const {
   DCHECK_LT(visit_begin, visit_end);
+#ifdef __LP64__
+  // TODO: make the optimized code below work in the 64bit case.
+  for (uintptr_t i = visit_begin; i < visit_end; i += kAlignment) {
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(i);
+    if (Test(obj)) {
+      visitor(obj);
+    }
+  }
+#else
   const size_t bit_index_start = (visit_begin - heap_begin_) / kAlignment;
   const size_t bit_index_end = (visit_end - heap_begin_ - 1) / kAlignment;
 
@@ -114,6 +125,7 @@
     visitor(obj);
     edge_word ^= static_cast<size_t>(kWordHighBitMask) >> shift;
   }
+#endif
 }
 
 inline bool SpaceBitmap::Modify(const mirror::Object* obj, bool do_set) {
@@ -130,6 +142,7 @@
   } else {
     *address = old_word & ~mask;
   }
+  DCHECK_EQ(Test(obj), do_set);
   return (old_word & mask) != 0;
 }
 
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 99800fc..a080bee 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -64,7 +64,7 @@
   size_t bitmap_size = OffsetToIndex(RoundUp(heap_capacity, kAlignment * kBitsPerWord)) * kWordSize;
   std::string error_msg;
   UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), NULL, bitmap_size,
-                                                 PROT_READ | PROT_WRITE, &error_msg));
+                                                 PROT_READ | PROT_WRITE, false, &error_msg));
   if (UNLIKELY(mem_map.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate bitmap " << name << ": " << error_msg;
     return NULL;
@@ -128,9 +128,9 @@
 //
 // The callback is not permitted to increase the max of either bitmap.
 void SpaceBitmap::SweepWalk(const SpaceBitmap& live_bitmap,
-                           const SpaceBitmap& mark_bitmap,
-                           uintptr_t sweep_begin, uintptr_t sweep_end,
-                           SpaceBitmap::SweepCallback* callback, void* arg) {
+                            const SpaceBitmap& mark_bitmap,
+                            uintptr_t sweep_begin, uintptr_t sweep_end,
+                            SpaceBitmap::SweepCallback* callback, void* arg) {
   CHECK(live_bitmap.bitmap_begin_ != NULL);
   CHECK(mark_bitmap.bitmap_begin_ != NULL);
   CHECK_EQ(live_bitmap.heap_begin_, mark_bitmap.heap_begin_);
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 2d6cde5..aa074eb 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -72,8 +72,8 @@
   }
 
   // Pack the bits in backwards so they come out in address order when using CLZ.
-  static word OffsetToMask(uintptr_t offset_) {
-    return static_cast<uintptr_t>(kWordHighBitMask) >> ((offset_ / kAlignment) % kBitsPerWord);
+  static word OffsetToMask(uintptr_t offset) {
+    return static_cast<uintptr_t>(kWordHighBitMask) >> ((offset / kAlignment) % kBitsPerWord);
   }
 
   inline bool Set(const mirror::Object* obj) {
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 8ae61a3..6c9e6f2 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -904,7 +904,7 @@
     uint32_t from_vec = *from_vecp;
     if (from_vec != 0) {
       *to_vecp |= from_vec;
-      *from_vecp = 0;  // clear the from free bit map.
+      *from_vecp = 0;  // clear the bulk free bit map.
     }
     DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
   }
@@ -1468,6 +1468,8 @@
 
 void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
   Thread* self = Thread::Current();
+  // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
+  WriterMutexLock wmu(self, bulk_free_lock_);
   for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) {
     MutexLock mu(self, *size_bracket_locks_[idx]);
     Run* thread_local_run = reinterpret_cast<Run*>(thread->rosalloc_runs_[idx]);
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 4eb13315..7480975 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -456,7 +456,9 @@
   // and the footprint.
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // The reader-writer lock to allow one bulk free at a time while
-  // allowing multiple individual frees at the same time.
+  // allowing multiple individual frees at the same time. Also, this
+  // is used to avoid race conditions between BulkFree() and
+  // RevokeThreadLocalRuns() on the bulk free bitmaps.
   ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   // The page release mode.
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 25e8966..ae04074 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -151,10 +151,11 @@
          space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect)) {
       accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
       accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-      if (live_bitmap != mark_bitmap) {
+      if (live_bitmap != nullptr && live_bitmap != mark_bitmap) {
         heap_->GetLiveBitmap()->ReplaceBitmap(live_bitmap, mark_bitmap);
         heap_->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-        space->AsMallocSpace()->SwapBitmaps();
+        CHECK(space->IsContinuousMemMapAllocSpace());
+        space->AsContinuousMemMapAllocSpace()->SwapBitmaps();
       }
     }
   }
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index 9c1c5dc..d148ae5 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -118,7 +118,7 @@
     while (ref_offsets != 0) {
       size_t right_shift = CLZ(ref_offsets);
       MemberOffset field_offset = CLASS_OFFSET_FROM_CLZ(right_shift);
-      mirror::Object* ref = obj->GetFieldObject<mirror::Object*>(field_offset, false);
+      mirror::Object* ref = obj->GetFieldObject<mirror::Object>(field_offset, false);
       visitor(obj, ref, field_offset, is_static);
       ref_offsets &= ~(CLASS_HIGH_BIT >> right_shift);
     }
@@ -127,17 +127,17 @@
     // walk up the class inheritance hierarchy and find reference
     // offsets the hard way. In the static case, just consider this
     // class.
-    for (const mirror::Class* klass = is_static ? obj->AsClass() : obj->GetClass();
-         klass != NULL;
-         klass = is_static ? NULL : klass->GetSuperClass()) {
+    for (mirror::Class* klass = is_static ? obj->AsClass() : obj->GetClass();
+         klass != nullptr;
+         klass = is_static ? nullptr : klass->GetSuperClass()) {
       size_t num_reference_fields = (is_static
                                      ? klass->NumReferenceStaticFields()
                                      : klass->NumReferenceInstanceFields());
       for (size_t i = 0; i < num_reference_fields; ++i) {
         mirror::ArtField* field = (is_static ? klass->GetStaticField(i)
-                                   : klass->GetInstanceField(i));
+                                             : klass->GetInstanceField(i));
         MemberOffset field_offset = field->GetOffset();
-        mirror::Object* ref = obj->GetFieldObject<mirror::Object*>(field_offset, false);
+        mirror::Object* ref = obj->GetFieldObject<mirror::Object>(field_offset, false);
         visitor(obj, ref, field_offset, is_static);
       }
     }
@@ -150,7 +150,7 @@
   const size_t length = static_cast<size_t>(array->GetLength());
   for (size_t i = 0; i < length; ++i) {
     mirror::Object* element = array->GetWithoutChecks(static_cast<int32_t>(i));
-    const size_t width = sizeof(mirror::Object*);
+    const size_t width = sizeof(mirror::HeapReference<mirror::Object>);
     MemberOffset offset(i * width + mirror::Array::DataOffset(width).Int32Value());
     visitor(array, element, offset, false);
   }
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 5d450a7..862d06f 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -89,7 +89,8 @@
 void MarkSweep::ImmuneSpace(space::ContinuousSpace* space) {
   // Bind live to mark bitmap if necessary.
   if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
-    BindLiveToMarkBitmap(space);
+    CHECK(space->IsContinuousMemMapAllocSpace());
+    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
   }
 
   // Add the space to the immune region.
@@ -143,11 +144,6 @@
       mark_stack_(NULL),
       immune_begin_(NULL),
       immune_end_(NULL),
-      soft_reference_list_(NULL),
-      weak_reference_list_(NULL),
-      finalizer_reference_list_(NULL),
-      phantom_reference_list_(NULL),
-      cleared_reference_list_(NULL),
       live_stack_freeze_size_(0),
       gc_barrier_(new Barrier(0)),
       large_object_lock_("mark sweep large object lock", kMarkSweepLargeObjectLock),
@@ -161,11 +157,6 @@
   mark_stack_ = heap_->mark_stack_.get();
   DCHECK(mark_stack_ != nullptr);
   SetImmuneRange(nullptr, nullptr);
-  soft_reference_list_ = nullptr;
-  weak_reference_list_ = nullptr;
-  finalizer_reference_list_ = nullptr;
-  phantom_reference_list_ = nullptr;
-  cleared_reference_list_ = nullptr;
   class_count_ = 0;
   array_count_ = 0;
   other_count_ = 0;
@@ -347,7 +338,8 @@
     timings_.EndSplit();
 
     // Unbind the live and mark bitmaps.
-    UnBindBitmaps();
+    TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+    GetHeap()->UnBindBitmaps();
   }
 }
 
@@ -589,14 +581,6 @@
   timings_.EndSplit();
 }
 
-void MarkSweep::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsMallocSpace());
-  space::MallocSpace* alloc_space = space->AsMallocSpace();
-  accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
-  GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-}
-
 class ScanObjectVisitor {
  public:
   explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
@@ -893,14 +877,8 @@
 // recursively marks until the mark stack is emptied.
 void MarkSweep::RecursiveMark() {
   TimingLogger::ScopedSplit split("RecursiveMark", &timings_);
-  // RecursiveMark will build the lists of known instances of the Reference classes.
-  // See DelayReferenceReferent for details.
-  CHECK(soft_reference_list_ == NULL);
-  CHECK(weak_reference_list_ == NULL);
-  CHECK(finalizer_reference_list_ == NULL);
-  CHECK(phantom_reference_list_ == NULL);
-  CHECK(cleared_reference_list_ == NULL);
-
+  // RecursiveMark will build the lists of known instances of the Reference classes. See
+  // DelayReferenceReferent for details.
   if (kUseRecursiveMark) {
     const bool partial = GetGcType() == kGcTypePartial;
     ScanObjectVisitor scan_visitor(this);
@@ -1146,13 +1124,13 @@
   DCHECK(mark_stack_->IsEmpty());
   TimingLogger::ScopedSplit("Sweep", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* malloc_space = space->AsMallocSpace();
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
       TimingLogger::ScopedSplit split(
-          malloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", &timings_);
       size_t freed_objects = 0;
       size_t freed_bytes = 0;
-      malloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
+      alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
       heap_->RecordFree(freed_objects, freed_bytes);
       freed_objects_.FetchAndAdd(freed_objects);
       freed_bytes_.FetchAndAdd(freed_bytes);
@@ -1278,23 +1256,6 @@
   return heap_->GetMarkBitmap()->Test(object);
 }
 
-void MarkSweep::UnBindBitmaps() {
-  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
-  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* alloc_space = space->AsMallocSpace();
-      if (alloc_space->temp_bitmap_.get() != NULL) {
-        // At this point, the temp_bitmap holds our old mark bitmap.
-        accounting::SpaceBitmap* new_bitmap = alloc_space->temp_bitmap_.release();
-        GetHeap()->GetMarkBitmap()->ReplaceBitmap(alloc_space->mark_bitmap_.get(), new_bitmap);
-        CHECK_EQ(alloc_space->mark_bitmap_.release(), alloc_space->live_bitmap_.get());
-        alloc_space->mark_bitmap_.reset(new_bitmap);
-        DCHECK(alloc_space->temp_bitmap_.get() == NULL);
-      }
-    }
-  }
-}
-
 void MarkSweep::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
   // Can't enqueue references if we hold the mutator lock.
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index e2eafb5..bfedac7 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_MARK_SWEEP_H_
 #define ART_RUNTIME_GC_COLLECTOR_MARK_SWEEP_H_
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "barrier.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -118,12 +118,6 @@
   // the image. Mark that portion of the heap as immune.
   virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void BindLiveToMarkBitmap(space::ContinuousSpace* space)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  void UnBindBitmaps()
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   // Builds a mark stack with objects on dirty cards and recursively mark until it empties.
   void RecursiveMarkDirtyObjects(bool paused, byte minimum_age)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
@@ -151,10 +145,6 @@
   void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  mirror::Object* GetClearedReferences() {
-    return cleared_reference_list_;
-  }
-
   // Blackens an object.
   void ScanObject(mirror::Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
@@ -346,12 +336,6 @@
   mirror::Object* immune_begin_;
   mirror::Object* immune_end_;
 
-  mirror::Object* soft_reference_list_;
-  mirror::Object* weak_reference_list_;
-  mirror::Object* finalizer_reference_list_;
-  mirror::Object* phantom_reference_list_;
-  mirror::Object* cleared_reference_list_;
-
   // Parallel finger.
   AtomicInteger atomic_finger_;
   // Number of classes scanned, if kCountScannedTypes.
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 99c726d..03307f5 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -67,7 +67,8 @@
 void SemiSpace::ImmuneSpace(space::ContinuousSpace* space) {
   // Bind live to mark bitmap if necessary.
   if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
-    BindLiveToMarkBitmap(space);
+    CHECK(space->IsContinuousMemMapAllocSpace());
+    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
   }
   // Add the space to the immune region.
   if (immune_begin_ == nullptr) {
@@ -98,12 +99,13 @@
 
 void SemiSpace::BindBitmaps() {
   timings_.StartSplit("BindBitmaps");
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetLiveBitmap() != nullptr) {
       if (space == to_space_) {
-        BindLiveToMarkBitmap(to_space_);
+        CHECK(to_space_->IsContinuousMemMapAllocSpace());
+        to_space_->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
       } else if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
                  || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect
                  // Add the main free list space and the non-moving
@@ -119,7 +121,6 @@
   if (generational_ && !whole_heap_collection_) {
     // We won't collect the large object space if a bump pointer space only collection.
     is_large_object_space_immune_ = true;
-    GetHeap()->GetLargeObjectsSpace()->CopyLiveToMarked();
   }
   timings_.EndSplit();
 }
@@ -180,8 +181,7 @@
       VLOG(heap) << "Bump pointer space only collection";
     }
   }
-  Thread* self = Thread::Current();
-  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  Locks::mutator_lock_->AssertExclusiveHeld(self_);
   TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
   // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
   // wrong space.
@@ -209,7 +209,7 @@
   // the live stack during the recursive mark.
   timings_.NewSplit("SwapStacks");
   heap_->SwapStacks();
-  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
   MarkRoots();
   // Mark roots of immune spaces.
   UpdateAndMarkModUnion();
@@ -287,6 +287,11 @@
 
   if (is_large_object_space_immune_) {
     DCHECK(generational_ && !whole_heap_collection_);
+    // Delay copying the live set to the marked set until here from
+    // BindBitmaps() as the large objects on the allocation stack may
+    // be newly added to the live set above in MarkAllocStackAsLive().
+    GetHeap()->GetLargeObjectsSpace()->CopyLiveToMarked();
+
     // When the large object space is immune, we need to scan the
     // large object space as roots as they contain references to their
     // classes (primitive array classes) that could move though they
@@ -305,10 +310,9 @@
 
 void SemiSpace::ReclaimPhase() {
   TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
-  Thread* self = Thread::Current();
-  ProcessReferences(self);
+  ProcessReferences(self_);
   {
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     SweepSystemWeaks();
   }
   // Record freed memory.
@@ -329,7 +333,7 @@
   timings_.EndSplit();
 
   {
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     // Reclaim unmarked objects.
     Sweep(false);
     // Swap the live and mark bitmaps for each space which we modified space. This is an
@@ -339,7 +343,8 @@
     SwapBitmaps();
     timings_.EndSplit();
     // Unbind the live and mark bitmaps.
-    UnBindBitmaps();
+    TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+    GetHeap()->UnBindBitmaps();
   }
   // Release the memory used by the from space.
   if (kResetFromSpace) {
@@ -497,7 +502,7 @@
           MarkStackPush(obj);
         }
       } else {
-        DCHECK(!to_space_->HasAddress(obj)) << "Marking object in to_space_";
+        CHECK(!to_space_->HasAddress(obj)) << "Marking object in to_space_";
         if (MarkLargeObject(obj)) {
           MarkStackPush(obj);
         }
@@ -530,14 +535,6 @@
   timings_.EndSplit();
 }
 
-void SemiSpace::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsMallocSpace());
-  space::MallocSpace* alloc_space = space->AsMallocSpace();
-  accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
-  GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-}
-
 mirror::Object* SemiSpace::MarkedForwardingAddressCallback(Object* object, void* arg) {
   return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
 }
@@ -548,7 +545,7 @@
   timings_.EndSplit();
 }
 
-bool SemiSpace::ShouldSweepSpace(space::MallocSpace* space) const {
+bool SemiSpace::ShouldSweepSpace(space::ContinuousSpace* space) const {
   return space != from_space_ && space != to_space_ && !IsImmuneSpace(space);
 }
 
@@ -556,16 +553,16 @@
   DCHECK(mark_stack_->IsEmpty());
   TimingLogger::ScopedSplit("Sweep", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* malloc_space = space->AsMallocSpace();
-      if (!ShouldSweepSpace(malloc_space)) {
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
+      if (!ShouldSweepSpace(alloc_space)) {
         continue;
       }
       TimingLogger::ScopedSplit split(
-          malloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
       size_t freed_objects = 0;
       size_t freed_bytes = 0;
-      malloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
+      alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
       heap_->RecordFree(freed_objects, freed_bytes);
       freed_objects_.FetchAndAdd(freed_objects);
       freed_bytes_.FetchAndAdd(freed_bytes);
@@ -603,9 +600,9 @@
     if (new_address != ref) {
       DCHECK(new_address != nullptr);
       // Don't need to mark the card since we updating the object address and not changing the
-      // actual objects its pointing to. Using SetFieldPtr is better in this case since it does not
-      // dirty cards and use additional memory.
-      obj->SetFieldPtr(offset, new_address, false);
+      // actual objects its pointing to. Using SetFieldObjectWithoutWriteBarrier is better in this
+      // case since it does not dirty cards and use additional memory.
+      obj->SetFieldObjectWithoutWriteBarrier(offset, new_address, false);
     }
   }, kMovingClasses);
   mirror::Class* klass = obj->GetClass();
@@ -660,20 +657,6 @@
   return heap_->GetMarkBitmap()->Test(obj) ? obj : nullptr;
 }
 
-void SemiSpace::UnBindBitmaps() {
-  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
-  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* alloc_space = space->AsMallocSpace();
-      if (alloc_space->HasBoundBitmaps()) {
-        alloc_space->UnBindBitmaps();
-        heap_->GetMarkBitmap()->ReplaceBitmap(alloc_space->GetLiveBitmap(),
-                                              alloc_space->GetMarkBitmap());
-      }
-    }
-  }
-}
-
 void SemiSpace::SetToSpace(space::ContinuousMemMapAllocSpace* to_space) {
   DCHECK(to_space != nullptr);
   to_space_ = to_space;
@@ -686,7 +669,6 @@
 
 void SemiSpace::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
-  // Can't enqueue references if we hold the mutator lock.
   Heap* heap = GetHeap();
   timings_.NewSplit("PostGcVerification");
   heap->PostGcVerification(this);
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index bf129a3..685b33c 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
 #define ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "barrier.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -114,9 +114,6 @@
   // the image. Mark that portion of the heap as immune.
   virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void BindLiveToMarkBitmap(space::ContinuousSpace* space)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   void UnBindBitmaps()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -171,7 +168,7 @@
   void ResizeMarkStack(size_t new_size);
 
   // Returns true if we should sweep the space.
-  virtual bool ShouldSweepSpace(space::MallocSpace* space) const;
+  virtual bool ShouldSweepSpace(space::ContinuousSpace* space) const;
 
   // Returns how many threads we should use for the current GC phase based on if we are paused,
   // whether or not we care about pauses.
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index c562e8c..9e3adb4 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -38,12 +38,12 @@
   // know what was allocated since the last GC. A side-effect of binding the allocation space mark
   // and live bitmap is that marking the objects will place them in the live bitmap.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace() &&
+    if (space->IsContinuousMemMapAllocSpace() &&
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
-      BindLiveToMarkBitmap(space);
+      DCHECK(space->IsContinuousMemMapAllocSpace());
+      space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
     }
   }
-
   GetHeap()->GetLargeObjectsSpace()->CopyLiveToMarked();
 }
 
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 4bc9ad2..98c27fb 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -34,6 +34,8 @@
   kCollectorTypeSS,
   // A generational variant of kCollectorTypeSS.
   kCollectorTypeGSS,
+  // Heap trimming collector, doesn't do any actual collecting.
+  kCollectorTypeHeapTrim,
 };
 std::ostream& operator<<(std::ostream& os, const CollectorType& collector_type);
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 06793bf..b1bbfc6 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -44,6 +44,7 @@
 #include "gc/space/large_object_space.h"
 #include "gc/space/rosalloc_space-inl.h"
 #include "gc/space/space-inl.h"
+#include "gc/space/zygote_space.h"
 #include "heap-inl.h"
 #include "image.h"
 #include "invoke_arg_array_builder.h"
@@ -101,7 +102,7 @@
       finalizer_reference_queue_(this),
       phantom_reference_queue_(this),
       cleared_references_(this),
-      is_gc_running_(false),
+      collector_type_running_(kCollectorTypeNone),
       last_gc_type_(collector::kGcTypeNone),
       next_gc_type_(collector::kGcTypePartial),
       capacity_(capacity),
@@ -230,7 +231,7 @@
     std::string error_str;
     post_zygote_non_moving_space_mem_map_.reset(
         MemMap::MapAnonymous("post zygote non-moving space", nullptr, 64 * MB,
-                             PROT_READ | PROT_WRITE, &error_str));
+                             PROT_READ | PROT_WRITE, true, &error_str));
     CHECK(post_zygote_non_moving_space_mem_map_.get() != nullptr) << error_str;
     heap_begin = std::min(post_zygote_non_moving_space_mem_map_->Begin(), heap_begin);
     heap_end = std::max(post_zygote_non_moving_space_mem_map_->End(), heap_end);
@@ -334,7 +335,9 @@
   ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
   MutexLock mu(self, *gc_complete_lock_);
   ++disable_moving_gc_count_;
-  // TODO: Wait for compacting GC to complete if we ever have a concurrent compacting GC.
+  if (IsCompactingGC(collector_type_running_)) {
+    WaitForGcToCompleteLocked(self);
+  }
 }
 
 void Heap::DecrementDisableMovingGC(Thread* self) {
@@ -485,7 +488,6 @@
     DCHECK(it != alloc_spaces_.end());
     alloc_spaces_.erase(it);
   }
-  delete space;
 }
 
 void Heap::RegisterGCAllocation(size_t bytes) {
@@ -605,7 +607,7 @@
 };
 
 mirror::Object* Heap::PreserveSoftReferenceCallback(mirror::Object* obj, void* arg) {
-  SoftReferenceArgs* args  = reinterpret_cast<SoftReferenceArgs*>(arg);
+  SoftReferenceArgs* args = reinterpret_cast<SoftReferenceArgs*>(arg);
   // TODO: Not preserve all soft references.
   return args->recursive_mark_callback_(obj, args->arg_);
 }
@@ -651,15 +653,15 @@
 bool Heap::IsEnqueued(mirror::Object* ref) const {
   // Since the references are stored as cyclic lists it means that once enqueued, the pending next
   // will always be non-null.
-  return ref->GetFieldObject<mirror::Object*>(GetReferencePendingNextOffset(), false) != nullptr;
+  return ref->GetFieldObject<mirror::Object>(GetReferencePendingNextOffset(), false) != nullptr;
 }
 
-bool Heap::IsEnqueuable(const mirror::Object* ref) const {
+bool Heap::IsEnqueuable(mirror::Object* ref) const {
   DCHECK(ref != nullptr);
   const mirror::Object* queue =
-      ref->GetFieldObject<mirror::Object*>(GetReferenceQueueOffset(), false);
+      ref->GetFieldObject<mirror::Object>(GetReferenceQueueOffset(), false);
   const mirror::Object* queue_next =
-      ref->GetFieldObject<mirror::Object*>(GetReferenceQueueNextOffset(), false);
+      ref->GetFieldObject<mirror::Object>(GetReferenceQueueNextOffset(), false);
   return queue != nullptr && queue_next == nullptr;
 }
 
@@ -718,7 +720,7 @@
 
 void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation) {
   std::ostringstream oss;
-  int64_t total_bytes_free = GetFreeMemory();
+  size_t total_bytes_free = GetFreeMemory();
   oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
       << " free bytes";
   // If the allocation failed due to fragmentation, print out the largest continuous allocation.
@@ -743,13 +745,25 @@
 }
 
 void Heap::Trim() {
+  Thread* self = Thread::Current();
+  {
+    // Need to do this before acquiring the locks since we don't want to get suspended while
+    // holding any locks.
+    ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
+    // Pretend we are doing a GC to prevent background compaction from deleting the space we are
+    // trimming.
+    MutexLock mu(self, *gc_complete_lock_);
+    // Ensure there is only one GC at a time.
+    WaitForGcToCompleteLocked(self);
+    collector_type_running_ = kCollectorTypeHeapTrim;
+  }
   uint64_t start_ns = NanoTime();
   // Trim the managed spaces.
   uint64_t total_alloc_space_allocated = 0;
   uint64_t total_alloc_space_size = 0;
   uint64_t managed_reclaimed = 0;
   for (const auto& space : continuous_spaces_) {
-    if (space->IsMallocSpace() && !space->IsZygoteSpace()) {
+    if (space->IsMallocSpace()) {
       gc::space::MallocSpace* alloc_space = space->AsMallocSpace();
       total_alloc_space_size += alloc_space->Size();
       managed_reclaimed += alloc_space->Trim();
@@ -760,6 +774,8 @@
   const float managed_utilization = static_cast<float>(total_alloc_space_allocated) /
       static_cast<float>(total_alloc_space_size);
   uint64_t gc_heap_end_ns = NanoTime();
+  // We never move things in the native heap, so we can finish the GC at this point.
+  FinishGC(self, collector::kGcTypeNone);
   // Trim the native heap.
   dlmalloc_trim(0);
   size_t native_reclaimed = 0;
@@ -789,7 +805,7 @@
   return FindSpaceFromObject(obj, true) != nullptr;
 }
 
-bool Heap::IsLiveObjectLocked(const mirror::Object* obj, bool search_allocation_stack,
+bool Heap::IsLiveObjectLocked(mirror::Object* obj, bool search_allocation_stack,
                               bool search_live_stack, bool sorted) {
   if (UNLIKELY(!IsAligned<kObjectAlignment>(obj))) {
     return false;
@@ -858,7 +874,7 @@
   return false;
 }
 
-void Heap::VerifyObjectImpl(const mirror::Object* obj) {
+void Heap::VerifyObjectImpl(mirror::Object* obj) {
   if (Thread::Current() == NULL ||
       Runtime::Current()->GetThreadList()->GetLockOwner() == Thread::Current()->GetTid()) {
     return;
@@ -871,9 +887,9 @@
   // to run
   const byte* raw_addr =
       reinterpret_cast<const byte*>(c) + mirror::Object::ClassOffset().Int32Value();
-  const mirror::Class* c_c = *reinterpret_cast<mirror::Class* const *>(raw_addr);
+  mirror::Class* c_c = reinterpret_cast<mirror::HeapReference<mirror::Class> const *>(raw_addr)->AsMirrorPtr();
   raw_addr = reinterpret_cast<const byte*>(c_c) + mirror::Object::ClassOffset().Int32Value();
-  const mirror::Class* c_c_c = *reinterpret_cast<mirror::Class* const *>(raw_addr);
+  mirror::Class* c_c_c = reinterpret_cast<mirror::HeapReference<mirror::Class> const *>(raw_addr)->AsMirrorPtr();
   return c_c == c_c_c;
 }
 
@@ -894,7 +910,7 @@
   }
 }
 
-void Heap::VerifyObjectBody(const mirror::Object* obj) {
+void Heap::VerifyObjectBody(mirror::Object* obj) {
   CHECK(IsAligned<kObjectAlignment>(obj)) << "Object isn't aligned: " << obj;
   // Ignore early dawn of the universe verifications.
   if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.Load()) < 10 * KB)) {
@@ -902,7 +918,7 @@
   }
   const byte* raw_addr = reinterpret_cast<const byte*>(obj) +
       mirror::Object::ClassOffset().Int32Value();
-  const mirror::Class* c = *reinterpret_cast<mirror::Class* const *>(raw_addr);
+  mirror::Class* c = reinterpret_cast<mirror::HeapReference<mirror::Class> const *>(raw_addr)->AsMirrorPtr();
   if (UNLIKELY(c == NULL)) {
     LOG(FATAL) << "Null class in object: " << obj;
   } else if (UNLIKELY(!IsAligned<kObjectAlignment>(c))) {
@@ -933,7 +949,7 @@
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-void Heap::RecordFree(int64_t freed_objects, int64_t freed_bytes) {
+void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
   DCHECK_LE(freed_bytes, num_bytes_allocated_.Load());
   num_bytes_allocated_.FetchAndSub(freed_bytes);
   if (Runtime::Current()->HasStatsEnabled()) {
@@ -1043,9 +1059,9 @@
       : classes_(classes), use_is_assignable_from_(use_is_assignable_from), counts_(counts) {
   }
 
-  void operator()(const mirror::Object* o) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void operator()(mirror::Object* o) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     for (size_t i = 0; i < classes_.size(); ++i) {
-      const mirror::Class* instance_class = o->GetClass();
+      mirror::Class* instance_class = o->GetClass();
       if (use_is_assignable_from_) {
         if (instance_class != NULL && classes_[i]->IsAssignableFrom(instance_class)) {
           ++counts_[i];
@@ -1087,11 +1103,11 @@
       : class_(c), max_count_(max_count), instances_(instances) {
   }
 
-  void operator()(const mirror::Object* o) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const mirror::Class* instance_class = o->GetClass();
+  void operator()(mirror::Object* o) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::Class* instance_class = o->GetClass();
     if (instance_class == class_) {
       if (max_count_ == 0 || instances_.size() < max_count_) {
-        instances_.push_back(const_cast<mirror::Object*>(o));
+        instances_.push_back(o);
       }
     }
   }
@@ -1174,18 +1190,30 @@
     return;
   }
   uint64_t start_time = NanoTime();
-  int32_t before_size  = GetTotalMemory();
-  int32_t before_allocated = num_bytes_allocated_.Load();
+  uint32_t before_size  = GetTotalMemory();
+  uint32_t before_allocated = num_bytes_allocated_.Load();
   ThreadList* tl = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
+  const bool copying_transition =
+      IsCompactingGC(background_collector_type_) || IsCompactingGC(post_zygote_collector_type_);
   // Busy wait until we can GC (StartGC can fail if we have a non-zero
   // compacting_gc_disable_count_, this should rarely occurs).
-  bool copying_transition =
-      IsCompactingGC(background_collector_type_) || IsCompactingGC(post_zygote_collector_type_);
-  while (!StartGC(self, copying_transition)) {
-    usleep(100);
+  for (;;) {
+    {
+      ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
+      MutexLock mu(self, *gc_complete_lock_);
+      // Ensure there is only one GC at a time.
+      WaitForGcToCompleteLocked(self);
+      // GC can be disabled if someone has a used GetPrimitiveArrayCritical but not yet released.
+      if (!copying_transition || disable_moving_gc_count_ == 0) {
+        // TODO: Not hard code in semi-space collector?
+        collector_type_running_ = copying_transition ? kCollectorTypeSS : collector_type;
+        break;
+      }
+    }
+    usleep(1000);
   }
   tl->SuspendAll();
   switch (collector_type) {
@@ -1198,8 +1226,10 @@
       DCHECK(allocator_mem_map_.get() == nullptr);
       allocator_mem_map_.reset(main_space_->ReleaseMemMap());
       madvise(main_space_->Begin(), main_space_->Size(), MADV_DONTNEED);
-      // RemoveSpace deletes the removed space.
-      RemoveSpace(main_space_);
+      // RemoveSpace does not delete the removed space.
+      space::Space* old_space = main_space_;
+      RemoveSpace(old_space);
+      delete old_space;
       break;
     }
     case kCollectorTypeMS:
@@ -1349,7 +1379,7 @@
     }
   }
 
-  virtual bool ShouldSweepSpace(space::MallocSpace* space) const {
+  virtual bool ShouldSweepSpace(space::ContinuousSpace* space) const {
     // Don't sweep any spaces since we probably blasted the internal accounting of the free list
     // allocator.
     return false;
@@ -1389,7 +1419,19 @@
   }
 };
 
+void Heap::UnBindBitmaps() {
+  for (const auto& space : GetContinuousSpaces()) {
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
+      if (alloc_space->HasBoundBitmaps()) {
+        alloc_space->UnBindBitmaps();
+      }
+    }
+  }
+}
+
 void Heap::PreZygoteFork() {
+  CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
   static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock);
   Thread* self = Thread::Current();
   MutexLock mu(self, zygote_creation_lock_);
@@ -1398,7 +1440,6 @@
     return;
   }
   VLOG(heap) << "Starting PreZygoteFork";
-  CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
   // Trim the pages at the end of the non moving space.
   non_moving_space_->Trim();
   non_moving_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
@@ -1424,30 +1465,28 @@
     non_moving_space_->SetLimit(target_space.Limit());
     VLOG(heap) << "Zygote size " << non_moving_space_->Size() << " bytes";
   }
+  // Save the old space so that we can remove it after we complete creating the zygote space.
+  space::MallocSpace* old_alloc_space = non_moving_space_;
   // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
-  // the remaining available heap memory.
-  space::MallocSpace* zygote_space = non_moving_space_;
-  main_space_ = non_moving_space_->CreateZygoteSpace("alloc space", low_memory_mode_);
+  // the remaining available space.
+  // Remove the old space before creating the zygote space since creating the zygote space sets
+  // the old alloc space's bitmaps to nullptr.
+  RemoveSpace(old_alloc_space);
+  space::ZygoteSpace* zygote_space = old_alloc_space->CreateZygoteSpace("alloc space",
+                                                                        low_memory_mode_,
+                                                                        &main_space_);
+  delete old_alloc_space;
+  CHECK(zygote_space != nullptr) << "Failed creating zygote space";
+  AddSpace(zygote_space, false);
+  CHECK(main_space_ != nullptr);
   if (main_space_->IsRosAllocSpace()) {
     rosalloc_space_ = main_space_->AsRosAllocSpace();
   } else if (main_space_->IsDlMallocSpace()) {
     dlmalloc_space_ = main_space_->AsDlMallocSpace();
   }
   main_space_->SetFootprintLimit(main_space_->Capacity());
-  // Change the GC retention policy of the zygote space to only collect when full.
-  zygote_space->SetGcRetentionPolicy(space::kGcRetentionPolicyFullCollect);
   AddSpace(main_space_);
   have_zygote_space_ = true;
-  // Remove the zygote space from alloc_spaces_ array since not doing so causes crashes in
-  // GetObjectsAllocated. This happens because the bin packing blows away the internal accounting
-  // stored in between objects.
-  if (zygote_space->IsAllocSpace()) {
-    // TODO: Refactor zygote spaces to be a new space type to avoid more of these types of issues.
-    auto it = std::find(alloc_spaces_.begin(), alloc_spaces_.end(), zygote_space->AsAllocSpace());
-    CHECK(it != alloc_spaces_.end());
-    alloc_spaces_.erase(it);
-    zygote_space->InvalidateAllocator();
-  }
   // Create the zygote space mod union table.
   accounting::ModUnionTable* mod_union_table =
       new accounting::ModUnionTableCardCache("zygote space mod-union table", this, zygote_space);
@@ -1532,11 +1571,22 @@
   if (self->IsHandlingStackOverflow()) {
     LOG(WARNING) << "Performing GC on a thread that is handling a stack overflow.";
   }
-  gc_complete_lock_->AssertNotHeld(self);
-  const bool compacting_gc = IsCompactingGC(collector_type_);
-  if (!StartGC(self, compacting_gc)) {
-    return collector::kGcTypeNone;
+  bool compacting_gc;
+  {
+    gc_complete_lock_->AssertNotHeld(self);
+    ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
+    MutexLock mu(self, *gc_complete_lock_);
+    // Ensure there is only one GC at a time.
+    WaitForGcToCompleteLocked(self);
+    compacting_gc = IsCompactingGC(collector_type_);
+    // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
+    if (compacting_gc && disable_moving_gc_count_ != 0) {
+      LOG(WARNING) << "Skipping GC due to disable moving GC count " << disable_moving_gc_count_;
+      return collector::kGcTypeNone;
+    }
+    collector_type_running_ = collector_type_;
   }
+
   if (gc_cause == kGcCauseForAlloc && runtime->HasStatsEnabled()) {
     ++runtime->GetStats()->gc_for_alloc_count;
     ++self->GetStats()->gc_for_alloc_count;
@@ -1581,20 +1631,14 @@
   CHECK(collector != nullptr)
       << "Could not find garbage collector with concurrent=" << concurrent_gc_
       << " and type=" << gc_type;
-
   ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), collector->GetName()).c_str());
-
   collector->Run(gc_cause, clear_soft_references);
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
-
   // Enqueue cleared references.
-  Locks::mutator_lock_->AssertNotHeld(self);
   EnqueueClearedReferences();
-
   // Grow the heap so that we know when to perform the next GC.
   GrowForUtilization(gc_type, collector->GetDurationNs());
-
   if (CareAboutPauseTimes()) {
     const size_t duration = collector->GetDurationNs();
     std::vector<uint64_t> pauses = collector->GetPauseTimes();
@@ -1636,25 +1680,12 @@
   return gc_type;
 }
 
-bool Heap::StartGC(Thread* self, bool is_compacting) {
-  MutexLock mu(self, *gc_complete_lock_);
-  // Ensure there is only one GC at a time.
-  WaitForGcToCompleteLocked(self);
-  // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
-  //       Not doing at the moment to ensure soft references are cleared.
-  // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
-  if (is_compacting && disable_moving_gc_count_ != 0) {
-    LOG(WARNING) << "Skipping GC due to disable moving GC count " << disable_moving_gc_count_;
-    return false;
-  }
-  is_gc_running_ = true;
-  return true;
-}
-
 void Heap::FinishGC(Thread* self, collector::GcType gc_type) {
   MutexLock mu(self, *gc_complete_lock_);
-  is_gc_running_ = false;
-  last_gc_type_ = gc_type;
+  collector_type_running_ = kCollectorTypeNone;
+  if (gc_type != collector::kGcTypeNone) {
+    last_gc_type_ = gc_type;
+  }
   // Wake anyone who may have been waiting for the GC to complete.
   gc_complete_cond_->Broadcast(self);
 }
@@ -1687,7 +1718,7 @@
 
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for smarter
   // analysis on visitors.
-  void operator()(const mirror::Object* obj, const mirror::Object* ref,
+  void operator()(mirror::Object* obj, mirror::Object* ref,
                   const MemberOffset& offset, bool /* is_static */) const
       NO_THREAD_SAFETY_ANALYSIS {
     if (ref == nullptr || IsLive(ref)) {
@@ -1782,7 +1813,7 @@
     }
   }
 
-  bool IsLive(const mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+  bool IsLive(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
     return heap_->IsLiveObjectLocked(obj, true, false, true);
   }
 
@@ -1867,7 +1898,7 @@
 
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
-  void operator()(const mirror::Object* obj, const mirror::Object* ref, const MemberOffset& offset,
+  void operator()(mirror::Object* obj, mirror::Object* ref, const MemberOffset& offset,
                   bool is_static) const NO_THREAD_SAFETY_ANALYSIS {
     // Filter out class references since changing an object's class does not mark the card as dirty.
     // Also handles large objects, since the only reference they hold is a class reference.
@@ -1895,13 +1926,13 @@
 
           // Print which field of the object is dead.
           if (!obj->IsObjectArray()) {
-            const mirror::Class* klass = is_static ? obj->AsClass() : obj->GetClass();
+            mirror::Class* klass = is_static ? obj->AsClass() : obj->GetClass();
             CHECK(klass != NULL);
-            const mirror::ObjectArray<mirror::ArtField>* fields = is_static ? klass->GetSFields()
-                                                                            : klass->GetIFields();
+            mirror::ObjectArray<mirror::ArtField>* fields = is_static ? klass->GetSFields()
+                                                                      : klass->GetIFields();
             CHECK(fields != NULL);
             for (int32_t i = 0; i < fields->GetLength(); ++i) {
-              const mirror::ArtField* cur = fields->Get(i);
+              mirror::ArtField* cur = fields->Get(i);
               if (cur->GetOffset().Int32Value() == offset.Int32Value()) {
                 LOG(ERROR) << (is_static ? "Static " : "") << "field in the live stack is "
                           << PrettyField(cur);
@@ -1909,7 +1940,7 @@
               }
             }
           } else {
-            const mirror::ObjectArray<mirror::Object>* object_array =
+            mirror::ObjectArray<mirror::Object>* object_array =
                 obj->AsObjectArray<mirror::Object>();
             for (int32_t i = 0; i < object_array->GetLength(); ++i) {
               if (object_array->Get(i) == ref) {
@@ -2086,7 +2117,7 @@
 collector::GcType Heap::WaitForGcToCompleteLocked(Thread* self) {
   collector::GcType last_gc_type = collector::kGcTypeNone;
   uint64_t wait_start = NanoTime();
-  while (is_gc_running_) {
+  while (collector_type_running_ != kCollectorTypeNone) {
     ATRACE_BEGIN("GC: Wait For Completion");
     // We must wait, change thread state then sleep on gc_complete_cond_;
     gc_complete_cond_->Wait(self);
@@ -2247,26 +2278,28 @@
 mirror::Object* Heap::GetReferenceReferent(mirror::Object* reference) {
   DCHECK(reference != NULL);
   DCHECK_NE(reference_referent_offset_.Uint32Value(), 0U);
-  return reference->GetFieldObject<mirror::Object*>(reference_referent_offset_, true);
+  return reference->GetFieldObject<mirror::Object>(reference_referent_offset_, true);
 }
 
 void Heap::AddFinalizerReference(Thread* self, mirror::Object* object) {
   ScopedObjectAccess soa(self);
   JValue result;
   ArgArray arg_array(NULL, 0);
-  arg_array.Append(reinterpret_cast<uint32_t>(object));
+  arg_array.Append(object);
   soa.DecodeMethod(WellKnownClasses::java_lang_ref_FinalizerReference_add)->Invoke(self,
       arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
 }
 
 void Heap::EnqueueClearedReferences() {
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertNotHeld(self);
   if (!cleared_references_.IsEmpty()) {
     // When a runtime isn't started there are no reference queues to care about so ignore.
     if (LIKELY(Runtime::Current()->IsStarted())) {
-      ScopedObjectAccess soa(Thread::Current());
+      ScopedObjectAccess soa(self);
       JValue result;
       ArgArray arg_array(NULL, 0);
-      arg_array.Append(reinterpret_cast<uint32_t>(cleared_references_.GetList()));
+      arg_array.Append(cleared_references_.GetList());
       soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(),
           arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
     }
@@ -2444,8 +2477,8 @@
   } while (!native_bytes_allocated_.CompareAndSwap(expected_size, new_size));
 }
 
-int64_t Heap::GetTotalMemory() const {
-  int64_t ret = 0;
+size_t Heap::GetTotalMemory() const {
+  size_t ret = 0;
   for (const auto& space : continuous_spaces_) {
     // Currently don't include the image space.
     if (!space->IsImageSpace()) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 52138d1..499d27c 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/timing_logger.h"
 #include "gc/accounting/atomic_stack.h"
 #include "gc/accounting/card_table.h"
@@ -204,14 +204,14 @@
   void ChangeCollector(CollectorType collector_type);
 
   // The given reference is believed to be to an object in the Java heap, check the soundness of it.
-  void VerifyObjectImpl(const mirror::Object* o);
-  void VerifyObject(const mirror::Object* o) {
+  void VerifyObjectImpl(mirror::Object* o);
+  void VerifyObject(mirror::Object* o) {
     if (o != nullptr && this != nullptr && verify_object_mode_ > kNoHeapVerification) {
       VerifyObjectImpl(o);
     }
   }
   // Check that c.getClass() == c.getClass().getClass().
-  bool VerifyClassClass(const mirror::Class* c) const;
+  bool VerifyClassClass(const mirror::Class* c) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Check sanity of all live references.
   void VerifyHeap() LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
@@ -232,9 +232,9 @@
 
   // Returns true if 'obj' is a live heap object, false otherwise (including for invalid addresses).
   // Requires the heap lock to be held.
-  bool IsLiveObjectLocked(const mirror::Object* obj, bool search_allocation_stack = true,
+  bool IsLiveObjectLocked(mirror::Object* obj, bool search_allocation_stack = true,
                           bool search_live_stack = true, bool sorted = false)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   // Returns true if there is any chance that the object (obj) will move.
   bool IsMovableObject(const mirror::Object* obj) const;
@@ -358,7 +358,7 @@
 
   // Freed bytes can be negative in cases where we copy objects from a compacted space to a
   // free-list backed space.
-  void RecordFree(int64_t freed_objects, int64_t freed_bytes);
+  void RecordFree(size_t freed_objects, size_t freed_bytes);
 
   // Must be called if a field of an Object in the heap changes, and before any GC safe-point.
   // The call is not needed if NULL is stored in the field.
@@ -411,16 +411,16 @@
   // consume. For a regular VM this would relate to the -Xmx option and would return -1 if no Xmx
   // were specified. Android apps start with a growth limit (small heap size) which is
   // cleared/extended for large apps.
-  int64_t GetMaxMemory() const {
+  size_t GetMaxMemory() const {
     return growth_limit_;
   }
 
   // Implements java.lang.Runtime.totalMemory, returning the amount of memory consumed by an
   // application.
-  int64_t GetTotalMemory() const;
+  size_t GetTotalMemory() const;
 
   // Implements java.lang.Runtime.freeMemory.
-  int64_t GetFreeMemory() const {
+  size_t GetFreeMemory() const {
     return GetTotalMemory() - num_bytes_allocated_;
   }
 
@@ -467,6 +467,9 @@
   void MarkAllocStackAsLive(accounting::ObjectStack* stack)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  // Unbind any bound bitmaps.
+  void UnBindBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   // DEPRECATED: Should remove in "near" future when support for multiple image spaces is added.
   // Assumes there is only one image space.
   space::ImageSpace* GetImageSpace() const;
@@ -534,7 +537,6 @@
   void Compact(space::ContinuousMemMapAllocSpace* target_space,
                space::ContinuousMemMapAllocSpace* source_space);
 
-  bool StartGC(Thread* self, bool is_compacting) LOCKS_EXCLUDED(gc_complete_lock_);
   void FinishGC(Thread* self, collector::GcType gc_type) LOCKS_EXCLUDED(gc_complete_lock_);
 
   static ALWAYS_INLINE bool AllocatorHasAllocationStack(AllocatorType allocator_type) {
@@ -548,7 +550,8 @@
   static bool IsCompactingGC(CollectorType collector_type) {
     return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS;
   }
-  bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const;
+  bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   ALWAYS_INLINE void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
                                        mirror::Object* obj);
 
@@ -594,8 +597,8 @@
   }
   void EnqueueClearedReferences();
   // Returns true if the reference object has not yet been enqueued.
-  bool IsEnqueuable(const mirror::Object* ref) const;
-  bool IsEnqueued(mirror::Object* ref) const;
+  bool IsEnqueuable(mirror::Object* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsEnqueued(mirror::Object* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj, RootVisitor mark_visitor,
                               void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -611,14 +614,6 @@
   void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
   bool IsGCRequestPending() const;
 
-  size_t RecordAllocationInstrumented(size_t size, mirror::Object* object)
-      LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  size_t RecordAllocationUninstrumented(size_t size, mirror::Object* object)
-      LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Sometimes CollectGarbageInternal decides to run a different Gc than you requested. Returns
   // which type of Gc was actually ran.
   collector::GcType CollectGarbageInternal(collector::GcType gc_plan, GcCause gc_cause,
@@ -650,7 +645,7 @@
 
   // No thread saftey analysis since we call this everywhere and it is impossible to find a proper
   // lock ordering for it.
-  void VerifyObjectBody(const mirror::Object *obj) NO_THREAD_SAFETY_ANALYSIS;
+  void VerifyObjectBody(mirror::Object *obj) NO_THREAD_SAFETY_ANALYSIS;
 
   static void VerificationCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(GlobalSychronization::heap_bitmap_lock_);
@@ -734,11 +729,6 @@
   // If we have a zygote space.
   bool have_zygote_space_;
 
-  // Number of pinned primitive arrays in the movable space.
-  // Block all GC until this hits zero, or we hit the timeout!
-  size_t number_gc_blockers_;
-  static constexpr size_t KGCBlockTimeout = 30000;
-
   // Guards access to the state of GC, associated conditional variable is used to signal when a GC
   // completes.
   Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -752,7 +742,7 @@
   ReferenceQueue cleared_references_;
 
   // True while the garbage collector is running.
-  volatile bool is_gc_running_ GUARDED_BY(gc_complete_lock_);
+  volatile CollectorType collector_type_running_ GUARDED_BY(gc_complete_lock_);
 
   // Last Gc type we ran. Used by WaitForConcurrentGc to know which Gc was waited on.
   volatile collector::GcType last_gc_type_ GUARDED_BY(gc_complete_lock_);
@@ -792,13 +782,13 @@
   size_t total_objects_freed_ever_;
 
   // Number of bytes allocated.  Adjusted after each allocation and free.
-  AtomicInteger num_bytes_allocated_;
+  Atomic<size_t> num_bytes_allocated_;
 
   // Bytes which are allocated and managed by native code but still need to be accounted for.
-  AtomicInteger native_bytes_allocated_;
+  Atomic<size_t> native_bytes_allocated_;
 
   // Data structure GC overhead.
-  AtomicInteger gc_memory_overhead_;
+  Atomic<size_t> gc_memory_overhead_;
 
   // Heap verification flags.
   const bool verify_missing_card_marks_;
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index d006349..2d73a71 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -52,8 +52,7 @@
     ref->SetFieldObject(pending_next_offset, ref, false);
     list_ = ref;
   } else {
-    mirror::Object* head =
-        list_->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+    mirror::Object* head = list_->GetFieldObject<mirror::Object>(pending_next_offset, false);
     ref->SetFieldObject(pending_next_offset, head, false);
     list_->SetFieldObject(pending_next_offset, ref, false);
   }
@@ -62,7 +61,7 @@
 mirror::Object* ReferenceQueue::DequeuePendingReference() {
   DCHECK(!IsEmpty());
   MemberOffset pending_next_offset = heap_->GetReferencePendingNextOffset();
-  mirror::Object* head = list_->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+  mirror::Object* head = list_->GetFieldObject<mirror::Object>(pending_next_offset, false);
   DCHECK(head != nullptr);
   mirror::Object* ref;
   // Note: the following code is thread-safe because it is only called from ProcessReferences which
@@ -71,7 +70,7 @@
     ref = list_;
     list_ = nullptr;
   } else {
-    mirror::Object* next = head->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+    mirror::Object* next = head->GetFieldObject<mirror::Object>(pending_next_offset, false);
     list_->SetFieldObject(pending_next_offset, next, false);
     ref = head;
   }
@@ -84,11 +83,11 @@
   os << "Reference starting at list_=" << list_ << "\n";
   while (cur != nullptr) {
     mirror::Object* pending_next =
-        cur->GetFieldObject<mirror::Object*>(heap_->GetReferencePendingNextOffset(), false);
+        cur->GetFieldObject<mirror::Object>(heap_->GetReferencePendingNextOffset(), false);
     os << "PendingNext=" << pending_next;
     if (cur->GetClass()->IsFinalizerReferenceClass()) {
       os << " Zombie=" <<
-          cur->GetFieldObject<mirror::Object*>(heap_->GetFinalizerReferenceZombieOffset(), false);
+          cur->GetFieldObject<mirror::Object>(heap_->GetFinalizerReferenceZombieOffset(), false);
     }
     os << "\n";
     cur = pending_next;
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 89589c3..3f3069e 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/timing_logger.h"
 #include "globals.h"
 #include "gtest/gtest.h"
@@ -83,7 +83,7 @@
  private:
   // Lock, used for parallel GC reference enqueuing. It allows for multiple threads simultaneously
   // calling AtomicEnqueueIfNotEnqueued.
-  Mutex lock_;
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // The heap contains the reference offsets.
   Heap* const heap_;
   // The actual reference list. Not a root since it will be nullptr when the GC is not running.
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index 4dc17df..a314d74 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -29,7 +29,7 @@
   capacity = RoundUp(capacity, kPageSize);
   std::string error_msg;
   UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
-                                                 PROT_READ | PROT_WRITE, &error_msg));
+                                                 PROT_READ | PROT_WRITE, true, &error_msg));
   if (mem_map.get() == nullptr) {
     LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
         << PrettySize(capacity) << " with message " << error_msg;
@@ -69,7 +69,7 @@
   return ret;
 }
 
-size_t BumpPointerSpace::AllocationSize(const mirror::Object* obj) {
+size_t BumpPointerSpace::AllocationSize(mirror::Object* obj) {
   return AllocationSizeNonvirtual(obj);
 }
 
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 3e25b6b..d73fe3b 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -49,8 +49,7 @@
   mirror::Object* AllocNonvirtualWithoutAccounting(size_t num_bytes);
 
   // Return the storage space required by obj.
-  virtual size_t AllocationSize(const mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual size_t AllocationSize(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // NOPS unless we support free lists.
   virtual size_t Free(Thread*, mirror::Object*) {
@@ -60,7 +59,7 @@
     return 0;
   }
 
-  size_t AllocationSizeNonvirtual(const mirror::Object* obj)
+  size_t AllocationSizeNonvirtual(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return obj->SizeOf();
   }
@@ -135,7 +134,6 @@
   byte* AllocBlock(size_t bytes) EXCLUSIVE_LOCKS_REQUIRED(block_lock_);
   void RevokeThreadLocalBuffersLocked(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(block_lock_);
 
-  size_t InternalAllocationSize(const mirror::Object* obj);
   mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 981af53..931ed21 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -228,7 +228,7 @@
   return dlmalloc_space->MoreCore(increment);
 }
 
-size_t DlMallocSpace::AllocationSize(const mirror::Object* obj) {
+size_t DlMallocSpace::AllocationSize(mirror::Object* obj) {
   return AllocationSizeNonvirtual(obj);
 }
 
@@ -287,6 +287,7 @@
 }
 
 void DlMallocSpace::Clear() {
+  // TODO: Delete and create new mspace here.
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
   GetLiveBitmap()->Clear();
   GetMarkBitmap()->Clear();
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 671d2b2..4507c36 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -48,13 +48,15 @@
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
                                           size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-  virtual size_t AllocationSize(const mirror::Object* obj);
-  virtual size_t Free(Thread* self, mirror::Object* ptr);
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
+  virtual size_t AllocationSize(mirror::Object* obj);
+  virtual size_t Free(Thread* self, mirror::Object* ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
 
-  size_t AllocationSizeNonvirtual(const mirror::Object* obj) {
+  size_t AllocationSizeNonvirtual(mirror::Object* obj) {
     void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
     return mspace_usable_size(obj_ptr) + kChunkOverhead;
   }
@@ -97,10 +99,6 @@
 
   virtual void Clear();
 
-  virtual void InvalidateAllocator() {
-    mspace_for_alloc_ = nullptr;
-  }
-
   virtual bool IsDlMallocSpace() const {
     return true;
   }
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 4777cc6..ebad8dd 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -35,7 +35,7 @@
 namespace gc {
 namespace space {
 
-AtomicInteger ImageSpace::bitmap_index_(0);
+Atomic<uint32_t> ImageSpace::bitmap_index_(0);
 
 ImageSpace::ImageSpace(const std::string& name, MemMap* mem_map,
                        accounting::SpaceBitmap* live_bitmap)
@@ -171,7 +171,7 @@
   byte* current = Begin() + RoundUp(sizeof(ImageHeader), kObjectAlignment);
   while (current < End()) {
     DCHECK_ALIGNED(current, kObjectAlignment);
-    const mirror::Object* obj = reinterpret_cast<const mirror::Object*>(current);
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(current);
     CHECK(live_bitmap_->Test(obj));
     CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
     current += RoundUp(obj->SizeOf(), kObjectAlignment);
@@ -227,7 +227,7 @@
     *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
     return nullptr;
   }
-  size_t bitmap_index = bitmap_index_.FetchAndAdd(1);
+  uint32_t bitmap_index = bitmap_index_.FetchAndAdd(1);
   std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_file_name,
                                        bitmap_index));
   UniquePtr<accounting::SpaceBitmap> bitmap(
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 78a83c9..9e19774 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -29,10 +29,6 @@
 // An image space is a space backed with a memory mapped image.
 class ImageSpace : public MemMapSpace {
  public:
-  bool CanAllocateInto() const {
-    return false;
-  }
-
   SpaceType GetType() const {
     return kSpaceTypeImageSpace;
   }
@@ -75,6 +71,10 @@
 
   void Dump(std::ostream& os) const;
 
+  // Sweeping image spaces is a NOP.
+  void Sweep(bool /* swap_bitmaps */, size_t* /* freed_objects */, size_t* /* freed_bytes */) {
+  }
+
  private:
   // Tries to initialize an ImageSpace from the given image path,
   // returning NULL on error.
@@ -94,7 +94,7 @@
 
   friend class Space;
 
-  static AtomicInteger bitmap_index_;
+  static Atomic<uint32_t> bitmap_index_;
 
   UniquePtr<accounting::SpaceBitmap> live_bitmap_;
 
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 7fcfed4..987a655 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -60,7 +60,7 @@
                                            size_t* bytes_allocated) {
   std::string error_msg;
   MemMap* mem_map = MemMap::MapAnonymous("large object space allocation", NULL, num_bytes,
-                                         PROT_READ | PROT_WRITE, &error_msg);
+                                         PROT_READ | PROT_WRITE, true, &error_msg);
   if (UNLIKELY(mem_map == NULL)) {
     LOG(WARNING) << "Large object allocation failed: " << error_msg;
     return NULL;
@@ -92,9 +92,9 @@
   return allocation_size;
 }
 
-size_t LargeObjectMapSpace::AllocationSize(const mirror::Object* obj) {
+size_t LargeObjectMapSpace::AllocationSize(mirror::Object* obj) {
   MutexLock mu(Thread::Current(), lock_);
-  MemMaps::iterator found = mem_maps_.find(const_cast<mirror::Object*>(obj));
+  MemMaps::iterator found = mem_maps_.find(obj);
   CHECK(found != mem_maps_.end()) << "Attempted to get size of a large object which is not live";
   return found->second->Size();
 }
@@ -134,7 +134,7 @@
   CHECK_EQ(size % kAlignment, 0U);
   std::string error_msg;
   MemMap* mem_map = MemMap::MapAnonymous(name.c_str(), requested_begin, size,
-                                         PROT_READ | PROT_WRITE, &error_msg);
+                                         PROT_READ | PROT_WRITE, true, &error_msg);
   CHECK(mem_map != NULL) << "Failed to allocate large object space mem map: " << error_msg;
   return new FreeListSpace(name, mem_map, mem_map->Begin(), mem_map->End());
 }
@@ -244,7 +244,7 @@
   return mem_map_->HasAddress(obj);
 }
 
-size_t FreeListSpace::AllocationSize(const mirror::Object* obj) {
+size_t FreeListSpace::AllocationSize(mirror::Object* obj) {
   AllocationHeader* header = GetAllocationHeader(obj);
   DCHECK(Contains(obj));
   DCHECK(!header->IsFree());
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index cd7c383..5274c8d 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -92,7 +92,7 @@
   static LargeObjectMapSpace* Create(const std::string& name);
 
   // Return the storage space required by obj.
-  size_t AllocationSize(const mirror::Object* obj);
+  size_t AllocationSize(mirror::Object* obj);
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* ptr);
   void Walk(DlMallocSpace::WalkCallback, void* arg) LOCKS_EXCLUDED(lock_);
@@ -118,8 +118,7 @@
   virtual ~FreeListSpace();
   static FreeListSpace* Create(const std::string& name, byte* requested_begin, size_t capacity);
 
-  size_t AllocationSize(const mirror::Object* obj)
-      EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  size_t AllocationSize(mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(lock_);
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* obj);
   bool Contains(const mirror::Object* obj) const;
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 2b2b26e..f90e6c7 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -19,6 +19,8 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
+#include "gc/space/space-inl.h"
+#include "gc/space/zygote_space.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
@@ -33,22 +35,27 @@
 size_t MallocSpace::bitmap_index_ = 0;
 
 MallocSpace::MallocSpace(const std::string& name, MemMap* mem_map,
-                         byte* begin, byte* end, byte* limit, size_t growth_limit)
+                         byte* begin, byte* end, byte* limit, size_t growth_limit,
+                         bool create_bitmaps)
     : ContinuousMemMapAllocSpace(name, mem_map, begin, end, limit, kGcRetentionPolicyAlwaysCollect),
       recent_free_pos_(0), lock_("allocation space lock", kAllocSpaceLock),
       growth_limit_(growth_limit) {
-  size_t bitmap_index = bitmap_index_++;
-  static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
-  live_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #" << bitmap_index;
-  mark_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #" << bitmap_index;
+  if (create_bitmaps) {
+    size_t bitmap_index = bitmap_index_++;
+    static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
+    CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
+    CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
+    live_bitmap_.reset(accounting::SpaceBitmap::Create(
+        StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+        Begin(), Capacity()));
+    DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #"
+        << bitmap_index;
+    mark_bitmap_.reset(accounting::SpaceBitmap::Create(
+        StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+        Begin(), Capacity()));
+    DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #"
+        << bitmap_index;
+  }
   for (auto& freed : recent_freed_objects_) {
     freed.first = nullptr;
     freed.second = nullptr;
@@ -80,7 +87,7 @@
 
   std::string error_msg;
   MemMap* mem_map = MemMap::MapAnonymous(name.c_str(), requested_begin, *capacity,
-                                         PROT_READ | PROT_WRITE, &error_msg);
+                                         PROT_READ | PROT_WRITE, true, &error_msg);
   if (mem_map == nullptr) {
     LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
                << PrettySize(*capacity) << ": " << error_msg;
@@ -88,14 +95,6 @@
   return mem_map;
 }
 
-void MallocSpace::SwapBitmaps() {
-  live_bitmap_.swap(mark_bitmap_);
-  // Swap names to get more descriptive diagnostics.
-  std::string temp_name(live_bitmap_->GetName());
-  live_bitmap_->SetName(mark_bitmap_->GetName());
-  mark_bitmap_->SetName(temp_name);
-}
-
 mirror::Class* MallocSpace::FindRecentFreedObject(const mirror::Object* obj) {
   size_t pos = recent_free_pos_;
   // Start at the most recently freed object and work our way back since there may be duplicates
@@ -154,29 +153,8 @@
   return original_end;
 }
 
-// Returns the old mark bitmap.
-accounting::SpaceBitmap* MallocSpace::BindLiveToMarkBitmap() {
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
-  temp_bitmap_.reset(mark_bitmap);
-  mark_bitmap_.reset(live_bitmap);
-  return mark_bitmap;
-}
-
-bool MallocSpace::HasBoundBitmaps() const {
-  return temp_bitmap_.get() != nullptr;
-}
-
-void MallocSpace::UnBindBitmaps() {
-  CHECK(HasBoundBitmaps());
-  // At this point, the temp_bitmap holds our old mark bitmap.
-  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
-  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
-  mark_bitmap_.reset(new_bitmap);
-  DCHECK(temp_bitmap_.get() == NULL);
-}
-
-MallocSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode) {
+ZygoteSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode,
+                                            MallocSpace** out_malloc_space) {
   // For RosAlloc, revoke thread local runs before creating a new
   // alloc space so that we won't mix thread local runs from different
   // alloc spaces.
@@ -220,15 +198,23 @@
   if (capacity - initial_size > 0) {
     CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
   }
-  MallocSpace* alloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator,
-                                            end_, end, limit_, growth_limit);
+  *out_malloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator, end_, end,
+                                     limit_, growth_limit);
   SetLimit(End());
   live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
   CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
   mark_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
   CHECK_EQ(mark_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
-  VLOG(heap) << "zygote space creation done";
-  return alloc_space;
+
+  // Create the actual zygote space.
+  ZygoteSpace* zygote_space = ZygoteSpace::Create("Zygote space", ReleaseMemMap(),
+                                                  live_bitmap_.release(), mark_bitmap_.release());
+  if (UNLIKELY(zygote_space == nullptr)) {
+    VLOG(heap) << "Failed creating zygote space from space " << GetName();
+  } else {
+    VLOG(heap) << "zygote space creation done";
+  }
+  return zygote_space;
 }
 
 void MallocSpace::Dump(std::ostream& os) const {
@@ -239,24 +225,16 @@
       << ",name=\"" << GetName() << "\"]";
 }
 
-struct SweepCallbackContext {
-  bool swap_bitmaps;
-  Heap* heap;
-  space::MallocSpace* space;
-  Thread* self;
-  size_t freed_objects;
-  size_t freed_bytes;
-};
-
-static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
+void MallocSpace::SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
   SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
-  space::AllocSpace* space = context->space;
+  DCHECK(context->space->IsMallocSpace());
+  space::MallocSpace* space = context->space->AsMallocSpace();
   Thread* self = context->self;
   Locks::heap_bitmap_lock_->AssertExclusiveHeld(self);
   // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
   // the bitmaps as an optimization.
   if (!context->swap_bitmaps) {
-    accounting::SpaceBitmap* bitmap = context->space->GetLiveBitmap();
+    accounting::SpaceBitmap* bitmap = space->GetLiveBitmap();
     for (size_t i = 0; i < num_ptrs; ++i) {
       bitmap->Clear(ptrs[i]);
     }
@@ -268,54 +246,6 @@
   context->freed_bytes += space->FreeList(self, num_ptrs, ptrs);
 }
 
-static void ZygoteSweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
-  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
-  Locks::heap_bitmap_lock_->AssertExclusiveHeld(context->self);
-  accounting::CardTable* card_table = context->heap->GetCardTable();
-  // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
-  // the bitmaps as an optimization.
-  if (!context->swap_bitmaps) {
-    accounting::SpaceBitmap* bitmap = context->space->GetLiveBitmap();
-    for (size_t i = 0; i < num_ptrs; ++i) {
-      bitmap->Clear(ptrs[i]);
-    }
-  }
-  // We don't free any actual memory to avoid dirtying the shared zygote pages.
-  for (size_t i = 0; i < num_ptrs; ++i) {
-    // Need to mark the card since this will update the mod-union table next GC cycle.
-    card_table->MarkCard(ptrs[i]);
-  }
-}
-
-void MallocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes) {
-  DCHECK(freed_objects != nullptr);
-  DCHECK(freed_bytes != nullptr);
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = GetMarkBitmap();
-  // If the bitmaps are bound then sweeping this space clearly won't do anything.
-  if (live_bitmap == mark_bitmap) {
-    return;
-  }
-  SweepCallbackContext scc;
-  scc.swap_bitmaps = swap_bitmaps;
-  scc.heap = Runtime::Current()->GetHeap();
-  scc.self = Thread::Current();
-  scc.space = this;
-  scc.freed_objects = 0;
-  scc.freed_bytes = 0;
-  if (swap_bitmaps) {
-    std::swap(live_bitmap, mark_bitmap);
-  }
-  // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
-  accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap,
-                                     reinterpret_cast<uintptr_t>(Begin()),
-                                     reinterpret_cast<uintptr_t>(End()),
-                                     IsZygoteSpace() ? &ZygoteSweepCallback : &SweepCallback,
-                                     reinterpret_cast<void*>(&scc));
-  *freed_objects += scc.freed_objects;
-  *freed_bytes += scc.freed_bytes;
-}
-
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index 7681b6d..f17bcd2 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -31,6 +31,8 @@
 
 namespace space {
 
+class ZygoteSpace;
+
 // TODO: Remove define macro
 #define CHECK_MEMORY_CALL(call, args, what) \
   do { \
@@ -41,19 +43,13 @@
     } \
   } while (false)
 
-// const bool kUseRosAlloc = true;
-
 // A common parent of DlMallocSpace and RosAllocSpace.
 class MallocSpace : public ContinuousMemMapAllocSpace {
  public:
   typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
 
   SpaceType GetType() const {
-    if (GetGcRetentionPolicy() == kGcRetentionPolicyFullCollect) {
-      return kSpaceTypeZygoteSpace;
-    } else {
-      return kSpaceTypeAllocSpace;
-    }
+    return kSpaceTypeMallocSpace;
   }
 
   // Allocate num_bytes without allowing the underlying space to grow.
@@ -62,9 +58,11 @@
   // Allocate num_bytes allowing the underlying space to grow.
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
   // Return the storage space required by obj.
-  virtual size_t AllocationSize(const mirror::Object* obj) = 0;
-  virtual size_t Free(Thread* self, mirror::Object* ptr) = 0;
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
+  virtual size_t AllocationSize(mirror::Object* obj) = 0;
+  virtual size_t Free(Thread* self, mirror::Object* ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
 #ifndef NDEBUG
   virtual void CheckMoreCoreForPrecondition() {}  // to be overridden in the debug build.
@@ -109,51 +107,27 @@
     return GetMemMap()->Size();
   }
 
-  accounting::SpaceBitmap* GetLiveBitmap() const {
-    return live_bitmap_.get();
-  }
-
-  accounting::SpaceBitmap* GetMarkBitmap() const {
-    return mark_bitmap_.get();
-  }
-
   void Dump(std::ostream& os) const;
 
   void SetGrowthLimit(size_t growth_limit);
 
-  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
-  void SwapBitmaps();
-
   virtual MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
                                       byte* begin, byte* end, byte* limit, size_t growth_limit) = 0;
 
-  // Turn ourself into a zygote space and return a new alloc space
-  // which has our unused memory.  When true, the low memory mode
-  // argument specifies that the heap wishes the created space to be
-  // more aggressive in releasing unused pages.
-  MallocSpace* CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode);
-
+  // Splits ourself into a zygote space and new malloc space which has our unused memory. When true,
+  // the low memory mode argument specifies that the heap wishes the created space to be more
+  // aggressive in releasing unused pages. Invalidates the space its called on.
+  ZygoteSpace* CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode,
+                                 MallocSpace** out_malloc_space) NO_THREAD_SAFETY_ANALYSIS;
   virtual uint64_t GetBytesAllocated() = 0;
   virtual uint64_t GetObjectsAllocated() = 0;
 
-  // Returns the old mark bitmap.
-  accounting::SpaceBitmap* BindLiveToMarkBitmap();
-  bool HasBoundBitmaps() const;
-  void UnBindBitmaps();
-
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
-  // Used to ensure that failure happens when you free / allocate into an invalidated space. If we
-  // don't do this we may get heap corruption instead of a segfault at null.
-  virtual void InvalidateAllocator() = 0;
-
-  // Sweep the references in the malloc space.
-  void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
-
  protected:
   MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
-              byte* limit, size_t growth_limit);
+              byte* limit, size_t growth_limit, bool create_bitmaps = true);
 
   static MemMap* CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
                               size_t* growth_limit, size_t* capacity, byte* requested_begin);
@@ -164,11 +138,13 @@
   virtual void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size,
                                 bool low_memory_mode) = 0;
 
-  void RegisterRecentFree(mirror::Object* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  void RegisterRecentFree(mirror::Object* ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
-  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+    return &SweepCallback;
+  }
 
   // Recent allocation buffer.
   static constexpr size_t kRecentFreeCount = kDebugSpaces ? (1 << 16) : 0;
@@ -190,9 +166,10 @@
   // one time by a call to ClearGrowthLimit.
   size_t growth_limit_;
 
-  friend class collector::MarkSweep;
-
  private:
+  static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   DISALLOW_COPY_AND_ASSIGN(MallocSpace);
 };
 
@@ -232,13 +209,14 @@
     return result;
   }
 
-  virtual size_t AllocationSize(const mirror::Object* obj) {
-    size_t result = BaseMallocSpaceType::AllocationSize(reinterpret_cast<const mirror::Object*>(
-        reinterpret_cast<const byte*>(obj) - kValgrindRedZoneBytes));
+  virtual size_t AllocationSize(mirror::Object* obj) {
+    size_t result = BaseMallocSpaceType::AllocationSize(reinterpret_cast<mirror::Object*>(
+        reinterpret_cast<byte*>(obj) - kValgrindRedZoneBytes));
     return result - 2 * kValgrindRedZoneBytes;
   }
 
-  virtual size_t Free(Thread* self, mirror::Object* ptr) {
+  virtual size_t Free(Thread* self, mirror::Object* ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     void* obj_after_rdz = reinterpret_cast<void*>(ptr);
     void* obj_with_rdz = reinterpret_cast<byte*>(obj_after_rdz) - kValgrindRedZoneBytes;
     // Make redzones undefined.
@@ -249,7 +227,8 @@
     return freed - 2 * kValgrindRedZoneBytes;
   }
 
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     size_t freed = 0;
     for (size_t i = 0; i < num_ptrs; i++) {
       freed += Free(self, ptrs[i]);
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index e5993f6..86e441e 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -220,7 +220,7 @@
   return rosalloc_space->MoreCore(increment);
 }
 
-size_t RosAllocSpace::AllocationSize(const mirror::Object* obj) {
+size_t RosAllocSpace::AllocationSize(mirror::Object* obj) {
   return AllocationSizeNonvirtual(obj);
 }
 
@@ -312,6 +312,7 @@
 }
 
 void RosAllocSpace::Clear() {
+  // TODO: Delete and create new mspace here.
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
   GetLiveBitmap()->Clear();
   GetMarkBitmap()->Clear();
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 6720976..4cd5a6d 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -47,13 +47,15 @@
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
                                           size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-  virtual size_t AllocationSize(const mirror::Object* obj);
-  virtual size_t Free(Thread* self, mirror::Object* ptr);
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
+  virtual size_t AllocationSize(mirror::Object* obj);
+  virtual size_t Free(Thread* self, mirror::Object* ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
 
-  size_t AllocationSizeNonvirtual(const mirror::Object* obj)
+  size_t AllocationSizeNonvirtual(mirror::Object* obj)
       NO_THREAD_SAFETY_ANALYSIS {
     // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held.
     void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
@@ -95,10 +97,6 @@
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
-  virtual void InvalidateAllocator() {
-    rosalloc_for_alloc_ = NULL;
-  }
-
   virtual bool IsRosAllocSpace() const {
     return true;
   }
diff --git a/runtime/gc/space/space-inl.h b/runtime/gc/space/space-inl.h
index 0c1d7a2..02a63f6 100644
--- a/runtime/gc/space/space-inl.h
+++ b/runtime/gc/space/space-inl.h
@@ -32,7 +32,7 @@
 }
 
 inline MallocSpace* Space::AsMallocSpace() {
-  DCHECK(GetType() == kSpaceTypeAllocSpace || GetType() == kSpaceTypeZygoteSpace);
+  DCHECK(IsMallocSpace());
   DCHECK(IsDlMallocSpace() || IsRosAllocSpace());
   return down_cast<MallocSpace*>(down_cast<MemMapSpace*>(this));
 }
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index f8ba6b3..32a00bc 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -17,6 +17,9 @@
 #include "space.h"
 
 #include "base/logging.h"
+#include "gc/accounting/heap_bitmap.h"
+#include "runtime.h"
+#include "thread-inl.h"
 
 namespace art {
 namespace gc {
@@ -41,6 +44,69 @@
     mark_objects_(new accounting::ObjectSet("large marked objects")) {
 }
 
+void ContinuousMemMapAllocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes) {
+  DCHECK(freed_objects != nullptr);
+  DCHECK(freed_bytes != nullptr);
+  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::SpaceBitmap* mark_bitmap = GetMarkBitmap();
+  // If the bitmaps are bound then sweeping this space clearly won't do anything.
+  if (live_bitmap == mark_bitmap) {
+    return;
+  }
+  SweepCallbackContext scc;
+  scc.swap_bitmaps = swap_bitmaps;
+  scc.heap = Runtime::Current()->GetHeap();
+  scc.self = Thread::Current();
+  scc.space = this;
+  scc.freed_objects = 0;
+  scc.freed_bytes = 0;
+  if (swap_bitmaps) {
+    std::swap(live_bitmap, mark_bitmap);
+  }
+  // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
+  accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap,
+                                     reinterpret_cast<uintptr_t>(Begin()),
+                                     reinterpret_cast<uintptr_t>(End()),
+                                     GetSweepCallback(),
+                                     reinterpret_cast<void*>(&scc));
+  *freed_objects += scc.freed_objects;
+  *freed_bytes += scc.freed_bytes;
+}
+
+// Returns the old mark bitmap.
+void ContinuousMemMapAllocSpace::BindLiveToMarkBitmap() {
+  CHECK(!HasBoundBitmaps());
+  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  if (live_bitmap != mark_bitmap_.get()) {
+    accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
+    Runtime::Current()->GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
+    temp_bitmap_.reset(mark_bitmap);
+    mark_bitmap_.reset(live_bitmap);
+  }
+}
+
+bool ContinuousMemMapAllocSpace::HasBoundBitmaps() const {
+  return temp_bitmap_.get() != nullptr;
+}
+
+void ContinuousMemMapAllocSpace::UnBindBitmaps() {
+  CHECK(HasBoundBitmaps());
+  // At this point, the temp_bitmap holds our old mark bitmap.
+  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
+  Runtime::Current()->GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap_.get(), new_bitmap);
+  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
+  mark_bitmap_.reset(new_bitmap);
+  DCHECK(temp_bitmap_.get() == nullptr);
+}
+
+void ContinuousMemMapAllocSpace::SwapBitmaps() {
+  live_bitmap_.swap(mark_bitmap_);
+  // Swap names to get more descriptive diagnostics.
+  std::string temp_name(live_bitmap_->GetName());
+  live_bitmap_->SetName(mark_bitmap_->GetName());
+  mark_bitmap_->SetName(temp_name);
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 5292344..98e6f65 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -44,6 +44,7 @@
 
 class AllocSpace;
 class BumpPointerSpace;
+class ContinuousMemMapAllocSpace;
 class ContinuousSpace;
 class DiscontinuousSpace;
 class MallocSpace;
@@ -51,6 +52,7 @@
 class RosAllocSpace;
 class ImageSpace;
 class LargeObjectSpace;
+class ZygoteSpace;
 
 static constexpr bool kDebugSpaces = kIsDebugBuild;
 
@@ -68,7 +70,7 @@
 
 enum SpaceType {
   kSpaceTypeImageSpace,
-  kSpaceTypeAllocSpace,
+  kSpaceTypeMallocSpace,
   kSpaceTypeZygoteSpace,
   kSpaceTypeBumpPointerSpace,
   kSpaceTypeLargeObjectSpace,
@@ -91,11 +93,6 @@
     return gc_retention_policy_;
   }
 
-  // Does the space support allocation?
-  virtual bool CanAllocateInto() const {
-    return true;
-  }
-
   // Is the given object contained within this space?
   virtual bool Contains(const mirror::Object* obj) const = 0;
 
@@ -111,7 +108,7 @@
   // Is this a dlmalloc backed allocation space?
   bool IsMallocSpace() const {
     SpaceType type = GetType();
-    return type == kSpaceTypeAllocSpace || type == kSpaceTypeZygoteSpace;
+    return type == kSpaceTypeMallocSpace;
   }
   MallocSpace* AsMallocSpace();
 
@@ -120,20 +117,24 @@
   }
   virtual DlMallocSpace* AsDlMallocSpace() {
     LOG(FATAL) << "Unreachable";
-    return NULL;
+    return nullptr;
   }
   virtual bool IsRosAllocSpace() const {
     return false;
   }
   virtual RosAllocSpace* AsRosAllocSpace() {
     LOG(FATAL) << "Unreachable";
-    return NULL;
+    return nullptr;
   }
 
   // Is this the space allocated into by the Zygote and no-longer in use?
   bool IsZygoteSpace() const {
     return GetType() == kSpaceTypeZygoteSpace;
   }
+  virtual ZygoteSpace* AsZygoteSpace() {
+    LOG(FATAL) << "Unreachable";
+    return nullptr;
+  }
 
   // Is this space a bump pointer space?
   bool IsBumpPointerSpace() const {
@@ -141,7 +142,7 @@
   }
   virtual BumpPointerSpace* AsBumpPointerSpace() {
     LOG(FATAL) << "Unreachable";
-    return NULL;
+    return nullptr;
   }
 
   // Does this space hold large objects and implement the large object space abstraction?
@@ -168,6 +169,14 @@
     return nullptr;
   }
 
+  virtual bool IsContinuousMemMapAllocSpace() const {
+    return false;
+  }
+  virtual ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace() {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+
   virtual ~Space() {}
 
  protected:
@@ -181,6 +190,15 @@
   std::string name_;
 
  protected:
+  struct SweepCallbackContext {
+    bool swap_bitmaps;
+    Heap* heap;
+    space::Space* space;
+    Thread* self;
+    size_t freed_objects;
+    size_t freed_bytes;
+  };
+
   // When should objects within this space be reclaimed? Not constant as we vary it in the case
   // of Zygote forking.
   GcRetentionPolicy gc_retention_policy_;
@@ -205,7 +223,7 @@
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
 
   // Return the storage space required by obj.
-  virtual size_t AllocationSize(const mirror::Object* obj) = 0;
+  virtual size_t AllocationSize(mirror::Object* obj) = 0;
 
   // Returns how many bytes were freed.
   virtual size_t Free(Thread* self, mirror::Object* ptr) = 0;
@@ -378,22 +396,53 @@
   virtual bool IsAllocSpace() const {
     return true;
   }
-
   virtual AllocSpace* AsAllocSpace() {
     return this;
   }
 
+  virtual bool IsContinuousMemMapAllocSpace() const {
+    return true;
+  }
+  virtual ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace() {
+    return this;
+  }
+
+  bool HasBoundBitmaps() const EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void BindLiveToMarkBitmap()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void UnBindBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
+  void SwapBitmaps();
+
   virtual void Clear() {
     LOG(FATAL) << "Unimplemented";
   }
 
+  virtual accounting::SpaceBitmap* GetLiveBitmap() const {
+    return live_bitmap_.get();
+  }
+  virtual accounting::SpaceBitmap* GetMarkBitmap() const {
+    return mark_bitmap_.get();
+  }
+
+  virtual void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
+  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+
  protected:
+  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+
   ContinuousMemMapAllocSpace(const std::string& name, MemMap* mem_map, byte* begin,
                              byte* end, byte* limit, GcRetentionPolicy gc_retention_policy)
       : MemMapSpace(name, mem_map, begin, end, limit, gc_retention_policy) {
   }
 
  private:
+  friend class gc::Heap;
   DISALLOW_COPY_AND_ASSIGN(ContinuousMemMapAllocSpace);
 };
 
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index b1be9d8..9989ffe 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -16,6 +16,7 @@
 
 #include "dlmalloc_space.h"
 #include "large_object_space.h"
+#include "zygote_space.h"
 
 #include "common_test.h"
 #include "globals.h"
@@ -162,6 +163,7 @@
   EXPECT_TRUE(ptr5 == NULL);
 
   // Release some memory.
+  ScopedObjectAccess soa(self);
   size_t free3 = space->AllocationSize(ptr3);
   EXPECT_EQ(free3, ptr3_bytes_allocated);
   EXPECT_EQ(free3, space->Free(self, ptr3));
@@ -179,7 +181,16 @@
 
   // Make sure that the zygote space isn't directly at the start of the space.
   space->Alloc(self, 1U * MB, &dummy);
-  space = space->CreateZygoteSpace("alloc space", Runtime::Current()->GetHeap()->IsLowMemoryMode());
+
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  space::Space* old_space = space;
+  heap->RemoveSpace(old_space);
+  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
+                                                              heap->IsLowMemoryMode(),
+                                                              &space);
+  delete old_space;
+  // Add the zygote space.
+  AddSpace(zygote_space);
 
   // Make space findable to the heap, will also delete space when runtime is cleaned up
   AddSpace(space);
@@ -247,6 +258,7 @@
   EXPECT_TRUE(ptr5 == NULL);
 
   // Release some memory.
+  ScopedObjectAccess soa(self);
   size_t free3 = space->AllocationSize(ptr3);
   EXPECT_EQ(free3, ptr3_bytes_allocated);
   space->Free(self, ptr3);
@@ -344,30 +356,36 @@
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->Alloc(self, 16, &allocation_size);
-    EXPECT_TRUE(lots_of_objects[i] != NULL);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
     InstallClass(lots_of_objects[i], 16);
     EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
-  // Release memory and check pointers are NULL
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    EXPECT_TRUE(lots_of_objects[i] == NULL);
+  // Release memory and check pointers are NULL.
+  {
+    ScopedObjectAccess soa(self);
+    space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+    for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+      EXPECT_TRUE(lots_of_objects[i] == nullptr);
+    }
   }
 
   // Succeeds, fits by adjusting the max allowed footprint.
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->AllocWithGrowth(self, 1024, &allocation_size);
-    EXPECT_TRUE(lots_of_objects[i] != NULL);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
     InstallClass(lots_of_objects[i], 1024);
     EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
   // Release memory and check pointers are NULL
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    EXPECT_TRUE(lots_of_objects[i] == NULL);
+  {
+    ScopedObjectAccess soa(self);
+    space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+    for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+      EXPECT_TRUE(lots_of_objects[i] == nullptr);
+    }
   }
 }
 
@@ -481,28 +499,30 @@
       break;
     }
 
-    // Free some objects
-    for (size_t i = 0; i < last_object; i += free_increment) {
-      mirror::Object* object = lots_of_objects.get()[i];
-      if (object == NULL) {
-        continue;
+    {
+      // Free some objects
+      ScopedObjectAccess soa(self);
+      for (size_t i = 0; i < last_object; i += free_increment) {
+        mirror::Object* object = lots_of_objects.get()[i];
+        if (object == NULL) {
+          continue;
+        }
+        size_t allocation_size = space->AllocationSize(object);
+        if (object_size > 0) {
+          EXPECT_GE(allocation_size, static_cast<size_t>(object_size));
+        } else {
+          EXPECT_GE(allocation_size, 8u);
+        }
+        space->Free(self, object);
+        lots_of_objects.get()[i] = NULL;
+        amount_allocated -= allocation_size;
+        footprint = space->GetFootprint();
+        EXPECT_GE(space->Size(), footprint);  // invariant
       }
-      size_t allocation_size = space->AllocationSize(object);
-      if (object_size > 0) {
-        EXPECT_GE(allocation_size, static_cast<size_t>(object_size));
-      } else {
-        EXPECT_GE(allocation_size, 8u);
-      }
-      space->Free(self, object);
-      lots_of_objects.get()[i] = NULL;
-      amount_allocated -= allocation_size;
-      footprint = space->GetFootprint();
-      EXPECT_GE(space->Size(), footprint);  // invariant
+
+      free_increment >>= 1;
     }
-
-    free_increment >>= 1;
   }
-
   // The space has become empty here before allocating a large object
   // below. For RosAlloc, revoke thread-local runs, which are kept
   // even when empty for a performance reason, so that they won't
@@ -530,8 +550,10 @@
   EXPECT_LE(space->Size(), growth_limit);
 
   // Clean up
-  space->Free(self, large_object);
-
+  {
+    ScopedObjectAccess soa(self);
+    space->Free(self, large_object);
+  }
   // Sanity check footprint
   footprint = space->GetFootprint();
   EXPECT_LE(footprint, growth_limit);
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
new file mode 100644
index 0000000..a303765
--- /dev/null
+++ b/runtime/gc/space/zygote_space.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "zygote_space.h"
+
+#include "gc/accounting/card_table-inl.h"
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+class CountObjectsAllocated {
+ public:
+  explicit CountObjectsAllocated(size_t* objects_allocated)
+      : objects_allocated_(objects_allocated) {}
+
+  void operator()(mirror::Object* obj) const {
+    ++*objects_allocated_;
+  }
+
+ private:
+  size_t* const objects_allocated_;
+};
+
+ZygoteSpace* ZygoteSpace::Create(const std::string& name, MemMap* mem_map,
+                                 accounting::SpaceBitmap* live_bitmap,
+                                 accounting::SpaceBitmap* mark_bitmap) {
+  DCHECK(live_bitmap != nullptr);
+  DCHECK(mark_bitmap != nullptr);
+  size_t objects_allocated = 0;
+  CountObjectsAllocated visitor(&objects_allocated);
+  ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(mem_map->Begin()),
+                                reinterpret_cast<uintptr_t>(mem_map->End()), visitor);
+  ZygoteSpace* zygote_space = new ZygoteSpace(name, mem_map, objects_allocated);
+  CHECK(zygote_space->live_bitmap_.get() == nullptr);
+  CHECK(zygote_space->mark_bitmap_.get() == nullptr);
+  zygote_space->live_bitmap_.reset(live_bitmap);
+  zygote_space->mark_bitmap_.reset(mark_bitmap);
+  return zygote_space;
+}
+
+ZygoteSpace::ZygoteSpace(const std::string& name, MemMap* mem_map, size_t objects_allocated)
+    : ContinuousMemMapAllocSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
+                                 kGcRetentionPolicyFullCollect),
+      objects_allocated_(objects_allocated) {
+}
+
+void ZygoteSpace::Dump(std::ostream& os) const {
+  os << GetType()
+      << " begin=" << reinterpret_cast<void*>(Begin())
+      << ",end=" << reinterpret_cast<void*>(End())
+      << ",size=" << PrettySize(Size())
+      << ",name=\"" << GetName() << "\"]";
+}
+
+void ZygoteSpace::SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
+  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
+  DCHECK(context->space->IsZygoteSpace());
+  ZygoteSpace* zygote_space = context->space->AsZygoteSpace();
+  Locks::heap_bitmap_lock_->AssertExclusiveHeld(context->self);
+  accounting::CardTable* card_table = context->heap->GetCardTable();
+  // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
+  // the bitmaps as an optimization.
+  if (!context->swap_bitmaps) {
+    accounting::SpaceBitmap* bitmap = zygote_space->GetLiveBitmap();
+    for (size_t i = 0; i < num_ptrs; ++i) {
+      bitmap->Clear(ptrs[i]);
+    }
+  }
+  // We don't free any actual memory to avoid dirtying the shared zygote pages.
+  for (size_t i = 0; i < num_ptrs; ++i) {
+    // Need to mark the card since this will update the mod-union table next GC cycle.
+    card_table->MarkCard(ptrs[i]);
+  }
+  zygote_space->objects_allocated_.FetchAndSub(num_ptrs);
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h
new file mode 100644
index 0000000..e0035b3
--- /dev/null
+++ b/runtime/gc/space/zygote_space.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
+
+#include "malloc_space.h"
+#include "mem_map.h"
+
+namespace art {
+namespace gc {
+
+namespace accounting {
+class SpaceBitmap;
+}
+
+namespace space {
+
+// An zygote space is a space which you cannot allocate into or free from.
+class ZygoteSpace : public ContinuousMemMapAllocSpace {
+ public:
+  // Returns the remaining storage in the out_map field.
+  static ZygoteSpace* Create(const std::string& name, MemMap* mem_map,
+                             accounting::SpaceBitmap* live_bitmap,
+                             accounting::SpaceBitmap* mark_bitmap)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void Dump(std::ostream& os) const;
+  virtual SpaceType GetType() const {
+    return kSpaceTypeZygoteSpace;
+  }
+  virtual ZygoteSpace* AsZygoteSpace() {
+    return this;
+  }
+  virtual mirror::Object* AllocWithGrowth(Thread* /*self*/, size_t /*num_bytes*/,
+                                          size_t* /*bytes_allocated*/) {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+  virtual size_t AllocationSize(mirror::Object* obj) {
+    LOG(FATAL) << "Unimplemented";
+    return 0;
+  }
+  virtual size_t Free(Thread* self, mirror::Object* ptr) {
+    LOG(FATAL) << "Unimplemented";
+    return 0;
+  }
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+    LOG(FATAL) << "Unimplemented";
+    return 0;
+  }
+  virtual uint64_t GetBytesAllocated() {
+    return Size();
+  }
+  virtual uint64_t GetObjectsAllocated() {
+    return objects_allocated_;
+  }
+
+ protected:
+  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+    return &SweepCallback;
+  }
+
+ private:
+  ZygoteSpace(const std::string& name, MemMap* mem_map, size_t objects_allocated);
+  static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg);
+
+  AtomicInteger objects_allocated_;
+
+  friend class Space;
+  DISALLOW_COPY_AND_ASSIGN(ZygoteSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
diff --git a/runtime/globals.h b/runtime/globals.h
index b1ccbdc..8c3ae56 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -36,7 +36,7 @@
 static constexpr size_t kBitsPerByte = 8;
 static constexpr size_t kBitsPerByteLog2 = 3;
 static constexpr int kBitsPerWord = kWordSize * kBitsPerByte;
-static constexpr size_t kWordHighBitMask = 1 << (kBitsPerWord - 1);
+static constexpr size_t kWordHighBitMask = static_cast<size_t>(1) << (kBitsPerWord - 1);
 
 // Required stack alignment
 static constexpr size_t kStackAlignment = 16;
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 9f899e8..24d403d 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -167,14 +167,8 @@
   hprof_basic_long = 11,
 };
 
-typedef uint32_t HprofId;
-typedef HprofId HprofStringId;
-typedef HprofId HprofObjectId;
-typedef HprofId HprofClassObjectId;
-typedef std::set<mirror::Class*> ClassSet;
-typedef std::set<mirror::Class*>::iterator ClassSetIterator;
-typedef SafeMap<std::string, size_t> StringMap;
-typedef SafeMap<std::string, size_t>::iterator StringMapIterator;
+typedef uint32_t HprofStringId;
+typedef uint32_t HprofClassObjectId;
 
 // Represents a top-level hprof record, whose serialized format is:
 // U1  TAG: denoting the type of the record
@@ -183,11 +177,8 @@
 // U1* BODY: as many bytes as specified in the above uint32_t field
 class HprofRecord {
  public:
-  HprofRecord() {
-    dirty_ = false;
-    alloc_length_ = 128;
+  HprofRecord() : alloc_length_(128), fp_(nullptr), tag_(0), time_(0), length_(0), dirty_(false) {
     body_ = reinterpret_cast<unsigned char*>(malloc(alloc_length_));
-    fp_ = NULL;
   }
 
   ~HprofRecord() {
@@ -233,7 +224,7 @@
 
   int AddU1(uint8_t value) {
     int err = GuaranteeRecordAppend(1);
-    if (err != 0) {
+    if (UNLIKELY(err != 0)) {
       return err;
     }
 
@@ -253,13 +244,30 @@
     return AddU8List(&value, 1);
   }
 
-  int AddId(HprofObjectId value) {
-    return AddU4((uint32_t) value);
+  int AddObjectId(const mirror::Object* value) {
+    return AddU4(PointerToLowMemUInt32(value));
+  }
+
+  // The ID for the synthetic object generated to account for class static overhead.
+  int AddClassStaticsId(const mirror::Class* value) {
+    return AddU4(1 | PointerToLowMemUInt32(value));
+  }
+
+  int AddJniGlobalRefId(jobject value) {
+    return AddU4(PointerToLowMemUInt32(value));
+  }
+
+  int AddClassId(HprofClassObjectId value) {
+    return AddU4(value);
+  }
+
+  int AddStringId(HprofStringId value) {
+    return AddU4(value);
   }
 
   int AddU1List(const uint8_t* values, size_t numValues) {
     int err = GuaranteeRecordAppend(numValues);
-    if (err != 0) {
+    if (UNLIKELY(err != 0)) {
       return err;
     }
 
@@ -270,7 +278,7 @@
 
   int AddU2List(const uint16_t* values, size_t numValues) {
     int err = GuaranteeRecordAppend(numValues * 2);
-    if (err != 0) {
+    if (UNLIKELY(err != 0)) {
       return err;
     }
 
@@ -285,7 +293,7 @@
 
   int AddU4List(const uint32_t* values, size_t numValues) {
     int err = GuaranteeRecordAppend(numValues * 4);
-    if (err != 0) {
+    if (UNLIKELY(err != 0)) {
       return err;
     }
 
@@ -317,8 +325,16 @@
     return 0;
   }
 
-  int AddIdList(const HprofObjectId* values, size_t numValues) {
-    return AddU4List((const uint32_t*) values, numValues);
+  int AddIdList(mirror::ObjectArray<mirror::Object>* values)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    int32_t length = values->GetLength();
+    for (int32_t i = 0; i < length; ++i) {
+      int err = AddObjectId(values->GetWithoutChecks(i));
+      if (UNLIKELY(err != 0)) {
+        return err;
+      }
+    }
+    return 0;
   }
 
   int AddUtf8String(const char* str) {
@@ -510,12 +526,11 @@
     HprofRecord* rec = &current_record_;
     uint32_t nextSerialNumber = 1;
 
-    for (ClassSetIterator it = classes_.begin(); it != classes_.end(); ++it) {
-      const mirror::Class* c = *it;
-      CHECK(c != NULL);
+    for (mirror::Class* c : classes_) {
+      CHECK(c != nullptr);
 
       int err = current_record_.StartNewRecord(header_fp_, HPROF_TAG_LOAD_CLASS, HPROF_TIME);
-      if (err != 0) {
+      if (UNLIKELY(err != 0)) {
         return err;
       }
 
@@ -525,9 +540,9 @@
       // U4: stack trace serial number
       // ID: class name string ID
       rec->AddU4(nextSerialNumber++);
-      rec->AddId((HprofClassObjectId) c);
+      rec->AddObjectId(c);
       rec->AddU4(HPROF_NULL_STACK_TRACE);
-      rec->AddId(LookupClassNameId(c));
+      rec->AddStringId(LookupClassNameId(c));
     }
 
     return 0;
@@ -536,9 +551,9 @@
   int WriteStringTable() {
     HprofRecord* rec = &current_record_;
 
-    for (StringMapIterator it = strings_.begin(); it != strings_.end(); ++it) {
-      const std::string& string = (*it).first;
-      size_t id = (*it).second;
+    for (std::pair<std::string, HprofStringId> p : strings_) {
+      const std::string& string = p.first;
+      size_t id = p.second;
 
       int err = current_record_.StartNewRecord(header_fp_, HPROF_TAG_STRING, HPROF_TIME);
       if (err != 0) {
@@ -573,24 +588,26 @@
 
   int MarkRootObject(const mirror::Object* obj, jobject jniObj);
 
-  HprofClassObjectId LookupClassId(mirror::Class* c)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (c == NULL) {
-      // c is the superclass of java.lang.Object or a primitive
-      return (HprofClassObjectId)0;
+  HprofClassObjectId LookupClassId(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (c == nullptr) {
+      // c is the superclass of java.lang.Object or a primitive.
+      return 0;
     }
 
-    std::pair<ClassSetIterator, bool> result = classes_.insert(c);
-    const mirror::Class* present = *result.first;
+    {
+      auto result = classes_.insert(c);
+      const mirror::Class* present = *result.first;
+      CHECK_EQ(present, c);
+    }
 
     // Make sure that we've assigned a string ID for this class' name
     LookupClassNameId(c);
 
-    CHECK_EQ(present, c);
-    return (HprofStringId) present;
+    HprofClassObjectId result = PointerToLowMemUInt32(c);
+    return result;
   }
 
-  HprofStringId LookupStringId(mirror::String* string) {
+  HprofStringId LookupStringId(mirror::String* string) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return LookupStringId(string->ToModifiedUtf8());
   }
 
@@ -599,7 +616,7 @@
   }
 
   HprofStringId LookupStringId(const std::string& string) {
-    StringMapIterator it = strings_.find(string);
+    auto it = strings_.find(string);
     if (it != strings_.end()) {
       return it->second;
     }
@@ -608,8 +625,7 @@
     return id;
   }
 
-  HprofStringId LookupClassNameId(const mirror::Class* c)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  HprofStringId LookupClassNameId(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return LookupStringId(PrettyDescriptor(c));
   }
 
@@ -675,9 +691,9 @@
   char* body_data_ptr_;
   size_t body_data_size_;
 
-  ClassSet classes_;
-  size_t next_string_id_;
-  StringMap strings_;
+  std::set<mirror::Class*> classes_;
+  HprofStringId next_string_id_;
+  SafeMap<std::string, HprofStringId> strings_;
 
   DISALLOW_COPY_AND_ASSIGN(Hprof);
 };
@@ -685,11 +701,8 @@
 #define OBJECTS_PER_SEGMENT     ((size_t)128)
 #define BYTES_PER_SEGMENT       ((size_t)4096)
 
-// The static field-name for the synthetic object generated to account
-// for class static overhead.
+// The static field-name for the synthetic object generated to account for class static overhead.
 #define STATIC_OVERHEAD_NAME    "$staticOverhead"
-// The ID for the synthetic object generated to account for class static overhead.
-#define CLASS_STATICS_ID(c) ((HprofObjectId)(((uint32_t)(c)) | 1))
 
 static HprofBasicType SignatureToBasicTypeAndSize(const char* sig, size_t* sizeOut) {
   char c = sig[0];
@@ -765,15 +778,15 @@
   case HPROF_ROOT_DEBUGGER:
   case HPROF_ROOT_VM_INTERNAL:
     rec->AddU1(heapTag);
-    rec->AddId((HprofObjectId)obj);
+    rec->AddObjectId(obj);
     break;
 
   // ID: object ID
   // ID: JNI global ref ID
   case HPROF_ROOT_JNI_GLOBAL:
     rec->AddU1(heapTag);
-    rec->AddId((HprofObjectId)obj);
-    rec->AddId((HprofId)jniObj);
+    rec->AddObjectId(obj);
+    rec->AddJniGlobalRefId(jniObj);
     break;
 
   // ID: object ID
@@ -783,7 +796,7 @@
   case HPROF_ROOT_JNI_MONITOR:
   case HPROF_ROOT_JAVA_FRAME:
     rec->AddU1(heapTag);
-    rec->AddId((HprofObjectId)obj);
+    rec->AddObjectId(obj);
     rec->AddU4(gc_thread_serial_number_);
     rec->AddU4((uint32_t)-1);
     break;
@@ -793,7 +806,7 @@
   case HPROF_ROOT_NATIVE_STACK:
   case HPROF_ROOT_THREAD_BLOCK:
     rec->AddU1(heapTag);
-    rec->AddId((HprofObjectId)obj);
+    rec->AddObjectId(obj);
     rec->AddU4(gc_thread_serial_number_);
     break;
 
@@ -802,7 +815,7 @@
   // U4: stack trace serial number
   case HPROF_ROOT_THREAD_OBJECT:
     rec->AddU1(heapTag);
-    rec->AddId((HprofObjectId)obj);
+    rec->AddObjectId(obj);
     rec->AddU4(gc_thread_serial_number_);
     rec->AddU4((uint32_t)-1);    // xxx
     break;
@@ -859,7 +872,7 @@
       nameId = LookupStringId("<ILLEGAL>");
       break;
     }
-    rec->AddId(nameId);
+    rec->AddStringId(nameId);
     current_heap_ = desiredHeap;
   }
 
@@ -875,11 +888,11 @@
       // obj is a ClassObject.
       size_t sFieldCount = thisClass->NumStaticFields();
       if (sFieldCount != 0) {
-        int byteLength = sFieldCount*sizeof(JValue);  // TODO bogus; fields are packed
+        int byteLength = sFieldCount * sizeof(JValue);  // TODO bogus; fields are packed
         // Create a byte array to reflect the allocation of the
         // StaticField array at the end of this class.
         rec->AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
-        rec->AddId(CLASS_STATICS_ID(obj));
+        rec->AddClassStaticsId(thisClass);
         rec->AddU4(StackTraceSerialNumber(obj));
         rec->AddU4(byteLength);
         rec->AddU1(hprof_basic_byte);
@@ -889,14 +902,14 @@
       }
 
       rec->AddU1(HPROF_CLASS_DUMP);
-      rec->AddId(LookupClassId(thisClass));
+      rec->AddClassId(LookupClassId(thisClass));
       rec->AddU4(StackTraceSerialNumber(thisClass));
-      rec->AddId(LookupClassId(thisClass->GetSuperClass()));
-      rec->AddId((HprofObjectId)thisClass->GetClassLoader());
-      rec->AddId((HprofObjectId)0);    // no signer
-      rec->AddId((HprofObjectId)0);    // no prot domain
-      rec->AddId((HprofId)0);           // reserved
-      rec->AddId((HprofId)0);           // reserved
+      rec->AddClassId(LookupClassId(thisClass->GetSuperClass()));
+      rec->AddObjectId(thisClass->GetClassLoader());
+      rec->AddObjectId(nullptr);    // no signer
+      rec->AddObjectId(nullptr);    // no prot domain
+      rec->AddObjectId(nullptr);    // reserved
+      rec->AddObjectId(nullptr);    // reserved
       if (thisClass->IsClassClass()) {
         // ClassObjects have their static fields appended, so aren't all the same size.
         // But they're at least this size.
@@ -916,9 +929,9 @@
         rec->AddU2((uint16_t)0);
       } else {
         rec->AddU2((uint16_t)(sFieldCount+1));
-        rec->AddId(LookupStringId(STATIC_OVERHEAD_NAME));
+        rec->AddStringId(LookupStringId(STATIC_OVERHEAD_NAME));
         rec->AddU1(hprof_basic_object);
-        rec->AddId(CLASS_STATICS_ID(obj));
+        rec->AddClassStaticsId(thisClass);
 
         for (size_t i = 0; i < sFieldCount; ++i) {
           mirror::ArtField* f = thisClass->GetStaticField(i);
@@ -926,7 +939,7 @@
 
           size_t size;
           HprofBasicType t = SignatureToBasicTypeAndSize(fh.GetTypeDescriptor(), &size);
-          rec->AddId(LookupStringId(fh.GetName()));
+          rec->AddStringId(LookupStringId(fh.GetName()));
           rec->AddU1(t);
           if (size == 1) {
             rec->AddU1(static_cast<uint8_t>(f->Get32(thisClass)));
@@ -949,24 +962,24 @@
         mirror::ArtField* f = thisClass->GetInstanceField(i);
         fh.ChangeField(f);
         HprofBasicType t = SignatureToBasicTypeAndSize(fh.GetTypeDescriptor(), NULL);
-        rec->AddId(LookupStringId(fh.GetName()));
+        rec->AddStringId(LookupStringId(fh.GetName()));
         rec->AddU1(t);
       }
     } else if (c->IsArrayClass()) {
-      const mirror::Array* aobj = obj->AsArray();
+      mirror::Array* aobj = obj->AsArray();
       uint32_t length = aobj->GetLength();
 
       if (obj->IsObjectArray()) {
         // obj is an object array.
         rec->AddU1(HPROF_OBJECT_ARRAY_DUMP);
 
-        rec->AddId((HprofObjectId)obj);
+        rec->AddObjectId(obj);
         rec->AddU4(StackTraceSerialNumber(obj));
         rec->AddU4(length);
-        rec->AddId(LookupClassId(c));
+        rec->AddClassId(LookupClassId(c));
 
         // Dump the elements, which are always objects or NULL.
-        rec->AddIdList((const HprofObjectId*)aobj->GetRawData(sizeof(mirror::Object*)), length);
+        rec->AddIdList(aobj->AsObjectArray<mirror::Object>());
       } else {
         size_t size;
         HprofBasicType t = PrimitiveToBasicTypeAndSize(c->GetComponentType()->GetPrimitiveType(), &size);
@@ -974,28 +987,28 @@
         // obj is a primitive array.
         rec->AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
 
-        rec->AddId((HprofObjectId)obj);
+        rec->AddObjectId(obj);
         rec->AddU4(StackTraceSerialNumber(obj));
         rec->AddU4(length);
         rec->AddU1(t);
 
         // Dump the raw, packed element values.
         if (size == 1) {
-          rec->AddU1List((const uint8_t*)aobj->GetRawData(sizeof(uint8_t)), length);
+          rec->AddU1List((const uint8_t*)aobj->GetRawData(sizeof(uint8_t), 0), length);
         } else if (size == 2) {
-          rec->AddU2List((const uint16_t*)aobj->GetRawData(sizeof(uint16_t)), length);
+          rec->AddU2List((const uint16_t*)aobj->GetRawData(sizeof(uint16_t), 0), length);
         } else if (size == 4) {
-          rec->AddU4List((const uint32_t*)aobj->GetRawData(sizeof(uint32_t)), length);
+          rec->AddU4List((const uint32_t*)aobj->GetRawData(sizeof(uint32_t), 0), length);
         } else if (size == 8) {
-          rec->AddU8List((const uint64_t*)aobj->GetRawData(sizeof(uint64_t)), length);
+          rec->AddU8List((const uint64_t*)aobj->GetRawData(sizeof(uint64_t), 0), length);
         }
       }
     } else {
       // obj is an instance object.
       rec->AddU1(HPROF_INSTANCE_DUMP);
-      rec->AddId((HprofObjectId)obj);
+      rec->AddObjectId(obj);
       rec->AddU4(StackTraceSerialNumber(obj));
-      rec->AddId(LookupClassId(c));
+      rec->AddClassId(LookupClassId(c));
 
       // Reserve some space for the length of the instance data, which we won't
       // know until we're done writing it.
@@ -1004,7 +1017,7 @@
 
       // Write the instance data;  fields for this class, followed by super class fields,
       // and so on. Don't write the klass or monitor fields of Object.class.
-      const mirror::Class* sclass = c;
+      mirror::Class* sclass = c;
       FieldHelper fh;
       while (!sclass->IsObjectClass()) {
         int ifieldCount = sclass->NumInstanceFields();
@@ -1019,10 +1032,9 @@
             rec->AddU2(f->Get32(obj));
           } else if (size == 4) {
             rec->AddU4(f->Get32(obj));
-          } else if (size == 8) {
-            rec->AddU8(f->Get64(obj));
           } else {
-            CHECK(false);
+            CHECK_EQ(size, 8U);
+            rec->AddU8(f->Get64(obj));
           }
         }
 
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index aee7447..ac83601 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -29,6 +29,7 @@
   kArm,
   kThumb2,
   kX86,
+  kX86_64,
   kMips
 };
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 0b11543..59ffdc1 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -18,7 +18,7 @@
 
 #include <sys/uio.h>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "debugger.h"
@@ -68,10 +68,21 @@
   return true;
 }
 
-static void UpdateEntrypoints(mirror::ArtMethod* method, const void* code) {
-  method->SetEntryPointFromCompiledCode(code);
+static void UpdateEntrypoints(mirror::ArtMethod* method, const void* quick_code,
+                              const void* portable_code, bool have_portable_code)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  method->SetEntryPointFromPortableCompiledCode(portable_code);
+  method->SetEntryPointFromQuickCompiledCode(quick_code);
+  bool portable_enabled = method->IsPortableCompiled();
+  if (have_portable_code && !portable_enabled) {
+    method->SetIsPortableCompiled();
+  } else if (portable_enabled) {
+    method->ClearIsPortableCompiled();
+  }
   if (!method->IsResolutionMethod()) {
-    if (code == GetCompiledCodeToInterpreterBridge()) {
+    if (quick_code == GetQuickToInterpreterBridge()) {
+      DCHECK(portable_code == GetPortableToInterpreterBridge());
+      DCHECK(!method->IsNative()) << PrettyMethod(method);
       method->SetEntryPointFromInterpreter(art::interpreter::artInterpreterToInterpreterBridge);
     } else {
       method->SetEntryPointFromInterpreter(art::artInterpreterToCompiledCodeBridge);
@@ -84,37 +95,47 @@
     // Do not change stubs for these methods.
     return;
   }
-  const void* new_code;
+  const void* new_portable_code;
+  const void* new_quick_code;
   bool uninstall = !entry_exit_stubs_installed_ && !interpreter_stubs_installed_;
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   bool is_class_initialized = method->GetDeclaringClass()->IsInitialized();
+  bool have_portable_code = false;
   if (uninstall) {
     if ((forced_interpret_only_ || IsDeoptimized(method)) && !method->IsNative()) {
-      new_code = GetCompiledCodeToInterpreterBridge();
+      new_portable_code = GetPortableToInterpreterBridge();
+      new_quick_code = GetQuickToInterpreterBridge();
     } else if (is_class_initialized || !method->IsStatic() || method->IsConstructor()) {
-      new_code = class_linker->GetOatCodeFor(method);
+      new_portable_code = class_linker->GetPortableOatCodeFor(method, &have_portable_code);
+      new_quick_code = class_linker->GetQuickOatCodeFor(method);
     } else {
-      new_code = GetResolutionTrampoline(class_linker);
+      new_portable_code = GetPortableResolutionTrampoline(class_linker);
+      new_quick_code = GetQuickResolutionTrampoline(class_linker);
     }
   } else {  // !uninstall
     if ((interpreter_stubs_installed_ || IsDeoptimized(method)) && !method->IsNative()) {
-      new_code = GetCompiledCodeToInterpreterBridge();
+      new_portable_code = GetPortableToInterpreterBridge();
+      new_quick_code = GetQuickToInterpreterBridge();
     } else {
       // Do not overwrite resolution trampoline. When the trampoline initializes the method's
       // class, all its static methods code will be set to the instrumentation entry point.
       // For more details, see ClassLinker::FixupStaticTrampolines.
       if (is_class_initialized || !method->IsStatic() || method->IsConstructor()) {
         // Do not overwrite interpreter to prevent from posting method entry/exit events twice.
-        new_code = class_linker->GetOatCodeFor(method);
-        if (entry_exit_stubs_installed_ && new_code != GetCompiledCodeToInterpreterBridge()) {
-          new_code = GetQuickInstrumentationEntryPoint();
+        new_portable_code = class_linker->GetPortableOatCodeFor(method, &have_portable_code);
+        new_quick_code = class_linker->GetQuickOatCodeFor(method);
+        if (entry_exit_stubs_installed_ && new_quick_code != GetQuickToInterpreterBridge()) {
+          DCHECK(new_portable_code != GetPortableToInterpreterBridge());
+          new_portable_code = GetPortableToInterpreterBridge();
+          new_quick_code = GetQuickInstrumentationEntryPoint();
         }
       } else {
-        new_code = GetResolutionTrampoline(class_linker);
+        new_portable_code = GetPortableResolutionTrampoline(class_linker);
+        new_quick_code = GetQuickResolutionTrampoline(class_linker);
       }
     }
   }
-  UpdateEntrypoints(method, new_code);
+  UpdateEntrypoints(method, new_quick_code, new_portable_code, have_portable_code);
 }
 
 // Places the instrumentation exit pc as the return PC for every quick frame. This also allows
@@ -470,23 +491,38 @@
   }
 }
 
-void Instrumentation::UpdateMethodsCode(mirror::ArtMethod* method, const void* code) const {
-  const void* new_code;
+void Instrumentation::UpdateMethodsCode(mirror::ArtMethod* method, const void* quick_code,
+                                        const void* portable_code, bool have_portable_code) const {
+  const void* new_portable_code;
+  const void* new_quick_code;
+  bool new_have_portable_code;
   if (LIKELY(!instrumentation_stubs_installed_)) {
-    new_code = code;
+    new_portable_code = portable_code;
+    new_quick_code = quick_code;
+    new_have_portable_code = have_portable_code;
   } else {
     if ((interpreter_stubs_installed_ || IsDeoptimized(method)) && !method->IsNative()) {
-      new_code = GetCompiledCodeToInterpreterBridge();
-    } else if (code == GetResolutionTrampoline(Runtime::Current()->GetClassLinker()) ||
-               code == GetCompiledCodeToInterpreterBridge()) {
-      new_code = code;
+      new_portable_code = GetPortableToInterpreterBridge();
+      new_quick_code = GetQuickToInterpreterBridge();
+      new_have_portable_code = false;
+    } else if (quick_code == GetQuickResolutionTrampoline(Runtime::Current()->GetClassLinker()) ||
+               quick_code == GetQuickToInterpreterBridge()) {
+      DCHECK((portable_code == GetPortableResolutionTrampoline(Runtime::Current()->GetClassLinker())) ||
+             (portable_code == GetPortableToInterpreterBridge()));
+      new_portable_code = portable_code;
+      new_quick_code = quick_code;
+      new_have_portable_code = have_portable_code;
     } else if (entry_exit_stubs_installed_) {
-      new_code = GetQuickInstrumentationEntryPoint();
+      new_quick_code = GetQuickInstrumentationEntryPoint();
+      new_portable_code = GetPortableToInterpreterBridge();
+      new_have_portable_code = false;
     } else {
-      new_code = code;
+      new_portable_code = portable_code;
+      new_quick_code = quick_code;
+      new_have_portable_code = have_portable_code;
     }
   }
-  UpdateEntrypoints(method, new_code);
+  UpdateEntrypoints(method, new_quick_code, new_portable_code, new_have_portable_code);
 }
 
 void Instrumentation::Deoptimize(mirror::ArtMethod* method) {
@@ -499,7 +535,8 @@
   CHECK(!already_deoptimized) << "Method " << PrettyMethod(method) << " is already deoptimized";
 
   if (!interpreter_stubs_installed_) {
-    UpdateEntrypoints(method, GetCompiledCodeToInterpreterBridge());
+    UpdateEntrypoints(method, GetQuickToInterpreterBridge(), GetPortableToInterpreterBridge(),
+                      false);
 
     // Install instrumentation exit stub and instrumentation frames. We may already have installed
     // these previously so it will only cover the newly created frames.
@@ -522,10 +559,15 @@
   if (!interpreter_stubs_installed_) {
     // Restore its code or resolution trampoline.
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    if (method->IsStatic() && !method->IsConstructor() && !method->GetDeclaringClass()->IsInitialized()) {
-      UpdateEntrypoints(method, GetResolutionTrampoline(class_linker));
+    if (method->IsStatic() && !method->IsConstructor() &&
+        !method->GetDeclaringClass()->IsInitialized()) {
+      UpdateEntrypoints(method, GetQuickResolutionTrampoline(class_linker),
+                        GetPortableResolutionTrampoline(class_linker), false);
     } else {
-      UpdateEntrypoints(method, class_linker->GetOatCodeFor(method));
+      bool have_portable_code = false;
+      const void* quick_code = class_linker->GetQuickOatCodeFor(method);
+      const void* portable_code = class_linker->GetPortableOatCodeFor(method, &have_portable_code);
+      UpdateEntrypoints(method, quick_code, portable_code, have_portable_code);
     }
 
     // If there is no deoptimized method left, we can restore the stack of each thread.
@@ -582,21 +624,21 @@
   ConfigureStubs(false, false);
 }
 
-const void* Instrumentation::GetQuickCodeFor(const mirror::ArtMethod* method) const {
+const void* Instrumentation::GetQuickCodeFor(mirror::ArtMethod* method) const {
   Runtime* runtime = Runtime::Current();
   if (LIKELY(!instrumentation_stubs_installed_)) {
-    const void* code = method->GetEntryPointFromCompiledCode();
+    const void* code = method->GetEntryPointFromQuickCompiledCode();
     DCHECK(code != NULL);
     if (LIKELY(code != GetQuickResolutionTrampoline(runtime->GetClassLinker()) &&
                code != GetQuickToInterpreterBridge())) {
       return code;
     }
   }
-  return runtime->GetClassLinker()->GetOatCodeFor(method);
+  return runtime->GetClassLinker()->GetQuickOatCodeFor(method);
 }
 
 void Instrumentation::MethodEnterEventImpl(Thread* thread, mirror::Object* this_object,
-                                           const mirror::ArtMethod* method,
+                                           mirror::ArtMethod* method,
                                            uint32_t dex_pc) const {
   auto it = method_entry_listeners_.begin();
   bool is_end = (it == method_entry_listeners_.end());
@@ -610,7 +652,7 @@
 }
 
 void Instrumentation::MethodExitEventImpl(Thread* thread, mirror::Object* this_object,
-                                          const mirror::ArtMethod* method,
+                                          mirror::ArtMethod* method,
                                           uint32_t dex_pc, const JValue& return_value) const {
   auto it = method_exit_listeners_.begin();
   bool is_end = (it == method_exit_listeners_.end());
@@ -624,7 +666,7 @@
 }
 
 void Instrumentation::MethodUnwindEvent(Thread* thread, mirror::Object* this_object,
-                                        const mirror::ArtMethod* method,
+                                        mirror::ArtMethod* method,
                                         uint32_t dex_pc) const {
   if (have_method_unwind_listeners_) {
     for (InstrumentationListener* listener : method_unwind_listeners_) {
@@ -634,7 +676,7 @@
 }
 
 void Instrumentation::DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
-                                          const mirror::ArtMethod* method,
+                                          mirror::ArtMethod* method,
                                           uint32_t dex_pc) const {
   // TODO: STL copy-on-write collection? The copy below is due to the debug listener having an
   // action where it can remove itself as a listener and break the iterator. The copy only works
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 41b545d..f01add1 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_INSTRUMENTATION_H_
 #define ART_RUNTIME_INSTRUMENTATION_H_
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/macros.h"
 #include "locks.h"
 
@@ -55,26 +55,26 @@
 
   // Call-back for when a method is entered.
   virtual void MethodEntered(Thread* thread, mirror::Object* this_object,
-                             const mirror::ArtMethod* method,
+                             mirror::ArtMethod* method,
                              uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when a method is exited.
   // TODO: its likely passing the return value would be useful, however, we may need to get and
   //       parse the shorty to determine what kind of register holds the result.
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc,
+                            mirror::ArtMethod* method, uint32_t dex_pc,
                             const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when a method is popped due to an exception throw. A method will either cause a
   // MethodExited call-back or a MethodUnwind call-back when its activation is removed.
   virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc)
+                            mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when the dex pc moves in a method.
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                          const mirror::ArtMethod* method, uint32_t new_dex_pc)
+                          mirror::ArtMethod* method, uint32_t new_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back when an exception is caught.
@@ -171,13 +171,14 @@
   void ResetQuickAllocEntryPoints();
 
   // Update the code of a method respecting any installed stubs.
-  void UpdateMethodsCode(mirror::ArtMethod* method, const void* code) const
+  void UpdateMethodsCode(mirror::ArtMethod* method, const void* quick_code,
+                         const void* portable_code, bool have_portable_code) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get the quick code for the given method. More efficient than asking the class linker as it
   // will short-cut to GetCode if instrumentation and static method resolution stubs aren't
   // installed.
-  const void* GetQuickCodeFor(const mirror::ArtMethod* method) const
+  const void* GetQuickCodeFor(mirror::ArtMethod* method) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ForceInterpretOnly() {
@@ -218,7 +219,7 @@
   // Inform listeners that a method has been entered. A dex PC is provided as we may install
   // listeners into executing code and get method enter events for methods already on the stack.
   void MethodEnterEvent(Thread* thread, mirror::Object* this_object,
-                        const mirror::ArtMethod* method, uint32_t dex_pc) const
+                        mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(HasMethodEntryListeners())) {
       MethodEnterEventImpl(thread, this_object, method, dex_pc);
@@ -227,7 +228,7 @@
 
   // Inform listeners that a method has been exited.
   void MethodExitEvent(Thread* thread, mirror::Object* this_object,
-                       const mirror::ArtMethod* method, uint32_t dex_pc,
+                       mirror::ArtMethod* method, uint32_t dex_pc,
                        const JValue& return_value) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(HasMethodExitListeners())) {
@@ -237,12 +238,12 @@
 
   // Inform listeners that a method has been exited due to an exception.
   void MethodUnwindEvent(Thread* thread, mirror::Object* this_object,
-                         const mirror::ArtMethod* method, uint32_t dex_pc) const
+                         mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Inform listeners that the dex pc has moved (only supported by the interpreter).
   void DexPcMovedEvent(Thread* thread, mirror::Object* this_object,
-                       const mirror::ArtMethod* method, uint32_t dex_pc) const
+                       mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(HasDexPcListeners())) {
       DexPcMovedEventImpl(thread, this_object, method, dex_pc);
@@ -289,14 +290,14 @@
   }
 
   void MethodEnterEventImpl(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc) const
+                            mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void MethodExitEventImpl(Thread* thread, mirror::Object* this_object,
-                           const mirror::ArtMethod* method,
+                           mirror::ArtMethod* method,
                            uint32_t dex_pc, const JValue& return_value) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
-                           const mirror::ArtMethod* method, uint32_t dex_pc) const
+                           mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Have we hijacked ArtMethod::code_ so that it calls instrumentation/interpreter code?
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index f574a0f..02a9aa6 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -356,7 +356,7 @@
     DCHECK_LT(shorty_pos + 1, mh.GetShortyLength());
     switch (shorty[shorty_pos + 1]) {
       case 'L': {
-        Object* o = reinterpret_cast<Object*>(args[arg_pos]);
+        Object* o = reinterpret_cast<StackReference<Object>*>(&args[arg_pos])->AsMirrorPtr();
         shadow_frame->SetVRegReference(cur_reg, o);
         break;
       }
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 0f94ccd..0b959fb 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -25,15 +25,16 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Assign register 'src_reg' from shadow_frame to register 'dest_reg' into new_shadow_frame.
-static inline void AssignRegister(ShadowFrame& new_shadow_frame, const ShadowFrame& shadow_frame,
-                                  size_t dest_reg, size_t src_reg) {
+static inline void AssignRegister(ShadowFrame* new_shadow_frame, const ShadowFrame& shadow_frame,
+                                  size_t dest_reg, size_t src_reg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // If both register locations contains the same value, the register probably holds a reference.
   int32_t src_value = shadow_frame.GetVReg(src_reg);
   mirror::Object* o = shadow_frame.GetVRegReference<false>(src_reg);
-  if (src_value == reinterpret_cast<int32_t>(o)) {
-    new_shadow_frame.SetVRegReference(dest_reg, o);
+  if (src_value == reinterpret_cast<intptr_t>(o)) {
+    new_shadow_frame->SetVRegReference(dest_reg, o);
   } else {
-    new_shadow_frame.SetVReg(dest_reg, src_value);
+    new_shadow_frame->SetVReg(dest_reg, src_value);
   }
 }
 
@@ -84,7 +85,7 @@
       ++dest_reg;
       ++arg_offset;
     }
-    for (size_t shorty_pos = 0; dest_reg < num_regs; ++shorty_pos, ++dest_reg, ++arg_offset) {
+    for (uint32_t shorty_pos = 0; dest_reg < num_regs; ++shorty_pos, ++dest_reg, ++arg_offset) {
       DCHECK_LT(shorty_pos + 1, mh.GetShortyLength());
       const size_t src_reg = (is_range) ? vregC + arg_offset : arg[arg_offset];
       switch (shorty[shorty_pos + 1]) {
@@ -131,18 +132,18 @@
       const uint16_t first_src_reg = inst->VRegC_3rc();
       for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
           ++dest_reg, ++src_reg) {
-        AssignRegister(*new_shadow_frame, shadow_frame, dest_reg, src_reg);
+        AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg);
       }
     } else {
       DCHECK_LE(num_ins, 5U);
       uint16_t regList = inst->Fetch16(2);
       uint16_t count = num_ins;
       if (count == 5) {
-        AssignRegister(*new_shadow_frame, shadow_frame, first_dest_reg + 4U, (inst_data >> 8) & 0x0f);
+        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + 4U, (inst_data >> 8) & 0x0f);
         --count;
        }
       for (size_t arg_index = 0; arg_index < count; ++arg_index, regList >>= 4) {
-        AssignRegister(*new_shadow_frame, shadow_frame, first_dest_reg + arg_index, regList & 0x0f);
+        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, regList & 0x0f);
       }
     }
   }
@@ -289,7 +290,7 @@
     CHECK(field.get() != NULL);
     ArtMethod* c = jlr_Field->FindDeclaredDirectMethod("<init>", "(Ljava/lang/reflect/ArtField;)V");
     uint32_t args[1];
-    args[0] = reinterpret_cast<uint32_t>(found);
+    args[0] = StackReference<mirror::Object>::FromMirrorPtr(found).AsVRegValue();
     EnterInterpreterFromInvoke(self, c, field.get(), args, NULL);
     result->SetL(field.get());
   } else if (name == "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)" ||
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 4481210..768ca33 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -218,7 +218,7 @@
       shadow_frame.SetVRegLong(vregA, static_cast<int64_t>(obj->GetField64(field_offset, is_volatile)));
       break;
     case Primitive::kPrimNot:
-      shadow_frame.SetVRegReference(vregA, obj->GetFieldObject<mirror::Object*>(field_offset, is_volatile));
+      shadow_frame.SetVRegReference(vregA, obj->GetFieldObject<mirror::Object>(field_offset, is_volatile));
       break;
     default:
       LOG(FATAL) << "Unreachable: " << field_type;
@@ -529,10 +529,10 @@
     oss << PrettyMethod(shadow_frame.GetMethod())
         << StringPrintf("\n0x%x: ", dex_pc)
         << inst->DumpString(&mh.GetDexFile()) << "\n";
-    for (size_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
+    for (uint32_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
       uint32_t raw_value = shadow_frame.GetVReg(i);
       Object* ref_value = shadow_frame.GetVRegReference(i);
-      oss << StringPrintf(" vreg%d=0x%08X", i, raw_value);
+      oss << StringPrintf(" vreg%u=0x%08X", i, raw_value);
       if (ref_value != NULL) {
         if (ref_value->GetClass()->IsStringClass() &&
             ref_value->AsString()->GetCharArray() != NULL) {
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index ca03885..e8504b7 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -568,7 +568,7 @@
         HANDLE_PENDING_EXCEPTION();
       } else {
         uint32_t size_in_bytes = payload->element_count * payload->element_width;
-        memcpy(array->GetRawData(payload->element_width), payload->data, size_in_bytes);
+        memcpy(array->GetRawData(payload->element_width, 0), payload->data, size_in_bytes);
         ADVANCE(3);
       }
     }
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 7631736..e5d15b1 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -483,7 +483,7 @@
           break;
         }
         uint32_t size_in_bytes = payload->element_count * payload->element_width;
-        memcpy(array->GetRawData(payload->element_width), payload->data, size_in_bytes);
+        memcpy(array->GetRawData(payload->element_width, 0), payload->data, size_in_bytes);
         inst = inst->Next_3xx();
         break;
       }
diff --git a/runtime/invoke_arg_array_builder.h b/runtime/invoke_arg_array_builder.h
index f615e8e..6ecce40 100644
--- a/runtime/invoke_arg_array_builder.h
+++ b/runtime/invoke_arg_array_builder.h
@@ -78,6 +78,10 @@
     num_bytes_ += 4;
   }
 
+  void Append(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Append(StackReference<mirror::Object>::FromMirrorPtr(obj).AsVRegValue());
+  }
+
   void AppendWide(uint64_t value) {
     // For ARM and MIPS portable, align wide values to 8 bytes (ArgArray starts at offset of 4).
 #if defined(ART_USE_PORTABLE_COMPILER) && (defined(__arm__) || defined(__mips__))
@@ -93,8 +97,8 @@
   void BuildArgArray(const ScopedObjectAccess& soa, mirror::Object* receiver, va_list ap)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Set receiver if non-null (method is not static)
-    if (receiver != NULL) {
-      Append(reinterpret_cast<int32_t>(receiver));
+    if (receiver != nullptr) {
+      Append(receiver);
     }
     for (size_t i = 1; i < shorty_len_; ++i) {
       switch (shorty_[i]) {
@@ -112,7 +116,7 @@
           break;
         }
         case 'L':
-          Append(reinterpret_cast<int32_t>(soa.Decode<mirror::Object*>(va_arg(ap, jobject))));
+          Append(soa.Decode<mirror::Object*>(va_arg(ap, jobject)));
           break;
         case 'D': {
           JValue value;
@@ -131,8 +135,8 @@
   void BuildArgArray(const ScopedObjectAccess& soa, mirror::Object* receiver, jvalue* args)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Set receiver if non-null (method is not static)
-    if (receiver != NULL) {
-      Append(reinterpret_cast<int32_t>(receiver));
+    if (receiver != nullptr) {
+      Append(receiver);
     }
     for (size_t i = 1, args_offset = 0; i < shorty_len_; ++i, ++args_offset) {
       switch (shorty_[i]) {
@@ -153,7 +157,7 @@
           Append(args[args_offset].i);
           break;
         case 'L':
-          Append(reinterpret_cast<int32_t>(soa.Decode<mirror::Object*>(args[args_offset].l)));
+          Append(soa.Decode<mirror::Object*>(args[args_offset].l));
           break;
         case 'D':
         case 'J':
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index ebc844e..334dca4 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_JDWP_JDWP_H_
 #define ART_RUNTIME_JDWP_JDWP_H_
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/mutex.h"
 #include "jdwp/jdwp_bits.h"
 #include "jdwp/jdwp_constants.h"
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 6522a62..a514e69 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -21,6 +21,7 @@
 #include <string>
 
 #include "atomic.h"
+#include "base/hex_dump.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/stringprintf.h"
@@ -1705,7 +1706,7 @@
   }
   if (i == arraysize(gHandlers)) {
     LOG(ERROR) << "Command not implemented: " << DescribeCommand(request);
-    LOG(ERROR) << HexDump(request.data(), request.size());
+    LOG(ERROR) << HexDump(request.data(), request.size(), false, "");
     result = ERR_NOT_IMPLEMENTED;
   }
 
@@ -1729,7 +1730,7 @@
   size_t respLen = expandBufGetLength(pReply) - kJDWPHeaderLen;
   VLOG(jdwp) << "REPLY: " << GetCommandName(request) << " " << result << " (length=" << respLen << ")";
   if (false) {
-    VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen);
+    VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen, false, "");
   }
 
   VLOG(jdwp) << "----------";
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 30b4ee8..deea5f6 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -22,7 +22,7 @@
 #include <utility>
 #include <vector>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
@@ -590,7 +590,7 @@
   }
 
   // See section 11.3 "Linking Native Methods" of the JNI spec.
-  void* FindNativeMethod(const ArtMethod* m, std::string& detail)
+  void* FindNativeMethod(ArtMethod* m, std::string& detail)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string jni_short_name(JniShortName(m));
     std::string jni_long_name(JniLongName(m));
@@ -2215,7 +2215,7 @@
     if (is_copy != nullptr) {
       *is_copy = JNI_FALSE;
     }
-    return array->GetRawData(array->GetClass()->GetComponentSize());
+    return array->GetRawData(array->GetClass()->GetComponentSize(), 0);
   }
 
   static void ReleasePrimitiveArrayCritical(JNIEnv* env, jarray array, void* elements, jint mode) {
@@ -2518,10 +2518,10 @@
 
   static jobject NewDirectByteBuffer(JNIEnv* env, void* address, jlong capacity) {
     if (capacity < 0) {
-      JniAbortF("NewDirectByteBuffer", "negative buffer capacity: %lld", capacity);
+      JniAbortF("NewDirectByteBuffer", "negative buffer capacity: %" PRId64, capacity);
     }
     if (address == NULL && capacity != 0) {
-      JniAbortF("NewDirectByteBuffer", "non-zero capacity for NULL pointer: %lld", capacity);
+      JniAbortF("NewDirectByteBuffer", "non-zero capacity for NULL pointer: %" PRId64, capacity);
     }
 
     // At the moment, the Java side is limited to 32 bits.
@@ -2644,7 +2644,7 @@
     ScopedObjectAccess soa(env);
     Array* array = soa.Decode<Array*>(java_array);
     size_t component_size = array->GetClass()->GetComponentSize();
-    void* array_data = array->GetRawData(component_size);
+    void* array_data = array->GetRawData(component_size, 0);
     gc::Heap* heap = Runtime::Current()->GetHeap();
     bool is_copy = array_data != reinterpret_cast<void*>(elements);
     size_t bytes = array->GetLength() * component_size;
@@ -2944,10 +2944,6 @@
   if (vm->check_jni) {
     SetCheckJniEnabled(true);
   }
-  // The JniEnv local reference values must be at a consistent offset or else cross-compilation
-  // errors will ensue.
-  CHECK_EQ(JNIEnvExt::LocalRefCookieOffset().Int32Value(), 12);
-  CHECK_EQ(JNIEnvExt::SegmentStateOffset().Int32Value(), 16);
 }
 
 JNIEnvExt::~JNIEnvExt() {
@@ -3217,7 +3213,8 @@
   }
 }
 
-bool JavaVMExt::LoadNativeLibrary(const std::string& path, ClassLoader* class_loader,
+bool JavaVMExt::LoadNativeLibrary(const std::string& path,
+                                  const SirtRef<ClassLoader>& class_loader,
                                   std::string* detail) {
   detail->clear();
 
@@ -3233,18 +3230,18 @@
     library = libraries->Get(path);
   }
   if (library != NULL) {
-    if (library->GetClassLoader() != class_loader) {
+    if (library->GetClassLoader() != class_loader.get()) {
       // The library will be associated with class_loader. The JNI
       // spec says we can't load the same library into more than one
       // class loader.
       StringAppendF(detail, "Shared library \"%s\" already opened by "
           "ClassLoader %p; can't open in ClassLoader %p",
-          path.c_str(), library->GetClassLoader(), class_loader);
+          path.c_str(), library->GetClassLoader(), class_loader.get());
       LOG(WARNING) << detail;
       return false;
     }
     VLOG(jni) << "[Shared library \"" << path << "\" already loaded in "
-              << "ClassLoader " << class_loader << "]";
+              << "ClassLoader " << class_loader.get() << "]";
     if (!library->CheckOnLoadResult()) {
       StringAppendF(detail, "JNI_OnLoad failed on a previous attempt "
           "to load \"%s\"", path.c_str());
@@ -3285,18 +3282,19 @@
     MutexLock mu(self, libraries_lock);
     library = libraries->Get(path);
     if (library == NULL) {  // We won race to get libraries_lock
-      library = new SharedLibrary(path, handle, class_loader);
+      library = new SharedLibrary(path, handle, class_loader.get());
       libraries->Put(path, library);
       created_library = true;
     }
   }
   if (!created_library) {
     LOG(INFO) << "WOW: we lost a race to add shared library: "
-        << "\"" << path << "\" ClassLoader=" << class_loader;
+        << "\"" << path << "\" ClassLoader=" << class_loader.get();
     return library->CheckOnLoadResult();
   }
 
-  VLOG(jni) << "[Added shared library \"" << path << "\" for ClassLoader " << class_loader << "]";
+  VLOG(jni) << "[Added shared library \"" << path << "\" for ClassLoader " << class_loader.get()
+      << "]";
 
   bool was_successful = false;
   void* sym = dlsym(handle, "JNI_OnLoad");
@@ -3311,7 +3309,7 @@
     typedef int (*JNI_OnLoadFn)(JavaVM*, void*);
     JNI_OnLoadFn jni_on_load = reinterpret_cast<JNI_OnLoadFn>(sym);
     SirtRef<ClassLoader> old_class_loader(self, self->GetClassLoaderOverride());
-    self->SetClassLoaderOverride(class_loader);
+    self->SetClassLoaderOverride(class_loader.get());
 
     int version = 0;
     {
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 96f7ae0..cd3c5cb 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -25,6 +25,7 @@
 #include "reference_table.h"
 #include "root_visitor.h"
 #include "runtime.h"
+#include "sirt_ref.h"
 
 #include <iosfwd>
 #include <string>
@@ -72,7 +73,7 @@
    * Returns 'true' on success. On failure, sets 'detail' to a
    * human-readable description of the error.
    */
-  bool LoadNativeLibrary(const std::string& path, mirror::ClassLoader* class_loader,
+  bool LoadNativeLibrary(const std::string& path, const SirtRef<mirror::ClassLoader>& class_loader,
                          std::string* detail)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 9b278f8..fed734e 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -131,7 +131,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
     }
 
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
@@ -148,11 +148,11 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
+    arg_array.Append(0U);
     result.SetB(-1);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'B');
     EXPECT_EQ(0, result.GetB());
@@ -184,11 +184,11 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
+    arg_array.Append(0U);
     result.SetI(-1);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
@@ -221,7 +221,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
@@ -264,12 +264,12 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
-    arg_array.Append(0);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
     result.SetI(-1);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
@@ -310,13 +310,13 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
     result.SetI(-1);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
@@ -361,14 +361,14 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
     result.SetI(-1);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
@@ -417,15 +417,15 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
     result.SetI(-1.0);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
@@ -480,7 +480,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
@@ -547,7 +547,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
@@ -603,7 +603,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
@@ -668,7 +668,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
@@ -1492,8 +1492,8 @@
   } while (false)
 
 
-#if !defined(ART_USE_PORTABLE_COMPILER)
 TEST_F(JniInternalTest, GetPrimitiveField_SetPrimitiveField) {
+  TEST_DISABLED_FOR_PORTABLE();
   Thread::Current()->TransitionFromSuspendedToRunnable();
   LoadDex("AllFields");
   bool started = runtime_->Start();
@@ -1524,6 +1524,7 @@
 }
 
 TEST_F(JniInternalTest, GetObjectField_SetObjectField) {
+  TEST_DISABLED_FOR_PORTABLE();
   Thread::Current()->TransitionFromSuspendedToRunnable();
   LoadDex("AllFields");
   runtime_->Start();
@@ -1553,7 +1554,6 @@
   env_->SetObjectField(o, i_fid, s2);
   ASSERT_TRUE(env_->IsSameObject(s2, env_->GetObjectField(o, i_fid)));
 }
-#endif
 
 TEST_F(JniInternalTest, NewLocalRef_NULL) {
   EXPECT_TRUE(env_->NewLocalRef(NULL) == NULL);
@@ -1756,7 +1756,7 @@
   ASSERT_TRUE(method != NULL);
 
   ArgArray arg_array(NULL, 0);
-  arg_array.Append(0);
+  arg_array.Append(0U);
   JValue result;
 
   // Start runtime.
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index 8b9a3cd..414b3bb 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_LOCK_WORD_INL_H_
 
 #include "lock_word.h"
+#include "monitor_pool.h"
 
 namespace art {
 
@@ -33,7 +34,8 @@
 
 inline Monitor* LockWord::FatLockMonitor() const {
   DCHECK_EQ(GetState(), kFatLocked);
-  return reinterpret_cast<Monitor*>(value_ << kStateSize);
+  MonitorId mon_id = static_cast<MonitorId>(value_ & ~(kStateMask << kStateShift));
+  return MonitorPool::MonitorFromMonitorId(mon_id);
 }
 
 inline size_t LockWord::ForwardingAddress() const {
@@ -46,8 +48,7 @@
 }
 
 inline LockWord::LockWord(Monitor* mon)
-    : value_(((reinterpret_cast<uintptr_t>(mon) >> kStateSize) | (kStateFat << kStateShift)) &
-             0xFFFFFFFFU) {
+    : value_(mon->GetMonitorId() | (kStateFat << kStateShift)) {
   DCHECK_EQ(FatLockMonitor(), mon);
 }
 
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index d24a3bb..ab86eaa 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -42,7 +42,7 @@
  *
  *  |33|222222222211111111110000000000|
  *  |10|987654321098765432109876543210|
- *  |01| Monitor* >> kStateSize       |
+ *  |01| MonitorId                    |
  *
  * When the lock word is in hash state and its bits are formatted as follows:
  *
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 0a3e1a1..393ea68 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -79,7 +79,7 @@
 #endif
 
 MemMap* MemMap::MapAnonymous(const char* name, byte* addr, size_t byte_count, int prot,
-                             std::string* error_msg) {
+                             bool low_4gb, std::string* error_msg) {
   if (byte_count == 0) {
     return new MemMap(name, NULL, 0, NULL, 0, prot);
   }
@@ -101,7 +101,11 @@
   ScopedFd fd(-1);
   int flags = MAP_PRIVATE | MAP_ANONYMOUS;
 #endif
-
+#ifdef __LP64__
+  if (low_4gb) {
+    flags |= MAP_32BIT;
+  }
+#endif
   byte* actual = reinterpret_cast<byte*>(mmap(addr, page_aligned_byte_count, prot, flags, fd.get(), 0));
   if (actual == MAP_FAILED) {
     std::string maps;
@@ -120,7 +124,7 @@
   CHECK_NE(0, prot);
   CHECK_NE(0, flags & (MAP_SHARED | MAP_PRIVATE));
   if (byte_count == 0) {
-    return new MemMap("file", NULL, 0, NULL, 0, prot);
+    return new MemMap(filename, NULL, 0, NULL, 0, prot);
   }
   // Adjust 'offset' to be page-aligned as required by mmap.
   int page_offset = start % kPageSize;
@@ -153,7 +157,7 @@
                               maps.c_str());
     return NULL;
   }
-  return new MemMap("file", actual + page_offset, byte_count, actual, page_aligned_byte_count,
+  return new MemMap(filename, actual + page_offset, byte_count, actual, page_aligned_byte_count,
                     prot);
 }
 
@@ -267,4 +271,11 @@
   return false;
 }
 
+std::ostream& operator<<(std::ostream& os, const MemMap& mem_map) {
+  os << StringPrintf("[MemMap: %s prot=%x %p-%p]",
+                     mem_map.GetName().c_str(), mem_map.GetProtect(),
+                     mem_map.BaseBegin(), mem_map.BaseEnd());
+  return os;
+}
+
 }  // namespace art
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 2c65833..e39c10e 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -39,7 +39,7 @@
   //
   // On success, returns returns a MemMap instance.  On failure, returns a NULL;
   static MemMap* MapAnonymous(const char* ashmem_name, byte* addr, size_t byte_count, int prot,
-                              std::string* error_msg);
+                              bool low_4gb, std::string* error_msg);
 
   // Map part of a file, taking care of non-page aligned offsets.  The
   // "start" offset is absolute, not relative.
@@ -62,6 +62,10 @@
   // Releases the memory mapping
   ~MemMap();
 
+  const std::string& GetName() const {
+    return name_;
+  }
+
   bool Protect(int prot);
 
   int GetProtect() const {
@@ -80,6 +84,18 @@
     return Begin() + Size();
   }
 
+  void* BaseBegin() const {
+    return base_begin_;
+  }
+
+  size_t BaseSize() const {
+    return base_size_;
+  }
+
+  void* BaseEnd() const {
+    return reinterpret_cast<byte*>(BaseBegin()) + BaseSize();
+  }
+
   bool HasAddress(const void* addr) const {
     return Begin() <= addr && addr < End();
   }
@@ -102,6 +118,7 @@
 
   friend class MemMapTest;  // To allow access to base_begin_ and base_size_.
 };
+std::ostream& operator<<(std::ostream& os, const MemMap& mem_map);
 
 }  // namespace art
 
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index cf2c9d0..6cb59b4 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -23,76 +23,111 @@
 
 class MemMapTest : public testing::Test {
  public:
-  byte* BaseBegin(MemMap* mem_map) {
+  static byte* BaseBegin(MemMap* mem_map) {
     return reinterpret_cast<byte*>(mem_map->base_begin_);
   }
-  size_t BaseSize(MemMap* mem_map) {
+  static size_t BaseSize(MemMap* mem_map) {
     return mem_map->base_size_;
   }
+
+  static void RemapAtEndTest(bool low_4gb) {
+    std::string error_msg;
+    // Cast the page size to size_t.
+    const size_t page_size = static_cast<size_t>(kPageSize);
+    // Map a two-page memory region.
+    MemMap* m0 = MemMap::MapAnonymous("MemMapTest_RemapAtEndTest_map0",
+                                      nullptr,
+                                      2 * page_size,
+                                      PROT_READ | PROT_WRITE,
+                                      low_4gb,
+                                      &error_msg);
+    // Check its state and write to it.
+    byte* base0 = m0->Begin();
+    ASSERT_TRUE(base0 != nullptr) << error_msg;
+    size_t size0 = m0->Size();
+    EXPECT_EQ(m0->Size(), 2 * page_size);
+    EXPECT_EQ(BaseBegin(m0), base0);
+    EXPECT_EQ(BaseSize(m0), size0);
+    memset(base0, 42, 2 * page_size);
+    // Remap the latter half into a second MemMap.
+    MemMap* m1 = m0->RemapAtEnd(base0 + page_size,
+                                "MemMapTest_RemapAtEndTest_map1",
+                                PROT_READ | PROT_WRITE,
+                                &error_msg);
+    // Check the states of the two maps.
+    EXPECT_EQ(m0->Begin(), base0) << error_msg;
+    EXPECT_EQ(m0->Size(), page_size);
+    EXPECT_EQ(BaseBegin(m0), base0);
+    EXPECT_EQ(BaseSize(m0), page_size);
+    byte* base1 = m1->Begin();
+    size_t size1 = m1->Size();
+    EXPECT_EQ(base1, base0 + page_size);
+    EXPECT_EQ(size1, page_size);
+    EXPECT_EQ(BaseBegin(m1), base1);
+    EXPECT_EQ(BaseSize(m1), size1);
+    // Write to the second region.
+    memset(base1, 43, page_size);
+    // Check the contents of the two regions.
+    for (size_t i = 0; i < page_size; ++i) {
+      EXPECT_EQ(base0[i], 42);
+    }
+    for (size_t i = 0; i < page_size; ++i) {
+      EXPECT_EQ(base1[i], 43);
+    }
+    // Unmap the first region.
+    delete m0;
+    // Make sure the second region is still accessible after the first
+    // region is unmapped.
+    for (size_t i = 0; i < page_size; ++i) {
+      EXPECT_EQ(base1[i], 43);
+    }
+    delete m1;
+  }
 };
 
 TEST_F(MemMapTest, MapAnonymousEmpty) {
   std::string error_msg;
   UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
-                                             NULL,
+                                             nullptr,
                                              0,
                                              PROT_READ,
+                                             false,
                                              &error_msg));
-  ASSERT_TRUE(map.get() != NULL) << error_msg;
+  ASSERT_TRUE(map.get() != nullptr) << error_msg;
+  ASSERT_TRUE(error_msg.empty());
+  map.reset(MemMap::MapAnonymous("MapAnonymousEmpty",
+                                 nullptr,
+                                 kPageSize,
+                                 PROT_READ | PROT_WRITE,
+                                 false,
+                                 &error_msg));
+  ASSERT_TRUE(map.get() != nullptr) << error_msg;
   ASSERT_TRUE(error_msg.empty());
 }
 
-TEST_F(MemMapTest, RemapAtEnd) {
+#ifdef __LP64__
+TEST_F(MemMapTest, MapAnonymousEmpty32bit) {
   std::string error_msg;
-  // Cast the page size to size_t.
-  const size_t page_size = static_cast<size_t>(kPageSize);
-  // Map a two-page memory region.
-  MemMap* m0 = MemMap::MapAnonymous("MemMapTest_RemapAtEndTest_map0",
-                                    NULL,
-                                    2 * page_size,
-                                    PROT_READ | PROT_WRITE,
-                                    &error_msg);
-  // Check its state and write to it.
-  byte* base0 = m0->Begin();
-  ASSERT_TRUE(base0 != NULL) << error_msg;
-  size_t size0 = m0->Size();
-  EXPECT_EQ(m0->Size(), 2 * page_size);
-  EXPECT_EQ(BaseBegin(m0), base0);
-  EXPECT_EQ(BaseSize(m0), size0);
-  memset(base0, 42, 2 * page_size);
-  // Remap the latter half into a second MemMap.
-  MemMap* m1 = m0->RemapAtEnd(base0 + page_size,
-                              "MemMapTest_RemapAtEndTest_map1",
-                              PROT_READ | PROT_WRITE,
-                              &error_msg);
-  // Check the states of the two maps.
-  EXPECT_EQ(m0->Begin(), base0) << error_msg;
-  EXPECT_EQ(m0->Size(), page_size);
-  EXPECT_EQ(BaseBegin(m0), base0);
-  EXPECT_EQ(BaseSize(m0), page_size);
-  byte* base1 = m1->Begin();
-  size_t size1 = m1->Size();
-  EXPECT_EQ(base1, base0 + page_size);
-  EXPECT_EQ(size1, page_size);
-  EXPECT_EQ(BaseBegin(m1), base1);
-  EXPECT_EQ(BaseSize(m1), size1);
-  // Write to the second region.
-  memset(base1, 43, page_size);
-  // Check the contents of the two regions.
-  for (size_t i = 0; i < page_size; ++i) {
-    EXPECT_EQ(base0[i], 42);
-  }
-  for (size_t i = 0; i < page_size; ++i) {
-    EXPECT_EQ(base1[i], 43);
-  }
-  // Unmap the first region.
-  delete m0;
-  // Make sure the second region is still accessible after the first
-  // region is unmapped.
-  for (size_t i = 0; i < page_size; ++i) {
-    EXPECT_EQ(base1[i], 43);
-  }
-  delete m1;
+  UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
+                                             nullptr,
+                                             kPageSize,
+                                             PROT_READ | PROT_WRITE,
+                                             true,
+                                             &error_msg));
+  ASSERT_TRUE(map.get() != nullptr) << error_msg;
+  ASSERT_TRUE(error_msg.empty());
+  ASSERT_LT(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), 1ULL << 32);
 }
+#endif
+
+TEST_F(MemMapTest, RemapAtEnd) {
+  RemapAtEndTest(false);
+}
+
+#ifdef __LP64__
+TEST_F(MemMapTest, RemapAtEnd32bit) {
+  RemapAtEndTest(true);
+}
+#endif
 
 }  // namespace art
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index bd81bd5..b2725e5 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -27,7 +27,7 @@
 namespace art {
 namespace mirror {
 
-inline size_t Array::SizeOf() const {
+inline size_t Array::SizeOf() {
   // This is safe from overflow because the array was already allocated, so we know it's sane.
   size_t component_size = GetClass()->GetComponentSize();
   int32_t component_count = GetLength();
@@ -64,9 +64,10 @@
   explicit SetLengthVisitor(int32_t length) : length_(length) {
   }
 
-  void operator()(mirror::Object* obj) const {
-    mirror::Array* array = obj->AsArray();
-    DCHECK(array->IsArrayInstance());
+  void operator()(Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsArray as object is not yet in live bitmap or allocation stack.
+    Array* array = down_cast<Array*>(obj);
+    // DCHECK(array->IsArrayInstance());
     array->SetLength(length_);
   }
 
@@ -116,6 +117,114 @@
   }
 }
 
+// Similar to memmove except elements are of aligned appropriately for T, count is in T sized units
+// copies are guaranteed not to tear when T is less-than 64bit.
+template<typename T>
+static inline void ArrayBackwardCopy(T* d, const T* s, int32_t count) {
+  d += count;
+  s += count;
+  for (int32_t i = 0; i < count; ++i) {
+    d--;
+    s--;
+    *d = *s;
+  }
+}
+
+template<class T>
+void PrimitiveArray<T>::Memmove(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos,
+                                int32_t count) {
+  if (UNLIKELY(count == 0)) {
+    return;
+  }
+  DCHECK_GE(dst_pos, 0);
+  DCHECK_GE(src_pos, 0);
+  DCHECK_GT(count, 0);
+  DCHECK(src != nullptr);
+  DCHECK_LT(dst_pos, GetLength());
+  DCHECK_LE(dst_pos, GetLength() - count);
+  DCHECK_LT(src_pos, src->GetLength());
+  DCHECK_LE(src_pos, src->GetLength() - count);
+
+  // Note for non-byte copies we can't rely on standard libc functions like memcpy(3) and memmove(3)
+  // in our implementation, because they may copy byte-by-byte.
+  if (LIKELY(src != this) || (dst_pos < src_pos) || (dst_pos - src_pos >= count)) {
+    // Forward copy ok.
+    Memcpy(dst_pos, src, src_pos, count);
+  } else {
+    // Backward copy necessary.
+    void* dst_raw = GetRawData(sizeof(T), dst_pos);
+    const void* src_raw = src->GetRawData(sizeof(T), src_pos);
+    if (sizeof(T) == sizeof(uint8_t)) {
+      // TUNING: use memmove here?
+      uint8_t* d = reinterpret_cast<uint8_t*>(dst_raw);
+      const uint8_t* s = reinterpret_cast<const uint8_t*>(src_raw);
+      ArrayBackwardCopy<uint8_t>(d, s, count);
+    } else if (sizeof(T) == sizeof(uint16_t)) {
+      uint16_t* d = reinterpret_cast<uint16_t*>(dst_raw);
+      const uint16_t* s = reinterpret_cast<const uint16_t*>(src_raw);
+      ArrayBackwardCopy<uint16_t>(d, s, count);
+    } else if (sizeof(T) == sizeof(uint32_t)) {
+      uint32_t* d = reinterpret_cast<uint32_t*>(dst_raw);
+      const uint32_t* s = reinterpret_cast<const uint32_t*>(src_raw);
+      ArrayBackwardCopy<uint32_t>(d, s, count);
+    } else {
+      DCHECK_EQ(sizeof(T), sizeof(uint64_t));
+      uint64_t* d = reinterpret_cast<uint64_t*>(dst_raw);
+      const uint64_t* s = reinterpret_cast<const uint64_t*>(src_raw);
+      ArrayBackwardCopy<uint64_t>(d, s, count);
+    }
+  }
+}
+
+// Similar to memcpy except elements are of aligned appropriately for T, count is in T sized units
+// copies are guaranteed not to tear when T is less-than 64bit.
+template<typename T>
+static inline void ArrayForwardCopy(T* d, const T* s, int32_t count) {
+  for (int32_t i = 0; i < count; ++i) {
+    *d = *s;
+    d++;
+    s++;
+  }
+}
+
+
+template<class T>
+void PrimitiveArray<T>::Memcpy(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos,
+                               int32_t count) {
+  if (UNLIKELY(count == 0)) {
+    return;
+  }
+  DCHECK_GE(dst_pos, 0);
+  DCHECK_GE(src_pos, 0);
+  DCHECK_GT(count, 0);
+  DCHECK(src != nullptr);
+  DCHECK_LT(dst_pos, GetLength());
+  DCHECK_LE(dst_pos, GetLength() - count);
+  DCHECK_LT(src_pos, src->GetLength());
+  DCHECK_LE(src_pos, src->GetLength() - count);
+
+  // Note for non-byte copies we can't rely on standard libc functions like memcpy(3) and memmove(3)
+  // in our implementation, because they may copy byte-by-byte.
+  void* dst_raw = GetRawData(sizeof(T), dst_pos);
+  const void* src_raw = src->GetRawData(sizeof(T), src_pos);
+  if (sizeof(T) == sizeof(uint8_t)) {
+    memcpy(dst_raw, src_raw, count);
+  } else if (sizeof(T) == sizeof(uint16_t)) {
+    uint16_t* d = reinterpret_cast<uint16_t*>(dst_raw);
+    const uint16_t* s = reinterpret_cast<const uint16_t*>(src_raw);
+    ArrayForwardCopy<uint16_t>(d, s, count);
+  } else if (sizeof(T) == sizeof(uint32_t)) {
+    uint32_t* d = reinterpret_cast<uint32_t*>(dst_raw);
+    const uint32_t* s = reinterpret_cast<const uint32_t*>(src_raw);
+    ArrayForwardCopy<uint32_t>(d, s, count);
+  } else {
+    DCHECK_EQ(sizeof(T), sizeof(uint64_t));
+    uint64_t* d = reinterpret_cast<uint64_t*>(dst_raw);
+    const uint64_t* s = reinterpret_cast<const uint64_t*>(src_raw);
+    ArrayForwardCopy<uint64_t>(d, s, count);
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index 00b88db..ca0d1f3 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -103,11 +103,11 @@
   return new_array;
 }
 
-void Array::ThrowArrayIndexOutOfBoundsException(int32_t index) const {
+void Array::ThrowArrayIndexOutOfBoundsException(int32_t index) {
   art::ThrowArrayIndexOutOfBoundsException(index, GetLength());
 }
 
-void Array::ThrowArrayStoreException(Object* object) const {
+void Array::ThrowArrayStoreException(Object* object) {
   art::ThrowArrayStoreException(object->GetClass(), this->GetClass());
 }
 
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 207573f..6e366a0 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -50,15 +50,15 @@
   static Array* CreateMultiArray(Thread* self, Class* element_class, IntArray* dimensions)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t SizeOf() const;
+  size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t GetLength() const {
+  int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Array, length_), false);
   }
 
-  void SetLength(int32_t length) {
+  void SetLength(int32_t length) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK_GE(length, 0);
-    SetField32(OFFSET_OF_OBJECT_MEMBER(Array, length_), length, false);
+    SetField32(OFFSET_OF_OBJECT_MEMBER(Array, length_), length, false, false);
   }
 
   static MemberOffset LengthOffset() {
@@ -74,20 +74,22 @@
     }
   }
 
-  void* GetRawData(size_t component_size) {
-    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(component_size).Int32Value();
+  void* GetRawData(size_t component_size, int32_t index)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(component_size).Int32Value() +
+        + (index * component_size);
     return reinterpret_cast<void*>(data);
   }
 
-  const void* GetRawData(size_t component_size) const {
-    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(component_size).Int32Value();
-    return reinterpret_cast<const void*>(data);
+  const void* GetRawData(size_t component_size, int32_t index) const {
+    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(component_size).Int32Value() +
+        + (index * component_size);
+    return reinterpret_cast<void*>(data);
   }
 
   // Returns true if the index is valid. If not, throws an ArrayIndexOutOfBoundsException and
   // returns false.
-  bool CheckIsValidIndex(int32_t index) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool CheckIsValidIndex(int32_t index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(static_cast<uint32_t>(index) >= static_cast<uint32_t>(GetLength()))) {
       ThrowArrayIndexOutOfBoundsException(index);
       return false;
@@ -96,11 +98,10 @@
   }
 
  protected:
-  void ThrowArrayStoreException(Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ThrowArrayStoreException(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  void ThrowArrayIndexOutOfBoundsException(int32_t index) const
+  void ThrowArrayIndexOutOfBoundsException(int32_t index)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // The number of array elements.
@@ -119,17 +120,15 @@
   static PrimitiveArray<T>* Alloc(Thread* self, size_t length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const T* GetData() const {
-    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(sizeof(T)).Int32Value();
-    return reinterpret_cast<T*>(data);
+  const T* GetData() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return reinterpret_cast<const T*>(GetRawData(sizeof(T), 0));
   }
 
-  T* GetData() {
-    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(sizeof(T)).Int32Value();
-    return reinterpret_cast<T*>(data);
+  T* GetData() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return reinterpret_cast<T*>(GetRawData(sizeof(T), 0));
   }
 
-  T Get(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  T Get(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(!CheckIsValidIndex(i))) {
       DCHECK(Thread::Current()->IsExceptionPending());
       return T(0);
@@ -137,7 +136,7 @@
     return GetWithoutChecks(i);
   }
 
-  T GetWithoutChecks(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  T GetWithoutChecks(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(CheckIsValidIndex(i));
     return GetData()[i];
   }
@@ -155,6 +154,22 @@
     GetData()[i] = value;
   }
 
+  /*
+   * Works like memmove(), except we guarantee not to allow tearing of array values (ie using
+   * smaller than element size copies). Arguments are assumed to be within the bounds of the array
+   * and the arrays non-null.
+   */
+  void Memmove(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos, int32_t count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  /*
+   * Works like memcpy(), except we guarantee not to allow tearing of array values (ie using
+   * smaller than element size copies). Arguments are assumed to be within the bounds of the array
+   * and the arrays non-null.
+   */
+  void Memcpy(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos, int32_t count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static void SetArrayClass(Class* array_class) {
     CHECK(array_class_ == NULL);
     CHECK(array_class != NULL);
diff --git a/runtime/mirror/art_field-inl.h b/runtime/mirror/art_field-inl.h
index d8c278c..530226b 100644
--- a/runtime/mirror/art_field-inl.h
+++ b/runtime/mirror/art_field-inl.h
@@ -29,8 +29,8 @@
 namespace art {
 namespace mirror {
 
-inline Class* ArtField::GetDeclaringClass() const {
-  Class* result = GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(ArtField, declaring_class_), false);
+inline Class* ArtField::GetDeclaringClass() {
+  Class* result = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(ArtField, declaring_class_), false);
   DCHECK(result != NULL);
   DCHECK(result->IsLoaded() || result->IsErroneous());
   return result;
@@ -40,106 +40,106 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(ArtField, declaring_class_), new_declaring_class, false);
 }
 
-inline uint32_t ArtField::GetAccessFlags() const {
+inline uint32_t ArtField::GetAccessFlags() {
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
   return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, access_flags_), false);
 }
 
-inline MemberOffset ArtField::GetOffset() const {
+inline MemberOffset ArtField::GetOffset() {
   DCHECK(GetDeclaringClass()->IsResolved() || GetDeclaringClass()->IsErroneous());
   return MemberOffset(GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, offset_), false));
 }
 
-inline MemberOffset ArtField::GetOffsetDuringLinking() const {
+inline MemberOffset ArtField::GetOffsetDuringLinking() {
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
   return MemberOffset(GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, offset_), false));
 }
 
-inline uint32_t ArtField::Get32(const Object* object) const {
+inline uint32_t ArtField::Get32(Object* object) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   return object->GetField32(GetOffset(), IsVolatile());
 }
 
-inline void ArtField::Set32(Object* object, uint32_t new_value) const {
+inline void ArtField::Set32(Object* object, uint32_t new_value) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   object->SetField32(GetOffset(), new_value, IsVolatile());
 }
 
-inline uint64_t ArtField::Get64(const Object* object) const {
+inline uint64_t ArtField::Get64(Object* object) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   return object->GetField64(GetOffset(), IsVolatile());
 }
 
-inline void ArtField::Set64(Object* object, uint64_t new_value) const {
+inline void ArtField::Set64(Object* object, uint64_t new_value) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   object->SetField64(GetOffset(), new_value, IsVolatile());
 }
 
-inline Object* ArtField::GetObj(const Object* object) const {
+inline Object* ArtField::GetObj(Object* object) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
-  return object->GetFieldObject<Object*>(GetOffset(), IsVolatile());
+  return object->GetFieldObject<Object>(GetOffset(), IsVolatile());
 }
 
-inline void ArtField::SetObj(Object* object, const Object* new_value) const {
+inline void ArtField::SetObj(Object* object, Object* new_value) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   object->SetFieldObject(GetOffset(), new_value, IsVolatile());
 }
 
-inline bool ArtField::GetBoolean(const Object* object) const {
+inline bool ArtField::GetBoolean(Object* object) {
   DCHECK_EQ(Primitive::kPrimBoolean, FieldHelper(this).GetTypeAsPrimitiveType())
       << PrettyField(this);
   return Get32(object);
 }
 
-inline void ArtField::SetBoolean(Object* object, bool z) const {
+inline void ArtField::SetBoolean(Object* object, bool z) {
   DCHECK_EQ(Primitive::kPrimBoolean, FieldHelper(this).GetTypeAsPrimitiveType())
       << PrettyField(this);
   Set32(object, z);
 }
 
-inline int8_t ArtField::GetByte(const Object* object) const {
+inline int8_t ArtField::GetByte(Object* object) {
   DCHECK_EQ(Primitive::kPrimByte, FieldHelper(this).GetTypeAsPrimitiveType())
       << PrettyField(this);
   return Get32(object);
 }
 
-inline void ArtField::SetByte(Object* object, int8_t b) const {
+inline void ArtField::SetByte(Object* object, int8_t b) {
   DCHECK_EQ(Primitive::kPrimByte, FieldHelper(this).GetTypeAsPrimitiveType())
       << PrettyField(this);
   Set32(object, b);
 }
 
-inline uint16_t ArtField::GetChar(const Object* object) const {
+inline uint16_t ArtField::GetChar(Object* object) {
   DCHECK_EQ(Primitive::kPrimChar, FieldHelper(this).GetTypeAsPrimitiveType())
       << PrettyField(this);
   return Get32(object);
 }
 
-inline void ArtField::SetChar(Object* object, uint16_t c) const {
+inline void ArtField::SetChar(Object* object, uint16_t c) {
   DCHECK_EQ(Primitive::kPrimChar, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   Set32(object, c);
 }
 
-inline int16_t ArtField::GetShort(const Object* object) const {
+inline int16_t ArtField::GetShort(Object* object) {
   DCHECK_EQ(Primitive::kPrimShort, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   return Get32(object);
 }
 
-inline void ArtField::SetShort(Object* object, int16_t s) const {
+inline void ArtField::SetShort(Object* object, int16_t s) {
   DCHECK_EQ(Primitive::kPrimShort, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   Set32(object, s);
 }
 
-inline int32_t ArtField::GetInt(const Object* object) const {
+inline int32_t ArtField::GetInt(Object* object) {
 #ifndef NDEBUG
   Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
   CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
@@ -147,7 +147,7 @@
   return Get32(object);
 }
 
-inline void ArtField::SetInt(Object* object, int32_t i) const {
+inline void ArtField::SetInt(Object* object, int32_t i) {
 #ifndef NDEBUG
   Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
   CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
@@ -155,7 +155,7 @@
   Set32(object, i);
 }
 
-inline int64_t ArtField::GetLong(const Object* object) const {
+inline int64_t ArtField::GetLong(Object* object) {
 #ifndef NDEBUG
   Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
   CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
@@ -163,7 +163,7 @@
   return Get64(object);
 }
 
-inline void ArtField::SetLong(Object* object, int64_t j) const {
+inline void ArtField::SetLong(Object* object, int64_t j) {
 #ifndef NDEBUG
   Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
   CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
@@ -171,7 +171,7 @@
   Set64(object, j);
 }
 
-inline float ArtField::GetFloat(const Object* object) const {
+inline float ArtField::GetFloat(Object* object) {
   DCHECK_EQ(Primitive::kPrimFloat, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   JValue bits;
@@ -179,7 +179,7 @@
   return bits.GetF();
 }
 
-inline void ArtField::SetFloat(Object* object, float f) const {
+inline void ArtField::SetFloat(Object* object, float f) {
   DCHECK_EQ(Primitive::kPrimFloat, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   JValue bits;
@@ -187,7 +187,7 @@
   Set32(object, bits.GetI());
 }
 
-inline double ArtField::GetDouble(const Object* object) const {
+inline double ArtField::GetDouble(Object* object) {
   DCHECK_EQ(Primitive::kPrimDouble, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   JValue bits;
@@ -195,7 +195,7 @@
   return bits.GetD();
 }
 
-inline void ArtField::SetDouble(Object* object, double d) const {
+inline void ArtField::SetDouble(Object* object, double d) {
   DCHECK_EQ(Primitive::kPrimDouble, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   JValue bits;
@@ -203,13 +203,13 @@
   Set64(object, bits.GetJ());
 }
 
-inline Object* ArtField::GetObject(const Object* object) const {
+inline Object* ArtField::GetObject(Object* object) {
   DCHECK_EQ(Primitive::kPrimNot, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   return GetObj(object);
 }
 
-inline void ArtField::SetObject(Object* object, const Object* l) const {
+inline void ArtField::SetObject(Object* object, Object* l) {
   DCHECK_EQ(Primitive::kPrimNot, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   SetObj(object, l);
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 62bcf06..b33fe4b 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -30,98 +30,74 @@
 // C++ mirror of java.lang.reflect.ArtField
 class MANAGED ArtField : public Object {
  public:
-  Class* GetDeclaringClass() const;
+  Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDeclaringClass(Class *new_declaring_class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t GetAccessFlags() const;
+  uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetAccessFlags(uint32_t new_access_flags) {
+  void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, access_flags_), new_access_flags, false);
   }
 
-  bool IsPublic() const {
+  bool IsPublic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccPublic) != 0;
   }
 
-  bool IsStatic() const {
+  bool IsStatic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccStatic) != 0;
   }
 
-  bool IsFinal() const {
+  bool IsFinal() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
-  uint32_t GetDexFieldIndex() const {
+  uint32_t GetDexFieldIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, field_dex_idx_), false);
   }
 
-  void SetDexFieldIndex(uint32_t new_idx) {
+  void SetDexFieldIndex(uint32_t new_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, field_dex_idx_), new_idx, false);
   }
 
-  // Offset to field within an Object
-  MemberOffset GetOffset() const;
+  // Offset to field within an Object.
+  MemberOffset GetOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static MemberOffset OffsetOffset() {
     return MemberOffset(OFFSETOF_MEMBER(ArtField, offset_));
   }
 
-  MemberOffset GetOffsetDuringLinking() const;
+  MemberOffset GetOffsetDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetOffset(MemberOffset num_bytes);
+  void SetOffset(MemberOffset num_bytes) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // field access, null object for static fields
-  bool GetBoolean(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetBoolean(Object* object, bool z) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int8_t GetByte(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetByte(Object* object, int8_t b) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  uint16_t GetChar(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetChar(Object* object, uint16_t c) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int16_t GetShort(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetShort(Object* object, int16_t s) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int32_t GetInt(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetInt(Object* object, int32_t i) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int64_t GetLong(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetLong(Object* object, int64_t j) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  float GetFloat(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetFloat(Object* object, float f) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  double GetDouble(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetDouble(Object* object, double d) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  Object* GetObject(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetObject(Object* object, const Object* l) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool GetBoolean(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetBoolean(Object* object, bool z) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  int8_t GetByte(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetByte(Object* object, int8_t b) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint16_t GetChar(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetChar(Object* object, uint16_t c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  int16_t GetShort(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetShort(Object* object, int16_t s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  int32_t GetInt(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetInt(Object* object, int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  int64_t GetLong(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetLong(Object* object, int64_t j) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  float GetFloat(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetFloat(Object* object, float f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  double GetDouble(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetDouble(Object* object, double d) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* GetObject(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetObject(Object* object, Object* l) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // raw field accesses
-  uint32_t Get32(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void Set32(Object* object, uint32_t new_value) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  uint64_t Get64(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void Set64(Object* object, uint64_t new_value) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  Object* GetObj(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetObj(Object* object, const Object* new_value) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Raw field accesses.
+  uint32_t Get32(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Set32(Object* object, uint32_t new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint64_t Get64(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Set64(Object* object, uint64_t new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* GetObj(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetObj(Object* object, Object* new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* GetJavaLangReflectArtField() {
     DCHECK(java_lang_reflect_ArtField_ != NULL);
@@ -133,14 +109,14 @@
   static void VisitRoots(RootVisitor* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsVolatile() const {
+  bool IsVolatile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccVolatile) != 0;
   }
 
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of
-  Class* declaring_class_;
+  HeapReference<Class> declaring_class_;
 
   uint32_t access_flags_;
 
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 088f616..8ef3be8 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -27,8 +27,9 @@
 namespace art {
 namespace mirror {
 
-inline Class* ArtMethod::GetDeclaringClass() const {
-  Class* result = GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, declaring_class_), false);
+inline Class* ArtMethod::GetDeclaringClass() {
+  Class* result = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, declaring_class_),
+                                        false);
   DCHECK(result != NULL) << this;
   DCHECK(result->IsIdxLoaded() || result->IsErroneous()) << this;
   return result;
@@ -38,44 +39,44 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(ArtMethod, declaring_class_), new_declaring_class, false);
 }
 
-inline uint32_t ArtMethod::GetAccessFlags() const {
+inline uint32_t ArtMethod::GetAccessFlags() {
   DCHECK(GetDeclaringClass()->IsIdxLoaded() || GetDeclaringClass()->IsErroneous());
   return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, access_flags_), false);
 }
 
-inline uint16_t ArtMethod::GetMethodIndex() const {
+inline uint16_t ArtMethod::GetMethodIndex() {
   DCHECK(GetDeclaringClass()->IsResolved() || GetDeclaringClass()->IsErroneous());
   return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_), false);
 }
 
-inline uint32_t ArtMethod::GetDexMethodIndex() const {
+inline uint32_t ArtMethod::GetDexMethodIndex() {
 #ifdef ART_SEA_IR_MODE
   // TODO: Re-add this check for (PORTABLE + SMALL + ) SEA IR when PORTABLE IS fixed!
   // DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
 #else
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
 #endif
-  return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_dex_index_), false);
+  return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_method_index_), false);
 }
 
-inline ObjectArray<String>* ArtMethod::GetDexCacheStrings() const {
-  return GetFieldObject<ObjectArray<String>*>(
+inline ObjectArray<String>* ArtMethod::GetDexCacheStrings() {
+  return GetFieldObject<ObjectArray<String> >(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_strings_), false);
 }
 
-inline ObjectArray<ArtMethod>* ArtMethod::GetDexCacheResolvedMethods() const {
-  return GetFieldObject<ObjectArray<ArtMethod>*>(
+inline ObjectArray<ArtMethod>* ArtMethod::GetDexCacheResolvedMethods() {
+  return GetFieldObject<ObjectArray<ArtMethod> >(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_methods_), false);
 }
 
-inline ObjectArray<Class>* ArtMethod::GetDexCacheResolvedTypes() const {
-  return GetFieldObject<ObjectArray<Class>*>(
+inline ObjectArray<Class>* ArtMethod::GetDexCacheResolvedTypes() {
+  return GetFieldObject<ObjectArray<Class> >(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_), false);
 }
 
-inline uint32_t ArtMethod::GetCodeSize() const {
+inline uint32_t ArtMethod::GetCodeSize() {
   DCHECK(!IsRuntimeMethod() && !IsProxyMethod()) << PrettyMethod(this);
-  uintptr_t code = reinterpret_cast<uintptr_t>(GetEntryPointFromCompiledCode());
+  uintptr_t code = reinterpret_cast<uintptr_t>(GetEntryPointFromQuickCompiledCode());
   if (code == 0) {
     return 0;
   }
@@ -106,7 +107,7 @@
   }
 }
 
-inline void ArtMethod::AssertPcIsWithinCode(uintptr_t pc) const {
+inline void ArtMethod::AssertPcIsWithinQuickCode(uintptr_t pc) {
   if (!kIsDebugBuild) {
     return;
   }
@@ -116,34 +117,44 @@
   if (pc == GetQuickInstrumentationExitPc()) {
     return;
   }
-  const void* code = GetEntryPointFromCompiledCode();
-  if (code == GetCompiledCodeToInterpreterBridge() || code == GetQuickInstrumentationEntryPoint()) {
+  const void* code = GetEntryPointFromQuickCompiledCode();
+  if (code == GetQuickToInterpreterBridge() || code == GetQuickInstrumentationEntryPoint()) {
     return;
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  if (code == GetResolutionTrampoline(class_linker)) {
+  if (code == GetQuickResolutionTrampoline(class_linker)) {
     return;
   }
-  DCHECK(IsWithinCode(pc))
+  DCHECK(IsWithinQuickCode(pc))
       << PrettyMethod(this)
       << " pc=" << std::hex << pc
       << " code=" << code
       << " size=" << GetCodeSize();
 }
 
-inline uint32_t ArtMethod::GetOatCodeOffset() const {
+inline uint32_t ArtMethod::GetQuickOatCodeOffset() {
   DCHECK(!Runtime::Current()->IsStarted());
-  return reinterpret_cast<uint32_t>(GetEntryPointFromCompiledCode());
+  return PointerToLowMemUInt32(GetEntryPointFromQuickCompiledCode());
 }
 
-inline void ArtMethod::SetOatCodeOffset(uint32_t code_offset) {
+inline uint32_t ArtMethod::GetPortableOatCodeOffset() {
   DCHECK(!Runtime::Current()->IsStarted());
-  SetEntryPointFromCompiledCode(reinterpret_cast<void*>(code_offset));
+  return PointerToLowMemUInt32(GetEntryPointFromPortableCompiledCode());
 }
 
-inline uint32_t ArtMethod::GetOatMappingTableOffset() const {
+inline void ArtMethod::SetQuickOatCodeOffset(uint32_t code_offset) {
   DCHECK(!Runtime::Current()->IsStarted());
-  return reinterpret_cast<uint32_t>(GetMappingTable());
+  SetEntryPointFromQuickCompiledCode(reinterpret_cast<void*>(code_offset));
+}
+
+inline void ArtMethod::SetPortableOatCodeOffset(uint32_t code_offset) {
+  DCHECK(!Runtime::Current()->IsStarted());
+  SetEntryPointFromPortableCompiledCode(reinterpret_cast<void*>(code_offset));
+}
+
+inline uint32_t ArtMethod::GetOatMappingTableOffset() {
+  DCHECK(!Runtime::Current()->IsStarted());
+  return PointerToLowMemUInt32(GetMappingTable());
 }
 
 inline void ArtMethod::SetOatMappingTableOffset(uint32_t mapping_table_offset) {
@@ -151,9 +162,9 @@
   SetMappingTable(reinterpret_cast<const uint8_t*>(mapping_table_offset));
 }
 
-inline uint32_t ArtMethod::GetOatVmapTableOffset() const {
+inline uint32_t ArtMethod::GetOatVmapTableOffset() {
   DCHECK(!Runtime::Current()->IsStarted());
-  return reinterpret_cast<uint32_t>(GetVmapTable());
+  return PointerToLowMemUInt32(GetVmapTable());
 }
 
 inline void ArtMethod::SetOatVmapTableOffset(uint32_t vmap_table_offset) {
@@ -166,16 +177,16 @@
   SetNativeGcMap(reinterpret_cast<uint8_t*>(gc_map_offset));
 }
 
-inline uint32_t ArtMethod::GetOatNativeGcMapOffset() const {
+inline uint32_t ArtMethod::GetOatNativeGcMapOffset() {
   DCHECK(!Runtime::Current()->IsStarted());
-  return reinterpret_cast<uint32_t>(GetNativeGcMap());
+  return PointerToLowMemUInt32(GetNativeGcMap());
 }
 
-inline bool ArtMethod::IsRuntimeMethod() const {
+inline bool ArtMethod::IsRuntimeMethod() {
   return GetDexMethodIndex() == DexFile::kDexNoIndex;
 }
 
-inline bool ArtMethod::IsCalleeSaveMethod() const {
+inline bool ArtMethod::IsCalleeSaveMethod() {
   if (!IsRuntimeMethod()) {
     return false;
   }
@@ -190,14 +201,14 @@
   return result;
 }
 
-inline bool ArtMethod::IsResolutionMethod() const {
+inline bool ArtMethod::IsResolutionMethod() {
   bool result = this == Runtime::Current()->GetResolutionMethod();
   // Check that if we do think it is phony it looks like the resolution method.
   DCHECK(!result || IsRuntimeMethod());
   return result;
 }
 
-inline bool ArtMethod::IsImtConflictMethod() const {
+inline bool ArtMethod::IsImtConflictMethod() {
   bool result = this == Runtime::Current()->GetImtConflictMethod();
   // Check that if we do think it is phony it looks like the imt conflict method.
   DCHECK(!result || IsRuntimeMethod());
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index f4a076c..575ea03 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -47,7 +47,7 @@
   }
 }
 
-InvokeType ArtMethod::GetInvokeType() const {
+InvokeType ArtMethod::GetInvokeType() {
   // TODO: kSuper?
   if (GetDeclaringClass()->IsInterface()) {
     return kInterface;
@@ -100,11 +100,11 @@
   return num_registers;
 }
 
-bool ArtMethod::IsProxyMethod() const {
+bool ArtMethod::IsProxyMethod() {
   return GetDeclaringClass()->IsProxyClass();
 }
 
-ArtMethod* ArtMethod::FindOverriddenMethod() const {
+ArtMethod* ArtMethod::FindOverriddenMethod() {
   if (IsStatic()) {
     return NULL;
   }
@@ -147,13 +147,16 @@
   return result;
 }
 
-uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc) const {
+uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc) {
   const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
   return pc - reinterpret_cast<uintptr_t>(code);
 }
 
-uint32_t ArtMethod::ToDexPc(const uintptr_t pc) const {
-#if !defined(ART_USE_PORTABLE_COMPILER)
+uint32_t ArtMethod::ToDexPc(const uintptr_t pc) {
+  if (IsPortableCompiled()) {
+    // Portable doesn't use the machine pc, we just use dex pc instead.
+    return static_cast<uint32_t>(pc);
+  }
   MappingTable table(GetMappingTable());
   if (table.TotalSize() == 0) {
     DCHECK(IsNative() || IsCalleeSaveMethod() || IsProxyMethod()) << PrettyMethod(this);
@@ -176,16 +179,12 @@
     }
   }
   LOG(FATAL) << "Failed to find Dex offset for PC offset " << reinterpret_cast<void*>(sought_offset)
-             << "(PC " << reinterpret_cast<void*>(pc) << ", code=" << code
-             << ") in " << PrettyMethod(this);
+                     << "(PC " << reinterpret_cast<void*>(pc) << ", code=" << code
+                     << ") in " << PrettyMethod(this);
   return DexFile::kDexNoIndex;
-#else
-  // Compiler LLVM doesn't use the machine pc, we just use dex pc instead.
-  return static_cast<uint32_t>(pc);
-#endif
 }
 
-uintptr_t ArtMethod::ToNativePc(const uint32_t dex_pc) const {
+uintptr_t ArtMethod::ToNativePc(const uint32_t dex_pc) {
   MappingTable table(GetMappingTable());
   if (table.TotalSize() == 0) {
     DCHECK_EQ(dex_pc, 0U);
@@ -213,7 +212,7 @@
 }
 
 uint32_t ArtMethod::FindCatchBlock(Class* exception_type, uint32_t dex_pc,
-                                   bool* has_no_move_exception) const {
+                                   bool* has_no_move_exception) {
   MethodHelper mh(this);
   const DexFile::CodeItem* code_item = mh.GetCodeItem();
   // Default to handler not found.
@@ -265,16 +264,21 @@
     }
   } else {
     const bool kLogInvocationStartAndReturn = false;
-    if (GetEntryPointFromCompiledCode() != NULL) {
+    bool have_quick_code = GetEntryPointFromQuickCompiledCode() != nullptr;
+    bool have_portable_code = GetEntryPointFromPortableCompiledCode() != nullptr;
+    if (LIKELY(have_quick_code || have_portable_code)) {
       if (kLogInvocationStartAndReturn) {
-        LOG(INFO) << StringPrintf("Invoking '%s' code=%p", PrettyMethod(this).c_str(), GetEntryPointFromCompiledCode());
+        LOG(INFO) << StringPrintf("Invoking '%s' %s code=%p", PrettyMethod(this).c_str(),
+                                  have_quick_code ? "quick" : "portable",
+                                  have_quick_code ? GetEntryPointFromQuickCompiledCode()
+                                                  : GetEntryPointFromPortableCompiledCode());
       }
-#ifdef ART_USE_PORTABLE_COMPILER
-      (*art_portable_invoke_stub)(this, args, args_size, self, result, result_type);
-#else
-      (*art_quick_invoke_stub)(this, args, args_size, self, result, result_type);
-#endif
-      if (UNLIKELY(reinterpret_cast<int32_t>(self->GetException(NULL)) == -1)) {
+      if (!IsPortableCompiled()) {
+        (*art_quick_invoke_stub)(this, args, args_size, self, result, result_type);
+      } else {
+        (*art_portable_invoke_stub)(this, args, args_size, self, result, result_type);
+      }
+      if (UNLIKELY(reinterpret_cast<intptr_t>(self->GetException(NULL)) == -1)) {
         // Unusual case where we were running LLVM generated code and an
         // exception was thrown to force the activations to be removed from the
         // stack. Continue execution in the interpreter.
@@ -285,11 +289,13 @@
         interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
       }
       if (kLogInvocationStartAndReturn) {
-        LOG(INFO) << StringPrintf("Returned '%s' code=%p", PrettyMethod(this).c_str(), GetEntryPointFromCompiledCode());
+        LOG(INFO) << StringPrintf("Returned '%s' %s code=%p", PrettyMethod(this).c_str(),
+                                  have_quick_code ? "quick" : "portable",
+                                  have_quick_code ? GetEntryPointFromQuickCompiledCode()
+                                                  : GetEntryPointFromPortableCompiledCode());
       }
     } else {
-      LOG(INFO) << "Not invoking '" << PrettyMethod(this)
-          << "' code=" << reinterpret_cast<const void*>(GetEntryPointFromCompiledCode());
+      LOG(INFO) << "Not invoking '" << PrettyMethod(this) << "' code=null";
       if (result != NULL) {
         result->SetJ(0);
       }
@@ -300,9 +306,10 @@
   self->PopManagedStackFragment(fragment);
 }
 
-bool ArtMethod::IsRegistered() const {
-  void* native_method = GetFieldPtr<void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, native_method_), false);
-  CHECK(native_method != NULL);
+bool ArtMethod::IsRegistered() {
+  void* native_method =
+      GetFieldPtr<void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_jni_), false);
+  CHECK(native_method != nullptr);
   void* jni_stub = GetJniDlsymLookupStub();
   return native_method != jni_stub;
 }
@@ -323,7 +330,7 @@
     // around JNI bugs, that include not giving Object** SIRT references to native methods. Direct
     // the native method to runtime support and store the target somewhere runtime support will
     // find it.
-#if defined(__i386__)
+#if defined(__i386__) || defined(__x86_64__)
     UNIMPLEMENTED(FATAL);
 #else
     SetNativeMethod(reinterpret_cast<void*>(art_work_around_app_jni_bugs));
@@ -340,7 +347,7 @@
 }
 
 void ArtMethod::SetNativeMethod(const void* native_method) {
-  SetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, native_method_),
+  SetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_jni_),
       native_method, false);
 }
 
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 95ca4c9..bfa7cbe 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -45,7 +45,7 @@
 // C++ mirror of java.lang.reflect.Method and java.lang.reflect.Constructor
 class MANAGED ArtMethod : public Object {
  public:
-  Class* GetDeclaringClass() const;
+  Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDeclaringClass(Class *new_declaring_class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -53,41 +53,37 @@
     return MemberOffset(OFFSETOF_MEMBER(ArtMethod, declaring_class_));
   }
 
-  static MemberOffset EntryPointFromCompiledCodeOffset() {
-    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, entry_point_from_compiled_code_));
-  }
+  uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t GetAccessFlags() const;
-
-  void SetAccessFlags(uint32_t new_access_flags) {
+  void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, access_flags_), new_access_flags, false);
   }
 
   // Approximate what kind of method call would be used for this method.
-  InvokeType GetInvokeType() const;
+  InvokeType GetInvokeType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if the method is declared public.
-  bool IsPublic() const {
+  bool IsPublic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccPublic) != 0;
   }
 
   // Returns true if the method is declared private.
-  bool IsPrivate() const {
+  bool IsPrivate() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccPrivate) != 0;
   }
 
   // Returns true if the method is declared static.
-  bool IsStatic() const {
+  bool IsStatic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccStatic) != 0;
   }
 
   // Returns true if the method is a constructor.
-  bool IsConstructor() const {
+  bool IsConstructor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccConstructor) != 0;
   }
 
   // Returns true if the method is static, private, or a constructor.
-  bool IsDirect() const {
+  bool IsDirect() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return IsDirect(GetAccessFlags());
   }
 
@@ -96,55 +92,70 @@
   }
 
   // Returns true if the method is declared synchronized.
-  bool IsSynchronized() const {
+  bool IsSynchronized() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t synchonized = kAccSynchronized | kAccDeclaredSynchronized;
     return (GetAccessFlags() & synchonized) != 0;
   }
 
-  bool IsFinal() const {
+  bool IsFinal() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
-  bool IsMiranda() const {
+  bool IsMiranda() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccMiranda) != 0;
   }
 
-  bool IsNative() const {
+  bool IsNative() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccNative) != 0;
   }
 
-  bool IsFastNative() const {
+  bool IsFastNative() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t mask = kAccFastNative | kAccNative;
     return (GetAccessFlags() & mask) == mask;
   }
 
-  bool IsAbstract() const {
+  bool IsAbstract() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccAbstract) != 0;
   }
 
-  bool IsSynthetic() const {
+  bool IsSynthetic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
-  bool IsProxyMethod() const;
+  bool IsProxyMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsPreverified() const {
+  bool IsPreverified() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccPreverified) != 0;
   }
 
-  void SetPreverified() {
+  void SetPreverified() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(!IsPreverified());
     SetAccessFlags(GetAccessFlags() | kAccPreverified);
   }
 
+  bool IsPortableCompiled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (GetAccessFlags() & kAccPortableCompiled) != 0;
+  }
+
+  void SetIsPortableCompiled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(!IsPortableCompiled());
+    SetAccessFlags(GetAccessFlags() | kAccPortableCompiled);
+  }
+
+  void ClearIsPortableCompiled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(IsPortableCompiled());
+    SetAccessFlags(GetAccessFlags() & ~kAccPortableCompiled);
+  }
+
   bool CheckIncompatibleClassChange(InvokeType type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint16_t GetMethodIndex() const;
+  uint16_t GetMethodIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t GetVtableIndex() const {
+  size_t GetVtableIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetMethodIndex();
   }
 
-  void SetMethodIndex(uint16_t new_method_index) {
+  void SetMethodIndex(uint16_t new_method_index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_), new_method_index, false);
   }
 
@@ -152,24 +163,24 @@
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
   }
 
-  uint32_t GetCodeItemOffset() const {
-    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, code_item_offset_), false);
+  uint32_t GetCodeItemOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_code_item_offset_), false);
   }
 
   void SetCodeItemOffset(uint32_t new_code_off) {
-    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, code_item_offset_), new_code_off, false);
+    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_code_item_offset_), new_code_off, false);
   }
 
   // Number of 32bit registers that would be required to hold all the arguments
   static size_t NumArgRegisters(const StringPiece& shorty);
 
-  uint32_t GetDexMethodIndex() const;
+  uint32_t GetDexMethodIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDexMethodIndex(uint32_t new_idx) {
-    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_dex_index_), new_idx, false);
+    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_method_index_), new_idx, false);
   }
 
-  ObjectArray<String>* GetDexCacheStrings() const;
+  ObjectArray<String>* GetDexCacheStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetDexCacheStrings(ObjectArray<String>* new_dex_cache_strings)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -185,41 +196,62 @@
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_);
   }
 
-  ObjectArray<ArtMethod>* GetDexCacheResolvedMethods() const;
+  ObjectArray<ArtMethod>* GetDexCacheResolvedMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetDexCacheResolvedMethods(ObjectArray<ArtMethod>* new_dex_cache_methods)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<Class>* GetDexCacheResolvedTypes() const;
+  ObjectArray<Class>* GetDexCacheResolvedTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetDexCacheResolvedTypes(ObjectArray<Class>* new_dex_cache_types)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Find the method that this method overrides
-  ArtMethod* FindOverriddenMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* FindOverriddenMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result, char result_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  EntryPointFromInterpreter* GetEntryPointFromInterpreter() const {
-    return GetFieldPtr<EntryPointFromInterpreter*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_), false);
+  EntryPointFromInterpreter* GetEntryPointFromInterpreter() {
+    return GetFieldPtr<EntryPointFromInterpreter*>(
+               OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_), false);
   }
 
   void SetEntryPointFromInterpreter(EntryPointFromInterpreter* entry_point_from_interpreter) {
-    SetFieldPtr<EntryPointFromInterpreter*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_), entry_point_from_interpreter, false);
+    SetFieldPtr<EntryPointFromInterpreter*>(
+        OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_),
+        entry_point_from_interpreter, false);
   }
 
-  const void* GetEntryPointFromCompiledCode() const {
-    return GetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_compiled_code_), false);
+  static MemberOffset EntryPointFromPortableCompiledCodeOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, entry_point_from_portable_compiled_code_));
   }
 
-  void SetEntryPointFromCompiledCode(const void* entry_point_from_compiled_code) {
-    SetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_compiled_code_), entry_point_from_compiled_code, false);
+  const void* GetEntryPointFromPortableCompiledCode() {
+    return GetFieldPtr<const void*>(EntryPointFromPortableCompiledCodeOffset(), false);
   }
 
-  uint32_t GetCodeSize() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetEntryPointFromPortableCompiledCode(const void* entry_point_from_portable_compiled_code) {
+    SetFieldPtr<const void*>(EntryPointFromPortableCompiledCodeOffset(),
+        entry_point_from_portable_compiled_code, false);
+  }
 
-  bool IsWithinCode(uintptr_t pc) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uintptr_t code = reinterpret_cast<uintptr_t>(GetEntryPointFromCompiledCode());
+  static MemberOffset EntryPointFromQuickCompiledCodeOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, entry_point_from_quick_compiled_code_));
+  }
+
+  const void* GetEntryPointFromQuickCompiledCode() {
+    return GetFieldPtr<const void*>(EntryPointFromQuickCompiledCodeOffset(), false);
+  }
+
+  void SetEntryPointFromQuickCompiledCode(const void* entry_point_from_quick_compiled_code) {
+    SetFieldPtr<const void*>(EntryPointFromQuickCompiledCodeOffset(),
+        entry_point_from_quick_compiled_code, false);
+  }
+
+
+  uint32_t GetCodeSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool IsWithinQuickCode(uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uintptr_t code = reinterpret_cast<uintptr_t>(GetEntryPointFromQuickCompiledCode());
     if (code == 0) {
       return pc == 0;
     }
@@ -231,45 +263,44 @@
     return (code <= pc && pc <= code + GetCodeSize());
   }
 
-  void AssertPcIsWithinCode(uintptr_t pc) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void AssertPcIsWithinQuickCode(uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t GetOatCodeOffset() const;
-
-  void SetOatCodeOffset(uint32_t code_offset);
-
-  static MemberOffset GetEntryPointFromCompiledCodeOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_compiled_code_);
-  }
+  uint32_t GetQuickOatCodeOffset();
+  uint32_t GetPortableOatCodeOffset();
+  void SetQuickOatCodeOffset(uint32_t code_offset);
+  void SetPortableOatCodeOffset(uint32_t code_offset);
 
   // Callers should wrap the uint8_t* in a MappingTable instance for convenient access.
-  const uint8_t* GetMappingTable() const {
-    return GetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, mapping_table_), false);
+  const uint8_t* GetMappingTable() {
+    return GetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_mapping_table_),
+        false);
   }
 
   void SetMappingTable(const uint8_t* mapping_table) {
-    SetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, mapping_table_),
-                                 mapping_table, false);
+    SetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_mapping_table_),
+                                mapping_table, false);
   }
 
-  uint32_t GetOatMappingTableOffset() const;
+  uint32_t GetOatMappingTableOffset();
 
   void SetOatMappingTableOffset(uint32_t mapping_table_offset);
 
   // Callers should wrap the uint8_t* in a VmapTable instance for convenient access.
-  const uint8_t* GetVmapTable() const {
-    return GetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, vmap_table_), false);
+  const uint8_t* GetVmapTable() {
+    return GetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_vmap_table_),
+        false);
   }
 
   void SetVmapTable(const uint8_t* vmap_table) {
-    SetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, vmap_table_), vmap_table, false);
+    SetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_vmap_table_), vmap_table,
+        false);
   }
 
-  uint32_t GetOatVmapTableOffset() const;
+  uint32_t GetOatVmapTableOffset();
 
   void SetOatVmapTableOffset(uint32_t vmap_table_offset);
 
-  const uint8_t* GetNativeGcMap() const {
+  const uint8_t* GetNativeGcMap() {
     return GetFieldPtr<uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, gc_map_), false);
   }
   void SetNativeGcMap(const uint8_t* data) {
@@ -278,31 +309,30 @@
 
   // When building the oat need a convenient place to stuff the offset of the native GC map.
   void SetOatNativeGcMapOffset(uint32_t gc_map_offset);
-  uint32_t GetOatNativeGcMapOffset() const;
+  uint32_t GetOatNativeGcMapOffset();
 
-  size_t GetFrameSizeInBytes() const {
+  size_t GetFrameSizeInBytes() {
     DCHECK_EQ(sizeof(size_t), sizeof(uint32_t));
-    size_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, frame_size_in_bytes_), false);
+    size_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_frame_size_in_bytes_), false);
     DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
     return result;
   }
 
   void SetFrameSizeInBytes(size_t new_frame_size_in_bytes) {
-    DCHECK_EQ(sizeof(size_t), sizeof(uint32_t));
-    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, frame_size_in_bytes_),
+    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_frame_size_in_bytes_),
                new_frame_size_in_bytes, false);
   }
 
-  size_t GetReturnPcOffsetInBytes() const {
+  size_t GetReturnPcOffsetInBytes() {
     return GetFrameSizeInBytes() - kPointerSize;
   }
 
-  size_t GetSirtOffsetInBytes() const {
+  size_t GetSirtOffsetInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(IsNative());
     return kPointerSize;
   }
 
-  bool IsRegistered() const;
+  bool IsRegistered();
 
   void RegisterNative(Thread* self, const void* native_method, bool is_fast)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -310,10 +340,10 @@
   void UnregisterNative(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static MemberOffset NativeMethodOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, native_method_);
+    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_jni_);
   }
 
-  const void* GetNativeMethod() const {
+  const void* GetNativeMethod() {
     return reinterpret_cast<const void*>(GetField32(NativeMethodOffset(), false));
   }
 
@@ -323,47 +353,47 @@
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
   }
 
-  uint32_t GetCoreSpillMask() const {
-    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, core_spill_mask_), false);
+  uint32_t GetCoreSpillMask() {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_core_spill_mask_), false);
   }
 
   void SetCoreSpillMask(uint32_t core_spill_mask) {
     // Computed during compilation
-    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, core_spill_mask_), core_spill_mask, false);
+    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_core_spill_mask_), core_spill_mask, false);
   }
 
-  uint32_t GetFpSpillMask() const {
-    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, fp_spill_mask_), false);
+  uint32_t GetFpSpillMask() {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_fp_spill_mask_), false);
   }
 
   void SetFpSpillMask(uint32_t fp_spill_mask) {
     // Computed during compilation
-    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, fp_spill_mask_), fp_spill_mask, false);
+    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_fp_spill_mask_), fp_spill_mask, false);
   }
 
   // Is this a CalleSaveMethod or ResolutionMethod and therefore doesn't adhere to normal
   // conventions for a method of managed code. Returns false for Proxy methods.
-  bool IsRuntimeMethod() const;
+  bool IsRuntimeMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Is this a hand crafted method used for something like describing callee saves?
-  bool IsCalleeSaveMethod() const;
+  bool IsCalleeSaveMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsResolutionMethod() const;
+  bool IsResolutionMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsImtConflictMethod() const;
+  bool IsImtConflictMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uintptr_t NativePcOffset(const uintptr_t pc) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uintptr_t NativePcOffset(const uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Converts a native PC to a dex PC.
-  uint32_t ToDexPc(const uintptr_t pc) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t ToDexPc(const uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Converts a dex PC to a native PC.
-  uintptr_t ToNativePc(const uint32_t dex_pc) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uintptr_t ToNativePc(const uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Find the catch block for the given exception type and dex_pc. When a catch block is found,
   // indicates whether the found catch block is responsible for clearing the exception or whether
   // a move-exception instruction is present.
-  uint32_t FindCatchBlock(Class* exception_type, uint32_t dex_pc, bool* has_no_move_exception) const
+  uint32_t FindCatchBlock(Class* exception_type, uint32_t dex_pc, bool* has_no_move_exception)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void SetClass(Class* java_lang_reflect_ArtMethod);
@@ -379,65 +409,83 @@
 
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  // The class we are a part of
-  Class* declaring_class_;
+  // The class we are a part of.
+  HeapReference<Class> declaring_class_;
 
-  // short cuts to declaring_class_->dex_cache_ member for fast compiled code access
-  ObjectArray<ArtMethod>* dex_cache_resolved_methods_;
+  // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
+  HeapReference<ObjectArray<ArtMethod> > dex_cache_resolved_methods_;
 
-  // short cuts to declaring_class_->dex_cache_ member for fast compiled code access
-  ObjectArray<Class>* dex_cache_resolved_types_;
+  // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
+  HeapReference<ObjectArray<Class> > dex_cache_resolved_types_;
 
-  // short cuts to declaring_class_->dex_cache_ member for fast compiled code access
-  ObjectArray<String>* dex_cache_strings_;
+  // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
+  HeapReference<ObjectArray<String> > dex_cache_strings_;
 
-  // Access flags; low 16 bits are defined by spec.
-  uint32_t access_flags_;
+  // Method dispatch from the interpreter invokes this pointer which may cause a bridge into
+  // compiled code.
+  uint64_t entry_point_from_interpreter_;
 
-  // Offset to the CodeItem.
-  uint32_t code_item_offset_;
+  // Pointer to JNI function registered to this method, or a function to resolve the JNI function.
+  uint64_t entry_point_from_jni_;
 
-  // Architecture-dependent register spill mask
-  uint32_t core_spill_mask_;
+  // Method dispatch from portable compiled code invokes this pointer which may cause bridging into
+  // quick compiled code or the interpreter.
+  uint64_t entry_point_from_portable_compiled_code_;
 
-  // Compiled code associated with this method for callers from managed code.
-  // May be compiled managed code or a bridge for invoking a native method.
-  // TODO: Break apart this into portable and quick.
-  const void* entry_point_from_compiled_code_;
+  // Method dispatch from quick compiled code invokes this pointer which may cause bridging into
+  // portable compiled code or the interpreter.
+  uint64_t entry_point_from_quick_compiled_code_;
 
-  // Called by the interpreter to execute this method.
-  EntryPointFromInterpreter* entry_point_from_interpreter_;
+  // Pointer to a data structure created by the compiler and used by the garbage collector to
+  // determine which registers hold live references to objects within the heap. Keyed by native PC
+  // offsets for the quick compiler and dex PCs for the portable.
+  uint64_t gc_map_;
 
-  // Architecture-dependent register spill mask
-  uint32_t fp_spill_mask_;
+  // --- Quick compiler meta-data. ---
+  // TODO: merge and place in native heap, such as done with the code size.
 
-  // Total size in bytes of the frame
-  size_t frame_size_in_bytes_;
-
-  // Garbage collection map of native PC offsets (quick) or dex PCs (portable) to reference bitmaps.
-  const uint8_t* gc_map_;
-
-  // Mapping from native pc to dex pc
-  const uint32_t* mapping_table_;
-
-  // Index into method_ids of the dex file associated with this method
-  uint32_t method_dex_index_;
-
-  // For concrete virtual methods, this is the offset of the method in Class::vtable_.
-  //
-  // For abstract methods in an interface class, this is the offset of the method in
-  // "iftable_->Get(n)->GetMethodArray()".
-  //
-  // For static and direct methods this is the index in the direct methods table.
-  uint32_t method_index_;
-
-  // The target native method registered with this method
-  const void* native_method_;
+  // Pointer to a data structure created by the quick compiler to map between dex PCs and native
+  // PCs, and vice-versa.
+  uint64_t quick_mapping_table_;
 
   // When a register is promoted into a register, the spill mask holds which registers hold dex
   // registers. The first promoted register's corresponding dex register is vmap_table_[1], the Nth
   // is vmap_table_[N]. vmap_table_[0] holds the length of the table.
-  const uint16_t* vmap_table_;
+  uint64_t quick_vmap_table_;
+
+  // --- End of quick compiler meta-data. ---
+
+  // Access flags; low 16 bits are defined by spec.
+  uint32_t access_flags_;
+
+  /* Dex file fields. The defining dex file is available via declaring_class_->dex_cache_ */
+
+  // Offset to the CodeItem.
+  uint32_t dex_code_item_offset_;
+
+  // Index into method_ids of the dex file associated with this method.
+  uint32_t dex_method_index_;
+
+  /* End of dex file fields. */
+
+  // Entry within a dispatch table for this method. For static/direct methods the index is into
+  // the declaringClass.directMethods, for virtual methods the vtable and for interface methods the
+  // ifTable.
+  uint32_t method_index_;
+
+  // --- Quick compiler meta-data. ---
+  // TODO: merge and place in native heap, such as done with the code size.
+
+  // Bit map of spilled machine registers.
+  uint32_t quick_core_spill_mask_;
+
+  // Bit map of spilled floating point machine registers.
+  uint32_t quick_fp_spill_mask_;
+
+  // Fixed frame size for this method when executed.
+  uint32_t quick_frame_size_in_bytes_;
+
+  // --- End of quick compiler meta-data. ---
 
   static Class* java_lang_reflect_ArtMethod_;
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index cd44ebc..a5f743b 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -33,63 +33,61 @@
 namespace art {
 namespace mirror {
 
-inline size_t Class::GetObjectSize() const {
+inline uint32_t Class::GetObjectSize() {
   DCHECK(!IsVariableSize()) << " class=" << PrettyTypeOf(this);
-  DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
   return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, object_size_), false);
 }
 
-inline Class* Class::GetSuperClass() const {
+inline Class* Class::GetSuperClass() {
   // Can only get super class for loaded classes (hack for when runtime is
   // initializing)
   DCHECK(IsLoaded() || !Runtime::Current()->IsStarted()) << IsLoaded();
-  return GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_), false);
+  return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_), false);
 }
 
-inline ClassLoader* Class::GetClassLoader() const {
-  return GetFieldObject<ClassLoader*>(OFFSET_OF_OBJECT_MEMBER(Class, class_loader_), false);
+inline ClassLoader* Class::GetClassLoader() {
+  return GetFieldObject<ClassLoader>(OFFSET_OF_OBJECT_MEMBER(Class, class_loader_), false);
 }
 
-inline DexCache* Class::GetDexCache() const {
-  return GetFieldObject<DexCache*>(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), false);
+inline DexCache* Class::GetDexCache() {
+  return GetFieldObject<DexCache>(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), false);
 }
 
-inline ObjectArray<ArtMethod>* Class::GetDirectMethods() const {
+inline ObjectArray<ArtMethod>* Class::GetDirectMethods() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod>*>(
+  return GetFieldObject<ObjectArray<ArtMethod> >(
       OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_), false);
 }
 
 inline void Class::SetDirectMethods(ObjectArray<ArtMethod>* new_direct_methods)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtMethod>*>(
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtMethod> >(
       OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_), false));
   DCHECK_NE(0, new_direct_methods->GetLength());
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_),
                  new_direct_methods, false);
 }
 
-inline ArtMethod* Class::GetDirectMethod(int32_t i) const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtMethod* Class::GetDirectMethod(int32_t i) {
   return GetDirectMethods()->Get(i);
 }
 
 inline void Class::SetDirectMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectArray<ArtMethod>* direct_methods =
-      GetFieldObject<ObjectArray<ArtMethod>*>(
+      GetFieldObject<ObjectArray<ArtMethod> >(
           OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_), false);
   direct_methods->Set(i, f);
 }
 
 // Returns the number of static, private, and constructor methods.
-inline size_t Class::NumDirectMethods() const {
+inline uint32_t Class::NumDirectMethods() {
   return (GetDirectMethods() != NULL) ? GetDirectMethods()->GetLength() : 0;
 }
 
-inline ObjectArray<ArtMethod>* Class::GetVirtualMethods() const {
+inline ObjectArray<ArtMethod>* Class::GetVirtualMethods() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod>*>(
+  return GetFieldObject<ObjectArray<ArtMethod> >(
       OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_), false);
 }
 
@@ -101,18 +99,16 @@
                  new_virtual_methods, false);
 }
 
-inline size_t Class::NumVirtualMethods() const {
+inline uint32_t Class::NumVirtualMethods() {
   return (GetVirtualMethods() != NULL) ? GetVirtualMethods()->GetLength() : 0;
 }
 
-inline ArtMethod* Class::GetVirtualMethod(uint32_t i) const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtMethod* Class::GetVirtualMethod(uint32_t i) {
   DCHECK(IsResolved() || IsErroneous());
   return GetVirtualMethods()->Get(i);
 }
 
-inline ArtMethod* Class::GetVirtualMethodDuringLinking(uint32_t i) const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtMethod* Class::GetVirtualMethodDuringLinking(uint32_t i) {
   DCHECK(IsLoaded() || IsErroneous());
   return GetVirtualMethods()->Get(i);
 }
@@ -120,35 +116,34 @@
 inline void Class::SetVirtualMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectArray<ArtMethod>* virtual_methods =
-      GetFieldObject<ObjectArray<ArtMethod>*>(
+      GetFieldObject<ObjectArray<ArtMethod> >(
           OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_), false);
   virtual_methods->Set(i, f);
 }
 
-inline ObjectArray<ArtMethod>* Class::GetVTable() const {
+inline ObjectArray<ArtMethod>* Class::GetVTable() {
   DCHECK(IsResolved() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod>*>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), false);
+  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), false);
 }
 
-inline ObjectArray<ArtMethod>* Class::GetVTableDuringLinking() const {
+inline ObjectArray<ArtMethod>* Class::GetVTableDuringLinking() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod>*>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), false);
+  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), false);
 }
 
-inline void Class::SetVTable(ObjectArray<ArtMethod>* new_vtable)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline void Class::SetVTable(ObjectArray<ArtMethod>* new_vtable) {
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), new_vtable, false);
 }
 
-inline ObjectArray<ArtMethod>* Class::GetImTable() const {
-  return GetFieldObject<ObjectArray<ArtMethod>*>(OFFSET_OF_OBJECT_MEMBER(Class, imtable_), false);
+inline ObjectArray<ArtMethod>* Class::GetImTable() {
+  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, imtable_), false);
 }
 
 inline void Class::SetImTable(ObjectArray<ArtMethod>* new_imtable) {
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, imtable_), new_imtable, false);
 }
 
-inline bool Class::Implements(const Class* klass) const {
+inline bool Class::Implements(Class* klass) {
   DCHECK(klass != NULL);
   DCHECK(klass->IsInterface()) << PrettyClass(this);
   // All interfaces implemented directly and by our superclass, and
@@ -183,13 +178,13 @@
 // Don't forget about primitive types.
 //   Object[]         = int[] --> false
 //
-inline bool Class::IsArrayAssignableFromArray(const Class* src) const {
+inline bool Class::IsArrayAssignableFromArray(Class* src) {
   DCHECK(IsArrayClass())  << PrettyClass(this);
   DCHECK(src->IsArrayClass()) << PrettyClass(src);
   return GetComponentType()->IsAssignableFrom(src->GetComponentType());
 }
 
-inline bool Class::IsAssignableFromArray(const Class* src) const {
+inline bool Class::IsAssignableFromArray(Class* src) {
   DCHECK(!IsInterface()) << PrettyClass(this);  // handled first in IsAssignableFrom
   DCHECK(src->IsArrayClass()) << PrettyClass(src);
   if (!IsArrayClass()) {
@@ -203,14 +198,15 @@
   return IsArrayAssignableFromArray(src);
 }
 
-template <bool throw_on_failure>
-inline bool Class::CanAccessResolvedField(Class* access_to, ArtField* field,
-                                          uint32_t field_idx) {
+template <bool throw_on_failure, bool use_referrers_cache>
+inline bool Class::ResolvedFieldAccessTest(Class* access_to, ArtField* field,
+                                           uint32_t field_idx, DexCache* dex_cache) {
+  DCHECK_EQ(use_referrers_cache, dex_cache == nullptr);
   if (UNLIKELY(!this->CanAccess(access_to))) {
     // The referrer class can't access the field's declaring class but may still be able
     // to access the field if the FieldId specifies an accessible subclass of the declaring
     // class rather than the declaring class itself.
-    DexCache* referrer_dex_cache = this->GetDexCache();
+    DexCache* referrer_dex_cache = use_referrers_cache ? this->GetDexCache() : dex_cache;
     uint32_t class_idx = referrer_dex_cache->GetDexFile()->GetFieldId(field_idx).class_idx_;
     // The referenced class has already been resolved with the field, get it from the dex cache.
     Class* dex_access_to = referrer_dex_cache->GetResolvedType(class_idx);
@@ -233,15 +229,16 @@
   return false;
 }
 
-template <bool throw_on_failure, InvokeType throw_invoke_type>
-inline bool Class::CanAccessResolvedMethod(Class* access_to, ArtMethod* method,
-                                           uint32_t method_idx) {
+template <bool throw_on_failure, bool use_referrers_cache, InvokeType throw_invoke_type>
+inline bool Class::ResolvedMethodAccessTest(Class* access_to, ArtMethod* method,
+                                            uint32_t method_idx, DexCache* dex_cache) {
   COMPILE_ASSERT(throw_on_failure || throw_invoke_type == kStatic, non_default_throw_invoke_type);
+  DCHECK_EQ(use_referrers_cache, dex_cache == nullptr);
   if (UNLIKELY(!this->CanAccess(access_to))) {
     // The referrer class can't access the method's declaring class but may still be able
     // to access the method if the MethodId specifies an accessible subclass of the declaring
     // class rather than the declaring class itself.
-    DexCache* referrer_dex_cache = this->GetDexCache();
+    DexCache* referrer_dex_cache = use_referrers_cache ? this->GetDexCache() : dex_cache;
     uint32_t class_idx = referrer_dex_cache->GetDexFile()->GetMethodId(method_idx).class_idx_;
     // The referenced class has already been resolved with the method, get it from the dex cache.
     Class* dex_access_to = referrer_dex_cache->GetResolvedType(class_idx);
@@ -265,10 +262,32 @@
   return false;
 }
 
-inline bool Class::IsSubClass(const Class* klass) const {
+inline bool Class::CanAccessResolvedField(Class* access_to, ArtField* field,
+                                          DexCache* dex_cache, uint32_t field_idx) {
+  return ResolvedFieldAccessTest<false, false>(access_to, field, field_idx, dex_cache);
+}
+
+inline bool Class::CheckResolvedFieldAccess(Class* access_to, ArtField* field,
+                                            uint32_t field_idx) {
+  return ResolvedFieldAccessTest<true, true>(access_to, field, field_idx, nullptr);
+}
+
+inline bool Class::CanAccessResolvedMethod(Class* access_to, ArtMethod* method,
+                                           DexCache* dex_cache, uint32_t method_idx) {
+  return ResolvedMethodAccessTest<false, false, kStatic>(access_to, method, method_idx, dex_cache);
+}
+
+template <InvokeType throw_invoke_type>
+inline bool Class::CheckResolvedMethodAccess(Class* access_to, ArtMethod* method,
+                                             uint32_t method_idx) {
+  return ResolvedMethodAccessTest<true, true, throw_invoke_type>(access_to, method, method_idx,
+                                                                 nullptr);
+}
+
+inline bool Class::IsSubClass(Class* klass) {
   DCHECK(!IsInterface()) << PrettyClass(this);
   DCHECK(!IsArrayClass()) << PrettyClass(this);
-  const Class* current = this;
+  Class* current = this;
   do {
     if (current == klass) {
       return true;
@@ -278,7 +297,7 @@
   return false;
 }
 
-inline ArtMethod* Class::FindVirtualMethodForInterface(ArtMethod* method) const {
+inline ArtMethod* Class::FindVirtualMethodForInterface(ArtMethod* method) {
   Class* declaring_class = method->GetDeclaringClass();
   DCHECK(declaring_class != NULL) << PrettyClass(this);
   DCHECK(declaring_class->IsInterface()) << PrettyMethod(method);
@@ -293,21 +312,19 @@
   return NULL;
 }
 
-inline ArtMethod* Class::FindVirtualMethodForVirtual(ArtMethod* method) const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtMethod* Class::FindVirtualMethodForVirtual(ArtMethod* method) {
   DCHECK(!method->GetDeclaringClass()->IsInterface() || method->IsMiranda());
   // The argument method may from a super class.
   // Use the index to a potentially overridden one for this instance's class.
   return GetVTable()->Get(method->GetMethodIndex());
 }
 
-inline ArtMethod* Class::FindVirtualMethodForSuper(ArtMethod* method) const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtMethod* Class::FindVirtualMethodForSuper(ArtMethod* method) {
   DCHECK(!method->GetDeclaringClass()->IsInterface());
   return GetSuperClass()->GetVTable()->Get(method->GetMethodIndex());
 }
 
-inline ArtMethod* Class::FindVirtualMethodForVirtualOrInterface(ArtMethod* method) const {
+inline ArtMethod* Class::FindVirtualMethodForVirtualOrInterface(ArtMethod* method) {
   if (method->IsDirect()) {
     return method;
   }
@@ -317,11 +334,11 @@
   return FindVirtualMethodForVirtual(method);
 }
 
-inline IfTable* Class::GetIfTable() const {
-  return GetFieldObject<IfTable*>(OFFSET_OF_OBJECT_MEMBER(Class, iftable_), false);
+inline IfTable* Class::GetIfTable() {
+  return GetFieldObject<IfTable>(OFFSET_OF_OBJECT_MEMBER(Class, iftable_), false);
 }
 
-inline int32_t Class::GetIfTableCount() const {
+inline int32_t Class::GetIfTableCount() {
   IfTable* iftable = GetIfTable();
   if (iftable == NULL) {
     return 0;
@@ -333,59 +350,58 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, iftable_), new_iftable, false);
 }
 
-inline ObjectArray<ArtField>* Class::GetIFields() const {
+inline ObjectArray<ArtField>* Class::GetIFields() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtField>*>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_), false);
+  return GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_), false);
 }
 
 inline void Class::SetIFields(ObjectArray<ArtField>* new_ifields)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField>*>(
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField> >(
       OFFSET_OF_OBJECT_MEMBER(Class, ifields_), false));
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, ifields_), new_ifields, false);
 }
 
-inline ObjectArray<ArtField>* Class::GetSFields() const {
+inline ObjectArray<ArtField>* Class::GetSFields() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtField>*>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), false);
+  return GetFieldObject<ObjectArray<ArtField> >(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), false);
 }
 
 inline void Class::SetSFields(ObjectArray<ArtField>* new_sfields)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField>*>(
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField> >(
       OFFSET_OF_OBJECT_MEMBER(Class, sfields_), false));
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), new_sfields, false);
 }
 
-inline size_t Class::NumStaticFields() const {
+inline uint32_t Class::NumStaticFields() {
   return (GetSFields() != NULL) ? GetSFields()->GetLength() : 0;
 }
 
-inline ArtField* Class::GetStaticField(uint32_t i) const  // TODO: uint16_t
+inline ArtField* Class::GetStaticField(uint32_t i)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return GetSFields()->Get(i);
 }
 
 inline void Class::SetStaticField(uint32_t i, ArtField* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectArray<ArtField>* sfields= GetFieldObject<ObjectArray<ArtField>*>(
+  ObjectArray<ArtField>* sfields= GetFieldObject<ObjectArray<ArtField> >(
       OFFSET_OF_OBJECT_MEMBER(Class, sfields_), false);
   sfields->Set(i, f);
 }
 
-inline size_t Class::NumInstanceFields() const {
+inline uint32_t Class::NumInstanceFields() {
   return (GetIFields() != NULL) ? GetIFields()->GetLength() : 0;
 }
 
-inline ArtField* Class::GetInstanceField(uint32_t i) const  // TODO: uint16_t
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtField* Class::GetInstanceField(uint32_t i) {  // TODO: uint16_t
   DCHECK_NE(NumInstanceFields(), 0U);
   return GetIFields()->Get(i);
 }
 
 inline void Class::SetInstanceField(uint32_t i, ArtField* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectArray<ArtField>* ifields= GetFieldObject<ObjectArray<ArtField>*>(
+  ObjectArray<ArtField>* ifields= GetFieldObject<ObjectArray<ArtField> >(
       OFFSET_OF_OBJECT_MEMBER(Class, ifields_), false);
   ifields->Set(i, f);
 }
@@ -395,7 +411,7 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), klass, false);
 }
 
-inline uint32_t Class::GetAccessFlags() const {
+inline uint32_t Class::GetAccessFlags() {
   // Check class is loaded or this is java.lang.String that has a
   // circularity issue during loading the names of its members
   DCHECK(IsLoaded() || IsErroneous() ||
@@ -405,8 +421,8 @@
   return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_), false);
 }
 
-inline String* Class::GetName() const {
-  return GetFieldObject<String*>(OFFSET_OF_OBJECT_MEMBER(Class, name_), false);
+inline String* Class::GetName() {
+  return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Class, name_), false);
 }
 inline void Class::SetName(String* name) {
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, name_), name, false);
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index bd965fa..8051c9b 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -125,7 +125,7 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), new_dex_cache, false);
 }
 
-void Class::SetClassSize(size_t new_class_size) {
+void Class::SetClassSize(uint32_t new_class_size) {
   if (kIsDebugBuild && (new_class_size < GetClassSize())) {
     DumpClass(LOG(ERROR), kDumpClassFullDetail);
     CHECK_GE(new_class_size, GetClassSize()) << " class=" << PrettyTypeOf(this);
@@ -177,7 +177,7 @@
   return name;
 }
 
-void Class::DumpClass(std::ostream& os, int flags) const {
+void Class::DumpClass(std::ostream& os, int flags) {
   if ((flags & kDumpClassFullDetail) == 0) {
     os << PrettyClass(this);
     if ((flags & kDumpClassClassLoader) != 0) {
@@ -281,9 +281,9 @@
   }
 }
 
-bool Class::IsInSamePackage(const Class* that) const {
-  const Class* klass1 = this;
-  const Class* klass2 = that;
+bool Class::IsInSamePackage(Class* that) {
+  Class* klass1 = this;
+  Class* klass2 = that;
   if (klass1 == klass2) {
     return true;
   }
@@ -307,7 +307,7 @@
                          ClassHelper(klass2).GetDescriptor());
 }
 
-bool Class::IsClassClass() const {
+bool Class::IsClassClass() {
   Class* java_lang_Class = GetClass()->GetClass();
   return this == java_lang_Class;
 }
@@ -316,17 +316,17 @@
   return this == String::GetJavaLangString();
 }
 
-bool Class::IsThrowableClass() const {
+bool Class::IsThrowableClass() {
   return WellKnownClasses::ToClass(WellKnownClasses::java_lang_Throwable)->IsAssignableFrom(this);
 }
 
-bool Class::IsArtFieldClass() const {
+bool Class::IsArtFieldClass() {
   Class* java_lang_Class = GetClass();
   Class* java_lang_reflect_ArtField = java_lang_Class->GetInstanceField(0)->GetClass();
   return this == java_lang_reflect_ArtField;
 }
 
-bool Class::IsArtMethodClass() const {
+bool Class::IsArtMethodClass() {
   return this == ArtMethod::GetJavaLangReflectArtMethod();
 }
 
@@ -334,7 +334,7 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, class_loader_), new_class_loader, false);
 }
 
-ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature) const {
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(name, signature);
   if (method != NULL) {
@@ -352,7 +352,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
+ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
   if (method != NULL) {
@@ -370,7 +370,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) const {
+ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) {
   MethodHelper mh;
   for (size_t i = 0; i < NumDirectMethods(); ++i) {
     ArtMethod* method = GetDirectMethod(i);
@@ -382,7 +382,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) const {
+ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) {
   MethodHelper mh;
   for (size_t i = 0; i < NumDirectMethods(); ++i) {
     ArtMethod* method = GetDirectMethod(i);
@@ -394,7 +394,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
+ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
   if (GetDexCache() == dex_cache) {
     for (size_t i = 0; i < NumDirectMethods(); ++i) {
       ArtMethod* method = GetDirectMethod(i);
@@ -406,8 +406,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDirectMethod(const StringPiece& name, const StringPiece& signature) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindDirectMethod(const StringPiece& name, const StringPiece& signature) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature);
     if (method != NULL) {
       return method;
@@ -416,8 +416,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDirectMethod(const StringPiece& name, const Signature& signature) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindDirectMethod(const StringPiece& name, const Signature& signature) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature);
     if (method != NULL) {
       return method;
@@ -426,8 +426,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(dex_cache, dex_method_idx);
     if (method != NULL) {
       return method;
@@ -436,7 +436,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) const {
+ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) {
   MethodHelper mh;
   for (size_t i = 0; i < NumVirtualMethods(); ++i) {
     ArtMethod* method = GetVirtualMethod(i);
@@ -449,7 +449,7 @@
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name,
-                                            const Signature& signature) const {
+                                            const Signature& signature) {
   MethodHelper mh;
   for (size_t i = 0; i < NumVirtualMethods(); ++i) {
     ArtMethod* method = GetVirtualMethod(i);
@@ -461,7 +461,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
+ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
   if (GetDexCache() == dex_cache) {
     for (size_t i = 0; i < NumVirtualMethods(); ++i) {
       ArtMethod* method = GetVirtualMethod(i);
@@ -473,8 +473,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const StringPiece& signature) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const StringPiece& signature) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature);
     if (method != NULL) {
       return method;
@@ -483,8 +483,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const Signature& signature) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const Signature& signature) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature);
     if (method != NULL) {
       return method;
@@ -493,8 +493,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
     if (method != NULL) {
       return method;
@@ -503,7 +503,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindClassInitializer() const {
+ArtMethod* Class::FindClassInitializer() {
   for (size_t i = 0; i < NumDirectMethods(); ++i) {
     ArtMethod* method = GetDirectMethod(i);
     if (method->IsConstructor() && method->IsStatic()) {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index d751363..cbec476 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -120,7 +120,7 @@
     kStatusMax = 10,
   };
 
-  Status GetStatus() const {
+  Status GetStatus() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Status), sizeof(uint32_t));
     return static_cast<Status>(GetField32(OFFSET_OF_OBJECT_MEMBER(Class, status_), true));
   }
@@ -132,107 +132,107 @@
   }
 
   // Returns true if the class has failed to link.
-  bool IsErroneous() const {
+  bool IsErroneous() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() == kStatusError;
   }
 
   // Returns true if the class has been loaded.
-  bool IsIdxLoaded() const {
+  bool IsIdxLoaded() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusIdx;
   }
 
   // Returns true if the class has been loaded.
-  bool IsLoaded() const {
+  bool IsLoaded() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusLoaded;
   }
 
   // Returns true if the class has been linked.
-  bool IsResolved() const {
+  bool IsResolved() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusResolved;
   }
 
   // Returns true if the class was compile-time verified.
-  bool IsCompileTimeVerified() const {
+  bool IsCompileTimeVerified() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusRetryVerificationAtRuntime;
   }
 
   // Returns true if the class has been verified.
-  bool IsVerified() const {
+  bool IsVerified() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusVerified;
   }
 
   // Returns true if the class is initializing.
-  bool IsInitializing() const {
+  bool IsInitializing() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusInitializing;
   }
 
   // Returns true if the class is initialized.
-  bool IsInitialized() const {
+  bool IsInitialized() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() == kStatusInitialized;
   }
 
-  uint32_t GetAccessFlags() const;
+  uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetAccessFlags(uint32_t new_access_flags) {
+  void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_), new_access_flags, false);
   }
 
   // Returns true if the class is an interface.
-  bool IsInterface() const {
+  bool IsInterface() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccInterface) != 0;
   }
 
   // Returns true if the class is declared public.
-  bool IsPublic() const {
+  bool IsPublic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccPublic) != 0;
   }
 
   // Returns true if the class is declared final.
-  bool IsFinal() const {
+  bool IsFinal() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
-  bool IsFinalizable() const {
+  bool IsFinalizable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccClassIsFinalizable) != 0;
   }
 
-  void SetFinalizable() {
+  void SetFinalizable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t flags = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_), false);
     SetAccessFlags(flags | kAccClassIsFinalizable);
   }
 
   // Returns true if the class is abstract.
-  bool IsAbstract() const {
+  bool IsAbstract() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccAbstract) != 0;
   }
 
   // Returns true if the class is an annotation.
-  bool IsAnnotation() const {
+  bool IsAnnotation() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccAnnotation) != 0;
   }
 
   // Returns true if the class is synthetic.
-  bool IsSynthetic() const {
+  bool IsSynthetic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
-  bool IsReferenceClass() const {
+  bool IsReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccClassIsReference) != 0;
   }
 
-  bool IsWeakReferenceClass() const {
+  bool IsWeakReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccClassIsWeakReference) != 0;
   }
 
-  bool IsSoftReferenceClass() const {
+  bool IsSoftReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccReferenceFlagsMask) == kAccClassIsReference;
   }
 
-  bool IsFinalizerReferenceClass() const {
+  bool IsFinalizerReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccClassIsFinalizerReference) != 0;
   }
 
-  bool IsPhantomReferenceClass() const {
+  bool IsPhantomReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccClassIsPhantomReference) != 0;
   }
 
@@ -241,7 +241,7 @@
   // For array classes, where all the classes are final due to there being no sub-classes, an
   // Object[] may be assigned to by a String[] but a String[] may not be assigned to by other
   // types as the component is final.
-  bool CannotBeAssignedFromOtherTypes() const {
+  bool CannotBeAssignedFromOtherTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (!IsArrayClass()) {
       return IsFinal();
     } else {
@@ -254,12 +254,12 @@
     }
   }
 
-  String* GetName() const;  // Returns the cached name.
+  String* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);  // Returns the cached name.
   void SetName(String* name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);  // Sets the cached name.
   // Computes the name, then sets the cached value.
   String* ComputeName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsProxyClass() const {
+  bool IsProxyClass() {
     // Read access flags without using getter as whether something is a proxy can be check in
     // any loaded state
     // TODO: switch to a check if the super class is java.lang.reflect.Proxy?
@@ -267,91 +267,91 @@
     return (access_flags & kAccClassIsProxy) != 0;
   }
 
-  Primitive::Type GetPrimitiveType() const {
+  Primitive::Type GetPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
     return static_cast<Primitive::Type>(
         GetField32(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), false));
   }
 
-  void SetPrimitiveType(Primitive::Type new_type) {
+  void SetPrimitiveType(Primitive::Type new_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), new_type, false);
   }
 
   // Returns true if the class is a primitive type.
-  bool IsPrimitive() const {
+  bool IsPrimitive() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() != Primitive::kPrimNot;
   }
 
-  bool IsPrimitiveBoolean() const {
+  bool IsPrimitiveBoolean() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimBoolean;
   }
 
-  bool IsPrimitiveByte() const {
+  bool IsPrimitiveByte() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimByte;
   }
 
-  bool IsPrimitiveChar() const {
+  bool IsPrimitiveChar() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimChar;
   }
 
-  bool IsPrimitiveShort() const {
+  bool IsPrimitiveShort() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimShort;
   }
 
-  bool IsPrimitiveInt() const {
+  bool IsPrimitiveInt() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimInt;
   }
 
-  bool IsPrimitiveLong() const {
+  bool IsPrimitiveLong() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimLong;
   }
 
-  bool IsPrimitiveFloat() const {
+  bool IsPrimitiveFloat() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimFloat;
   }
 
-  bool IsPrimitiveDouble() const {
+  bool IsPrimitiveDouble() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimDouble;
   }
 
-  bool IsPrimitiveVoid() const {
+  bool IsPrimitiveVoid() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimVoid;
   }
 
-  bool IsPrimitiveArray() const {
+  bool IsPrimitiveArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return IsArrayClass() && GetComponentType()->IsPrimitive();
   }
 
   // Depth of class from java.lang.Object
-  size_t Depth() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    size_t depth = 0;
+  uint32_t Depth() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uint32_t depth = 0;
     for (Class* klass = this; klass->GetSuperClass() != NULL; klass = klass->GetSuperClass()) {
       depth++;
     }
     return depth;
   }
 
-  bool IsArrayClass() const {
+  bool IsArrayClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetComponentType() != NULL;
   }
 
-  bool IsClassClass() const;
+  bool IsClassClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsStringClass() const;
 
-  bool IsThrowableClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsThrowableClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsArtFieldClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsArtFieldClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsArtMethodClass() const;
+  bool IsArtMethodClass();
 
   static MemberOffset ComponentTypeOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Class, component_type_);
   }
 
-  Class* GetComponentType() const {
-    return GetFieldObject<Class*>(ComponentTypeOffset(), false);
+  Class* GetComponentType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<Class>(ComponentTypeOffset(), false);
   }
 
   void SetComponentType(Class* new_component_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -360,18 +360,18 @@
     SetFieldObject(ComponentTypeOffset(), new_component_type, false);
   }
 
-  size_t GetComponentSize() const {
+  size_t GetComponentSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return Primitive::ComponentSize(GetComponentType()->GetPrimitiveType());
   }
 
-  bool IsObjectClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool IsObjectClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return !IsPrimitive() && GetSuperClass() == NULL;
   }
-  bool IsInstantiable() const {
+  bool IsInstantiable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (!IsPrimitive() && !IsInterface() && !IsAbstract()) || ((IsAbstract()) && IsArrayClass());
   }
 
-  bool IsObjectArrayClass() const {
+  bool IsObjectArrayClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetComponentType() != NULL && !GetComponentType()->IsPrimitive();
   }
 
@@ -385,48 +385,44 @@
   Object* AllocNonMovableObject(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsVariableSize() const {
+  bool IsVariableSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Classes and arrays vary in size, and so the object_size_ field cannot
     // be used to get their instance size
     return IsClassClass() || IsArrayClass();
   }
 
-  size_t SizeOf() const {
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
+  uint32_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, class_size_), false);
   }
 
-  size_t GetClassSize() const {
-    DCHECK_EQ(sizeof(size_t), sizeof(uint32_t));
+  uint32_t GetClassSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, class_size_), false);
   }
 
-  void SetClassSize(size_t new_class_size)
+  void SetClassSize(uint32_t new_class_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t GetObjectSize() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t GetObjectSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetObjectSize(size_t new_object_size) {
+  void SetObjectSize(uint32_t new_object_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!IsVariableSize());
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
     return SetField32(OFFSET_OF_OBJECT_MEMBER(Class, object_size_), new_object_size, false);
   }
 
   // Returns true if this class is in the same packages as that class.
-  bool IsInSamePackage(const Class* that) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsInSamePackage(Class* that) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static bool IsInSamePackage(const StringPiece& descriptor1, const StringPiece& descriptor2);
 
   // Returns true if this class can access that class.
-  bool CanAccess(Class* that) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool CanAccess(Class* that) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return that->IsPublic() || this->IsInSamePackage(that);
   }
 
   // Can this class access a member in the provided class with the provided member access flags?
   // Note that access to the class isn't checked in case the declaring class is protected and the
   // method has been exposed by a public sub-class
-  bool CanAccessMember(Class* access_to, uint32_t member_flags) const
+  bool CanAccessMember(Class* access_to, uint32_t member_flags)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Classes can access all of their own members
     if (this == access_to) {
@@ -453,26 +449,32 @@
   // Can this class access a resolved field?
   // Note that access to field's class is checked and this may require looking up the class
   // referenced by the FieldId in the DexFile in case the declaring class is inaccessible.
-  template <bool throw_on_failure>
   bool CanAccessResolvedField(Class* access_to, ArtField* field,
-                              uint32_t field_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+                              DexCache* dex_cache, uint32_t field_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool CheckResolvedFieldAccess(Class* access_to, ArtField* field,
+                                uint32_t field_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Can this class access a resolved method?
   // Note that access to methods's class is checked and this may require looking up the class
   // referenced by the MethodId in the DexFile in case the declaring class is inaccessible.
-  template <bool throw_on_failure, InvokeType throw_invoke_type = kStatic>
   bool CanAccessResolvedMethod(Class* access_to, ArtMethod* resolved_method,
-                               uint32_t method_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  bool IsSubClass(const Class* klass) const
+                               DexCache* dex_cache, uint32_t method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  template <InvokeType throw_invoke_type>
+  bool CheckResolvedMethodAccess(Class* access_to, ArtMethod* resolved_method,
+                                 uint32_t method_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool IsSubClass(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Can src be assigned to this class? For example, String can be assigned to Object (by an
   // upcast), however, an Object cannot be assigned to a String as a potentially exception throwing
   // downcast would be necessary. Similarly for interfaces, a class that implements (or an interface
   // that extends) another can be assigned to its parent, but not vice-versa. All Classes may assign
   // to themselves. Classes for primitive types may not assign to each other.
-  inline bool IsAssignableFrom(const Class* src) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  inline bool IsAssignableFrom(Class* src) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(src != NULL);
     if (this == src) {
       // Can always assign to things of the same type.
@@ -489,18 +491,18 @@
     }
   }
 
-  Class* GetSuperClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Class* GetSuperClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSuperClass(Class *new_super_class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // super class is assigned once, except during class linker initialization
-    Class* old_super_class = GetFieldObject<Class*>(
-        OFFSET_OF_OBJECT_MEMBER(Class, super_class_), false);
-    DCHECK(old_super_class == NULL || old_super_class == new_super_class);
-    DCHECK(new_super_class != NULL);
+    // Super class is assigned once, except during class linker initialization.
+    Class* old_super_class = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_),
+                                                   false);
+    DCHECK(old_super_class == nullptr || old_super_class == new_super_class);
+    DCHECK(new_super_class != nullptr);
     SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, super_class_), new_super_class, false);
   }
 
-  bool HasSuperClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool HasSuperClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetSuperClass() != NULL;
   }
 
@@ -508,7 +510,7 @@
     return MemberOffset(OFFSETOF_MEMBER(Class, super_class_));
   }
 
-  ClassLoader* GetClassLoader() const;
+  ClassLoader* GetClassLoader() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetClassLoader(ClassLoader* new_cl) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -522,46 +524,43 @@
     kDumpClassInitialized = (1 << 2),
   };
 
-  void DumpClass(std::ostream& os, int flags) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void DumpClass(std::ostream& os, int flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  DexCache* GetDexCache() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDexCache(DexCache* new_dex_cache) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetDirectMethods() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ObjectArray<ArtMethod>* GetDirectMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDirectMethods(ObjectArray<ArtMethod>* new_direct_methods)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* GetDirectMethod(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* GetDirectMethod(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDirectMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns the number of static, private, and constructor methods.
-  size_t NumDirectMethods() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t NumDirectMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetVirtualMethods() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ObjectArray<ArtMethod>* GetVirtualMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetVirtualMethods(ObjectArray<ArtMethod>* new_virtual_methods)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns the number of non-inherited virtual methods.
-  size_t NumVirtualMethods() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t NumVirtualMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* GetVirtualMethod(uint32_t i) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* GetVirtualMethod(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* GetVirtualMethodDuringLinking(uint32_t i) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* GetVirtualMethodDuringLinking(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetVirtualMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetVTable() const;
+  ObjectArray<ArtMethod>* GetVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetVTableDuringLinking() const;
+  ObjectArray<ArtMethod>* GetVTableDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetVTable(ObjectArray<ArtMethod>* new_vtable)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -570,7 +569,7 @@
     return OFFSET_OF_OBJECT_MEMBER(Class, vtable_);
   }
 
-  ObjectArray<ArtMethod>* GetImTable() const;
+  ObjectArray<ArtMethod>* GetImTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetImTable(ObjectArray<ArtMethod>* new_imtable)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -581,105 +580,102 @@
 
   // Given a method implemented by this class but potentially from a super class, return the
   // specific implementation method for this class.
-  ArtMethod* FindVirtualMethodForVirtual(ArtMethod* method) const
+  ArtMethod* FindVirtualMethodForVirtual(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Given a method implemented by this class' super class, return the specific implementation
   // method for this class.
-  ArtMethod* FindVirtualMethodForSuper(ArtMethod* method) const
+  ArtMethod* FindVirtualMethodForSuper(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Given a method implemented by this class, but potentially from a
   // super class or interface, return the specific implementation
   // method for this class.
-  ArtMethod* FindVirtualMethodForInterface(ArtMethod* method) const
+  ArtMethod* FindVirtualMethodForInterface(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE;
 
-  ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method) const
+  ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindInterfaceMethod(const StringPiece& name, const Signature& signature) const
+  ArtMethod* FindInterfaceMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+  ArtMethod* FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) const
+  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) const
+  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+  ArtMethod* FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDirectMethod(const StringPiece& name, const StringPiece& signature) const
+  ArtMethod* FindDirectMethod(const StringPiece& name, const StringPiece& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDirectMethod(const StringPiece& name, const Signature& signature) const
+  ArtMethod* FindDirectMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+  ArtMethod* FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) const
+  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature) const
+  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+  ArtMethod* FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindVirtualMethod(const StringPiece& name, const StringPiece& signature) const
+  ArtMethod* FindVirtualMethod(const StringPiece& name, const StringPiece& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindVirtualMethod(const StringPiece& name, const Signature& signature) const
+  ArtMethod* FindVirtualMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+  ArtMethod* FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindClassInitializer() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* FindClassInitializer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t GetIfTableCount() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  int32_t GetIfTableCount() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  IfTable* GetIfTable() const;
+  IfTable* GetIfTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetIfTable(IfTable* new_iftable) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get instance fields of the class (See also GetSFields).
-  ObjectArray<ArtField>* GetIFields() const;
+  ObjectArray<ArtField>* GetIFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetIFields(ObjectArray<ArtField>* new_ifields) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t NumInstanceFields() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t NumInstanceFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtField* GetInstanceField(uint32_t i) const  // TODO: uint16_t
+  ArtField* GetInstanceField(uint32_t i)  // TODO: uint16_t
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetInstanceField(uint32_t i, ArtField* f)  // TODO: uint16_t
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns the number of instance fields containing reference types.
-  size_t NumReferenceInstanceFields() const {
+  uint32_t NumReferenceInstanceFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsResolved() || IsErroneous());
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_instance_fields_), false);
   }
 
-  size_t NumReferenceInstanceFieldsDuringLinking() const {
+  uint32_t NumReferenceInstanceFieldsDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsLoaded() || IsErroneous());
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_instance_fields_), false);
   }
 
-  void SetNumReferenceInstanceFields(size_t new_num) {
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
+  void SetNumReferenceInstanceFields(uint32_t new_num) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_instance_fields_), new_num, false);
   }
 
-  uint32_t GetReferenceInstanceOffsets() const {
+  uint32_t GetReferenceInstanceOffsets() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsResolved() || IsErroneous());
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, reference_instance_offsets_), false);
   }
@@ -693,39 +689,39 @@
   }
 
   // Returns the number of static fields containing reference types.
-  size_t NumReferenceStaticFields() const {
+  uint32_t NumReferenceStaticFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsResolved() || IsErroneous());
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_static_fields_), false);
   }
 
-  size_t NumReferenceStaticFieldsDuringLinking() const {
+  uint32_t NumReferenceStaticFieldsDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsLoaded() || IsErroneous());
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_static_fields_), false);
   }
 
-  void SetNumReferenceStaticFields(size_t new_num) {
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
+  void SetNumReferenceStaticFields(uint32_t new_num) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_static_fields_), new_num, false);
   }
 
   // Gets the static fields of the class.
-  ObjectArray<ArtField>* GetSFields() const;
+  ObjectArray<ArtField>* GetSFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSFields(ObjectArray<ArtField>* new_sfields) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t NumStaticFields() const;
+  uint32_t NumStaticFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtField* GetStaticField(uint32_t i) const;  // TODO: uint16_t
+  // TODO: uint16_t
+  ArtField* GetStaticField(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetStaticField(uint32_t i, ArtField* f);  // TODO: uint16_t
+  // TODO: uint16_t
+  void SetStaticField(uint32_t i, ArtField* f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t GetReferenceStaticOffsets() const {
+  uint32_t GetReferenceStaticOffsets() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, reference_static_offsets_), false);
   }
 
-  void SetReferenceStaticOffsets(uint32_t new_reference_offsets);
+  void SetReferenceStaticOffsets(uint32_t new_reference_offsets)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Find a static or instance field using the JLS resolution order
   ArtField* FindField(const StringPiece& name, const StringPiece& type)
@@ -761,33 +757,33 @@
   ArtField* FindDeclaredStaticField(const DexCache* dex_cache, uint32_t dex_field_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  pid_t GetClinitThreadId() const {
+  pid_t GetClinitThreadId() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsIdxLoaded() || IsErroneous());
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, clinit_thread_id_), false);
   }
 
-  void SetClinitThreadId(pid_t new_clinit_thread_id) {
+  void SetClinitThreadId(pid_t new_clinit_thread_id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, clinit_thread_id_), new_clinit_thread_id, false);
   }
 
-  Class* GetVerifyErrorClass() const {
+  Class* GetVerifyErrorClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // DCHECK(IsErroneous());
-    return GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), false);
+    return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), false);
   }
 
-  uint16_t GetDexClassDefIndex() const {
+  uint16_t GetDexClassDefIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_class_def_idx_), false);
   }
 
-  void SetDexClassDefIndex(uint16_t class_def_idx) {
+  void SetDexClassDefIndex(uint16_t class_def_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_class_def_idx_), class_def_idx, false);
   }
 
-  uint16_t GetDexTypeIndex() const {
+  uint16_t GetDexTypeIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), false);
   }
 
-  void SetDexTypeIndex(uint16_t type_idx) {
+  void SetDexTypeIndex(uint16_t type_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), type_idx, false);
   }
 
@@ -808,28 +804,34 @@
  private:
   void SetVerifyErrorClass(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool Implements(const Class* klass) const
+  template <bool throw_on_failure, bool use_referrers_cache>
+  bool ResolvedFieldAccessTest(Class* access_to, ArtField* field,
+                               uint32_t field_idx, DexCache* dex_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsArrayAssignableFromArray(const Class* klass) const
+  template <bool throw_on_failure, bool use_referrers_cache, InvokeType throw_invoke_type>
+  bool ResolvedMethodAccessTest(Class* access_to, ArtMethod* resolved_method,
+                                uint32_t method_idx, DexCache* dex_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsAssignableFromArray(const Class* klass) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool Implements(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsArrayAssignableFromArray(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsAssignableFromArray(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void CheckObjectAlloc() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // defining class loader, or NULL for the "bootstrap" system loader
-  ClassLoader* class_loader_;
+  HeapReference<ClassLoader> class_loader_;
 
   // For array classes, the component class object for instanceof/checkcast
   // (for String[][][], this will be String[][]). NULL for non-array classes.
-  Class* component_type_;
+  HeapReference<Class> component_type_;
 
   // DexCache of resolved constant pool entries (will be NULL for classes generated by the
   // runtime such as arrays and primitive classes).
-  DexCache* dex_cache_;
+  HeapReference<DexCache> dex_cache_;
 
   // static, private, and <init> methods
-  ObjectArray<ArtMethod>* direct_methods_;
+  HeapReference<ObjectArray<ArtMethod> > direct_methods_;
 
   // instance fields
   //
@@ -841,7 +843,7 @@
   // All instance fields that refer to objects are guaranteed to be at
   // the beginning of the field list.  num_reference_instance_fields_
   // specifies the number of reference fields.
-  ObjectArray<ArtField>* ifields_;
+  HeapReference<ObjectArray<ArtField> > ifields_;
 
   // The interface table (iftable_) contains pairs of a interface class and an array of the
   // interface methods. There is one pair per interface supported by this class.  That means one
@@ -854,38 +856,38 @@
   //
   // For every interface a concrete class implements, we create an array of the concrete vtable_
   // methods for the methods in the interface.
-  IfTable* iftable_;
+  HeapReference<IfTable> iftable_;
 
   // Interface method table (imt), for quick "invoke-interface".
-  ObjectArray<ArtMethod>* imtable_;
+  HeapReference<ObjectArray<ArtMethod> > imtable_;
 
-  // descriptor for the class such as "java.lang.Class" or "[C". Lazily initialized by ComputeName
-  String* name_;
+  // Descriptor for the class such as "java.lang.Class" or "[C". Lazily initialized by ComputeName
+  HeapReference<String> name_;
 
   // Static fields
-  ObjectArray<ArtField>* sfields_;
+  HeapReference<ObjectArray<ArtField>> sfields_;
 
   // The superclass, or NULL if this is java.lang.Object, an interface or primitive type.
-  Class* super_class_;
+  HeapReference<Class> super_class_;
 
   // If class verify fails, we must return same error on subsequent tries.
-  Class* verify_error_class_;
+  HeapReference<Class> verify_error_class_;
 
   // Virtual methods defined in this class; invoked through vtable.
-  ObjectArray<ArtMethod>* virtual_methods_;
+  HeapReference<ObjectArray<ArtMethod> > virtual_methods_;
 
   // Virtual method table (vtable), for use by "invoke-virtual".  The vtable from the superclass is
   // copied in, and virtual methods from our class either replace those from the super or are
   // appended. For abstract classes, methods may be created in the vtable that aren't in
   // virtual_ methods_ for miranda methods.
-  ObjectArray<ArtMethod>* vtable_;
+  HeapReference<ObjectArray<ArtMethod> > vtable_;
 
   // Access flags; low 16 bits are defined by VM spec.
   uint32_t access_flags_;
 
   // Total size of the Class instance; used when allocating storage on gc heap.
   // See also object_size_.
-  size_t class_size_;
+  uint32_t class_size_;
 
   // Tid used to check for recursive <clinit> invocation.
   pid_t clinit_thread_id_;
@@ -899,15 +901,15 @@
   int32_t dex_type_idx_;
 
   // Number of instance fields that are object refs.
-  size_t num_reference_instance_fields_;
+  uint32_t num_reference_instance_fields_;
 
   // Number of static fields that are object refs,
-  size_t num_reference_static_fields_;
+  uint32_t num_reference_static_fields_;
 
   // Total object size; used when allocating storage on gc heap.
   // (For interfaces and abstract classes this will be zero.)
   // See also class_size_.
-  size_t object_size_;
+  uint32_t object_size_;
 
   // Primitive type value, or Primitive::kPrimNot (0); set for generated primitive classes.
   Primitive::Type primitive_type_;
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index 415cb67..69accf5 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -32,9 +32,9 @@
 class MANAGED ClassLoader : public Object {
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  Object* packages_;
-  ClassLoader* parent_;
-  Object* proxyCache_;
+  HeapReference<Object> packages_;
+  HeapReference<ClassLoader> parent_;
+  HeapReference<Object> proxyCache_;
 
   friend struct art::ClassLoaderOffsets;  // for verifying offset information
   DISALLOW_IMPLICIT_CONSTRUCTORS(ClassLoader);
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index da26be5..f59c3a2 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -22,7 +22,7 @@
 namespace art {
 namespace mirror {
 
-inline ArtMethod* DexCache::GetResolvedMethod(uint32_t method_idx) const
+inline ArtMethod* DexCache::GetResolvedMethod(uint32_t method_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ArtMethod* method = GetResolvedMethods()->Get(method_idx);
   // Hide resolution trampoline methods from the caller
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index a5fe598..99529f0 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -52,8 +52,8 @@
 
   void Fixup(ArtMethod* trampoline) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  String* GetLocation() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<String*>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), false);
+  String* GetLocation() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), false);
   }
 
   static MemberOffset StringsOffset() {
@@ -68,24 +68,23 @@
     return OFFSET_OF_OBJECT_MEMBER(DexCache, resolved_methods_);
   }
 
-  size_t NumStrings() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t NumStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStrings()->GetLength();
   }
 
-  size_t NumResolvedTypes() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t NumResolvedTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetResolvedTypes()->GetLength();
   }
 
-  size_t NumResolvedMethods() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t NumResolvedMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetResolvedMethods()->GetLength();
   }
 
-  size_t NumResolvedFields() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t NumResolvedFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetResolvedFields()->GetLength();
   }
 
-  String* GetResolvedString(uint32_t string_idx) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  String* GetResolvedString(uint32_t string_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStrings()->Get(string_idx);
   }
 
@@ -94,8 +93,7 @@
     GetStrings()->Set(string_idx, resolved);
   }
 
-  Class* GetResolvedType(uint32_t type_idx) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  Class* GetResolvedType(uint32_t type_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetResolvedTypes()->Get(type_idx);
   }
 
@@ -104,16 +102,14 @@
     GetResolvedTypes()->Set(type_idx, resolved);
   }
 
-  ArtMethod* GetResolvedMethod(uint32_t method_idx) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* GetResolvedMethod(uint32_t method_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetResolvedMethod(uint32_t method_idx, ArtMethod* resolved)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     GetResolvedMethods()->Set(method_idx, resolved);
   }
 
-  ArtField* GetResolvedField(uint32_t field_idx) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ArtField* GetResolvedField(uint32_t field_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetResolvedFields()->Get(field_idx);
   }
 
@@ -122,28 +118,24 @@
     GetResolvedFields()->Set(field_idx, resolved);
   }
 
-  ObjectArray<String>* GetStrings() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<String>* >(StringsOffset(), false);
+  ObjectArray<String>* GetStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject< ObjectArray<String> >(StringsOffset(), false);
   }
 
-  ObjectArray<Class>* GetResolvedTypes() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<Class>* >(
+  ObjectArray<Class>* GetResolvedTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<Class> >(
         OFFSET_OF_OBJECT_MEMBER(DexCache, resolved_types_), false);
   }
 
-  ObjectArray<ArtMethod>* GetResolvedMethods() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<ArtMethod>* >(ResolvedMethodsOffset(), false);
+  ObjectArray<ArtMethod>* GetResolvedMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject< ObjectArray<ArtMethod> >(ResolvedMethodsOffset(), false);
   }
 
-  ObjectArray<ArtField>* GetResolvedFields() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<ArtField>* >(ResolvedFieldsOffset(), false);
+  ObjectArray<ArtField>* GetResolvedFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<ArtField> >(ResolvedFieldsOffset(), false);
   }
 
-  const DexFile* GetDexFile() const {
+  const DexFile* GetDexFile() {
     return GetFieldPtr<const DexFile*>(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_), false);
   }
 
@@ -152,13 +144,13 @@
   }
 
  private:
-  Object* dex_;
-  String* location_;
-  ObjectArray<ArtField>* resolved_fields_;
-  ObjectArray<ArtMethod>* resolved_methods_;
-  ObjectArray<Class>* resolved_types_;
-  ObjectArray<String>* strings_;
-  uint32_t dex_file_;
+  HeapReference<Object> dex_;
+  HeapReference<String> location_;
+  HeapReference<ObjectArray<ArtField> > resolved_fields_;
+  HeapReference<ObjectArray<ArtMethod> > resolved_methods_;
+  HeapReference<ObjectArray<Class> > resolved_types_;
+  HeapReference<ObjectArray<String> > strings_;
+  uint64_t dex_file_;
 
   friend struct art::DexCacheOffsets;  // for verifying offset information
   DISALLOW_IMPLICIT_CONSTRUCTORS(DexCache);
diff --git a/runtime/mirror/iftable.h b/runtime/mirror/iftable.h
index 421893d..be83d03 100644
--- a/runtime/mirror/iftable.h
+++ b/runtime/mirror/iftable.h
@@ -24,7 +24,7 @@
 
 class MANAGED IfTable : public ObjectArray<Object> {
  public:
-  Class* GetInterface(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  Class* GetInterface(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Class* interface = Get((i * kMax) + kInterface)->AsClass();
     DCHECK(interface != NULL);
     return interface;
@@ -32,15 +32,14 @@
 
   void SetInterface(int32_t i, Class* interface) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetMethodArray(int32_t i) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ObjectArray<ArtMethod>* GetMethodArray(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ObjectArray<ArtMethod>* method_array =
         down_cast<ObjectArray<ArtMethod>*>(Get((i * kMax) + kMethodArray));
     DCHECK(method_array != NULL);
     return method_array;
   }
 
-  size_t GetMethodArrayCount(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t GetMethodArrayCount(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ObjectArray<ArtMethod>* method_array =
         down_cast<ObjectArray<ArtMethod>*>(Get((i * kMax) + kMethodArray));
     if (method_array == NULL) {
@@ -56,7 +55,7 @@
     Set((i * kMax) + kMethodArray, new_ma);
   }
 
-  size_t Count() const {
+  size_t Count() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetLength() / kMax;
   }
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 9161bc5..afa4112 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -32,19 +32,18 @@
 namespace art {
 namespace mirror {
 
-inline Class* Object::GetClass() const {
-  return GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(Object, klass_), false);
+inline Class* Object::GetClass() {
+  return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Object, klass_), false);
 }
 
 inline void Object::SetClass(Class* new_klass) {
-  // new_klass may be NULL prior to class linker initialization
-  // We don't mark the card since the class is guaranteed to be referenced from another location.
-  // Proxy classes are held live by the class loader, and other classes are roots of the class
-  // linker.
-  SetFieldPtr(OFFSET_OF_OBJECT_MEMBER(Object, klass_), new_klass, false, false);
+  // new_klass may be NULL prior to class linker initialization.
+  // We don't mark the card as this occurs as part of object allocation. Not all objects have
+  // backing cards, such as large objects.
+  SetFieldObjectWithoutWriteBarrier(OFFSET_OF_OBJECT_MEMBER(Object, klass_), new_klass, false, false);
 }
 
-inline LockWord Object::GetLockWord() const {
+inline LockWord Object::GetLockWord() {
   return LockWord(GetField32(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), true));
 }
 
@@ -85,19 +84,19 @@
   Monitor::Wait(self, this, ms, ns, true, kTimedWaiting);
 }
 
-inline bool Object::VerifierInstanceOf(const Class* klass) const {
+inline bool Object::VerifierInstanceOf(Class* klass) {
   DCHECK(klass != NULL);
   DCHECK(GetClass() != NULL);
   return klass->IsInterface() || InstanceOf(klass);
 }
 
-inline bool Object::InstanceOf(const Class* klass) const {
+inline bool Object::InstanceOf(Class* klass) {
   DCHECK(klass != NULL);
   DCHECK(GetClass() != NULL);
   return klass->IsAssignableFrom(GetClass());
 }
 
-inline bool Object::IsClass() const {
+inline bool Object::IsClass() {
   Class* java_lang_Class = GetClass()->GetClass();
   return GetClass() == java_lang_Class;
 }
@@ -107,12 +106,7 @@
   return down_cast<Class*>(this);
 }
 
-inline const Class* Object::AsClass() const {
-  DCHECK(IsClass());
-  return down_cast<const Class*>(this);
-}
-
-inline bool Object::IsObjectArray() const {
+inline bool Object::IsObjectArray() {
   return IsArrayInstance() && !GetClass()->GetComponentType()->IsPrimitive();
 }
 
@@ -122,17 +116,11 @@
   return down_cast<ObjectArray<T>*>(this);
 }
 
-template<class T>
-inline const ObjectArray<T>* Object::AsObjectArray() const {
-  DCHECK(IsObjectArray());
-  return down_cast<const ObjectArray<T>*>(this);
-}
-
-inline bool Object::IsArrayInstance() const {
+inline bool Object::IsArrayInstance() {
   return GetClass()->IsArrayClass();
 }
 
-inline bool Object::IsArtField() const {
+inline bool Object::IsArtField() {
   return GetClass()->IsArtFieldClass();
 }
 
@@ -141,12 +129,7 @@
   return down_cast<ArtField*>(this);
 }
 
-inline const ArtField* Object::AsArtField() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(IsArtField());
-  return down_cast<const ArtField*>(this);
-}
-
-inline bool Object::IsArtMethod() const {
+inline bool Object::IsArtMethod() {
   return GetClass()->IsArtMethodClass();
 }
 
@@ -155,12 +138,7 @@
   return down_cast<ArtMethod*>(this);
 }
 
-inline const ArtMethod* Object::AsArtMethod() const {
-  DCHECK(IsArtMethod());
-  return down_cast<const ArtMethod*>(this);
-}
-
-inline bool Object::IsReferenceInstance() const {
+inline bool Object::IsReferenceInstance() {
   return GetClass()->IsReferenceClass();
 }
 
@@ -169,11 +147,6 @@
   return down_cast<Array*>(this);
 }
 
-inline const Array* Object::AsArray() const {
-  DCHECK(IsArrayInstance());
-  return down_cast<const Array*>(this);
-}
-
 inline BooleanArray* Object::AsBooleanArray() {
   DCHECK(GetClass()->IsArrayClass());
   DCHECK(GetClass()->GetComponentType()->IsPrimitiveBoolean());
@@ -186,6 +159,13 @@
   return down_cast<ByteArray*>(this);
 }
 
+inline ByteArray* Object::AsByteSizedArray() {
+  DCHECK(GetClass()->IsArrayClass());
+  DCHECK(GetClass()->GetComponentType()->IsPrimitiveByte() ||
+         GetClass()->GetComponentType()->IsPrimitiveBoolean());
+  return down_cast<ByteArray*>(this);
+}
+
 inline CharArray* Object::AsCharArray() {
   DCHECK(GetClass()->IsArrayClass());
   DCHECK(GetClass()->GetComponentType()->IsPrimitiveChar());
@@ -198,6 +178,13 @@
   return down_cast<ShortArray*>(this);
 }
 
+inline ShortArray* Object::AsShortSizedArray() {
+  DCHECK(GetClass()->IsArrayClass());
+  DCHECK(GetClass()->GetComponentType()->IsPrimitiveShort() ||
+         GetClass()->GetComponentType()->IsPrimitiveChar());
+  return down_cast<ShortArray*>(this);
+}
+
 inline IntArray* Object::AsIntArray() {
   DCHECK(GetClass()->IsArrayClass());
   DCHECK(GetClass()->GetComponentType()->IsPrimitiveInt() ||
@@ -222,23 +209,23 @@
   return down_cast<Throwable*>(this);
 }
 
-inline bool Object::IsWeakReferenceInstance() const {
+inline bool Object::IsWeakReferenceInstance() {
   return GetClass()->IsWeakReferenceClass();
 }
 
-inline bool Object::IsSoftReferenceInstance() const {
+inline bool Object::IsSoftReferenceInstance() {
   return GetClass()->IsSoftReferenceClass();
 }
 
-inline bool Object::IsFinalizerReferenceInstance() const {
+inline bool Object::IsFinalizerReferenceInstance() {
   return GetClass()->IsFinalizerReferenceClass();
 }
 
-inline bool Object::IsPhantomReferenceInstance() const {
+inline bool Object::IsPhantomReferenceInstance() {
   return GetClass()->IsPhantomReferenceClass();
 }
 
-inline size_t Object::SizeOf() const {
+inline size_t Object::SizeOf() {
   size_t result;
   if (IsArrayInstance()) {
     result = AsArray()->SizeOf();
@@ -253,13 +240,13 @@
   return result;
 }
 
-inline uint32_t Object::GetField32(MemberOffset field_offset, bool is_volatile) const {
+inline uint32_t Object::GetField32(MemberOffset field_offset, bool is_volatile) {
   VerifyObject(this);
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
   const int32_t* word_addr = reinterpret_cast<const int32_t*>(raw_addr);
   if (UNLIKELY(is_volatile)) {
     int32_t result = *(reinterpret_cast<volatile int32_t*>(const_cast<int32_t*>(word_addr)));
-    QuasiAtomic::MembarLoadLoad();
+    QuasiAtomic::MembarLoadLoad();  // Ensure volatile loads don't re-order.
     return result;
   } else {
     return *word_addr;
@@ -276,7 +263,7 @@
   if (UNLIKELY(is_volatile)) {
     QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
     *word_addr = new_value;
-    QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any loads.
+    QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any volatile loads.
   } else {
     *word_addr = new_value;
   }
@@ -289,28 +276,31 @@
   return __sync_bool_compare_and_swap(addr, old_value, new_value);
 }
 
-inline uint64_t Object::GetField64(MemberOffset field_offset, bool is_volatile) const {
+inline uint64_t Object::GetField64(MemberOffset field_offset, bool is_volatile) {
   VerifyObject(this);
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
   const int64_t* addr = reinterpret_cast<const int64_t*>(raw_addr);
   if (UNLIKELY(is_volatile)) {
     uint64_t result = QuasiAtomic::Read64(addr);
-    QuasiAtomic::MembarLoadLoad();
+    QuasiAtomic::MembarLoadLoad();  // Ensure volatile loads don't re-order.
     return result;
   } else {
     return *addr;
   }
 }
 
-inline void Object::SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile) {
-  VerifyObject(this);
+inline void Object::SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile,
+                               bool this_is_valid) {
+  if (this_is_valid) {
+    VerifyObject(this);
+  }
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   int64_t* addr = reinterpret_cast<int64_t*>(raw_addr);
   if (UNLIKELY(is_volatile)) {
     QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
     QuasiAtomic::Write64(addr, new_value);
     if (!QuasiAtomic::LongAtomicsUseMutexes()) {
-      QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any loads.
+      QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any volatile loads.
     } else {
       // Fence from from mutex is enough.
     }
@@ -319,12 +309,69 @@
   }
 }
 
-inline void Object::WriteBarrierField(const Object* dst, MemberOffset field_offset,
-                                      const Object* new_value) {
-  Runtime::Current()->GetHeap()->WriteBarrierField(dst, field_offset, new_value);
+inline bool Object::CasField64(MemberOffset field_offset, uint64_t old_value, uint64_t new_value) {
+  VerifyObject(this);
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  volatile uint64_t* addr = reinterpret_cast<volatile uint64_t*>(raw_addr);
+  return __sync_bool_compare_and_swap(addr, old_value, new_value);
 }
 
-inline void Object::VerifyObject(const Object* obj) {
+template<class T>
+inline T* Object::GetFieldObject(MemberOffset field_offset, bool is_volatile) {
+  VerifyObject(this);
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr);
+  HeapReference<T> objref = *objref_addr;
+
+  if (UNLIKELY(is_volatile)) {
+    QuasiAtomic::MembarLoadLoad();  // Ensure loads don't re-order.
+  }
+  T* result = objref.AsMirrorPtr();
+  VerifyObject(result);
+  return result;
+}
+
+inline void Object::SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset, Object* new_value,
+                                                      bool is_volatile, bool this_is_valid) {
+  if (this_is_valid) {
+    VerifyObject(this);
+  }
+  VerifyObject(new_value);
+  HeapReference<Object> objref(HeapReference<Object>::FromMirrorPtr(new_value));
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  HeapReference<Object>* objref_addr = reinterpret_cast<HeapReference<Object>*>(raw_addr);
+  if (UNLIKELY(is_volatile)) {
+    QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
+    objref_addr->Assign(new_value);
+    QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any loads.
+  } else {
+    objref_addr->Assign(new_value);
+  }
+}
+
+inline void Object::SetFieldObject(MemberOffset field_offset, Object* new_value, bool is_volatile,
+                                   bool this_is_valid) {
+  SetFieldObjectWithoutWriteBarrier(field_offset, new_value, is_volatile, this_is_valid);
+  if (new_value != nullptr) {
+    CheckFieldAssignment(field_offset, new_value);
+    Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value);
+  }
+}
+
+inline bool Object::CasFieldObject(MemberOffset field_offset, Object* old_value, Object* new_value) {
+  VerifyObject(this);
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  volatile uint32_t* addr = reinterpret_cast<volatile uint32_t*>(raw_addr);
+  HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
+  bool success =  __sync_bool_compare_and_swap(addr, old_ref.reference_, new_ref.reference_);
+  if (success) {
+    Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value);
+  }
+  return success;
+}
+
+inline void Object::VerifyObject(Object* obj) {
   if (kIsDebugBuild) {
     Runtime::Current()->GetHeap()->VerifyObject(obj);
   }
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index bdb3250..1251852 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -52,7 +52,7 @@
   Class* c = src->GetClass();
   if (c->IsArrayClass()) {
     if (!c->GetComponentType()->IsPrimitive()) {
-      const ObjectArray<Object>* array = dest->AsObjectArray<Object>();
+      ObjectArray<Object>* array = dest->AsObjectArray<Object>();
       heap->WriteBarrierArray(dest, 0, array->GetLength());
     }
   } else {
@@ -139,14 +139,15 @@
   return 0;
 }
 
-void Object::CheckFieldAssignmentImpl(MemberOffset field_offset, const Object* new_value) {
-  const Class* c = GetClass();
+void Object::CheckFieldAssignmentImpl(MemberOffset field_offset, Object* new_value) {
+  Class* c = GetClass();
   if (Runtime::Current()->GetClassLinker() == NULL ||
+      !Runtime::Current()->IsStarted() ||
       !Runtime::Current()->GetHeap()->IsObjectValidationEnabled() ||
       !c->IsResolved()) {
     return;
   }
-  for (const Class* cur = c; cur != NULL; cur = cur->GetSuperClass()) {
+  for (Class* cur = c; cur != NULL; cur = cur->GetSuperClass()) {
     ObjectArray<ArtField>* fields = cur->GetIFields();
     if (fields != NULL) {
       size_t num_ref_ifields = cur->NumReferenceInstanceFields();
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 058aee7..6fe8b73 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -21,6 +21,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "cutils/atomic-inline.h"
+#include "object_reference.h"
 #include "offsets.h"
 
 namespace art {
@@ -51,17 +52,13 @@
 class String;
 class Throwable;
 
-// Classes shared with the managed side of the world need to be packed so that they don't have
-// extra platform specific padding.
-#define MANAGED PACKED(4)
-
 // Fields within mirror objects aren't accessed directly so that the appropriate amount of
 // handshaking is done with GC (for example, read and write barriers). This macro is used to
 // compute an offset for the Set/Get methods defined in Object that can safely access fields.
 #define OFFSET_OF_OBJECT_MEMBER(type, field) \
     MemberOffset(OFFSETOF_MEMBER(type, field))
 
-const bool kCheckFieldAssignments = false;
+constexpr bool kCheckFieldAssignments = false;
 
 // C++ mirror of java.lang.Object
 class MANAGED Object {
@@ -70,19 +67,17 @@
     return OFFSET_OF_OBJECT_MEMBER(Object, klass_);
   }
 
-  Class* GetClass() const;
+  Class* GetClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetClass(Class* new_klass);
+  void SetClass(Class* new_klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // The verifier treats all interfaces as java.lang.Object and relies on runtime checks in
   // invoke-interface to detect incompatible interface types.
-  bool VerifierInstanceOf(const Class* klass) const
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool VerifierInstanceOf(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool InstanceOf(const Class* klass) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool InstanceOf(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t SizeOf() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Object* Clone(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -92,9 +87,9 @@
     return OFFSET_OF_OBJECT_MEMBER(Object, monitor_);
   }
 
-  LockWord GetLockWord() const;
+  LockWord GetLockWord();
   void SetLockWord(LockWord new_val);
-  bool CasLockWord(LockWord old_val, LockWord new_val);
+  bool CasLockWord(LockWord old_val, LockWord new_val) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint32_t GetLockOwnerThreadId();
 
   void MonitorEnter(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -111,111 +106,113 @@
 
   void Wait(Thread* self, int64_t timeout, int32_t nanos) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsClass() const;
+  bool IsClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Class* AsClass();
+  Class* AsClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const Class* AsClass() const;
-
-  bool IsObjectArray() const;
+  bool IsObjectArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<class T>
-  ObjectArray<T>* AsObjectArray();
+  ObjectArray<T>* AsObjectArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<class T>
-  const ObjectArray<T>* AsObjectArray() const;
+  bool IsArrayInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsArrayInstance() const;
+  Array* AsArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Array* AsArray();
+  BooleanArray* AsBooleanArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ByteArray* AsByteArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ByteArray* AsByteSizedArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const Array* AsArray() const;
+  CharArray* AsCharArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ShortArray* AsShortArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ShortArray* AsShortSizedArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  BooleanArray* AsBooleanArray();
-  ByteArray* AsByteArray();
-  CharArray* AsCharArray();
-  ShortArray* AsShortArray();
-  IntArray* AsIntArray();
-  LongArray* AsLongArray();
+  IntArray* AsIntArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  LongArray* AsLongArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  String* AsString();
+  String* AsString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Throwable* AsThrowable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsArtMethod() const;
+  bool IsArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* AsArtMethod();
+  ArtMethod* AsArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const ArtMethod* AsArtMethod() const;
-
-  bool IsArtField() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ArtField* AsArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const ArtField* AsArtField() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsReferenceInstance() const;
+  bool IsWeakReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsWeakReferenceInstance() const;
+  bool IsSoftReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsSoftReferenceInstance() const;
+  bool IsFinalizerReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsFinalizerReferenceInstance() const;
+  bool IsPhantomReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsPhantomReferenceInstance() const;
+  // Accessor for Java type fields.
+  template<class T> T* GetFieldObject(MemberOffset field_offset, bool is_volatile)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset, Object* new_value,
+                                         bool is_volatile, bool this_is_valid = true)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetFieldObject(MemberOffset field_offset, Object* new_value, bool is_volatile,
+                      bool this_is_valid = true)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool CasFieldObject(MemberOffset field_offset, Object* old_value, Object* new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Accessors for Java type fields
-  template<class T>
-  T GetFieldObject(MemberOffset field_offset, bool is_volatile) const {
-    T result = reinterpret_cast<T>(GetField32(field_offset, is_volatile));
-    VerifyObject(result);
-    return result;
-  }
-
-  void SetFieldObject(MemberOffset field_offset, const Object* new_value, bool is_volatile,
-                      bool this_is_valid = true) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    VerifyObject(new_value);
-    SetField32(field_offset, reinterpret_cast<uint32_t>(new_value), is_volatile, this_is_valid);
-    if (new_value != NULL) {
-      CheckFieldAssignment(field_offset, new_value);
-      WriteBarrierField(this, field_offset, new_value);
-    }
-  }
-
-  Object** GetFieldObjectAddr(MemberOffset field_offset) ALWAYS_INLINE {
+  HeapReference<Object>* GetFieldObjectReferenceAddr(MemberOffset field_offset) ALWAYS_INLINE {
     VerifyObject(this);
-    return reinterpret_cast<Object**>(reinterpret_cast<byte*>(this) + field_offset.Int32Value());
+    return reinterpret_cast<HeapReference<Object>*>(reinterpret_cast<byte*>(this) +
+        field_offset.Int32Value());
   }
 
-  uint32_t GetField32(MemberOffset field_offset, bool is_volatile) const;
+  uint32_t GetField32(MemberOffset field_offset, bool is_volatile);
 
   void SetField32(MemberOffset field_offset, uint32_t new_value, bool is_volatile,
                   bool this_is_valid = true);
 
-  bool CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value);
+  bool CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint64_t GetField64(MemberOffset field_offset, bool is_volatile) const;
+  uint64_t GetField64(MemberOffset field_offset, bool is_volatile);
 
-  void SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile);
+  void SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile,
+                  bool this_is_valid = true);
+
+  bool CasField64(MemberOffset field_offset, uint64_t old_value, uint64_t new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<typename T>
-  void SetFieldPtr(MemberOffset field_offset, T new_value, bool is_volatile, bool this_is_valid = true) {
+  void SetFieldPtr(MemberOffset field_offset, T new_value, bool is_volatile,
+                   bool this_is_valid = true) {
+#ifndef __LP64__
     SetField32(field_offset, reinterpret_cast<uint32_t>(new_value), is_volatile, this_is_valid);
+#else
+    SetField64(field_offset, reinterpret_cast<uint64_t>(new_value), is_volatile, this_is_valid);
+#endif
   }
 
  protected:
   // Accessors for non-Java type fields
   template<class T>
-  T GetFieldPtr(MemberOffset field_offset, bool is_volatile) const {
+  T GetFieldPtr(MemberOffset field_offset, bool is_volatile) {
+#ifndef __LP64__
     return reinterpret_cast<T>(GetField32(field_offset, is_volatile));
+#else
+    return reinterpret_cast<T>(GetField64(field_offset, is_volatile));
+#endif
   }
 
  private:
-  static void VerifyObject(const Object* obj) ALWAYS_INLINE;
+  static void VerifyObject(Object* obj) ALWAYS_INLINE;
   // Verify the type correctness of stores to fields.
-  void CheckFieldAssignmentImpl(MemberOffset field_offset, const Object* new_value)
+  void CheckFieldAssignmentImpl(MemberOffset field_offset, Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void CheckFieldAssignment(MemberOffset field_offset, const Object* new_value)
+  void CheckFieldAssignment(MemberOffset field_offset, Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (kCheckFieldAssignments) {
       CheckFieldAssignmentImpl(field_offset, new_value);
@@ -225,11 +222,9 @@
   // Generate an identity hash code.
   static int32_t GenerateIdentityHashCode();
 
-  // Write barrier called post update to a reference bearing field.
-  static void WriteBarrierField(const Object* dst, MemberOffset offset, const Object* new_value);
-
-  Class* klass_;
-
+  // The Class representing the type of the object.
+  HeapReference<Class> klass_;
+  // Monitor and hash code information.
   uint32_t monitor_;
 
   friend class art::ImageWriter;
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 6a50dfe..c342479 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -25,6 +25,7 @@
 #include "runtime.h"
 #include "sirt_ref.h"
 #include "thread.h"
+#include <string>
 
 namespace art {
 namespace mirror {
@@ -32,8 +33,8 @@
 template<class T>
 inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class,
                                              int32_t length, gc::AllocatorType allocator_type) {
-  Array* array = Array::Alloc<true>(self, object_array_class, length, sizeof(Object*),
-                                    allocator_type);
+  Array* array = Array::Alloc<true>(self, object_array_class, length,
+                                    sizeof(HeapReference<Object>), allocator_type);
   if (UNLIKELY(array == nullptr)) {
     return nullptr;
   } else {
@@ -49,12 +50,12 @@
 }
 
 template<class T>
-inline T* ObjectArray<T>::Get(int32_t i) const {
+inline T* ObjectArray<T>::Get(int32_t i) {
   if (UNLIKELY(!CheckIsValidIndex(i))) {
     DCHECK(Thread::Current()->IsExceptionPending());
     return NULL;
   }
-  return GetWithoutChecks(i);
+  return GetFieldObject<T>(OffsetOfElement(i), false);
 }
 
 template<class T>
@@ -72,7 +73,7 @@
 template<class T>
 inline void ObjectArray<T>::Set(int32_t i, T* object) {
   if (LIKELY(CheckIsValidIndex(i) && CheckAssignable(object))) {
-    SetWithoutChecks(i, object);
+    SetFieldObject(OffsetOfElement(i), object, false);
   } else {
     DCHECK(Thread::Current()->IsExceptionPending());
   }
@@ -82,72 +83,123 @@
 inline void ObjectArray<T>::SetWithoutChecks(int32_t i, T* object) {
   DCHECK(CheckIsValidIndex(i));
   DCHECK(CheckAssignable(object));
-  MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
-  SetFieldObject(data_offset, object, false);
+  SetFieldObject(OffsetOfElement(i), object, false);
 }
 
 template<class T>
-inline void ObjectArray<T>::SetPtrWithoutChecks(int32_t i, T* object) {
+inline void ObjectArray<T>::SetWithoutChecksAndWriteBarrier(int32_t i, T* object) {
   DCHECK(CheckIsValidIndex(i));
-  // TODO enable this check. It fails when writing the image in ImageWriter::FixupObjectArray.
+  // TODO:  enable this check. It fails when writing the image in ImageWriter::FixupObjectArray.
   // DCHECK(CheckAssignable(object));
-  MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
-  SetFieldPtr(data_offset, object, false);
+  SetFieldObjectWithoutWriteBarrier(OffsetOfElement(i), object, false);
 }
 
 template<class T>
-inline T* ObjectArray<T>::GetWithoutChecks(int32_t i) const {
+inline T* ObjectArray<T>::GetWithoutChecks(int32_t i) {
   DCHECK(CheckIsValidIndex(i));
-  MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
-  return GetFieldObject<T*>(data_offset, false);
+  return GetFieldObject<T>(OffsetOfElement(i), false);
 }
 
 template<class T>
-inline void ObjectArray<T>::Copy(const ObjectArray<T>* src, int src_pos,
-                                 ObjectArray<T>* dst, int dst_pos,
-                                 size_t length) {
-  if (src->CheckIsValidIndex(src_pos) &&
-      src->CheckIsValidIndex(src_pos + length - 1) &&
-      dst->CheckIsValidIndex(dst_pos) &&
-      dst->CheckIsValidIndex(dst_pos + length - 1)) {
-    MemberOffset src_offset(DataOffset(sizeof(Object*)).Int32Value() + src_pos * sizeof(Object*));
-    MemberOffset dst_offset(DataOffset(sizeof(Object*)).Int32Value() + dst_pos * sizeof(Object*));
-    Class* array_class = dst->GetClass();
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    if (array_class == src->GetClass()) {
-      // No need for array store checks if arrays are of the same type
-      for (size_t i = 0; i < length; i++) {
-        Object* object = src->GetFieldObject<Object*>(src_offset, false);
-        heap->VerifyObject(object);
-        // directly set field, we do a bulk write barrier at the end
-        dst->SetField32(dst_offset, reinterpret_cast<uint32_t>(object), false, true);
-        src_offset = MemberOffset(src_offset.Uint32Value() + sizeof(Object*));
-        dst_offset = MemberOffset(dst_offset.Uint32Value() + sizeof(Object*));
-      }
+inline void ObjectArray<T>::AssignableMemmove(int32_t dst_pos, ObjectArray<T>* src,
+                                              int32_t src_pos, int32_t count) {
+  if (kIsDebugBuild) {
+    for (int i = 0; i < count; ++i) {
+      // The Get will perform the VerifyObject.
+      src->GetWithoutChecks(src_pos + i);
+    }
+  }
+  // Perform the memmove using int memmove then perform the write barrier.
+  CHECK_EQ(sizeof(HeapReference<T>), sizeof(uint32_t));
+  IntArray* dstAsIntArray = reinterpret_cast<IntArray*>(this);
+  IntArray* srcAsIntArray = reinterpret_cast<IntArray*>(src);
+  dstAsIntArray->Memmove(dst_pos, srcAsIntArray, src_pos, count);
+  Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
+  if (kIsDebugBuild) {
+    for (int i = 0; i < count; ++i) {
+      // The Get will perform the VerifyObject.
+      GetWithoutChecks(dst_pos + i);
+    }
+  }
+}
+
+template<class T>
+inline void ObjectArray<T>::AssignableMemcpy(int32_t dst_pos, ObjectArray<T>* src,
+                                             int32_t src_pos, int32_t count) {
+  if (kIsDebugBuild) {
+    for (int i = 0; i < count; ++i) {
+      // The Get will perform the VerifyObject.
+      src->GetWithoutChecks(src_pos + i);
+    }
+  }
+  // Perform the memmove using int memcpy then perform the write barrier.
+  CHECK_EQ(sizeof(HeapReference<T>), sizeof(uint32_t));
+  IntArray* dstAsIntArray = reinterpret_cast<IntArray*>(this);
+  IntArray* srcAsIntArray = reinterpret_cast<IntArray*>(src);
+  dstAsIntArray->Memcpy(dst_pos, srcAsIntArray, src_pos, count);
+  Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
+  if (kIsDebugBuild) {
+    for (int i = 0; i < count; ++i) {
+      // The Get will perform the VerifyObject.
+      GetWithoutChecks(dst_pos + i);
+    }
+  }
+}
+
+template<class T>
+inline void ObjectArray<T>::AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src,
+                                                     int32_t src_pos, int32_t count,
+                                                     bool throw_exception) {
+  DCHECK_NE(this, src)
+      << "This case should be handled with memmove that handles overlaps correctly";
+  // We want to avoid redundant IsAssignableFrom checks where possible, so we cache a class that
+  // we know is assignable to the destination array's component type.
+  Class* dst_class = GetClass()->GetComponentType();
+  Class* lastAssignableElementClass = dst_class;
+
+  Object* o = nullptr;
+  int i = 0;
+  for (; i < count; ++i) {
+    // The follow get operations force the objects to be verified.
+    o = src->GetWithoutChecks(src_pos + i);
+    if (o == nullptr) {
+      // Null is always assignable.
+      SetWithoutChecks(dst_pos + i, nullptr);
     } else {
-      Class* element_class = array_class->GetComponentType();
-      CHECK(!element_class->IsPrimitive());
-      for (size_t i = 0; i < length; i++) {
-        Object* object = src->GetFieldObject<Object*>(src_offset, false);
-        if (object != NULL && !object->InstanceOf(element_class)) {
-          dst->ThrowArrayStoreException(object);
-          return;
-        }
-        heap->VerifyObject(object);
-        // directly set field, we do a bulk write barrier at the end
-        dst->SetField32(dst_offset, reinterpret_cast<uint32_t>(object), false, true);
-        src_offset = MemberOffset(src_offset.Uint32Value() + sizeof(Object*));
-        dst_offset = MemberOffset(dst_offset.Uint32Value() + sizeof(Object*));
+      // TODO: use the underlying class reference to avoid uncompression when not necessary.
+      Class* o_class = o->GetClass();
+      if (LIKELY(lastAssignableElementClass == o_class)) {
+        SetWithoutChecks(dst_pos + i, o);
+      } else if (LIKELY(dst_class->IsAssignableFrom(o_class))) {
+        lastAssignableElementClass = o_class;
+        SetWithoutChecks(dst_pos + i, o);
+      } else {
+        // Can't put this element into the array, break to perform write-barrier and throw
+        // exception.
+        break;
       }
     }
-    heap->WriteBarrierArray(dst, dst_pos, length);
-  } else {
-    DCHECK(Thread::Current()->IsExceptionPending());
+  }
+  Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
+  if (UNLIKELY(i != count)) {
+    std::string actualSrcType(PrettyTypeOf(o));
+    std::string dstType(PrettyTypeOf(this));
+    Thread* self = Thread::Current();
+    ThrowLocation throw_location = self->GetCurrentLocationForThrow();
+    if (throw_exception) {
+      self->ThrowNewExceptionF(throw_location, "Ljava/lang/ArrayStoreException;",
+                               "source[%d] of type %s cannot be stored in destination array of type %s",
+                               src_pos + i, actualSrcType.c_str(), dstType.c_str());
+    } else {
+      LOG(FATAL) << StringPrintf("source[%d] of type %s cannot be stored in destination array of type %s",
+                                 src_pos + i, actualSrcType.c_str(), dstType.c_str());
+    }
   }
 }
 
 template<class T>
 inline ObjectArray<T>* ObjectArray<T>::CopyOf(Thread* self, int32_t new_length) {
+  DCHECK_GE(new_length, 0);
   // We may get copied by a compacting GC.
   SirtRef<ObjectArray<T> > sirt_this(self, this);
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -155,11 +207,17 @@
       heap->GetCurrentNonMovingAllocator();
   ObjectArray<T>* new_array = Alloc(self, GetClass(), new_length, allocator_type);
   if (LIKELY(new_array != nullptr)) {
-    Copy(sirt_this.get(), 0, new_array, 0, std::min(sirt_this->GetLength(), new_length));
+    new_array->AssignableMemcpy(0, sirt_this.get(), 0, std::min(sirt_this->GetLength(), new_length));
   }
   return new_array;
 }
 
+template<class T>
+inline MemberOffset ObjectArray<T>::OffsetOfElement(int32_t i) {
+  return MemberOffset(DataOffset(sizeof(HeapReference<Object>)).Int32Value() +
+                      (i * sizeof(HeapReference<Object>)));
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 5da8845..347494e 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -33,7 +33,7 @@
   static ObjectArray<T>* Alloc(Thread* self, Class* object_array_class, int32_t length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  T* Get(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  T* Get(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if the object can be stored into the array. If not, throws
   // an ArrayStoreException and returns false.
@@ -44,22 +44,30 @@
   // Set element without bound and element type checks, to be used in limited
   // circumstances, such as during boot image writing
   void SetWithoutChecks(int32_t i, T* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetWithoutChecksAndWriteBarrier(int32_t i, T* object)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Set element without bound and element type checks, to be used in limited circumstances, such
-  // as during boot image writing. Does not do write barrier.
-  void SetPtrWithoutChecks(int32_t i, T* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  T* GetWithoutChecks(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  T* GetWithoutChecks(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Copy src into this array (dealing with overlaps as memmove does) without assignability checks.
+  void AssignableMemmove(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
+                         int32_t count) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static void Copy(const ObjectArray<T>* src, int src_pos,
-                   ObjectArray<T>* dst, int dst_pos,
-                   size_t length)
+  // Copy src into this array assuming no overlap and without assignability checks.
+  void AssignableMemcpy(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
+                        int32_t count) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Copy src into this array with assignability checks.
+  void AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
+                                int32_t count, bool throw_exception)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ObjectArray<T>* CopyOf(Thread* self, int32_t new_length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
+  static MemberOffset OffsetOfElement(int32_t i);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(ObjectArray);
 };
 
diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h
new file mode 100644
index 0000000..b30890f
--- /dev/null
+++ b/runtime/mirror/object_reference.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
+#define ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
+
+#include "locks.h"
+
+namespace art {
+namespace mirror {
+
+class Object;
+
+// Classes shared with the managed side of the world need to be packed so that they don't have
+// extra platform specific padding.
+#define MANAGED PACKED(4)
+
+// Value type representing a reference to a mirror::Object of type MirrorType.
+template<bool kPoisonReferences, class MirrorType>
+class MANAGED ObjectReference {
+ public:
+  MirrorType* AsMirrorPtr() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return UnCompress();
+  }
+
+  void Assign(MirrorType* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    reference_ = Compress(other);
+  }
+
+  void Clear() {
+    reference_ = 0;
+  }
+
+  uint32_t AsVRegValue() const {
+    return reference_;
+  }
+
+ protected:
+  ObjectReference<kPoisonReferences, MirrorType>(MirrorType* mirror_ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : reference_(Compress(mirror_ptr)) {
+  }
+
+  // Compress reference to its bit representation.
+  static uint32_t Compress(MirrorType* mirror_ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uintptr_t as_bits = reinterpret_cast<uintptr_t>(mirror_ptr);
+    return static_cast<uint32_t>(kPoisonReferences ? -as_bits : as_bits);
+  }
+
+  // Uncompress an encoded reference from its bit representation.
+  MirrorType* UnCompress() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uintptr_t as_bits = kPoisonReferences ? -reference_ : reference_;
+    return reinterpret_cast<MirrorType*>(as_bits);
+  }
+
+  friend class Object;
+
+  // The encoded reference to a mirror::Object.
+  uint32_t reference_;
+};
+
+// References between objects within the managed heap.
+template<class MirrorType>
+class MANAGED HeapReference : public ObjectReference<false, MirrorType> {
+ public:
+  static HeapReference<MirrorType> FromMirrorPtr(MirrorType* mirror_ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return HeapReference<MirrorType>(mirror_ptr);
+  }
+ private:
+  HeapReference<MirrorType>(MirrorType* mirror_ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : ObjectReference<false, MirrorType>(mirror_ptr) {}
+};
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 3637181..2af32da 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -77,7 +77,7 @@
   EXPECT_EQ(CLASS_COMPONENT_TYPE_OFFSET, Class::ComponentTypeOffset().Int32Value());
 
   EXPECT_EQ(ARRAY_LENGTH_OFFSET, Array::LengthOffset().Int32Value());
-  EXPECT_EQ(OBJECT_ARRAY_DATA_OFFSET, Array::DataOffset(sizeof(Object*)).Int32Value());
+  EXPECT_EQ(OBJECT_ARRAY_DATA_OFFSET, Array::DataOffset(sizeof(HeapReference<Object>)).Int32Value());
 
   EXPECT_EQ(STRING_VALUE_OFFSET, String::ValueOffset().Int32Value());
   EXPECT_EQ(STRING_COUNT_OFFSET, String::CountOffset().Int32Value());
@@ -85,7 +85,8 @@
   EXPECT_EQ(STRING_DATA_OFFSET, Array::DataOffset(sizeof(uint16_t)).Int32Value());
 
   EXPECT_EQ(METHOD_DEX_CACHE_METHODS_OFFSET, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
-  EXPECT_EQ(METHOD_CODE_OFFSET, ArtMethod::EntryPointFromCompiledCodeOffset().Int32Value());
+  EXPECT_EQ(METHOD_PORTABLE_CODE_OFFSET, ArtMethod::EntryPointFromPortableCompiledCodeOffset().Int32Value());
+  EXPECT_EQ(METHOD_QUICK_CODE_OFFSET, ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
 }
 
 TEST_F(ObjectTest, IsInSamePackage) {
@@ -295,7 +296,7 @@
   uint32_t field_idx = dex_file->GetIndexForFieldId(*field_id);
 
   ArtField* field = FindFieldFromCode<StaticObjectRead, true>(field_idx, clinit, Thread::Current(),
-                                                              sizeof(Object*));
+                                                              sizeof(HeapReference<Object>));
   Object* s0 = field->GetObj(klass);
   EXPECT_TRUE(s0 != NULL);
 
diff --git a/runtime/mirror/proxy.h b/runtime/mirror/proxy.h
index 18a84dc..ff019c6 100644
--- a/runtime/mirror/proxy.h
+++ b/runtime/mirror/proxy.h
@@ -29,24 +29,28 @@
 // has the static fields used to implement reflection on proxy objects.
 class MANAGED SynthesizedProxyClass : public Class {
  public:
-  ObjectArray<Class>* GetInterfaces() {
-    return interfaces_;
+  ObjectArray<Class>* GetInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<Class> >(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
+                                                                       interfaces_),
+                                               false);
   }
 
-  ObjectArray<ObjectArray<Class> >* GetThrows() {
-    return throws_;
+  ObjectArray<ObjectArray<Class> >* GetThrows()  SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<ObjectArray<Class> > >(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
+                                                                                     throws_),
+                                               false);
   }
 
  private:
-  ObjectArray<Class>* interfaces_;
-  ObjectArray<ObjectArray<Class> >* throws_;
+  HeapReference<ObjectArray<Class> > interfaces_;
+  HeapReference<ObjectArray<ObjectArray<Class> > > throws_;
   DISALLOW_IMPLICIT_CONSTRUCTORS(SynthesizedProxyClass);
 };
 
 // C++ mirror of java.lang.reflect.Proxy.
 class MANAGED Proxy : public Object {
  private:
-  Object* h_;
+  HeapReference<Object> h_;
 
   friend struct art::ProxyOffsets;  // for verifying offset information
   DISALLOW_IMPLICIT_CONSTRUCTORS(Proxy);
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index d1be4dc..73d2673 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -29,24 +29,23 @@
 // C++ mirror of java.lang.StackTraceElement
 class MANAGED StackTraceElement : public Object {
  public:
-  const String* GetDeclaringClass() const {
-    return GetFieldObject<const String*>(
-        OFFSET_OF_OBJECT_MEMBER(StackTraceElement, declaring_class_), false);
+  String* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, declaring_class_),
+                                  false);
   }
 
-  const String* GetMethodName() const {
-    return GetFieldObject<const String*>(
-        OFFSET_OF_OBJECT_MEMBER(StackTraceElement, method_name_), false);
+  String* GetMethodName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, method_name_),
+                                  false);
   }
 
-  const String* GetFileName() const {
-    return GetFieldObject<const String*>(
-        OFFSET_OF_OBJECT_MEMBER(StackTraceElement, file_name_), false);
+  String* GetFileName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, file_name_),
+                                  false);
   }
 
-  int32_t GetLineNumber() const {
-    return GetField32(
-        OFFSET_OF_OBJECT_MEMBER(StackTraceElement, line_number_), false);
+  int32_t GetLineNumber() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, line_number_), false);
   }
 
   static StackTraceElement* Alloc(Thread* self,
@@ -63,9 +62,9 @@
 
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  String* declaring_class_;
-  String* file_name_;
-  String* method_name_;
+  HeapReference<String> declaring_class_;
+  HeapReference<String> file_name_;
+  HeapReference<String> method_name_;
   int32_t line_number_;
 
   static Class* GetStackTraceElement() {
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 1f756a1..10ae066 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -29,23 +29,19 @@
 namespace art {
 namespace mirror {
 
-const CharArray* String::GetCharArray() const {
-  return GetFieldObject<const CharArray*>(ValueOffset(), false);
-}
-
 CharArray* String::GetCharArray() {
-  return GetFieldObject<CharArray*>(ValueOffset(), false);
+  return GetFieldObject<CharArray>(ValueOffset(), false);
 }
 
 void String::ComputeHashCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   SetHashCode(ComputeUtf16Hash(GetCharArray(), GetOffset(), GetLength()));
 }
 
-int32_t String::GetUtfLength() const {
+int32_t String::GetUtfLength() {
   return CountUtf8Bytes(GetCharArray()->GetData() + GetOffset(), GetLength());
 }
 
-int32_t String::FastIndexOf(int32_t ch, int32_t start) const {
+int32_t String::FastIndexOf(int32_t ch, int32_t start) {
   int32_t count = GetLength();
   if (start < 0) {
     start = 0;
@@ -97,13 +93,13 @@
   return result;
 }
 
-int32_t String::GetLength() const {
+int32_t String::GetLength() {
   int32_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_), false);
   DCHECK(result >= 0 && result <= GetCharArray()->GetLength());
   return result;
 }
 
-uint16_t String::CharAt(int32_t index) const {
+uint16_t String::CharAt(int32_t index) {
   // TODO: do we need this? Equals is the only caller, and could
   // bounds check itself.
   DCHECK_GE(count_, 0);  // ensures the unsigned comparison is safe.
@@ -179,7 +175,7 @@
   return string;
 }
 
-bool String::Equals(const String* that) const {
+bool String::Equals(String* that) {
   if (this == that) {
     // Quick reference equality test
     return true;
@@ -201,7 +197,7 @@
   }
 }
 
-bool String::Equals(const uint16_t* that_chars, int32_t that_offset, int32_t that_length) const {
+bool String::Equals(const uint16_t* that_chars, int32_t that_offset, int32_t that_length) {
   if (this->GetLength() != that_length) {
     return false;
   } else {
@@ -214,7 +210,7 @@
   }
 }
 
-bool String::Equals(const char* modified_utf8) const {
+bool String::Equals(const char* modified_utf8) {
   for (int32_t i = 0; i < GetLength(); ++i) {
     uint16_t ch = GetUtf16FromUtf8(&modified_utf8);
     if (ch == '\0' || ch != CharAt(i)) {
@@ -224,7 +220,7 @@
   return *modified_utf8 == '\0';
 }
 
-bool String::Equals(const StringPiece& modified_utf8) const {
+bool String::Equals(const StringPiece& modified_utf8) {
   const char* p = modified_utf8.data();
   for (int32_t i = 0; i < GetLength(); ++i) {
     uint16_t ch = GetUtf16FromUtf8(&p);
@@ -236,7 +232,7 @@
 }
 
 // Create a modified UTF-8 encoded std::string from a java/lang/String object.
-std::string String::ToModifiedUtf8() const {
+std::string String::ToModifiedUtf8() {
   const uint16_t* chars = GetCharArray()->GetData() + GetOffset();
   size_t byte_count = GetUtfLength();
   std::string result(byte_count, static_cast<char>(0));
@@ -259,9 +255,9 @@
 }
 #endif
 
-int32_t String::CompareTo(String* rhs) const {
+int32_t String::CompareTo(String* rhs) {
   // Quick test for comparison of a string with itself.
-  const String* lhs = this;
+  String* lhs = this;
   if (lhs == rhs) {
     return 0;
   }
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 4bbcb9c..a82b26c 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -44,24 +44,23 @@
     return OFFSET_OF_OBJECT_MEMBER(String, offset_);
   }
 
-  const CharArray* GetCharArray() const;
-  CharArray* GetCharArray();
+  CharArray* GetCharArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t GetOffset() const {
+  int32_t GetOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     int32_t result = GetField32(OffsetOffset(), false);
     DCHECK_LE(0, result);
     return result;
   }
 
-  int32_t GetLength() const;
+  int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   int32_t GetHashCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ComputeHashCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t GetUtfLength() const;
+  int32_t GetUtfLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint16_t CharAt(int32_t index) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint16_t CharAt(int32_t index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   String* Intern() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -78,29 +77,28 @@
                                        const char* utf8_data_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool Equals(const char* modified_utf8) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool Equals(const char* modified_utf8) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // TODO: do we need this overload? give it a more intention-revealing name.
-  bool Equals(const StringPiece& modified_utf8) const
+  bool Equals(const StringPiece& modified_utf8)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool Equals(const String* that) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool Equals(String* that) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Compare UTF-16 code point values not in a locale-sensitive manner
   int Compare(int32_t utf16_length, const char* utf8_data_in);
 
   // TODO: do we need this overload? give it a more intention-revealing name.
   bool Equals(const uint16_t* that_chars, int32_t that_offset,
-              int32_t that_length) const
+              int32_t that_length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Create a modified UTF-8 encoded std::string from a java/lang/String object.
-  std::string ToModifiedUtf8() const;
+  std::string ToModifiedUtf8() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t FastIndexOf(int32_t ch, int32_t start) const;
+  int32_t FastIndexOf(int32_t ch, int32_t start) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t CompareTo(String* other) const;
+  int32_t CompareTo(String* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* GetJavaLangString() {
     DCHECK(java_lang_String_ != NULL);
@@ -123,7 +121,7 @@
     SetField32(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count, false);
   }
 
-  void SetOffset(int32_t new_offset) {
+  void SetOffset(int32_t new_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LE(0, new_offset);
     DCHECK_GE(GetLength(), new_offset);
     SetField32(OFFSET_OF_OBJECT_MEMBER(String, offset_), new_offset, false);
@@ -138,7 +136,7 @@
   void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  CharArray* array_;
+  HeapReference<CharArray> array_;
 
   int32_t count_;
 
@@ -155,8 +153,8 @@
 
 class MANAGED StringClass : public Class {
  private:
-  CharArray* ASCII_;
-  Object* CASE_INSENSITIVE_ORDER_;
+  HeapReference<CharArray> ASCII_;
+  HeapReference<Object> CASE_INSENSITIVE_ORDER_;
   uint32_t REPLACEMENT_CHAR_;
   int64_t serialVersionUID_;
   friend struct art::StringClassOffsets;  // for verifying offset information
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index b55db72..2318b74 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -33,22 +33,22 @@
 Class* Throwable::java_lang_Throwable_ = NULL;
 
 void Throwable::SetCause(Throwable* cause) {
-  CHECK(cause != NULL);
+  CHECK(cause != nullptr);
   CHECK(cause != this);
-  Throwable* current_cause = GetFieldObject<Throwable*>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_),
-                                                        false);
+  Throwable* current_cause = GetFieldObject<Throwable>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_),
+                                                       false);
   CHECK(current_cause == NULL || current_cause == this);
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_), cause, false);
 }
 
-bool Throwable::IsCheckedException() const {
+bool Throwable::IsCheckedException() {
   if (InstanceOf(WellKnownClasses::ToClass(WellKnownClasses::java_lang_Error))) {
     return false;
   }
   return !InstanceOf(WellKnownClasses::ToClass(WellKnownClasses::java_lang_RuntimeException));
 }
 
-std::string Throwable::Dump() const {
+std::string Throwable::Dump() {
   std::string result(PrettyTypeOf(this));
   result += ": ";
   String* msg = GetDetailMessage();
@@ -74,7 +74,7 @@
                              source_file, line_number);
     }
   }
-  Throwable* cause = GetFieldObject<Throwable*>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_), false);
+  Throwable* cause = GetFieldObject<Throwable>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_), false);
   if (cause != NULL && cause != this) {  // Constructor makes cause == this by default.
     result += "Caused by: ";
     result += cause->Dump();
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index 5a90599..bc9848a 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -33,16 +33,16 @@
   void SetDetailMessage(String* new_detail_message) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_), new_detail_message, false);
   }
-  String* GetDetailMessage() const {
-    return GetFieldObject<String*>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_), false);
+  String* GetDetailMessage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_), false);
   }
-  std::string Dump() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  std::string Dump() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // This is a runtime version of initCause, you shouldn't use it if initCause may have been
   // overridden. Also it asserts rather than throwing exceptions. Currently this is only used
   // in cases like the verifier where the checks cannot fail and initCause isn't overridden.
   void SetCause(Throwable* cause) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsCheckedException() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsCheckedException() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* GetJavaLangThrowable() {
     DCHECK(java_lang_Throwable_ != NULL);
@@ -55,16 +55,16 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  Object* GetStackState() const {
-    return GetFieldObject<Object*>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), true);
+  Object* GetStackState() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), true);
   }
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  Throwable* cause_;
-  String* detail_message_;
-  Object* stack_state_;  // Note this is Java volatile:
-  Object* stack_trace_;
-  Object* suppressed_exceptions_;
+  HeapReference<Throwable> cause_;
+  HeapReference<String> detail_message_;
+  HeapReference<Object> stack_state_;  // Note this is Java volatile:
+  HeapReference<Object> stack_trace_;
+  HeapReference<Object> suppressed_exceptions_;
 
   static Class* java_lang_Throwable_;
 
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index 4e365be..0addd51 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -47,6 +47,7 @@
 static const uint32_t kAccClassIsProxy = 0x00040000;  // class (dex only)
 static const uint32_t kAccPreverified = 0x00080000;  // method (dex only)
 static const uint32_t kAccFastNative = 0x0080000;  // method (dex only)
+static const uint32_t kAccPortableCompiled = 0x0100000;  // method (dex only)
 
 // Special runtime-only flags.
 // Note: if only kAccClassIsReference is set, we have a soft reference.
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 4186693..72220e0 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -79,7 +79,7 @@
   is_sensitive_thread_hook_ = is_sensitive_thread_hook;
 }
 
-Monitor::Monitor(Thread* owner, mirror::Object* obj, int32_t hash_code)
+Monitor::Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
     : monitor_lock_("a monitor lock", kMonitorLock),
       monitor_contenders_("monitor contenders", monitor_lock_),
       num_waiters_(0),
@@ -89,10 +89,11 @@
       wait_set_(NULL),
       hash_code_(hash_code),
       locking_method_(NULL),
-      locking_dex_pc_(0) {
+      locking_dex_pc_(0),
+      monitor_id_(MonitorPool::CreateMonitorId(self, this)) {
   // We should only inflate a lock if the owner is ourselves or suspended. This avoids a race
   // with the owner unlocking the thin-lock.
-  CHECK(owner == nullptr || owner == Thread::Current() || owner->IsSuspended());
+  CHECK(owner == nullptr || owner == self || owner->IsSuspended());
   // The identity hash code is set for the life time of the monitor.
 }
 
@@ -145,6 +146,7 @@
 }
 
 Monitor::~Monitor() {
+  MonitorPool::ReleaseMonitorId(monitor_id_);
   // Deflated monitors have a null object.
 }
 
@@ -219,7 +221,7 @@
     // Contended.
     const bool log_contention = (lock_profiling_threshold_ != 0);
     uint64_t wait_start_ms = log_contention ? 0 : MilliTime();
-    const mirror::ArtMethod* owners_method = locking_method_;
+    mirror::ArtMethod* owners_method = locking_method_;
     uint32_t owners_dex_pc = locking_dex_pc_;
     monitor_lock_.Unlock(self);  // Let go of locks in order.
     {
@@ -411,7 +413,7 @@
   if (ms < 0 || ns < 0 || ns > 999999) {
     ThrowLocation throw_location = self->GetCurrentLocationForThrow();
     self->ThrowNewExceptionF(throw_location, "Ljava/lang/IllegalArgumentException;",
-                             "timeout arguments out of range: ms=%lld ns=%d", ms, ns);
+                             "timeout arguments out of range: ms=%" PRId64 " ns=%d", ms, ns);
     monitor_lock_.Unlock(self);
     return;
   }
@@ -430,7 +432,7 @@
   int prev_lock_count = lock_count_;
   lock_count_ = 0;
   owner_ = NULL;
-  const mirror::ArtMethod* saved_method = locking_method_;
+  mirror::ArtMethod* saved_method = locking_method_;
   locking_method_ = NULL;
   uintptr_t saved_dex_pc = locking_dex_pc_;
   locking_dex_pc_ = 0;
@@ -611,7 +613,7 @@
   DCHECK(self != NULL);
   DCHECK(obj != NULL);
   // Allocate and acquire a new monitor.
-  UniquePtr<Monitor> m(new Monitor(owner, obj, hash_code));
+  UniquePtr<Monitor> m(new Monitor(self, owner, obj, hash_code));
   if (m->Install(self)) {
     VLOG(monitor) << "monitor: thread " << owner->GetThreadId()
                     << " created monitor " << m.get() << " for object " << obj;
@@ -1008,7 +1010,7 @@
   return owner_ != nullptr;
 }
 
-void Monitor::TranslateLocation(const mirror::ArtMethod* method, uint32_t dex_pc,
+void Monitor::TranslateLocation(mirror::ArtMethod* method, uint32_t dex_pc,
                                 const char** source_file, uint32_t* line_number) const {
   // If method is null, location is unknown
   if (method == NULL) {
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 16e9410..85a8c48 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -24,7 +24,7 @@
 #include <list>
 #include <vector>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/mutex.h"
 #include "root_visitor.h"
 #include "sirt_ref.h"
@@ -40,6 +40,8 @@
 class Thread;
 class StackVisitor;
 
+typedef uint32_t MonitorId;
+
 class Monitor {
  public:
   // The default number of spins that are done before thread suspension is used to forcibly inflate
@@ -108,6 +110,10 @@
     return hash_code_.Load() != 0;
   }
 
+  MonitorId GetMonitorId() const {
+    return monitor_id_;
+  }
+
   static void InflateThinLocked(Thread* self, SirtRef<mirror::Object>& obj, LockWord lock_word,
                                 uint32_t hash_code) NO_THREAD_SAFETY_ANALYSIS;
 
@@ -115,7 +121,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  explicit Monitor(Thread* owner, mirror::Object* obj, int32_t hash_code)
+  explicit Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Install the monitor into its object, may fail if another thread installs a different monitor
@@ -162,7 +168,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Translates the provided method and pc into its declaring class' source file and line number.
-  void TranslateLocation(const mirror::ArtMethod* method, uint32_t pc,
+  void TranslateLocation(mirror::ArtMethod* method, uint32_t pc,
                          const char** source_file, uint32_t* line_number) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -195,9 +201,12 @@
   // Method and dex pc where the lock owner acquired the lock, used when lock
   // sampling is enabled. locking_method_ may be null if the lock is currently
   // unlocked, or if the lock is acquired by the system when the stack is empty.
-  const mirror::ArtMethod* locking_method_ GUARDED_BY(monitor_lock_);
+  mirror::ArtMethod* locking_method_ GUARDED_BY(monitor_lock_);
   uint32_t locking_dex_pc_ GUARDED_BY(monitor_lock_);
 
+  // The denser encoded version of this monitor as stored in the lock word.
+  MonitorId monitor_id_;
+
   friend class MonitorInfo;
   friend class MonitorList;
   friend class mirror::Object;
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
new file mode 100644
index 0000000..eadd7a6
--- /dev/null
+++ b/runtime/monitor_pool.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "monitor_pool.h"
+
+#include "base/logging.h"
+#include "base/mutex-inl.h"
+#include "monitor.h"
+
+namespace art {
+
+MonitorPool::MonitorPool() : allocated_ids_lock_("allocated monitor ids lock") {
+}
+
+Monitor* MonitorPool::LookupMonitorFromTable(MonitorId mon_id) {
+  ReaderMutexLock mu(Thread::Current(), allocated_ids_lock_);
+  return table_.Get(mon_id);
+}
+
+MonitorId MonitorPool::AllocMonitorIdFromTable(Thread* self, Monitor* mon) {
+  WriterMutexLock mu(self, allocated_ids_lock_);
+  for (size_t i = 0; i < allocated_ids_.size(); ++i) {
+    if (!allocated_ids_[i]) {
+      allocated_ids_.set(i);
+      MonitorId mon_id = i + 1;  // Zero is reserved to mean "invalid".
+      table_.Put(mon_id, mon);
+      return mon_id;
+    }
+  }
+  LOG(FATAL) << "Out of internal monitor ids";
+  return 0;
+}
+
+void MonitorPool::ReleaseMonitorIdFromTable(MonitorId mon_id) {
+  WriterMutexLock mu(Thread::Current(), allocated_ids_lock_);
+  DCHECK(table_.Get(mon_id) != nullptr);
+  table_.erase(mon_id);
+  --mon_id;  // Zero is reserved to mean "invalid".
+  DCHECK(allocated_ids_[mon_id]) << mon_id;
+  allocated_ids_.reset(mon_id);
+}
+
+}  // namespace art
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
new file mode 100644
index 0000000..32f3f4e
--- /dev/null
+++ b/runtime/monitor_pool.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MONITOR_POOL_H_
+#define ART_RUNTIME_MONITOR_POOL_H_
+
+#include "monitor.h"
+
+#include "safe_map.h"
+
+#include <stdint.h>
+
+namespace art {
+
+// Abstraction to keep monitors small enough to fit in a lock word (32bits). On 32bit systems the
+// monitor id loses the alignment bits of the Monitor*.
+class MonitorPool {
+ public:
+  static MonitorPool* Create() {
+#ifndef __LP64__
+    return nullptr;
+#else
+    return new MonitorPool();
+#endif
+  }
+
+  static Monitor* MonitorFromMonitorId(MonitorId mon_id) {
+#ifndef __LP64__
+    return reinterpret_cast<Monitor*>(mon_id << 3);
+#else
+    return Runtime::Current()->GetMonitorPool()->LookupMonitorFromTable(mon_id);
+#endif
+  }
+
+  static MonitorId MonitorIdFromMonitor(Monitor* mon) {
+#ifndef __LP64__
+    return reinterpret_cast<MonitorId>(mon) >> 3;
+#else
+    return mon->GetMonitorId();
+#endif
+  }
+
+  static MonitorId CreateMonitorId(Thread* self, Monitor* mon) {
+#ifndef __LP64__
+    UNUSED(self);
+    return MonitorIdFromMonitor(mon);
+#else
+    return Runtime::Current()->GetMonitorPool()->AllocMonitorIdFromTable(self, mon);
+#endif
+  }
+
+  static void ReleaseMonitorId(MonitorId mon_id) {
+#ifndef __LP64__
+    UNUSED(mon_id);
+#else
+    Runtime::Current()->GetMonitorPool()->ReleaseMonitorIdFromTable(mon_id);
+#endif
+  }
+
+ private:
+#ifdef __LP64__
+  MonitorPool();
+
+  Monitor* LookupMonitorFromTable(MonitorId mon_id);
+
+  MonitorId LookupMonitorIdFromTable(Monitor* mon);
+
+  MonitorId AllocMonitorIdFromTable(Thread* self, Monitor* mon);
+
+  void ReleaseMonitorIdFromTable(MonitorId mon_id);
+
+  ReaderWriterMutex allocated_ids_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  static constexpr uint32_t kMaxMonitorId = 0xFFFF;
+  std::bitset<kMaxMonitorId> allocated_ids_ GUARDED_BY(allocated_ids_lock_);
+  SafeMap<MonitorId, Monitor*> table_ GUARDED_BY(allocated_ids_lock_);
+#endif
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_MONITOR_POOL_H_
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 45a2eed..1a3ceb8 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -84,7 +84,7 @@
   void operator=(const NullableScopedUtfChars&);
 };
 
-static jint DexFile_openDexFileNative(JNIEnv* env, jclass, jstring javaSourceName, jstring javaOutputName, jint) {
+static jlong DexFile_openDexFileNative(JNIEnv* env, jclass, jstring javaSourceName, jstring javaOutputName, jint) {
   ScopedUtfChars sourceName(env, javaSourceName);
   if (sourceName.c_str() == NULL) {
     return 0;
@@ -125,10 +125,10 @@
     ThrowIOException("%s", error_msg.c_str());
     return 0;
   }
-  return static_cast<jint>(reinterpret_cast<uintptr_t>(dex_file));
+  return static_cast<jlong>(reinterpret_cast<uintptr_t>(dex_file));
 }
 
-static const DexFile* toDexFile(int dex_file_address, JNIEnv* env) {
+static const DexFile* toDexFile(jlong dex_file_address, JNIEnv* env) {
   const DexFile* dex_file = reinterpret_cast<const DexFile*>(static_cast<uintptr_t>(dex_file_address));
   if (UNLIKELY(dex_file == nullptr)) {
     ScopedObjectAccess soa(env);
@@ -137,7 +137,7 @@
   return dex_file;
 }
 
-static void DexFile_closeDexFile(JNIEnv* env, jclass, jint cookie) {
+static void DexFile_closeDexFile(JNIEnv* env, jclass, jlong cookie) {
   const DexFile* dex_file;
   dex_file = toDexFile(cookie, env);
   if (dex_file == nullptr) {
@@ -150,7 +150,7 @@
 }
 
 static jclass DexFile_defineClassNative(JNIEnv* env, jclass, jstring javaName, jobject javaLoader,
-                                        jint cookie) {
+                                        jlong cookie) {
   const DexFile* dex_file = toDexFile(cookie, env);
   if (dex_file == NULL) {
     VLOG(class_linker) << "Failed to find dex_file";
@@ -177,7 +177,7 @@
   return soa.AddLocalReference<jclass>(result);
 }
 
-static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jint cookie) {
+static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jlong cookie) {
   const DexFile* dex_file;
   dex_file = toDexFile(cookie, env);
   if (dex_file == nullptr) {
@@ -290,7 +290,7 @@
         return JNI_TRUE;
       }
       if (oat_file->GetOatHeader().GetImageFileLocationOatDataBegin()
-          != reinterpret_cast<uint32_t>(image_header.GetOatDataBegin())) {
+          != reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin())) {
         if (kDebugLogging) {
           ScopedObjectAccess soa(env);
           LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
@@ -330,11 +330,11 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(DexFile, closeDexFile, "(I)V"),
-  NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;I)Ljava/lang/Class;"),
-  NATIVE_METHOD(DexFile, getClassNameList, "(I)[Ljava/lang/String;"),
+  NATIVE_METHOD(DexFile, closeDexFile, "(J)V"),
+  NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;J)Ljava/lang/Class;"),
+  NATIVE_METHOD(DexFile, getClassNameList, "(J)[Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
-  NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)I"),
+  NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)J"),
 };
 
 void register_dalvik_system_DexFile(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 4a84cfe..dceea5c 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -24,6 +24,7 @@
 #include "gc/space/dlmalloc_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
+#include "gc/space/zygote_space.h"
 #include "hprof/hprof.h"
 #include "jni_internal.h"
 #include "mirror/class.h"
@@ -265,9 +266,9 @@
     if (space->IsImageSpace()) {
       // Currently don't include the image space.
     } else if (space->IsZygoteSpace()) {
-      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
-      zygoteSize += malloc_space->GetFootprint();
-      zygoteUsed += malloc_space->GetBytesAllocated();
+      gc::space::ZygoteSpace* zygote_space = space->AsZygoteSpace();
+      zygoteSize += zygote_space->Size();
+      zygoteUsed += zygote_space->GetBytesAllocated();
     } else if (space->IsMallocSpace()) {
       // This is a malloc space.
       gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index c9e255c..e1b5f97 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -90,7 +90,7 @@
     ThrowRuntimeException("Trying to get address of movable array object");
     return 0;
   }
-  return reinterpret_cast<uintptr_t>(array->GetRawData(array->GetClass()->GetComponentSize()));
+  return reinterpret_cast<uintptr_t>(array->GetRawData(array->GetClass()->GetComponentSize(), 0));
 }
 
 static void VMRuntime_clearGrowthLimit(JNIEnv*, jobject) {
@@ -181,7 +181,8 @@
 
 typedef std::map<std::string, mirror::String*> StringTable;
 
-static mirror::Object* PreloadDexCachesStringsVisitor(mirror::Object* root, void* arg) {
+static mirror::Object* PreloadDexCachesStringsVisitor(mirror::Object* root, void* arg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   StringTable& table = *reinterpret_cast<StringTable*>(arg);
   mirror::String* string = const_cast<mirror::Object*>(root)->AsString();
   // LOG(INFO) << "VMRuntime.preloadDexCaches interned=" << string->ToModifiedUtf8();
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index f915365..7e02e29 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -79,7 +79,7 @@
     ClosestUserClassLoaderVisitor(Thread* thread, mirror::Object* bootstrap, mirror::Object* system)
       : StackVisitor(thread, NULL), bootstrap(bootstrap), system(system), class_loader(NULL) {}
 
-    bool VisitFrame() {
+    bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       DCHECK(class_loader == NULL);
       mirror::Class* c = GetMethod()->GetDeclaringClass();
       mirror::Object* cl = c->GetClassLoader();
diff --git a/runtime/native/dalvik_system_Zygote.cc b/runtime/native/dalvik_system_Zygote.cc
index 7fa9457..22c5430 100644
--- a/runtime/native/dalvik_system_Zygote.cc
+++ b/runtime/native/dalvik_system_Zygote.cc
@@ -47,8 +47,10 @@
 #if defined(__linux__)
 #include <sys/personality.h>
 #include <sys/utsname.h>
+#if defined(HAVE_ANDROID_OS)
 #include <sys/capability.h>
 #endif
+#endif
 
 namespace art {
 
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index e969fcf..0629f4d 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -64,7 +64,8 @@
   std::string detail;
   {
     ScopedObjectAccess soa(env);
-    mirror::ClassLoader* classLoader = soa.Decode<mirror::ClassLoader*>(javaLoader);
+    SirtRef<mirror::ClassLoader> classLoader(soa.Self(),
+                                             soa.Decode<mirror::ClassLoader*>(javaLoader));
     JavaVMExt* vm = Runtime::Current()->GetJavaVM();
     bool success = vm->LoadNativeLibrary(filename.c_str(), classLoader, &detail);
     if (success) {
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index ea78e04..6bbe642 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -24,151 +24,15 @@
 #include "mirror/object_array-inl.h"
 #include "scoped_fast_native_object_access.h"
 
-/*
- * We make guarantees about the atomicity of accesses to primitive
- * variables.  These guarantees also apply to elements of arrays.
- * In particular, 8-bit, 16-bit, and 32-bit accesses must be atomic and
- * must not cause "word tearing".  Accesses to 64-bit array elements must
- * either be atomic or treated as two 32-bit operations.  References are
- * always read and written atomically, regardless of the number of bits
- * used to represent them.
- *
- * We can't rely on standard libc functions like memcpy(3) and memmove(3)
- * in our implementation of System.arraycopy, because they may copy
- * byte-by-byte (either for the full run or for "unaligned" parts at the
- * start or end).  We need to use functions that guarantee 16-bit or 32-bit
- * atomicity as appropriate.
- *
- * System.arraycopy() is heavily used, so having an efficient implementation
- * is important.  The bionic libc provides a platform-optimized memory move
- * function that should be used when possible.  If it's not available,
- * the trivial "reference implementation" versions below can be used until
- * a proper version can be written.
- *
- * For these functions, The caller must guarantee that dst/src are aligned
- * appropriately for the element type, and that n is a multiple of the
- * element size.
- */
-
-/*
- * Works like memmove(), except:
- * - if all arguments are at least 32-bit aligned, we guarantee that we
- *   will use operations that preserve atomicity of 32-bit values
- * - if not, we guarantee atomicity of 16-bit values
- *
- * If all three arguments are not at least 16-bit aligned, the behavior
- * of this function is undefined.  (We could remove this restriction by
- * testing for unaligned values and punting to memmove(), but that's
- * not currently useful.)
- *
- * TODO: add loop for 64-bit alignment
- * TODO: use __builtin_prefetch
- * TODO: write ARM/MIPS/x86 optimized versions
- */
-void MemmoveWords(void* dst, const void* src, size_t n) {
-  DCHECK_EQ((((uintptr_t) dst | (uintptr_t) src | n) & 0x01), 0U);
-
-  char* d = reinterpret_cast<char*>(dst);
-  const char* s = reinterpret_cast<const char*>(src);
-  size_t copyCount;
-
-  // If the source and destination pointers are the same, this is
-  // an expensive no-op.  Testing for an empty move now allows us
-  // to skip a check later.
-  if (n == 0 || d == s) {
-    return;
-  }
-
-  // Determine if the source and destination buffers will overlap if
-  // we copy data forward (i.e. *dst++ = *src++).
-  //
-  // It's okay if the destination buffer starts before the source and
-  // there is some overlap, because the reader is always ahead of the
-  // writer.
-  if (LIKELY((d < s) || ((size_t)(d - s) >= n))) {
-    // Copy forward.  We prefer 32-bit loads and stores even for 16-bit
-    // data, so sort that out.
-    if (((reinterpret_cast<uintptr_t>(d) | reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) {
-      // Not 32-bit aligned.  Two possibilities:
-      // (1) Congruent, we can align to 32-bit by copying one 16-bit val
-      // (2) Non-congruent, we can do one of:
-      //   a. copy whole buffer as a series of 16-bit values
-      //   b. load/store 32 bits, using shifts to ensure alignment
-      //   c. just copy the as 32-bit values and assume the CPU
-      //      will do a reasonable job
-      //
-      // We're currently using (a), which is suboptimal.
-      if (((reinterpret_cast<uintptr_t>(d) ^ reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) {
-        copyCount = n;
-      } else {
-        copyCount = 2;
-      }
-      n -= copyCount;
-      copyCount /= sizeof(uint16_t);
-
-      while (copyCount--) {
-        *reinterpret_cast<uint16_t*>(d) = *reinterpret_cast<const uint16_t*>(s);
-        d += sizeof(uint16_t);
-        s += sizeof(uint16_t);
-      }
-    }
-
-    // Copy 32-bit aligned words.
-    copyCount = n / sizeof(uint32_t);
-    while (copyCount--) {
-      *reinterpret_cast<uint32_t*>(d) = *reinterpret_cast<const uint32_t*>(s);
-      d += sizeof(uint32_t);
-      s += sizeof(uint32_t);
-    }
-
-    // Check for leftovers.  Either we finished exactly, or we have one remaining 16-bit chunk.
-    if ((n & 0x02) != 0) {
-      *reinterpret_cast<uint16_t*>(d) = *reinterpret_cast<const uint16_t*>(s);
-    }
-  } else {
-    // Copy backward, starting at the end.
-    d += n;
-    s += n;
-
-    if (((reinterpret_cast<uintptr_t>(d) | reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) {
-      // try for 32-bit alignment.
-      if (((reinterpret_cast<uintptr_t>(d) ^ reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) {
-        copyCount = n;
-      } else {
-        copyCount = 2;
-      }
-      n -= copyCount;
-      copyCount /= sizeof(uint16_t);
-
-      while (copyCount--) {
-        d -= sizeof(uint16_t);
-        s -= sizeof(uint16_t);
-        *reinterpret_cast<uint16_t*>(d) = *reinterpret_cast<const uint16_t*>(s);
-      }
-    }
-
-    // Copy 32-bit aligned words.
-    copyCount = n / sizeof(uint32_t);
-    while (copyCount--) {
-      d -= sizeof(uint32_t);
-      s -= sizeof(uint32_t);
-      *reinterpret_cast<uint32_t*>(d) = *reinterpret_cast<const uint32_t*>(s);
-    }
-
-    // Copy leftovers.
-    if ((n & 0x02) != 0) {
-      d -= sizeof(uint16_t);
-      s -= sizeof(uint16_t);
-      *reinterpret_cast<uint16_t*>(d) = *reinterpret_cast<const uint16_t*>(s);
-    }
-  }
-}
-
-#define move16 MemmoveWords
-#define move32 MemmoveWords
-
 namespace art {
 
+/*
+ * We make guarantees about the atomicity of accesses to primitive variables.  These guarantees
+ * also apply to elements of arrays. In particular, 8-bit, 16-bit, and 32-bit accesses must not
+ * cause "word tearing".  Accesses to 64-bit array elements may be two 32-bit operations.
+ * References are never torn regardless of the number of bits used to represent them.
+ */
+
 static void ThrowArrayStoreException_NotAnArray(const char* identifier, mirror::Object* array)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   std::string actualType(PrettyTypeOf(array));
@@ -178,168 +42,132 @@
                            "%s of type %s is not an array", identifier, actualType.c_str());
 }
 
-static void System_arraycopy(JNIEnv* env, jclass, jobject javaSrc, jint srcPos, jobject javaDst, jint dstPos, jint length) {
+static void System_arraycopy(JNIEnv* env, jclass, jobject javaSrc, jint srcPos, jobject javaDst,
+                             jint dstPos, jint length) {
+  // The API is defined in terms of length, but length is somewhat overloaded so we use count.
+  const jint count = length;
   ScopedFastNativeObjectAccess soa(env);
 
   // Null pointer checks.
-  if (UNLIKELY(javaSrc == NULL)) {
-    ThrowNullPointerException(NULL, "src == null");
+  if (UNLIKELY(javaSrc == nullptr)) {
+    ThrowNullPointerException(nullptr, "src == null");
     return;
   }
-  if (UNLIKELY(javaDst == NULL)) {
-    ThrowNullPointerException(NULL, "dst == null");
+  if (UNLIKELY(javaDst == nullptr)) {
+    ThrowNullPointerException(nullptr, "dst == null");
     return;
   }
 
   // Make sure source and destination are both arrays.
   mirror::Object* srcObject = soa.Decode<mirror::Object*>(javaSrc);
-  mirror::Object* dstObject = soa.Decode<mirror::Object*>(javaDst);
   if (UNLIKELY(!srcObject->IsArrayInstance())) {
     ThrowArrayStoreException_NotAnArray("source", srcObject);
     return;
   }
+  mirror::Object* dstObject = soa.Decode<mirror::Object*>(javaDst);
   if (UNLIKELY(!dstObject->IsArrayInstance())) {
     ThrowArrayStoreException_NotAnArray("destination", dstObject);
     return;
   }
   mirror::Array* srcArray = srcObject->AsArray();
   mirror::Array* dstArray = dstObject->AsArray();
-  mirror::Class* srcComponentType = srcArray->GetClass()->GetComponentType();
-  mirror::Class* dstComponentType = dstArray->GetClass()->GetComponentType();
 
   // Bounds checking.
-  if (UNLIKELY(srcPos < 0 || dstPos < 0 || length < 0 || srcPos > srcArray->GetLength() - length || dstPos > dstArray->GetLength() - length)) {
+  if (UNLIKELY(srcPos < 0) || UNLIKELY(dstPos < 0) || UNLIKELY(count < 0) ||
+      UNLIKELY(srcPos > srcArray->GetLength() - count) ||
+      UNLIKELY(dstPos > dstArray->GetLength() - count)) {
     ThrowLocation throw_location = soa.Self()->GetCurrentLocationForThrow();
     soa.Self()->ThrowNewExceptionF(throw_location, "Ljava/lang/ArrayIndexOutOfBoundsException;",
                                    "src.length=%d srcPos=%d dst.length=%d dstPos=%d length=%d",
-                                   srcArray->GetLength(), srcPos, dstArray->GetLength(), dstPos, length);
+                                   srcArray->GetLength(), srcPos, dstArray->GetLength(), dstPos,
+                                   count);
     return;
   }
 
-  // Handle primitive arrays.
-  if (srcComponentType->IsPrimitive() || dstComponentType->IsPrimitive()) {
-    // If one of the arrays holds a primitive type the other array must hold the exact same type.
-    if (UNLIKELY(srcComponentType != dstComponentType)) {
-      std::string srcType(PrettyTypeOf(srcArray));
-      std::string dstType(PrettyTypeOf(dstArray));
-      ThrowLocation throw_location = soa.Self()->GetCurrentLocationForThrow();
-      soa.Self()->ThrowNewExceptionF(throw_location, "Ljava/lang/ArrayStoreException;",
-                                     "Incompatible types: src=%s, dst=%s",
-                                     srcType.c_str(), dstType.c_str());
-      return;
-    }
+  mirror::Class* dstComponentType = dstArray->GetClass()->GetComponentType();
+  mirror::Class* srcComponentType = srcArray->GetClass()->GetComponentType();
+  Primitive::Type dstComponentPrimitiveType = dstComponentType->GetPrimitiveType();
 
-    size_t width = srcArray->GetClass()->GetComponentSize();
-    uint8_t* dstBytes = reinterpret_cast<uint8_t*>(dstArray->GetRawData(width));
-    const uint8_t* srcBytes = reinterpret_cast<const uint8_t*>(srcArray->GetRawData(width));
-
-    switch (width) {
-    case 1:
-      memmove(dstBytes + dstPos, srcBytes + srcPos, length);
-      break;
-    case 2:
-      move16(dstBytes + dstPos * 2, srcBytes + srcPos * 2, length * 2);
-      break;
-    case 4:
-      move32(dstBytes + dstPos * 4, srcBytes + srcPos * 4, length * 4);
-      break;
-    case 8:
-      // We don't need to guarantee atomicity of the entire 64-bit word.
-      move32(dstBytes + dstPos * 8, srcBytes + srcPos * 8, length * 8);
-      break;
-    default:
-      LOG(FATAL) << "Unknown primitive array type: " << PrettyTypeOf(srcArray);
-    }
-
-    return;
-  }
-
-  // Neither class is primitive. Are the types trivially compatible?
-  const size_t width = sizeof(mirror::Object*);
-  uint8_t* dstBytes = reinterpret_cast<uint8_t*>(dstArray->GetRawData(width));
-  const uint8_t* srcBytes = reinterpret_cast<const uint8_t*>(srcArray->GetRawData(width));
-  if (dstArray == srcArray || dstComponentType->IsAssignableFrom(srcComponentType)) {
-    // Yes. Bulk copy.
-    COMPILE_ASSERT(sizeof(width) == sizeof(uint32_t), move32_assumes_Object_references_are_32_bit);
-    move32(dstBytes + dstPos * width, srcBytes + srcPos * width, length * width);
-    Runtime::Current()->GetHeap()->WriteBarrierArray(dstArray, dstPos, length);
-    return;
-  }
-
-  // The arrays are not trivially compatible. However, we may still be able to copy some or all of
-  // the elements if the source objects are compatible (for example, copying an Object[] to
-  // String[], the Objects being copied might actually be Strings).
-  // We can't do a bulk move because that would introduce a check-use race condition, so we copy
-  // elements one by one.
-
-  // We already dealt with overlapping copies, so we don't need to cope with that case below.
-  CHECK_NE(dstArray, srcArray);
-
-  mirror::Object* const * srcObjects =
-      reinterpret_cast<mirror::Object* const *>(srcBytes + srcPos * width);
-  mirror::Object** dstObjects = reinterpret_cast<mirror::Object**>(dstBytes + dstPos * width);
-  mirror::Class* dstClass = dstArray->GetClass()->GetComponentType();
-
-  // We want to avoid redundant IsAssignableFrom checks where possible, so we cache a class that
-  // we know is assignable to the destination array's component type.
-  mirror::Class* lastAssignableElementClass = dstClass;
-
-  mirror::Object* o = NULL;
-  int i = 0;
-  for (; i < length; ++i) {
-    o = srcObjects[i];
-    if (o != NULL) {
-      mirror::Class* oClass = o->GetClass();
-      if (lastAssignableElementClass == oClass) {
-        dstObjects[i] = o;
-      } else if (dstClass->IsAssignableFrom(oClass)) {
-        lastAssignableElementClass = oClass;
-        dstObjects[i] = o;
-      } else {
-        // Can't put this element into the array.
-        break;
+  if (LIKELY(srcComponentType == dstComponentType)) {
+    // Trivial assignability.
+    switch (dstComponentPrimitiveType) {
+      case Primitive::kPrimVoid:
+        LOG(FATAL) << "Unreachable, cannot have arrays of type void";
+        return;
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+        DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 1U);
+        dstArray->AsByteSizedArray()->Memmove(dstPos, srcArray->AsByteSizedArray(), srcPos, count);
+        return;
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+        DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 2U);
+        dstArray->AsShortSizedArray()->Memmove(dstPos, srcArray->AsShortSizedArray(), srcPos, count);
+        return;
+      case Primitive::kPrimInt:
+      case Primitive::kPrimFloat:
+        DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 4U);
+        dstArray->AsIntArray()->Memmove(dstPos, srcArray->AsIntArray(), srcPos, count);
+        return;
+      case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
+        DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 8U);
+        dstArray->AsLongArray()->Memmove(dstPos, srcArray->AsLongArray(), srcPos, count);
+        return;
+      case Primitive::kPrimNot: {
+        mirror::ObjectArray<mirror::Object>* dstObjArray = dstArray->AsObjectArray<mirror::Object>();
+        mirror::ObjectArray<mirror::Object>* srcObjArray = srcArray->AsObjectArray<mirror::Object>();
+        dstObjArray->AssignableMemmove(dstPos, srcObjArray, srcPos, count);
+        return;
       }
-    } else {
-      dstObjects[i] = NULL;
+      default:
+        LOG(FATAL) << "Unknown array type: " << PrettyTypeOf(srcArray);
+        return;
     }
   }
-
-  Runtime::Current()->GetHeap()->WriteBarrierArray(dstArray, dstPos, length);
-  if (UNLIKELY(i != length)) {
-    std::string actualSrcType(PrettyTypeOf(o));
+  // If one of the arrays holds a primitive type the other array must hold the exact same type.
+  if (UNLIKELY((dstComponentPrimitiveType != Primitive::kPrimNot) ||
+               srcComponentType->IsPrimitive())) {
+    std::string srcType(PrettyTypeOf(srcArray));
     std::string dstType(PrettyTypeOf(dstArray));
     ThrowLocation throw_location = soa.Self()->GetCurrentLocationForThrow();
     soa.Self()->ThrowNewExceptionF(throw_location, "Ljava/lang/ArrayStoreException;",
-                                   "source[%d] of type %s cannot be stored in destination array of type %s",
-                                   srcPos + i, actualSrcType.c_str(), dstType.c_str());
+                                   "Incompatible types: src=%s, dst=%s",
+                                   srcType.c_str(), dstType.c_str());
     return;
   }
+  // Arrays hold distinct types and so therefore can't alias - use memcpy instead of memmove.
+  mirror::ObjectArray<mirror::Object>* dstObjArray = dstArray->AsObjectArray<mirror::Object>();
+  mirror::ObjectArray<mirror::Object>* srcObjArray = srcArray->AsObjectArray<mirror::Object>();
+  // If we're assigning into say Object[] then we don't need per element checks.
+  if (dstComponentType->IsAssignableFrom(srcComponentType)) {
+    dstObjArray->AssignableMemcpy(dstPos, srcObjArray, srcPos, count);
+    return;
+  }
+  dstObjArray->AssignableCheckingMemcpy(dstPos, srcObjArray, srcPos, count, true);
 }
 
-static void System_arraycopyCharUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos, jobject javaDst, jint dstPos, jint length) {
+static void System_arraycopyCharUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                          jobject javaDst, jint dstPos, jint count) {
   ScopedFastNativeObjectAccess soa(env);
-  DCHECK(javaSrc != NULL);
-  DCHECK(javaDst != NULL);
   mirror::Object* srcObject = soa.Decode<mirror::Object*>(javaSrc);
   mirror::Object* dstObject = soa.Decode<mirror::Object*>(javaDst);
-  DCHECK(srcObject->IsArrayInstance());
-  DCHECK(dstObject->IsArrayInstance());
+  DCHECK(srcObject != nullptr);
+  DCHECK(dstObject != nullptr);
   mirror::Array* srcArray = srcObject->AsArray();
   mirror::Array* dstArray = dstObject->AsArray();
-  DCHECK(srcPos >= 0 && dstPos >= 0 && length >= 0 &&
-         srcPos + length <= srcArray->GetLength() && dstPos + length <= dstArray->GetLength());
-  DCHECK_EQ(srcArray->GetClass()->GetComponentType(), dstArray->GetClass()->GetComponentType());
-  DCHECK(srcArray->GetClass()->GetComponentType()->IsPrimitive());
-  DCHECK(dstArray->GetClass()->GetComponentType()->IsPrimitive());
-  DCHECK_EQ(srcArray->GetClass()->GetComponentSize(), static_cast<size_t>(2));
-  DCHECK_EQ(dstArray->GetClass()->GetComponentSize(), static_cast<size_t>(2));
-  uint8_t* dstBytes = reinterpret_cast<uint8_t*>(dstArray->GetRawData(2));
-  const uint8_t* srcBytes = reinterpret_cast<const uint8_t*>(srcArray->GetRawData(2));
-  move16(dstBytes + dstPos * 2, srcBytes + srcPos * 2, length * 2);
+  DCHECK_GE(srcPos, 0);
+  DCHECK_GE(dstPos, 0);
+  DCHECK_GE(count, 0);
+  DCHECK_LE(srcPos + count, srcArray->GetLength());
+  DCHECK_LE(dstPos + count, dstArray->GetLength());
+  DCHECK_EQ(srcArray->GetClass(), dstArray->GetClass());
+  DCHECK_EQ(srcArray->GetClass()->GetComponentType()->GetPrimitiveType(), Primitive::kPrimChar);
+  dstArray->AsCharArray()->Memmove(dstPos, srcArray->AsCharArray(), srcPos, count);
 }
 
 static jint System_identityHashCode(JNIEnv* env, jclass, jobject javaObject) {
-  if (javaObject == nullptr) {
+  if (UNLIKELY(javaObject == nullptr)) {
     return 0;
   }
   ScopedFastNativeObjectAccess soa(env);
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index b5fc7e7..6c22003 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "atomic.h"
 #include "gc/accounting/card_table-inl.h"
 #include "jni_internal.h"
 #include "mirror/object.h"
@@ -23,40 +22,30 @@
 
 namespace art {
 
-static jboolean Unsafe_compareAndSwapInt(JNIEnv* env, jobject, jobject javaObj, jlong offset, jint expectedValue, jint newValue) {
+static jboolean Unsafe_compareAndSwapInt(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                         jint expectedValue, jint newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
-  volatile int32_t* address = reinterpret_cast<volatile int32_t*>(raw_addr);
-  // Note: android_atomic_release_cas() returns 0 on success, not failure.
-  int result = android_atomic_release_cas(expectedValue, newValue, address);
-  return (result == 0) ? JNI_TRUE : JNI_FALSE;
-}
-
-static jboolean Unsafe_compareAndSwapLong(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong expectedValue, jlong newValue) {
-  ScopedFastNativeObjectAccess soa(env);
-  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
-  volatile int64_t* address = reinterpret_cast<volatile int64_t*>(raw_addr);
-  // Note: android_atomic_cmpxchg() returns 0 on success, not failure.
-  bool success = QuasiAtomic::Cas64(expectedValue, newValue, address);
+  bool success = obj->CasField32(MemberOffset(offset), expectedValue, newValue);
   return success ? JNI_TRUE : JNI_FALSE;
 }
 
-static jboolean Unsafe_compareAndSwapObject(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaExpectedValue, jobject javaNewValue) {
+static jboolean Unsafe_compareAndSwapLong(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                          jlong expectedValue, jlong newValue) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  bool success = obj->CasField64(MemberOffset(offset), expectedValue, newValue);
+  return success ? JNI_TRUE : JNI_FALSE;
+}
+
+static jboolean Unsafe_compareAndSwapObject(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                            jobject javaExpectedValue, jobject javaNewValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   mirror::Object* expectedValue = soa.Decode<mirror::Object*>(javaExpectedValue);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
-  byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
-  int32_t* address = reinterpret_cast<int32_t*>(raw_addr);
-  // Note: android_atomic_cmpxchg() returns 0 on success, not failure.
-  int result = android_atomic_release_cas(reinterpret_cast<int32_t>(expectedValue),
-      reinterpret_cast<int32_t>(newValue), address);
-  if (result == 0) {
-    Runtime::Current()->GetHeap()->WriteBarrierField(obj, MemberOffset(offset), newValue);
-  }
-  return (result == 0) ? JNI_TRUE : JNI_FALSE;
+  bool success = obj->CasFieldObject(MemberOffset(offset), expectedValue, newValue);
+  return success ? JNI_TRUE : JNI_FALSE;
 }
 
 static jint Unsafe_getInt(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
@@ -77,13 +66,15 @@
   obj->SetField32(MemberOffset(offset), newValue, false);
 }
 
-static void Unsafe_putIntVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset, jint newValue) {
+static void Unsafe_putIntVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                  jint newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   obj->SetField32(MemberOffset(offset), newValue, true);
 }
 
-static void Unsafe_putOrderedInt(JNIEnv* env, jobject, jobject javaObj, jlong offset, jint newValue) {
+static void Unsafe_putOrderedInt(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                 jint newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   QuasiAtomic::MembarStoreStore();
@@ -108,13 +99,15 @@
   obj->SetField64(MemberOffset(offset), newValue, false);
 }
 
-static void Unsafe_putLongVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong newValue) {
+static void Unsafe_putLongVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                   jlong newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   obj->SetField64(MemberOffset(offset), newValue, true);
 }
 
-static void Unsafe_putOrderedLong(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong newValue) {
+static void Unsafe_putOrderedLong(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                  jlong newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   QuasiAtomic::MembarStoreStore();
@@ -124,32 +117,35 @@
 static jobject Unsafe_getObjectVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  mirror::Object* value = obj->GetFieldObject<mirror::Object*>(MemberOffset(offset), true);
+  mirror::Object* value = obj->GetFieldObject<mirror::Object>(MemberOffset(offset), true);
   return soa.AddLocalReference<jobject>(value);
 }
 
 static jobject Unsafe_getObject(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  mirror::Object* value = obj->GetFieldObject<mirror::Object*>(MemberOffset(offset), false);
+  mirror::Object* value = obj->GetFieldObject<mirror::Object>(MemberOffset(offset), false);
   return soa.AddLocalReference<jobject>(value);
 }
 
-static void Unsafe_putObject(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaNewValue) {
+static void Unsafe_putObject(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                             jobject javaNewValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
   obj->SetFieldObject(MemberOffset(offset), newValue, false);
 }
 
-static void Unsafe_putObjectVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaNewValue) {
+static void Unsafe_putObjectVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                     jobject javaNewValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
   obj->SetFieldObject(MemberOffset(offset), newValue, true);
 }
 
-static void Unsafe_putOrderedObject(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaNewValue) {
+static void Unsafe_putOrderedObject(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                    jobject javaNewValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index fa2b485..0f380ad 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -66,16 +66,16 @@
                        std::string* error_msg) {
   CHECK(!filename.empty()) << location;
   CheckLocation(filename);
-#ifdef ART_USE_PORTABLE_COMPILER
-  // If we are using PORTABLE, use dlopen to deal with relocations.
-  //
-  // We use our own ELF loader for Quick to deal with legacy apps that
-  // open a generated dex file by name, remove the file, then open
-  // another generated dex file with the same name. http://b/10614658
-  if (executable) {
-    return OpenDlopen(filename, location, requested_base, error_msg);
+  if (kUsePortableCompiler) {
+    // If we are using PORTABLE, use dlopen to deal with relocations.
+    //
+    // We use our own ELF loader for Quick to deal with legacy apps that
+    // open a generated dex file by name, remove the file, then open
+    // another generated dex file with the same name. http://b/10614658
+    if (executable) {
+      return OpenDlopen(filename, location, requested_base, error_msg);
+    }
   }
-#endif
   // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
   //
   // On target, dlopen may fail when compiling due to selinux restrictions on installd.
@@ -338,12 +338,17 @@
   }
 
   if (warn_if_not_found) {
+    std::string checksum("<unspecified>");
+    if (dex_location_checksum != NULL) {
+      checksum = StringPrintf("0x%08x", *dex_location_checksum);
+    }
     LOG(WARNING) << "Failed to find OatDexFile for DexFile " << dex_location
-                 << " in OatFile " << GetLocation();
+                 << " with checksum " << checksum << " in OatFile " << GetLocation();
     if (kIsDebugBuild) {
       for (Table::const_iterator it = oat_dex_files_.begin(); it != oat_dex_files_.end(); ++it) {
         LOG(WARNING) << "OatFile " << GetLocation()
-                     << " contains OatDexFile " << it->second->GetDexFileLocation();
+                     << " contains OatDexFile " << it->second->GetDexFileLocation()
+                     << " with checksum 0x" << std::hex << it->second->GetDexFileLocationChecksum();
       }
     }
   }
@@ -498,51 +503,40 @@
     mapping_table_offset_(mapping_table_offset),
     vmap_table_offset_(vmap_table_offset),
     native_gc_map_offset_(gc_map_offset) {
-#ifndef NDEBUG
-  if (mapping_table_offset_ != 0) {  // implies non-native, non-stub code
-    if (vmap_table_offset_ == 0) {
-      DCHECK_EQ(0U, static_cast<uint32_t>(__builtin_popcount(core_spill_mask_) +
-                                          __builtin_popcount(fp_spill_mask_)));
-    } else {
-      VmapTable vmap_table(reinterpret_cast<const uint8_t*>(begin_ + vmap_table_offset_));
+  if (kIsDebugBuild) {
+    if (mapping_table_offset_ != 0) {  // implies non-native, non-stub code
+      if (vmap_table_offset_ == 0) {
+        CHECK_EQ(0U, static_cast<uint32_t>(__builtin_popcount(core_spill_mask_) +
+                                           __builtin_popcount(fp_spill_mask_)));
+      } else {
+        VmapTable vmap_table(reinterpret_cast<const uint8_t*>(begin_ + vmap_table_offset_));
 
-      DCHECK_EQ(vmap_table.Size(), static_cast<uint32_t>(__builtin_popcount(core_spill_mask_) +
-                                                         __builtin_popcount(fp_spill_mask_)));
+        CHECK_EQ(vmap_table.Size(), static_cast<uint32_t>(__builtin_popcount(core_spill_mask_) +
+                                                          __builtin_popcount(fp_spill_mask_)));
+      }
+    } else {
+      CHECK_EQ(vmap_table_offset_, 0U);
     }
-  } else {
-    DCHECK_EQ(vmap_table_offset_, 0U);
   }
-#endif
 }
 
 OatFile::OatMethod::~OatMethod() {}
 
-const void* OatFile::OatMethod::GetCode() const {
-  return GetOatPointer<const void*>(code_offset_);
-}
 
-uint32_t OatFile::OatMethod::GetCodeSize() const {
-#if defined(ART_USE_PORTABLE_COMPILER)
-  // TODO: With Quick, we store the size before the code. With
-  // Portable, the code is in a .o file we don't manage ourselves. ELF
-  // symbols do have a concept of size, so we could capture that and
-  // store it somewhere, such as the OatMethod.
-  return 0;
-#else
-  uintptr_t code = reinterpret_cast<uint32_t>(GetCode());
-
+uint32_t OatFile::OatMethod::GetQuickCodeSize() const {
+  uintptr_t code = reinterpret_cast<uintptr_t>(GetQuickCode());
   if (code == 0) {
     return 0;
   }
   // TODO: make this Thumb2 specific
   code &= ~0x1;
   return reinterpret_cast<uint32_t*>(code)[-1];
-#endif
 }
 
 void OatFile::OatMethod::LinkMethod(mirror::ArtMethod* method) const {
   CHECK(method != NULL);
-  method->SetEntryPointFromCompiledCode(GetCode());
+  method->SetEntryPointFromPortableCompiledCode(GetPortableCode());
+  method->SetEntryPointFromQuickCompiledCode(GetQuickCode());
   method->SetFrameSizeInBytes(frame_size_in_bytes_);
   method->SetCoreSpillMask(core_spill_mask_);
   method->SetFpSpillMask(fp_spill_mask_);
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 887a9d1..d6e8dc0 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -97,8 +97,30 @@
       return native_gc_map_offset_;
     }
 
-    const void* GetCode() const;
-    uint32_t GetCodeSize() const;
+    const void* GetPortableCode() const {
+      // TODO: encode whether code is portable/quick in flags within OatMethod.
+      if (kUsePortableCompiler) {
+        return GetOatPointer<const void*>(code_offset_);
+      } else {
+        return nullptr;
+      }
+    }
+
+    const void* GetQuickCode() const {
+      if (kUsePortableCompiler) {
+        return nullptr;
+      } else {
+        return GetOatPointer<const void*>(code_offset_);
+      }
+    }
+
+    uint32_t GetPortableCodeSize() const {
+      // TODO: With Quick, we store the size before the code. With Portable, the code is in a .o
+      // file we don't manage ourselves. ELF symbols do have a concept of size, so we could capture
+      // that and store it somewhere, such as the OatMethod.
+      return 0;
+    }
+    uint32_t GetQuickCodeSize() const;
 
     const uint8_t* GetMappingTable() const {
       return GetOatPointer<const uint8_t*>(mapping_table_offset_);
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index 407aa65..0451f5d 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -69,30 +69,29 @@
 
 class ClassHelper {
  public:
-  explicit ClassHelper(const mirror::Class* c )
+  explicit ClassHelper(mirror::Class* c )
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : interface_type_list_(NULL),
-        klass_(NULL) {
-    if (c != NULL) {
+      : interface_type_list_(nullptr), klass_(nullptr) {
+    if (c != nullptr) {
       ChangeClass(c);
     }
   }
 
-  void ChangeClass(const mirror::Class* new_c)
+  void ChangeClass(mirror::Class* new_c)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(new_c != NULL) << "klass_=" << klass_;  // Log what we were changing from if any
+    CHECK(new_c != nullptr) << "klass_=" << klass_;  // Log what we were changing from if any
     if (!new_c->IsClass()) {
       LOG(FATAL) << "new_c=" << new_c << " cc " << new_c->GetClass() << " ccc "
-          << ((new_c->GetClass() != nullptr) ? new_c->GetClass()->GetClass() : NULL);
+          << ((new_c->GetClass() != nullptr) ? new_c->GetClass()->GetClass() : nullptr);
     }
     klass_ = new_c;
-    interface_type_list_ = NULL;
+    interface_type_list_ = nullptr;
   }
 
   // The returned const char* is only guaranteed to be valid for the lifetime of the ClassHelper.
   // If you need it longer, copy it into a std::string.
   const char* GetDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(klass_ != NULL);
+    CHECK(klass_ != nullptr);
     if (UNLIKELY(klass_->IsArrayClass())) {
       return GetArrayDescriptor();
     } else if (UNLIKELY(klass_->IsPrimitive())) {
@@ -109,8 +108,8 @@
 
   const char* GetArrayDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string result("[");
-    const mirror::Class* saved_klass = klass_;
-    CHECK(saved_klass != NULL);
+    mirror::Class* saved_klass = klass_;
+    CHECK(saved_klass != nullptr);
     ChangeClass(klass_->GetComponentType());
     result += GetDescriptor();
     ChangeClass(saved_klass);
@@ -128,7 +127,7 @@
   }
 
   uint32_t NumDirectInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(klass_ != NULL);
+    DCHECK(klass_ != nullptr);
     if (klass_->IsPrimitive()) {
       return 0;
     } else if (klass_->IsArrayClass()) {
@@ -137,7 +136,7 @@
       return klass_->GetIfTable()->GetLength();
     } else {
       const DexFile::TypeList* interfaces = GetInterfaceTypeList();
-      if (interfaces == NULL) {
+      if (interfaces == nullptr) {
         return 0;
       } else {
         return interfaces->Size();
@@ -147,7 +146,7 @@
 
   uint16_t GetDirectInterfaceTypeIdx(uint32_t idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(klass_ != NULL);
+    DCHECK(klass_ != nullptr);
     DCHECK(!klass_->IsPrimitive());
     DCHECK(!klass_->IsArrayClass());
     return GetInterfaceTypeList()->GetTypeItem(idx).type_idx_;
@@ -155,7 +154,7 @@
 
   mirror::Class* GetDirectInterface(uint32_t idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(klass_ != NULL);
+    DCHECK(klass_ != nullptr);
     DCHECK(!klass_->IsPrimitive());
     if (klass_->IsArrayClass()) {
       if (idx == 0) {
@@ -169,9 +168,9 @@
     } else {
       uint16_t type_idx = GetDirectInterfaceTypeIdx(idx);
       mirror::Class* interface = GetDexCache()->GetResolvedType(type_idx);
-      if (interface == NULL) {
+      if (interface == nullptr) {
         interface = GetClassLinker()->ResolveType(GetDexFile(), type_idx, klass_);
-        CHECK(interface != NULL || Thread::Current()->IsExceptionPending());
+        CHECK(interface != nullptr || Thread::Current()->IsExceptionPending());
       }
       return interface;
     }
@@ -181,13 +180,13 @@
     std::string descriptor(GetDescriptor());
     const DexFile& dex_file = GetDexFile();
     const DexFile::ClassDef* dex_class_def = GetClassDef();
-    CHECK(dex_class_def != NULL);
+    CHECK(dex_class_def != nullptr);
     return dex_file.GetSourceFile(*dex_class_def);
   }
 
   std::string GetLocation() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::DexCache* dex_cache = GetDexCache();
-    if (dex_cache != NULL && !klass_->IsProxyClass()) {
+    if (dex_cache != nullptr && !klass_->IsProxyClass()) {
       return dex_cache->GetLocation()->ToModifiedUtf8();
     } else {
       // Arrays and proxies are generated and have no corresponding dex file location.
@@ -207,9 +206,9 @@
   const DexFile::TypeList* GetInterfaceTypeList()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile::TypeList* result = interface_type_list_;
-    if (result == NULL) {
+    if (result == nullptr) {
       const DexFile::ClassDef* class_def = GetClassDef();
-      if (class_def != NULL) {
+      if (class_def != nullptr) {
         result =  GetDexFile().GetInterfacesList(*class_def);
         interface_type_list_ = result;
       }
@@ -222,7 +221,7 @@
   }
 
   const DexFile::TypeList* interface_type_list_;
-  const mirror::Class* klass_;
+  mirror::Class* klass_;
   std::string descriptor_;
 
   DISALLOW_COPY_AND_ASSIGN(ClassHelper);
@@ -230,11 +229,11 @@
 
 class FieldHelper {
  public:
-  FieldHelper() : field_(NULL) {}
-  explicit FieldHelper(const mirror::ArtField* f) : field_(f) {}
+  FieldHelper() : field_(nullptr) {}
+  explicit FieldHelper(mirror::ArtField* f) : field_(f) {}
 
-  void ChangeField(const mirror::ArtField* new_f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(new_f != NULL);
+  void ChangeField(mirror::ArtField* new_f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(new_f != nullptr);
     field_ = new_f;
   }
 
@@ -257,9 +256,9 @@
     const DexFile& dex_file = GetDexFile();
     const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
     mirror::Class* type = GetDexCache()->GetResolvedType(field_id.type_idx_);
-    if (resolve && (type == NULL)) {
+    if (resolve && (type == nullptr)) {
       type = GetClassLinker()->ResolveType(field_id.type_idx_, field_);
-      CHECK(type != NULL || Thread::Current()->IsExceptionPending());
+      CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
     }
     return type;
   }
@@ -320,7 +319,7 @@
   const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *GetDexCache()->GetDexFile();
   }
-  const mirror::ArtField* field_;
+  mirror::ArtField* field_;
   std::string declaring_class_descriptor_;
 
   DISALLOW_COPY_AND_ASSIGN(FieldHelper);
@@ -328,20 +327,18 @@
 
 class MethodHelper {
  public:
-  MethodHelper()
-     : method_(NULL), shorty_(NULL),
-       shorty_len_(0) {}
+  MethodHelper() : method_(nullptr), shorty_(nullptr), shorty_len_(0) {}
 
-  explicit MethodHelper(const mirror::ArtMethod* m)
+  explicit MethodHelper(mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : method_(NULL), shorty_(NULL), shorty_len_(0) {
+      : method_(nullptr), shorty_(nullptr), shorty_len_(0) {
     SetMethod(m);
   }
 
   void ChangeMethod(mirror::ArtMethod* new_m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(new_m != NULL);
+    DCHECK(new_m != nullptr);
     SetMethod(new_m);
-    shorty_ = NULL;
+    shorty_ = nullptr;
   }
 
   const mirror::ArtMethod* GetMethod() const {
@@ -381,7 +378,7 @@
 
   const char* GetShorty() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const char* result = shorty_;
-    if (result == NULL) {
+    if (result == nullptr) {
       const DexFile& dex_file = GetDexFile();
       result = dex_file.GetMethodShorty(dex_file.GetMethodId(method_->GetDexMethodIndex()),
                                         &shorty_len_);
@@ -391,7 +388,7 @@
   }
 
   uint32_t GetShortyLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (shorty_ == NULL) {
+    if (shorty_ == nullptr) {
       GetShorty();
     }
     return shorty_len_;
@@ -529,15 +526,15 @@
 
   bool IsResolvedTypeIdx(uint16_t type_idx) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->GetDexCacheResolvedTypes()->Get(type_idx) != NULL;
+    return method_->GetDexCacheResolvedTypes()->Get(type_idx) != nullptr;
   }
 
   mirror::Class* GetClassFromTypeIdx(uint16_t type_idx, bool resolve = true)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::Class* type = method_->GetDexCacheResolvedTypes()->Get(type_idx);
-    if (type == NULL && resolve) {
+    if (type == nullptr && resolve) {
       type = GetClassLinker()->ResolveType(type_idx, method_);
-      CHECK(type != NULL || Thread::Current()->IsExceptionPending());
+      CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
     }
     return type;
   }
@@ -563,7 +560,7 @@
 
   mirror::String* ResolveString(uint32_t string_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::String* s = method_->GetDexCacheStrings()->Get(string_idx);
-    if (UNLIKELY(s == NULL)) {
+    if (UNLIKELY(s == nullptr)) {
       SirtRef<mirror::DexCache> dex_cache(Thread::Current(), GetDexCache());
       s = GetClassLinker()->ResolveString(GetDexFile(), string_idx, dex_cache);
     }
@@ -613,13 +610,13 @@
  private:
   // Set the method_ field, for proxy methods looking up the interface method via the resolved
   // methods table.
-  void SetMethod(const mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (method != NULL) {
+  void SetMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (method != nullptr) {
       mirror::Class* klass = method->GetDeclaringClass();
       if (UNLIKELY(klass->IsProxyClass())) {
         mirror::ArtMethod* interface_method =
             method->GetDexCacheResolvedMethods()->Get(method->GetDexMethodIndex());
-        DCHECK(interface_method != NULL);
+        DCHECK(interface_method != nullptr);
         DCHECK(interface_method == GetClassLinker()->FindMethodForProxy(klass, method));
         method = interface_method;
       }
@@ -631,7 +628,7 @@
     return Runtime::Current()->GetClassLinker();
   }
 
-  const mirror::ArtMethod* method_;
+  mirror::ArtMethod* method_;
   const char* shorty_;
   uint32_t shorty_len_;
 
diff --git a/runtime/offsets.h b/runtime/offsets.h
index 94ae805..e2dba9d 100644
--- a/runtime/offsets.h
+++ b/runtime/offsets.h
@@ -22,7 +22,7 @@
 
 namespace art {
 
-// Allow the meaning of offsets to be strongly typed
+// Allow the meaning of offsets to be strongly typed.
 class Offset {
  public:
   explicit Offset(size_t val) : val_(val) {}
@@ -37,7 +37,7 @@
 };
 std::ostream& operator<<(std::ostream& os, const Offset& offs);
 
-// Offsets relative to the current frame
+// Offsets relative to the current frame.
 class FrameOffset : public Offset {
  public:
   explicit FrameOffset(size_t val) : Offset(val) {}
@@ -45,13 +45,13 @@
   bool operator<(FrameOffset other) const { return val_ < other.val_; }
 };
 
-// Offsets relative to the current running thread
+// Offsets relative to the current running thread.
 class ThreadOffset : public Offset {
  public:
   explicit ThreadOffset(size_t val) : Offset(val) {}
 };
 
-// Offsets relative to an object
+// Offsets relative to an object.
 class MemberOffset : public Offset {
  public:
   explicit MemberOffset(size_t val) : Offset(val) {}
diff --git a/runtime/primitive.h b/runtime/primitive.h
index 5e07311..b436bd2 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -21,6 +21,7 @@
 
 #include "base/logging.h"
 #include "base/macros.h"
+#include "mirror/object_reference.h"
 
 namespace art {
 namespace mirror {
@@ -78,7 +79,7 @@
       case kPrimFloat:   return 4;
       case kPrimLong:
       case kPrimDouble:  return 8;
-      case kPrimNot:     return sizeof(mirror::Object*);
+      case kPrimNot:     return sizeof(mirror::HeapReference<mirror::Object>);
       default:
         LOG(FATAL) << "Invalid type " << static_cast<int>(type);
         return 0;
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 0e73812..20e08b8 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -36,6 +36,11 @@
 #include "ScopedLocalRef.h"
 #include "thread.h"
 #include "thread_list.h"
+
+#ifdef HAVE_ANDROID_OS
+#include "cutils/properties.h"
+#endif
+
 #if !defined(ART_USE_PORTABLE_COMPILER)
 #include "entrypoints/quick/quick_entrypoints.h"
 #endif
@@ -259,6 +264,17 @@
     }
   }
 
+  // Only on target...
+#ifdef HAVE_ANDROID_OS
+  // Switch off profiler if the dalvik.vm.profiler property has value 0.
+  char buf[PROP_VALUE_MAX];
+  property_get("dalvik.vm.profiler", buf, "0");
+  if (strcmp(buf, "0") == 0) {
+    LOG(INFO) << "Profiler disabled.  To enable setprop dalvik.vm.profiler 1";
+    return;
+  }
+#endif
+
   LOG(INFO) << "Starting profile with period " << period << "s, duration " << duration <<
       "s, interval " << interval_us << "us.  Profile file " << profile_file_name;
 
@@ -439,9 +455,7 @@
 }
 
 uint32_t ProfileSampleResults::Hash(mirror::ArtMethod* method) {
-  uint32_t value = reinterpret_cast<uint32_t>(method);
-  value >>= 2;
-  return value % kHashSize;
+  return (PointerToLowMemUInt32(method) >> 3) % kHashSize;
 }
 
 }  // namespace art
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index 6f65bff..b5ef735 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -59,7 +59,7 @@
 
 // If "obj" is an array, return the number of elements in the array.
 // Otherwise, return zero.
-static size_t GetElementCount(const mirror::Object* obj) {
+static size_t GetElementCount(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (obj == NULL || obj == kClearedJniWeakGlobal || !obj->IsArrayInstance()) {
     return 0;
   }
@@ -67,7 +67,7 @@
 }
 
 struct ObjectComparator {
-  bool operator()(const mirror::Object* obj1, const mirror::Object* obj2)
+  bool operator()(mirror::Object* obj1, mirror::Object* obj2)
     // TODO: enable analysis when analysis can work with the STL.
       NO_THREAD_SAFETY_ANALYSIS {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
@@ -105,7 +105,7 @@
 // Pass in the number of elements in the array (or 0 if this is not an
 // array object), and the number of additional objects that are identical
 // or equivalent to the original.
-static void DumpSummaryLine(std::ostream& os, const mirror::Object* obj, size_t element_count,
+static void DumpSummaryLine(std::ostream& os, mirror::Object* obj, size_t element_count,
                             int identical, int equiv)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (obj == NULL) {
@@ -159,7 +159,7 @@
   }
   os << "  Last " << (count - first) << " entries (of " << count << "):\n";
   for (int idx = count - 1; idx >= first; --idx) {
-    const mirror::Object* ref = entries[idx];
+    mirror::Object* ref = entries[idx];
     if (ref == NULL) {
       continue;
     }
@@ -212,8 +212,8 @@
   size_t equiv = 0;
   size_t identical = 0;
   for (size_t idx = 1; idx < count; idx++) {
-    const mirror::Object* prev = sorted_entries[idx-1];
-    const mirror::Object* current = sorted_entries[idx];
+    mirror::Object* prev = sorted_entries[idx-1];
+    mirror::Object* current = sorted_entries[idx];
     size_t element_count = GetElementCount(prev);
     if (current == prev) {
       // Same reference, added more than once.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 2591224..4e90478 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -31,6 +31,7 @@
 #include "arch/arm/registers_arm.h"
 #include "arch/mips/registers_mips.h"
 #include "arch/x86/registers_x86.h"
+#include "arch/x86_64/registers_x86_64.h"
 #include "atomic.h"
 #include "class_linker.h"
 #include "debugger.h"
@@ -75,19 +76,27 @@
       is_zygote_(false),
       is_concurrent_gc_enabled_(true),
       is_explicit_gc_disabled_(false),
+      compiler_filter_(kSpeed),
+      huge_method_threshold_(0),
+      large_method_threshold_(0),
+      small_method_threshold_(0),
+      tiny_method_threshold_(0),
+      num_dex_methods_threshold_(0),
+      sea_ir_mode_(false),
       default_stack_size_(0),
-      heap_(NULL),
+      heap_(nullptr),
       max_spins_before_thin_lock_inflation_(Monitor::kDefaultMaxSpinsBeforeThinLockInflation),
-      monitor_list_(NULL),
-      thread_list_(NULL),
-      intern_table_(NULL),
-      class_linker_(NULL),
-      signal_catcher_(NULL),
-      java_vm_(NULL),
-      pre_allocated_OutOfMemoryError_(NULL),
-      resolution_method_(NULL),
-      imt_conflict_method_(NULL),
-      default_imt_(NULL),
+      monitor_list_(nullptr),
+      monitor_pool_(nullptr),
+      thread_list_(nullptr),
+      intern_table_(nullptr),
+      class_linker_(nullptr),
+      signal_catcher_(nullptr),
+      java_vm_(nullptr),
+      pre_allocated_OutOfMemoryError_(nullptr),
+      resolution_method_(nullptr),
+      imt_conflict_method_(nullptr),
+      default_imt_(nullptr),
       method_verifiers_lock_("Method verifiers lock"),
       threads_being_born_(0),
       shutdown_cond_(new ConditionVariable("Runtime shutdown", *Locks::runtime_shutdown_lock_)),
@@ -95,19 +104,25 @@
       shutting_down_started_(false),
       started_(false),
       finished_starting_(false),
-      vfprintf_(NULL),
-      exit_(NULL),
-      abort_(NULL),
+      vfprintf_(nullptr),
+      exit_(nullptr),
+      abort_(nullptr),
       stats_enabled_(false),
-      method_trace_(0),
+      profile_(false),
+      profile_period_s_(0),
+      profile_duration_s_(0),
+      profile_interval_us_(0),
+      profile_backoff_coefficient_(0),
+      method_trace_(false),
       method_trace_file_size_(0),
       instrumentation_(),
       use_compile_time_class_path_(false),
-      main_thread_group_(NULL),
-      system_thread_group_(NULL),
-      system_class_loader_(NULL) {
+      main_thread_group_(nullptr),
+      system_thread_group_(nullptr),
+      system_class_loader_(nullptr),
+      dump_gc_performance_on_shutdown_(false) {
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-    callee_save_methods_[i] = NULL;
+    callee_save_methods_[i] = nullptr;
   }
 }
 
@@ -141,6 +156,7 @@
   // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended.
   delete thread_list_;
   delete monitor_list_;
+  delete monitor_pool_;
   delete class_linker_;
   delete heap_;
   delete intern_table_;
@@ -149,8 +165,8 @@
   QuasiAtomic::Shutdown();
   verifier::MethodVerifier::Shutdown();
   // TODO: acquire a static mutex on Runtime to avoid racing.
-  CHECK(instance_ == NULL || instance_ == this);
-  instance_ = NULL;
+  CHECK(instance_ == nullptr || instance_ == this);
+  instance_ = nullptr;
 }
 
 struct AbortState {
@@ -318,10 +334,19 @@
   return 0;
 }
 
-size_t ParseIntegerOrDie(const std::string& s) {
-  std::string::size_type colon = s.find(':');
+static const std::string StringAfterChar(const std::string& s, char c) {
+  std::string::size_type colon = s.find(c);
   if (colon == std::string::npos) {
-    LOG(FATAL) << "Missing integer: " << s;
+    LOG(FATAL) << "Missing char " << c << " in string " << s;
+  }
+  // Add one to remove the char we were trimming until.
+  return s.substr(colon + 1);
+}
+
+static size_t ParseIntegerOrDie(const std::string& s, char after_char) {
+  std::string::size_type colon = s.find(after_char);
+  if (colon == std::string::npos) {
+    LOG(FATAL) << "Missing char " << after_char << " in string " << s;
   }
   const char* begin = &s[colon + 1];
   char* end;
@@ -332,10 +357,10 @@
   return result;
 }
 
-double ParseDoubleOrDie(const std::string& option, const char* prefix,
-                        double min, double max, bool ignore_unrecognized,
-                        double defval) {
-  std::istringstream iss(option.substr(strlen(prefix)));
+
+static double ParseDoubleOrDie(const std::string& option, char after_char, double min, double max,
+                               bool ignore_unrecognized, double defval) {
+  std::istringstream iss(StringAfterChar(option, after_char));
   double value;
   iss >> value;
   // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
@@ -473,7 +498,7 @@
       parsed->boot_class_path_
           = reinterpret_cast<const std::vector<const DexFile*>*>(options[i].second);
     } else if (StartsWith(option, "-Ximage:")) {
-      parsed->image_ = option.substr(strlen("-Ximage:")).data();
+      parsed->image_ = StringAfterChar(option, ':');
     } else if (StartsWith(option, "-Xcheck:jni")) {
       parsed->check_jni_ = true;
     } else if (StartsWith(option, "-Xrunjdwp:") || StartsWith(option, "-agentlib:jdwp=")) {
@@ -539,15 +564,12 @@
       }
       parsed->heap_max_free_ = size;
     } else if (StartsWith(option, "-XX:HeapTargetUtilization=")) {
-      parsed->heap_target_utilization_ = ParseDoubleOrDie(option, "-XX:HeapTargetUtilization=",
-          0.1, 0.9, ignore_unrecognized,
-          parsed->heap_target_utilization_);
+      parsed->heap_target_utilization_ = ParseDoubleOrDie(
+          option, '=', 0.1, 0.9, ignore_unrecognized, parsed->heap_target_utilization_);
     } else if (StartsWith(option, "-XX:ParallelGCThreads=")) {
-      parsed->parallel_gc_threads_ =
-          ParseMemoryOption(option.substr(strlen("-XX:ParallelGCThreads=")).c_str(), 1024);
+      parsed->parallel_gc_threads_ = ParseIntegerOrDie(option, '=');
     } else if (StartsWith(option, "-XX:ConcGCThreads=")) {
-      parsed->conc_gc_threads_ =
-          ParseMemoryOption(option.substr(strlen("-XX:ConcGCThreads=")).c_str(), 1024);
+      parsed->conc_gc_threads_ = ParseIntegerOrDie(option, '=');
     } else if (StartsWith(option, "-Xss")) {
       size_t size = ParseMemoryOption(option.substr(strlen("-Xss")).c_str(), 1);
       if (size == 0) {
@@ -560,15 +582,11 @@
       }
       parsed->stack_size_ = size;
     } else if (StartsWith(option, "-XX:MaxSpinsBeforeThinLockInflation=")) {
-      parsed->max_spins_before_thin_lock_inflation_ =
-          strtoul(option.substr(strlen("-XX:MaxSpinsBeforeThinLockInflation=")).c_str(),
-                  nullptr, 10);
-    } else if (option == "-XX:LongPauseLogThreshold") {
-      parsed->long_pause_log_threshold_ =
-          ParseMemoryOption(option.substr(strlen("-XX:LongPauseLogThreshold=")).c_str(), 1024);
-    } else if (option == "-XX:LongGCLogThreshold") {
-          parsed->long_gc_log_threshold_ =
-              ParseMemoryOption(option.substr(strlen("-XX:LongGCLogThreshold")).c_str(), 1024);
+      parsed->max_spins_before_thin_lock_inflation_ = ParseIntegerOrDie(option, '=');
+    } else if (StartsWith(option, "-XX:LongPauseLogThreshold=")) {
+      parsed->long_pause_log_threshold_ = MsToNs(ParseIntegerOrDie(option, '='));
+    } else if (StartsWith(option, "-XX:LongGCLogThreshold=")) {
+      parsed->long_gc_log_threshold_ = MsToNs(ParseIntegerOrDie(option, '='));
     } else if (option == "-XX:DumpGCPerformanceOnShutdown") {
       parsed->dump_gc_performance_on_shutdown_ = true;
     } else if (option == "-XX:IgnoreMaxFootprint") {
@@ -608,7 +626,7 @@
         }
       }
     } else if (StartsWith(option, "-XX:BackgroundGC=")) {
-      const std::string substring = option.substr(strlen("-XX:BackgroundGC="));
+      const std::string substring = StringAfterChar(option, '=');
       gc::CollectorType collector_type = ParseCollectorType(substring);
       if (collector_type != gc::kCollectorTypeNone) {
         parsed->background_collector_type_ = collector_type;
@@ -650,9 +668,9 @@
     } else if (StartsWith(option, "-Xjnigreflimit:")) {
       // Silently ignored for backwards compatibility.
     } else if (StartsWith(option, "-Xlockprofthreshold:")) {
-      parsed->lock_profiling_threshold_ = ParseIntegerOrDie(option);
+      parsed->lock_profiling_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xstacktracefile:")) {
-      parsed->stack_trace_file_ = option.substr(strlen("-Xstacktracefile:"));
+      parsed->stack_trace_file_ = StringAfterChar(option, ':');
     } else if (option == "sensitiveThread") {
       parsed->hook_is_sensitive_thread_ = reinterpret_cast<bool (*)()>(const_cast<void*>(options[i].second));
     } else if (option == "vfprintf") {
@@ -671,7 +689,7 @@
     } else if (StartsWith(option, "-Xmethod-trace-file:")) {
       parsed->method_trace_file_ = option.substr(strlen("-Xmethod-trace-file:"));
     } else if (StartsWith(option, "-Xmethod-trace-file-size:")) {
-      parsed->method_trace_file_size_ = ParseIntegerOrDie(option);
+      parsed->method_trace_file_size_ = ParseIntegerOrDie(option, ':');
     } else if (option == "-Xprofile:threadcpuclock") {
       Trace::SetDefaultClockSource(kProfilerClockSourceThreadCpu);
     } else if (option == "-Xprofile:wallclock") {
@@ -679,18 +697,17 @@
     } else if (option == "-Xprofile:dualclock") {
       Trace::SetDefaultClockSource(kProfilerClockSourceDual);
     } else if (StartsWith(option, "-Xprofile:")) {
-      parsed->profile_output_filename_ = option.substr(strlen("-Xprofile:"));
+      parsed->profile_output_filename_ = StringAfterChar(option, ';');
       parsed->profile_ = true;
     } else if (StartsWith(option, "-Xprofile-period:")) {
-      parsed->profile_period_s_ = ParseIntegerOrDie(option);
+      parsed->profile_period_s_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xprofile-duration:")) {
-      parsed->profile_duration_s_ = ParseIntegerOrDie(option);
+      parsed->profile_duration_s_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xprofile-interval:")) {
-      parsed->profile_interval_us_ = ParseIntegerOrDie(option);
+      parsed->profile_interval_us_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xprofile-backoff:")) {
-      parsed->profile_backoff_coefficient_ = ParseDoubleOrDie(option, "-Xprofile-backoff:",
-          1.0, 10.0, ignore_unrecognized,
-          parsed->profile_backoff_coefficient_);
+      parsed->profile_backoff_coefficient_ = ParseDoubleOrDie(
+          option, ':', 1.0, 10.0, ignore_unrecognized, parsed->profile_backoff_coefficient_);
     } else if (option == "-compiler-filter:interpret-only") {
       parsed->compiler_filter_ = kInterpretOnly;
     } else if (option == "-compiler-filter:space") {
@@ -704,15 +721,15 @@
     } else if (option == "-sea_ir") {
       parsed->sea_ir_mode_ = true;
     } else if (StartsWith(option, "-huge-method-max:")) {
-      parsed->huge_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->huge_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-large-method-max:")) {
-      parsed->large_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->large_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-small-method-max:")) {
-      parsed->small_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->small_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-tiny-method-max:")) {
-      parsed->tiny_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->tiny_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-num-dex-methods-max:")) {
-      parsed->num_dex_methods_threshold_ = ParseIntegerOrDie(option);
+      parsed->num_dex_methods_threshold_ = ParseIntegerOrDie(option, ':');
     } else {
       if (!ignore_unrecognized) {
         // TODO: print usage via vfprintf
@@ -975,6 +992,7 @@
   max_spins_before_thin_lock_inflation_ = options->max_spins_before_thin_lock_inflation_;
 
   monitor_list_ = new MonitorList;
+  monitor_pool_ = MonitorPool::Create();
   thread_list_ = new ThreadList;
   intern_table_ = new InternTable;
 
@@ -1086,7 +1104,8 @@
     std::string mapped_name(StringPrintf(OS_SHARED_LIB_FORMAT_STR, "javacore"));
     std::string reason;
     self->TransitionFromSuspendedToRunnable();
-    if (!instance_->java_vm_->LoadNativeLibrary(mapped_name, NULL, &reason)) {
+    SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
+    if (!instance_->java_vm_->LoadNativeLibrary(mapped_name, class_loader, &reason)) {
       LOG(FATAL) << "LoadNativeLibrary failed for \"" << mapped_name << "\": " << reason;
     }
     self->TransitionFromRunnableToSuspended(kNative);
@@ -1343,40 +1362,53 @@
 
 mirror::ArtMethod* Runtime::CreateImtConflictMethod() {
   Thread* self = Thread::Current();
-  Runtime* r = Runtime::Current();
-  ClassLinker* cl = r->GetClassLinker();
-  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, class_linker->AllocArtMethod(self));
   method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
-  // TODO: use a special method for imt conflict method saves
+  // TODO: use a special method for imt conflict method saves.
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
   // When compiling, the code pointer will get set later when the image is loaded.
-  method->SetEntryPointFromCompiledCode(r->IsCompiler() ? NULL : GetImtConflictTrampoline(cl));
+  if (runtime->IsCompiler()) {
+    method->SetEntryPointFromPortableCompiledCode(nullptr);
+    method->SetEntryPointFromQuickCompiledCode(nullptr);
+  } else {
+    method->SetEntryPointFromPortableCompiledCode(GetPortableImtConflictTrampoline(class_linker));
+    method->SetEntryPointFromQuickCompiledCode(GetQuickImtConflictTrampoline(class_linker));
+  }
   return method.get();
 }
 
 mirror::ArtMethod* Runtime::CreateResolutionMethod() {
   Thread* self = Thread::Current();
-  Runtime* r = Runtime::Current();
-  ClassLinker* cl = r->GetClassLinker();
-  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, class_linker->AllocArtMethod(self));
   method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for resolution method saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
   // When compiling, the code pointer will get set later when the image is loaded.
-  method->SetEntryPointFromCompiledCode(r->IsCompiler() ? NULL : GetResolutionTrampoline(cl));
+  if (runtime->IsCompiler()) {
+    method->SetEntryPointFromPortableCompiledCode(nullptr);
+    method->SetEntryPointFromQuickCompiledCode(nullptr);
+  } else {
+    method->SetEntryPointFromPortableCompiledCode(GetPortableResolutionTrampoline(class_linker));
+    method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionTrampoline(class_linker));
+  }
   return method.get();
 }
 
 mirror::ArtMethod* Runtime::CreateCalleeSaveMethod(InstructionSet instruction_set,
                                                    CalleeSaveType type) {
   Thread* self = Thread::Current();
-  Runtime* r = Runtime::Current();
-  ClassLinker* cl = r->GetClassLinker();
-  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, class_linker->AllocArtMethod(self));
   method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for callee saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
-  method->SetEntryPointFromCompiledCode(NULL);
+  method->SetEntryPointFromPortableCompiledCode(nullptr);
+  method->SetEntryPointFromQuickCompiledCode(nullptr);
   if ((instruction_set == kThumb2) || (instruction_set == kArm)) {
     uint32_t ref_spills = (1 << art::arm::R5) | (1 << art::arm::R6)  | (1 << art::arm::R7) |
                           (1 << art::arm::R8) | (1 << art::arm::R10) | (1 << art::arm::R11);
@@ -1426,8 +1458,23 @@
     method->SetFrameSizeInBytes(frame_size);
     method->SetCoreSpillMask(core_spills);
     method->SetFpSpillMask(0);
+  } else if (instruction_set == kX86_64) {
+    uint32_t ref_spills =
+        (1 << art::x86_64::RBP) | (1 << art::x86_64::RSI) | (1 << art::x86_64::RDI) |
+        (1 << art::x86_64::R8)  | (1 << art::x86_64::R9)  | (1 << art::x86_64::R10) |
+        (1 << art::x86_64::R11) | (1 << art::x86_64::R12) | (1 << art::x86_64::R13) |
+        (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
+    uint32_t arg_spills =
+        (1 << art::x86_64::RCX) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RBX);
+    uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
+                         (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
+    size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
+                                 1 /* Method* */) * kPointerSize, kStackAlignment);
+    method->SetFrameSizeInBytes(frame_size);
+    method->SetCoreSpillMask(core_spills);
+    method->SetFpSpillMask(0);
   } else {
-    UNIMPLEMENTED(FATAL);
+    UNIMPLEMENTED(FATAL) << instruction_set;
   }
   return method.get();
 }
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 9d48631..557ba2c 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -61,6 +61,7 @@
 class InternTable;
 struct JavaVMExt;
 class MonitorList;
+class MonitorPool;
 class SignalCatcher;
 class ThreadList;
 class Trace;
@@ -314,6 +315,10 @@
     return monitor_list_;
   }
 
+  MonitorPool* GetMonitorPool() const {
+    return monitor_pool_;
+  }
+
   mirror::Throwable* GetPreAllocatedOutOfMemoryError() const
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -524,6 +529,7 @@
   // The number of spins that are done before thread suspension is used to forcibly inflate.
   size_t max_spins_before_thin_lock_inflation_;
   MonitorList* monitor_list_;
+  MonitorPool* monitor_pool_;
 
   ThreadList* thread_list_;
 
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index 47b72e9..d8f408a 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -133,7 +133,7 @@
 
   void Dump(std::ostream& os) {
     // TODO: support non-x86 hosts (not urgent because this code doesn't run on targets).
-#if defined(__APPLE__)
+#if defined(__APPLE__) && defined(__i386__)
     DumpRegister32(os, "eax", context->__ss.__eax);
     DumpRegister32(os, "ebx", context->__ss.__ebx);
     DumpRegister32(os, "ecx", context->__ss.__ecx);
@@ -159,7 +159,7 @@
     os << '\n';
     DumpRegister32(os, "gs",  context->__ss.__gs);
     DumpRegister32(os, "ss",  context->__ss.__ss);
-#else
+#elif defined(__linux__) && defined(__i386__)
     DumpRegister32(os, "eax", context.gregs[REG_EAX]);
     DumpRegister32(os, "ebx", context.gregs[REG_EBX]);
     DumpRegister32(os, "ecx", context.gregs[REG_ECX]);
@@ -185,6 +185,8 @@
     os << '\n';
     DumpRegister32(os, "gs",  context.gregs[REG_GS]);
     DumpRegister32(os, "ss",  context.gregs[REG_SS]);
+#else
+    os << "Unknown architecture/word size/OS in ucontext dump";
 #endif
   }
 
diff --git a/runtime/sirt_ref.h b/runtime/sirt_ref.h
index 56d81ec..3c5e4f8 100644
--- a/runtime/sirt_ref.h
+++ b/runtime/sirt_ref.h
@@ -35,13 +35,17 @@
     DCHECK_EQ(top_sirt, &sirt_);
   }
 
-  T& operator*() const { return *get(); }
-  T* operator->() const { return get(); }
-  T* get() const {
+  T& operator*() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return *get();
+  }
+  T* operator->() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return get();
+  }
+  T* get() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return down_cast<T*>(sirt_.GetReference(0));
   }
 
-  void reset(T* object = NULL) {
+  void reset(T* object = nullptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     sirt_.SetReference(0, object);
   }
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index e583ced..fd7d981 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -75,7 +75,7 @@
   return count;
 }
 
-bool ManagedStack::ShadowFramesContain(mirror::Object** shadow_frame_entry) const {
+bool ManagedStack::ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const {
   for (const ManagedStack* current_fragment = this; current_fragment != NULL;
        current_fragment = current_fragment->GetLink()) {
     for (ShadowFrame* current_frame = current_fragment->top_shadow_frame_; current_frame != NULL;
@@ -251,7 +251,7 @@
     return "upcall";
   }
   result += PrettyMethod(m);
-  result += StringPrintf("' at dex PC 0x%04zx", GetDexPc());
+  result += StringPrintf("' at dex PC 0x%04x", GetDexPc());
   if (!IsShadowFrame()) {
     result += StringPrintf(" (native PC %p)", reinterpret_cast<void*>(GetCurrentQuickFramePc()));
   }
@@ -264,23 +264,23 @@
 }
 
 void StackVisitor::SanityCheckFrame() const {
-#ifndef NDEBUG
-  mirror::ArtMethod* method = GetMethod();
-  CHECK(method->GetClass() == mirror::ArtMethod::GetJavaLangReflectArtMethod());
-  if (cur_quick_frame_ != NULL) {
-    method->AssertPcIsWithinCode(cur_quick_frame_pc_);
-    // Frame sanity.
-    size_t frame_size = method->GetFrameSizeInBytes();
-    CHECK_NE(frame_size, 0u);
-    // A rough guess at an upper size we expect to see for a frame. The 256 is
-    // a dex register limit. The 16 incorporates callee save spills and
-    // outgoing argument set up.
-    const size_t kMaxExpectedFrameSize = 256 * sizeof(word) + 16;
-    CHECK_LE(frame_size, kMaxExpectedFrameSize);
-    size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
-    CHECK_LT(return_pc_offset, frame_size);
+  if (kIsDebugBuild) {
+    mirror::ArtMethod* method = GetMethod();
+    CHECK(method->GetClass() == mirror::ArtMethod::GetJavaLangReflectArtMethod());
+    if (cur_quick_frame_ != nullptr) {
+      method->AssertPcIsWithinQuickCode(cur_quick_frame_pc_);
+      // Frame sanity.
+      size_t frame_size = method->GetFrameSizeInBytes();
+      CHECK_NE(frame_size, 0u);
+      // A rough guess at an upper size we expect to see for a frame. The 256 is
+      // a dex register limit. The 16 incorporates callee save spills and
+      // outgoing argument set up.
+      const size_t kMaxExpectedFrameSize = 256 * sizeof(word) + 16;
+      CHECK_LE(frame_size, kMaxExpectedFrameSize);
+      size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
+      CHECK_LT(return_pc_offset, frame_size);
+    }
   }
-#endif
 }
 
 void StackVisitor::WalkStack(bool include_transitions) {
diff --git a/runtime/stack.h b/runtime/stack.h
index 590f406..8466069 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -19,8 +19,10 @@
 
 #include "dex_file.h"
 #include "instrumentation.h"
+#include "base/casts.h"
 #include "base/macros.h"
 #include "arch/context.h"
+#include "mirror/object_reference.h"
 
 #include <stdint.h>
 #include <string>
@@ -52,16 +54,77 @@
   kUndefined,
 };
 
+/**
+ * @brief Represents the virtual register numbers that denote special meaning.
+ * @details This is used to make some virtual register numbers to have specific
+ * semantic meaning. This is done so that the compiler can treat all virtual
+ * registers the same way and only special case when needed. For example,
+ * calculating SSA does not care whether a virtual register is a normal one or
+ * a compiler temporary, so it can deal with them in a consistent manner. But,
+ * for example if backend cares about temporaries because it has custom spill
+ * location, then it can special case them only then.
+ */
+enum VRegBaseRegNum : int {
+  /**
+   * @brief Virtual registers originating from dex have number >= 0.
+   */
+  kVRegBaseReg = 0,
+
+  /**
+   * @brief Invalid virtual register number.
+   */
+  kVRegInvalid = -1,
+
+  /**
+   * @brief Used to denote the base register for compiler temporaries.
+   * @details Compiler temporaries are virtual registers not originating
+   * from dex but that are created by compiler.  All virtual register numbers
+   * that are <= kVRegTempBaseReg are categorized as compiler temporaries.
+   */
+  kVRegTempBaseReg = -2,
+
+  /**
+   * @brief Base register of temporary that holds the method pointer.
+   * @details This is a special compiler temporary because it has a specific
+   * location on stack.
+   */
+  kVRegMethodPtrBaseReg = kVRegTempBaseReg,
+
+  /**
+   * @brief Base register of non-special compiler temporary.
+   * @details A non-special compiler temporary is one whose spill location
+   * is flexible.
+   */
+  kVRegNonSpecialTempBaseReg = -3,
+};
+
+// A reference from the shadow stack to a MirrorType object within the Java heap.
+template<class MirrorType>
+class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> {
+ public:
+  StackReference<MirrorType>() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : mirror::ObjectReference<false, MirrorType>(nullptr) {}
+
+  static StackReference<MirrorType> FromMirrorPtr(MirrorType* p)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return StackReference<MirrorType>(p);
+  }
+
+ private:
+  StackReference<MirrorType>(MirrorType* p) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : mirror::ObjectReference<false, MirrorType>(p) {}
+};
+
 // ShadowFrame has 3 possible layouts:
 //  - portable - a unified array of VRegs and references. Precise references need GC maps.
 //  - interpreter - separate VRegs and reference arrays. References are in the reference array.
 //  - JNI - just VRegs, but where every VReg holds a reference.
 class ShadowFrame {
  public:
-  // Compute size of ShadowFrame in bytes.
+  // Compute size of ShadowFrame in bytes assuming it has a reference array.
   static size_t ComputeSize(uint32_t num_vregs) {
     return sizeof(ShadowFrame) + (sizeof(uint32_t) * num_vregs) +
-           (sizeof(mirror::Object*) * num_vregs);
+           (sizeof(StackReference<mirror::Object>) * num_vregs);
   }
 
   // Create ShadowFrame in heap for deoptimization.
@@ -151,22 +214,19 @@
   }
 
   template <bool kChecked = false>
-  mirror::Object* GetVRegReference(size_t i) const {
+  mirror::Object* GetVRegReference(size_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LT(i, NumberOfVRegs());
     if (HasReferenceArray()) {
-      mirror::Object* ref = References()[i];
+      mirror::Object* ref = References()[i].AsMirrorPtr();
       if (kChecked) {
         CHECK(VerifyReference(ref)) << "VReg " << i << "(" << ref
                                     << ") is in protected space, reference array " << true;
       }
-      // If the vreg reference is not equal to the vreg then the vreg reference is stale.
-      if (UNLIKELY(reinterpret_cast<uint32_t>(ref) != vregs_[i])) {
-        return nullptr;
-      }
       return ref;
     } else {
-      const uint32_t* vreg = &vregs_[i];
-      mirror::Object* ref = *reinterpret_cast<mirror::Object* const*>(vreg);
+      const uint32_t* vreg_ptr = &vregs_[i];
+      mirror::Object* ref =
+          reinterpret_cast<const StackReference<mirror::Object>*>(vreg_ptr)->AsMirrorPtr();
       if (kChecked) {
         CHECK(VerifyReference(ref)) << "VReg " << i
             << "(" << ref << ") is in protected space, reference array " << false;
@@ -187,7 +247,7 @@
     // This is needed for moving collectors since these can update the vreg references if they
     // happen to agree with references in the reference array.
     if (kMovingCollector && HasReferenceArray()) {
-      References()[i] = nullptr;
+      References()[i].Clear();
     }
   }
 
@@ -198,7 +258,7 @@
     // This is needed for moving collectors since these can update the vreg references if they
     // happen to agree with references in the reference array.
     if (kMovingCollector && HasReferenceArray()) {
-      References()[i] = nullptr;
+      References()[i].Clear();
     }
   }
 
@@ -211,8 +271,8 @@
     // This is needed for moving collectors since these can update the vreg references if they
     // happen to agree with references in the reference array.
     if (kMovingCollector && HasReferenceArray()) {
-      References()[i] = nullptr;
-      References()[i + 1] = nullptr;
+      References()[i].Clear();
+      References()[i + 1].Clear();
     }
   }
 
@@ -225,24 +285,24 @@
     // This is needed for moving collectors since these can update the vreg references if they
     // happen to agree with references in the reference array.
     if (kMovingCollector && HasReferenceArray()) {
-      References()[i] = nullptr;
-      References()[i + 1] = nullptr;
+      References()[i].Clear();
+      References()[i + 1].Clear();
     }
   }
 
-  void SetVRegReference(size_t i, mirror::Object* val) {
+  void SetVRegReference(size_t i, mirror::Object* val) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LT(i, NumberOfVRegs());
     DCHECK(!kMovingCollector || VerifyReference(val))
         << "VReg " << i << "(" << val << ") is in protected space";
     uint32_t* vreg = &vregs_[i];
-    *reinterpret_cast<mirror::Object**>(vreg) = val;
+    reinterpret_cast<StackReference<mirror::Object>*>(vreg)->Assign(val);
     if (HasReferenceArray()) {
-      References()[i] = val;
+      References()[i].Assign(val);
     }
   }
 
-  mirror::ArtMethod* GetMethod() const {
-    DCHECK_NE(method_, static_cast<void*>(NULL));
+  mirror::ArtMethod* GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(method_ != nullptr);
     return method_;
   }
 
@@ -254,7 +314,7 @@
 
   void SetMethod(mirror::ArtMethod* method) {
 #if defined(ART_USE_PORTABLE_COMPILER)
-    DCHECK_NE(method, static_cast<void*>(NULL));
+    DCHECK(method != nullptr);
     method_ = method;
 #else
     UNUSED(method);
@@ -262,7 +322,7 @@
 #endif
   }
 
-  bool Contains(mirror::Object** shadow_frame_entry_obj) const {
+  bool Contains(StackReference<mirror::Object>* shadow_frame_entry_obj) const {
     if (HasReferenceArray()) {
       return ((&References()[0] <= shadow_frame_entry_obj) &&
               (shadow_frame_entry_obj <= (&References()[NumberOfVRegs() - 1])));
@@ -302,22 +362,22 @@
       CHECK_LT(num_vregs, static_cast<uint32_t>(kHasReferenceArray));
       number_of_vregs_ |= kHasReferenceArray;
 #endif
-      memset(vregs_, 0, num_vregs * (sizeof(uint32_t) + sizeof(mirror::Object*)));
+      memset(vregs_, 0, num_vregs * (sizeof(uint32_t) + sizeof(StackReference<mirror::Object>)));
     } else {
       memset(vregs_, 0, num_vregs * sizeof(uint32_t));
     }
   }
 
-  mirror::Object* const* References() const {
+  const StackReference<mirror::Object>* References() const {
     DCHECK(HasReferenceArray());
     const uint32_t* vreg_end = &vregs_[NumberOfVRegs()];
-    return reinterpret_cast<mirror::Object* const*>(vreg_end);
+    return reinterpret_cast<const StackReference<mirror::Object>*>(vreg_end);
   }
 
   bool VerifyReference(const mirror::Object* val) const;
 
-  mirror::Object** References() {
-    return const_cast<mirror::Object**>(const_cast<const ShadowFrame*>(this)->References());
+  StackReference<mirror::Object>* References() {
+    return const_cast<StackReference<mirror::Object>*>(const_cast<const ShadowFrame*>(this)->References());
   }
 
 #if defined(ART_USE_PORTABLE_COMPILER)
@@ -426,9 +486,9 @@
     return OFFSETOF_MEMBER(ManagedStack, top_shadow_frame_);
   }
 
-  size_t NumJniShadowFrameReferences() const;
+  size_t NumJniShadowFrameReferences() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool ShadowFramesContain(mirror::Object** shadow_frame_entry) const;
+  bool ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const;
 
  private:
   ManagedStack* link_;
@@ -450,18 +510,18 @@
   void WalkStack(bool include_transitions = false)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ArtMethod* GetMethod() const {
-    if (cur_shadow_frame_ != NULL) {
+  mirror::ArtMethod* GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (cur_shadow_frame_ != nullptr) {
       return cur_shadow_frame_->GetMethod();
-    } else if (cur_quick_frame_ != NULL) {
+    } else if (cur_quick_frame_ != nullptr) {
       return *cur_quick_frame_;
     } else {
-      return NULL;
+      return nullptr;
     }
   }
 
   bool IsShadowFrame() const {
-    return cur_shadow_frame_ != NULL;
+    return cur_shadow_frame_ != nullptr;
   }
 
   uint32_t GetDexPc() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -470,9 +530,10 @@
 
   size_t GetNativePcOffset() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uintptr_t* CalleeSaveAddress(int num, size_t frame_size) const {
+  uintptr_t* CalleeSaveAddress(int num, size_t frame_size) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Callee saves are held at the top of the frame
-    DCHECK(GetMethod() != NULL);
+    DCHECK(GetMethod() != nullptr);
     byte* save_addr =
         reinterpret_cast<byte*>(cur_quick_frame_) + frame_size - ((num + 1) * kPointerSize);
 #if defined(__i386__)
@@ -509,23 +570,30 @@
 
   // This is a fast-path for getting/setting values in a quick frame.
   uint32_t* GetVRegAddr(mirror::ArtMethod** cur_quick_frame, const DexFile::CodeItem* code_item,
-                   uint32_t core_spills, uint32_t fp_spills, size_t frame_size,
-                   uint16_t vreg) const {
+                        uint32_t core_spills, uint32_t fp_spills, size_t frame_size,
+                        uint16_t vreg) const {
     int offset = GetVRegOffset(code_item, core_spills, fp_spills, frame_size, vreg);
     DCHECK_EQ(cur_quick_frame, GetCurrentQuickFrame());
     byte* vreg_addr = reinterpret_cast<byte*>(cur_quick_frame) + offset;
     return reinterpret_cast<uint32_t*>(vreg_addr);
   }
 
-  uintptr_t GetReturnPc() const;
+  uintptr_t GetReturnPc() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetReturnPc(uintptr_t new_ret_pc);
+  void SetReturnPc(uintptr_t new_ret_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
    * Return sp-relative offset for a Dalvik virtual register, compiler
    * spill or Method* in bytes using Method*.
-   * Note that (reg >= 0) refers to a Dalvik register, (reg == -2)
-   * denotes Method* and (reg <= -3) denotes a compiler temp.
+   * Note that (reg >= 0) refers to a Dalvik register, (reg == -1)
+   * denotes an invalid Dalvik register, (reg == -2) denotes Method*
+   * and (reg <= -3) denotes a compiler temporary. A compiler temporary
+   * can be thought of as a virtual register that does not exist in the
+   * dex but holds intermediate values to help optimizations and code
+   * generation. A special compiler temporary is one whose location
+   * in frame is well known while non-special ones do not have a requirement
+   * on location in frame as long as code generator itself knows how
+   * to access them.
    *
    *     +------------------------+
    *     | IN[ins-1]              |  {Note: resides in caller's frame}
@@ -546,9 +614,9 @@
    *     | V[1]                   |  ... (reg == 1)
    *     | V[0]                   |  ... (reg == 0) <---- "locals_start"
    *     +------------------------+
-   *     | Compiler temps         |  ... (reg == -2)
-   *     |                        |  ... (reg == -3)
-   *     |                        |  ... (reg == -4)
+   *     | Compiler temp region   |  ... (reg <= -3)
+   *     |                        |
+   *     |                        |
    *     +------------------------+
    *     | stack alignment padding|  {0 to (kStackAlignWords-1) of padding}
    *     +------------------------+
@@ -556,23 +624,35 @@
    *     | OUT[outs-2]            |
    *     |       .                |
    *     | OUT[0]                 |
-   *     | curMethod*             |  ... (reg == -1) <<== sp, 16-byte aligned
+   *     | curMethod*             |  ... (reg == -2) <<== sp, 16-byte aligned
    *     +========================+
    */
   static int GetVRegOffset(const DexFile::CodeItem* code_item,
                            uint32_t core_spills, uint32_t fp_spills,
                            size_t frame_size, int reg) {
     DCHECK_EQ(frame_size & (kStackAlignment - 1), 0U);
+    DCHECK_NE(reg, static_cast<int>(kVRegInvalid));
+
     int num_spills = __builtin_popcount(core_spills) + __builtin_popcount(fp_spills) + 1;  // Filler.
     int num_ins = code_item->ins_size_;
     int num_regs = code_item->registers_size_ - num_ins;
     int locals_start = frame_size - ((num_spills + num_regs) * sizeof(uint32_t));
-    if (reg == -2) {
-      return 0;  // Method*
-    } else if (reg <= -3) {
-      return locals_start - ((reg + 1) * sizeof(uint32_t));  // Compiler temp.
-    } else if (reg < num_regs) {
-      return locals_start + (reg * sizeof(uint32_t));        // Dalvik local reg.
+    if (reg == static_cast<int>(kVRegMethodPtrBaseReg)) {
+      // The current method pointer corresponds to special location on stack.
+      return 0;
+    } else if (reg <= static_cast<int>(kVRegNonSpecialTempBaseReg)) {
+      /*
+       * Special temporaries may have custom locations and the logic above deals with that.
+       * However, non-special temporaries are placed relative to the locals. Since the
+       * virtual register numbers for temporaries "grow" in negative direction, reg number
+       * will always be <= to the temp base reg. Thus, the logic ensures that the first
+       * temp is at offset -4 bytes from locals, the second is at -8 bytes from locals,
+       * and so on.
+       */
+      int relative_offset = (reg + std::abs(static_cast<int>(kVRegNonSpecialTempBaseReg)) - 1) * sizeof(uint32_t);
+      return locals_start + relative_offset;
+    }  else if (reg < num_regs) {
+      return locals_start + (reg * sizeof(uint32_t));
     } else {
       return frame_size + ((reg - num_regs) * sizeof(uint32_t)) + sizeof(uint32_t);  // Dalvik in.
     }
diff --git a/runtime/stack_indirect_reference_table.h b/runtime/stack_indirect_reference_table.h
index 4c9b038..d22650b 100644
--- a/runtime/stack_indirect_reference_table.h
+++ b/runtime/stack_indirect_reference_table.h
@@ -19,6 +19,7 @@
 
 #include "base/logging.h"
 #include "base/macros.h"
+#include "stack.h"
 
 namespace art {
 namespace mirror {
@@ -33,7 +34,7 @@
  public:
   explicit StackIndirectReferenceTable(mirror::Object* object) :
       number_of_references_(1), link_(NULL) {
-    references_[0] = object;
+    references_[0].Assign(object);
   }
 
   ~StackIndirectReferenceTable() {}
@@ -53,17 +54,17 @@
     link_ = sirt;
   }
 
-  mirror::Object* GetReference(size_t i) const {
+  mirror::Object* GetReference(size_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LT(i, number_of_references_);
-    return references_[i];
+    return references_[i].AsMirrorPtr();
   }
 
-  void SetReference(size_t i, mirror::Object* object) {
+  void SetReference(size_t i, mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LT(i, number_of_references_);
-    references_[i] = object;
+    references_[i].Assign(object);
   }
 
-  bool Contains(mirror::Object** sirt_entry) const {
+  bool Contains(StackReference<mirror::Object>* sirt_entry) const {
     // A SIRT should always contain something. One created by the
     // jni_compiler should have a jobject/jclass as a native method is
     // passed in a this pointer or a class
@@ -89,7 +90,7 @@
   StackIndirectReferenceTable* link_;
 
   // number_of_references_ are available if this is allocated and filled in by jni_compiler.
-  mirror::Object* references_[1];
+  StackReference<mirror::Object> references_[1];
 
   DISALLOW_COPY_AND_ASSIGN(StackIndirectReferenceTable);
 };
diff --git a/runtime/thread.cc b/runtime/thread.cc
index d195ebf..c649765 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -178,7 +178,7 @@
         receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(soa.DecodeMethod(mid));
     JValue result;
     ArgArray arg_array(nullptr, 0);
-    arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+    arg_array.Append(receiver);
     m->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
   }
   // Detach and delete self.
@@ -190,7 +190,7 @@
 Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa,
                                   mirror::Object* thread_peer) {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer);
-  Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetInt(thread_peer)));
+  Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetLong(thread_peer)));
   // Sanity check that if we have a result it is either suspended or we hold the thread_list_lock_
   // to stop it from going away.
   if (kIsDebugBuild) {
@@ -260,8 +260,8 @@
 
   // Thread.start is synchronized, so we know that nativePeer is 0, and know that we're not racing to
   // assign it.
-  env->SetIntField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer,
-                   reinterpret_cast<jint>(child_thread));
+  env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer,
+                    reinterpret_cast<jlong>(child_thread));
 
   pthread_t new_pthread;
   pthread_attr_t attr;
@@ -283,7 +283,7 @@
     delete child_thread;
     child_thread = nullptr;
     // TODO: remove from thread group?
-    env->SetIntField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0);
+    env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0);
     {
       std::string msg(StringPrintf("pthread_create (%s stack) failed: %s",
                                    PrettySize(stack_size).c_str(), strerror(pthread_create_result)));
@@ -388,8 +388,8 @@
 
   Thread* self = this;
   DCHECK_EQ(self, Thread::Current());
-  jni_env_->SetIntField(peer.get(), WellKnownClasses::java_lang_Thread_nativePeer,
-                        reinterpret_cast<jint>(self));
+  jni_env_->SetLongField(peer.get(), WellKnownClasses::java_lang_Thread_nativePeer,
+                         reinterpret_cast<jlong>(self));
 
   ScopedObjectAccess soa(self);
   SirtRef<mirror::String> peer_thread_name(soa.Self(), GetThreadName(soa));
@@ -767,7 +767,7 @@
     }
     const int kMaxRepetition = 3;
     mirror::Class* c = m->GetDeclaringClass();
-    const mirror::DexCache* dex_cache = c->GetDexCache();
+    mirror::DexCache* dex_cache = c->GetDexCache();
     int line_number = -1;
     if (dex_cache != nullptr) {  // be tolerant of bad input
       const DexFile& dex_file = *dex_cache->GetDexFile();
@@ -1017,7 +1017,7 @@
     RemoveFromThreadGroup(soa);
 
     // this.nativePeer = 0;
-    soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer)->SetInt(opeer_, 0);
+    soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer)->SetLong(opeer_, 0);
     Dbg::PostThreadDeath(self);
 
     // Thread.join() is implemented as an Object.wait() on the Thread.lock object. Signal anyone
@@ -1046,9 +1046,11 @@
   }
   opeer_ = nullptr;
 
-  delete jni_env_;
-  jni_env_ = nullptr;
-
+  bool initialized = (jni_env_ != nullptr);  // Did Thread::Init run?
+  if (initialized) {
+    delete jni_env_;
+    jni_env_ = nullptr;
+  }
   CHECK_NE(GetState(), kRunnable);
   CHECK_NE(ReadFlag(kCheckpointRequest), true);
   CHECK(checkpoint_functions_[0] == nullptr);
@@ -1065,6 +1067,10 @@
     delete long_jump_context_;
   }
 
+  if (initialized) {
+    CleanupCpu();
+  }
+
   delete debug_invoke_req_;
   delete single_step_control_;
   delete instrumentation_stack_;
@@ -1127,7 +1133,8 @@
 }
 
 bool Thread::SirtContains(jobject obj) const {
-  mirror::Object** sirt_entry = reinterpret_cast<mirror::Object**>(obj);
+  StackReference<mirror::Object>* sirt_entry =
+      reinterpret_cast<StackReference<mirror::Object>*>(obj);
   for (StackIndirectReferenceTable* cur = top_sirt_; cur; cur = cur->GetLink()) {
     if (cur->Contains(sirt_entry)) {
       return true;
@@ -1166,10 +1173,11 @@
     IndirectReferenceTable& locals = jni_env_->locals;
     result = const_cast<mirror::Object*>(locals.Get(ref));
   } else if (kind == kSirtOrInvalid) {
-    // TODO: make stack indirect reference table lookup more efficient
-    // Check if this is a local reference in the SIRT
+    // TODO: make stack indirect reference table lookup more efficient.
+    // Check if this is a local reference in the SIRT.
     if (LIKELY(SirtContains(obj))) {
-      result = *reinterpret_cast<mirror::Object**>(obj);  // Read from SIRT
+      // Read from SIRT.
+      result = reinterpret_cast<StackReference<mirror::Object>*>(obj)->AsMirrorPtr();
     } else if (Runtime::Current()->GetJavaVM()->work_around_app_jni_bugs) {
       // Assume an invalid local reference is actually a direct pointer.
       result = reinterpret_cast<mirror::Object*>(obj);
@@ -1410,8 +1418,8 @@
     mirror::ArtMethod* method = down_cast<mirror::ArtMethod*>(method_trace->Get(i));
     MethodHelper mh(method);
     int32_t line_number;
-    SirtRef<mirror::String> class_name_object(soa.Self(), NULL);
-    SirtRef<mirror::String> source_name_object(soa.Self(), NULL);
+    SirtRef<mirror::String> class_name_object(soa.Self(), nullptr);
+    SirtRef<mirror::String> source_name_object(soa.Self(), nullptr);
     if (method->IsProxyMethod()) {
       line_number = -1;
       class_name_object.reset(method->GetDeclaringClass()->GetName());
@@ -1423,16 +1431,18 @@
       // Allocate element, potentially triggering GC
       // TODO: reuse class_name_object via Class::name_?
       const char* descriptor = mh.GetDeclaringClassDescriptor();
-      CHECK(descriptor != NULL);
+      CHECK(descriptor != nullptr);
       std::string class_name(PrettyDescriptor(descriptor));
       class_name_object.reset(mirror::String::AllocFromModifiedUtf8(soa.Self(), class_name.c_str()));
-      if (class_name_object.get() == NULL) {
-        return NULL;
+      if (class_name_object.get() == nullptr) {
+        return nullptr;
       }
       const char* source_file = mh.GetDeclaringClassSourceFile();
-      source_name_object.reset(mirror::String::AllocFromModifiedUtf8(soa.Self(), source_file));
-      if (source_name_object.get() == NULL) {
-        return NULL;
+      if (source_file != nullptr) {
+        source_name_object.reset(mirror::String::AllocFromModifiedUtf8(soa.Self(), source_file));
+        if (source_name_object.get() == nullptr) {
+          return nullptr;
+        }
       }
     }
     const char* method_name = mh.GetName();
@@ -1561,12 +1571,12 @@
     SetException(gc_safe_throw_location, exception.get());
   } else {
     ArgArray args("VLL", 3);
-    args.Append(reinterpret_cast<uint32_t>(exception.get()));
+    args.Append(exception.get());
     if (msg != nullptr) {
-      args.Append(reinterpret_cast<uint32_t>(msg_string.get()));
+      args.Append(msg_string.get());
     }
     if (cause.get() != nullptr) {
-      args.Append(reinterpret_cast<uint32_t>(cause.get()));
+      args.Append(cause.get());
     }
     JValue result;
     exception_init_method->Invoke(this, args.GetArray(), args.GetNumBytes(), &result, 'V');
@@ -1858,7 +1868,7 @@
         // Java method.
         // Portable path use DexGcMap and store in Method.native_gc_map_.
         const uint8_t* gc_map = m->GetNativeGcMap();
-        CHECK(gc_map != NULL) << PrettyMethod(m);
+        CHECK(gc_map != nullptr) << PrettyMethod(m);
         verifier::DexPcToReferenceMap dex_gc_map(gc_map);
         uint32_t dex_pc = GetDexPc();
         const uint8_t* reg_bitmap = dex_gc_map.FindBitMap(dex_pc);
@@ -1912,13 +1922,13 @@
                   }
                 }
               } else {
-                uint32_t* reg_addr =
-                    GetVRegAddr(cur_quick_frame, code_item, core_spills, fp_spills, frame_size, reg);
+                uintptr_t* reg_addr = reinterpret_cast<uintptr_t*>(
+                    GetVRegAddr(cur_quick_frame, code_item, core_spills, fp_spills, frame_size, reg));
                 mirror::Object* ref = reinterpret_cast<mirror::Object*>(*reg_addr);
                 if (ref != nullptr) {
                   mirror::Object* new_ref = visitor_(ref, reg, this);
                   if (ref != new_ref) {
-                    *reg_addr = reinterpret_cast<uint32_t>(new_ref);
+                    *reg_addr = reinterpret_cast<uintptr_t>(new_ref);
                   }
                 }
               }
diff --git a/runtime/thread.h b/runtime/thread.h
index 0810909..b7f8bb0 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -487,23 +487,23 @@
                         ManagedStack::TopShadowFrameOffset());
   }
 
-  // Number of references allocated in JNI ShadowFrames on this thread
-  size_t NumJniShadowFrameReferences() const {
+  // Number of references allocated in JNI ShadowFrames on this thread.
+  size_t NumJniShadowFrameReferences() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return managed_stack_.NumJniShadowFrameReferences();
   }
 
-  // Number of references in SIRTs on this thread
+  // Number of references in SIRTs on this thread.
   size_t NumSirtReferences();
 
-  // Number of references allocated in SIRTs & JNI shadow frames on this thread
-  size_t NumStackReferences() {
+  // Number of references allocated in SIRTs & JNI shadow frames on this thread.
+  size_t NumStackReferences() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return NumSirtReferences() + NumJniShadowFrameReferences();
   };
 
   // Is the given obj in this thread's stack indirect reference table?
   bool SirtContains(jobject obj) const;
 
-  void SirtVisitRoots(RootVisitor* visitor, void* arg);
+  void SirtVisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void PushSirt(StackIndirectReferenceTable* sirt) {
     sirt->SetLink(top_sirt_);
@@ -619,6 +619,7 @@
   void Init(ThreadList*, JavaVMExt*) EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_);
   void InitCardTable();
   void InitCpu();
+  void CleanupCpu();
   void InitTlsEntryPoints();
   void InitTid();
   void InitPthreadKeySelf();
@@ -787,7 +788,7 @@
   // A positive value implies we're in a region where thread suspension isn't expected.
   uint32_t no_thread_suspension_;
 
-  // Cause for last suspension.
+  // If no_thread_suspension_ is > 0, what is causing that assertion.
   const char* last_no_thread_suspension_cause_;
 
   // Maximum number of checkpoint functions.
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index aca0561..e8c9ff8 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -31,7 +31,7 @@
       name_(name) {
   std::string error_msg;
   stack_.reset(MemMap::MapAnonymous(name.c_str(), nullptr, stack_size, PROT_READ | PROT_WRITE,
-                                    &error_msg));
+                                    false, &error_msg));
   CHECK(stack_.get() != nullptr) << error_msg;
   const char* reason = "new thread pool worker thread";
   pthread_attr_t attr;
diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc
index 2029d4b..c6f0e92 100644
--- a/runtime/thread_pool_test.cc
+++ b/runtime/thread_pool_test.cc
@@ -17,7 +17,7 @@
 
 #include <string>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "common_test.h"
 #include "thread_pool.h"
 
diff --git a/runtime/trace.cc b/runtime/trace.cc
index b0f6e37..18185d4 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -89,7 +89,7 @@
   explicit BuildStackTraceVisitor(Thread* thread) : StackVisitor(thread, NULL),
       method_trace_(Trace::AllocStackTrace()) {}
 
-  bool VisitFrame() {
+  bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
     // Ignore runtime frames (in particular callee save).
     if (!m->IsRuntimeMethod()) {
@@ -133,9 +133,9 @@
   return static_cast<TraceAction>(tmid & kTraceMethodActionMask);
 }
 
-static uint32_t EncodeTraceMethodAndAction(const mirror::ArtMethod* method,
+static uint32_t EncodeTraceMethodAndAction(mirror::ArtMethod* method,
                                            TraceAction action) {
-  uint32_t tmid = reinterpret_cast<uint32_t>(method) | action;
+  uint32_t tmid = PointerToLowMemUInt32(method) | action;
   DCHECK_EQ(method, DecodeTraceMethodId(tmid));
   return tmid;
 }
@@ -298,7 +298,7 @@
 
 void* Trace::RunSamplingThread(void* arg) {
   Runtime* runtime = Runtime::Current();
-  int interval_us = reinterpret_cast<int>(arg);
+  intptr_t interval_us = reinterpret_cast<intptr_t>(arg);
   CHECK_GE(interval_us, 0);
   CHECK(runtime->AttachCurrentThread("Sampling Profiler", true, runtime->GetSystemThreadGroup(),
                                      !runtime->IsCompiler()));
@@ -508,7 +508,7 @@
   } else {
     os << StringPrintf("clock=wall\n");
   }
-  os << StringPrintf("elapsed-time-usec=%llu\n", elapsed);
+  os << StringPrintf("elapsed-time-usec=%" PRIu64 "\n", elapsed);
   size_t num_records = (final_offset - kTraceHeaderLength) / GetRecordSize(clock_source_);
   os << StringPrintf("num-method-calls=%zd\n", num_records);
   os << StringPrintf("clock-call-overhead-nsec=%d\n", clock_overhead_ns);
@@ -548,13 +548,13 @@
 }
 
 void Trace::DexPcMoved(Thread* thread, mirror::Object* this_object,
-                       const mirror::ArtMethod* method, uint32_t new_dex_pc) {
+                       mirror::ArtMethod* method, uint32_t new_dex_pc) {
   // We're not recorded to listen to this kind of event, so complain.
   LOG(ERROR) << "Unexpected dex PC event in tracing " << PrettyMethod(method) << " " << new_dex_pc;
 };
 
 void Trace::MethodEntered(Thread* thread, mirror::Object* this_object,
-                          const mirror::ArtMethod* method, uint32_t dex_pc) {
+                          mirror::ArtMethod* method, uint32_t dex_pc) {
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
   ReadClocks(thread, &thread_clock_diff, &wall_clock_diff);
@@ -563,7 +563,7 @@
 }
 
 void Trace::MethodExited(Thread* thread, mirror::Object* this_object,
-                         const mirror::ArtMethod* method, uint32_t dex_pc,
+                         mirror::ArtMethod* method, uint32_t dex_pc,
                          const JValue& return_value) {
   UNUSED(return_value);
   uint32_t thread_clock_diff = 0;
@@ -574,7 +574,7 @@
 }
 
 void Trace::MethodUnwind(Thread* thread, mirror::Object* this_object,
-                         const mirror::ArtMethod* method, uint32_t dex_pc) {
+                         mirror::ArtMethod* method, uint32_t dex_pc) {
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
   ReadClocks(thread, &thread_clock_diff, &wall_clock_diff);
@@ -605,7 +605,7 @@
   }
 }
 
-void Trace::LogMethodTraceEvent(Thread* thread, const mirror::ArtMethod* method,
+void Trace::LogMethodTraceEvent(Thread* thread, mirror::ArtMethod* method,
                                 instrumentation::Instrumentation::InstrumentationEvent event,
                                 uint32_t thread_clock_diff, uint32_t wall_clock_diff) {
   // Advance cur_offset_ atomically.
diff --git a/runtime/trace.h b/runtime/trace.h
index 9be015a..d810df0 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -73,17 +73,17 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   virtual void MethodEntered(Thread* thread, mirror::Object* this_object,
-                             const mirror::ArtMethod* method, uint32_t dex_pc)
+                             mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc,
+                            mirror::ArtMethod* method, uint32_t dex_pc,
                             const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc)
+                            mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                          const mirror::ArtMethod* method, uint32_t new_dex_pc)
+                          mirror::ArtMethod* method, uint32_t new_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
                                mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
@@ -105,7 +105,7 @@
 
   void ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint32_t* wall_clock_diff);
 
-  void LogMethodTraceEvent(Thread* thread, const mirror::ArtMethod* method,
+  void LogMethodTraceEvent(Thread* thread, mirror::ArtMethod* method,
                            instrumentation::Instrumentation::InstrumentationEvent event,
                            uint32_t thread_clock_diff, uint32_t wall_clock_diff);
 
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 5ec2ea1..e48d6d2 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -68,7 +68,7 @@
   }
 }
 
-int32_t ComputeUtf16Hash(const mirror::CharArray* chars, int32_t offset,
+int32_t ComputeUtf16Hash(mirror::CharArray* chars, int32_t offset,
                          size_t char_count) {
   int32_t hash = 0;
   for (size_t i = 0; i < char_count; i++) {
diff --git a/runtime/utf.h b/runtime/utf.h
index cc5e6d4..5b2289e 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -73,7 +73,7 @@
 /*
  * The java.lang.String hashCode() algorithm.
  */
-int32_t ComputeUtf16Hash(const mirror::CharArray* chars, int32_t offset, size_t char_count)
+int32_t ComputeUtf16Hash(mirror::CharArray* chars, int32_t offset, size_t char_count)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count);
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index a293043..aad21bc 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -215,14 +215,14 @@
   }
 }
 
-std::string PrettyDescriptor(const mirror::String* java_descriptor) {
+std::string PrettyDescriptor(mirror::String* java_descriptor) {
   if (java_descriptor == NULL) {
     return "null";
   }
   return PrettyDescriptor(java_descriptor->ToModifiedUtf8());
 }
 
-std::string PrettyDescriptor(const mirror::Class* klass) {
+std::string PrettyDescriptor(mirror::Class* klass) {
   if (klass == NULL) {
     return "null";
   }
@@ -283,7 +283,7 @@
   return PrettyDescriptor(descriptor_string);
 }
 
-std::string PrettyField(const mirror::ArtField* f, bool with_type) {
+std::string PrettyField(mirror::ArtField* f, bool with_type) {
   if (f == NULL) {
     return "null";
   }
@@ -358,7 +358,7 @@
   return PrettyDescriptor(return_type);
 }
 
-std::string PrettyMethod(const mirror::ArtMethod* m, bool with_signature) {
+std::string PrettyMethod(mirror::ArtMethod* m, bool with_signature) {
   if (m == nullptr) {
     return "null";
   }
@@ -401,7 +401,7 @@
   return result;
 }
 
-std::string PrettyTypeOf(const mirror::Object* obj) {
+std::string PrettyTypeOf(mirror::Object* obj) {
   if (obj == NULL) {
     return "null";
   }
@@ -417,7 +417,7 @@
   return result;
 }
 
-std::string PrettyClass(const mirror::Class* c) {
+std::string PrettyClass(mirror::Class* c) {
   if (c == NULL) {
     return "null";
   }
@@ -428,7 +428,7 @@
   return result;
 }
 
-std::string PrettyClassAndClassLoader(const mirror::Class* c) {
+std::string PrettyClassAndClassLoader(mirror::Class* c) {
   if (c == NULL) {
     return "null";
   }
@@ -445,7 +445,7 @@
 std::string PrettySize(int64_t byte_count) {
   // The byte thresholds at which we display amounts.  A byte count is displayed
   // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
-  static const size_t kUnitThresholds[] = {
+  static const int64_t kUnitThresholds[] = {
     0,              // B up to...
     3*1024,         // KB up to...
     2*1024*1024,    // MB up to...
@@ -464,7 +464,7 @@
       break;
     }
   }
-  return StringPrintf("%s%lld%s", negative_str, byte_count / kBytesPerUnit[i], kUnitStrings[i]);
+  return StringPrintf("%s%" PRId64 "%s", negative_str, byte_count / kBytesPerUnit[i], kUnitStrings[i]);
 }
 
 std::string PrettyDuration(uint64_t nano_duration) {
@@ -534,18 +534,18 @@
   uint64_t whole_part = nano_duration / divisor;
   uint64_t fractional_part = nano_duration % divisor;
   if (fractional_part == 0) {
-    return StringPrintf("%llu%s", whole_part, unit);
+    return StringPrintf("%" PRIu64 "%s", whole_part, unit);
   } else {
     while ((fractional_part % 1000) == 0) {
       zero_fill -= 3;
       fractional_part /= 1000;
     }
     if (zero_fill == 3) {
-      return StringPrintf("%llu.%03llu%s", whole_part, fractional_part, unit);
+      return StringPrintf("%" PRIu64 ".%03" PRIu64 "%s", whole_part, fractional_part, unit);
     } else if (zero_fill == 6) {
-      return StringPrintf("%llu.%06llu%s", whole_part, fractional_part, unit);
+      return StringPrintf("%" PRIu64 ".%06" PRIu64 "%s", whole_part, fractional_part, unit);
     } else {
-      return StringPrintf("%llu.%09llu%s", whole_part, fractional_part, unit);
+      return StringPrintf("%" PRIu64 ".%09" PRIu64 "%s", whole_part, fractional_part, unit);
     }
   }
 }
@@ -627,7 +627,7 @@
   return descriptor;
 }
 
-std::string JniShortName(const mirror::ArtMethod* m) {
+std::string JniShortName(mirror::ArtMethod* m) {
   MethodHelper mh(m);
   std::string class_name(mh.GetDeclaringClassDescriptor());
   // Remove the leading 'L' and trailing ';'...
@@ -646,7 +646,7 @@
   return short_name;
 }
 
-std::string JniLongName(const mirror::ArtMethod* m) {
+std::string JniLongName(mirror::ArtMethod* m) {
   std::string long_name;
   long_name += JniShortName(m);
   long_name += "__";
diff --git a/runtime/utils.h b/runtime/utils.h
index f063c0a..e2d8966 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -166,8 +166,29 @@
   return static_cast<int>(x & 0x0000003F);
 }
 
-#define CLZ(x) __builtin_clz(x)
-#define CTZ(x) __builtin_ctz(x)
+template<typename T>
+static inline int CLZ(T x) {
+  if (sizeof(T) == sizeof(uint32_t)) {
+    return __builtin_clz(x);
+  } else {
+    return __builtin_clzll(x);
+  }
+}
+
+template<typename T>
+static inline int CTZ(T x) {
+  if (sizeof(T) == sizeof(uint32_t)) {
+    return __builtin_ctz(x);
+  } else {
+    return __builtin_ctzll(x);
+  }
+}
+
+static inline uint32_t PointerToLowMemUInt32(const void* p) {
+  uintptr_t intp = reinterpret_cast<uintptr_t>(p);
+  DCHECK_LE(intp, 0xFFFFFFFFU);
+  return intp & 0xFFFFFFFFU;
+}
 
 static inline bool NeedsEscaping(uint16_t ch) {
   return (ch < ' ' || ch > '~');
@@ -200,21 +221,22 @@
 // Returns a human-readable equivalent of 'descriptor'. So "I" would be "int",
 // "[[I" would be "int[][]", "[Ljava/lang/String;" would be
 // "java.lang.String[]", and so forth.
-std::string PrettyDescriptor(const mirror::String* descriptor);
+std::string PrettyDescriptor(mirror::String* descriptor)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 std::string PrettyDescriptor(const std::string& descriptor);
 std::string PrettyDescriptor(Primitive::Type type);
-std::string PrettyDescriptor(const mirror::Class* klass)
+std::string PrettyDescriptor(mirror::Class* klass)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Returns a human-readable signature for 'f'. Something like "a.b.C.f" or
 // "int a.b.C.f" (depending on the value of 'with_type').
-std::string PrettyField(const mirror::ArtField* f, bool with_type = true)
+std::string PrettyField(mirror::ArtField* f, bool with_type = true)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 std::string PrettyField(uint32_t field_idx, const DexFile& dex_file, bool with_type = true);
 
 // Returns a human-readable signature for 'm'. Something like "a.b.C.m" or
 // "a.b.C.m(II)V" (depending on the value of 'with_signature').
-std::string PrettyMethod(const mirror::ArtMethod* m, bool with_signature = true)
+std::string PrettyMethod(mirror::ArtMethod* m, bool with_signature = true)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 std::string PrettyMethod(uint32_t method_idx, const DexFile& dex_file, bool with_signature = true);
 
@@ -222,7 +244,7 @@
 // So given an instance of java.lang.String, the output would
 // be "java.lang.String". Given an array of int, the output would be "int[]".
 // Given String.class, the output would be "java.lang.Class<java.lang.String>".
-std::string PrettyTypeOf(const mirror::Object* obj)
+std::string PrettyTypeOf(mirror::Object* obj)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Returns a human-readable form of the type at an index in the specified dex file.
@@ -231,11 +253,11 @@
 
 // Returns a human-readable form of the name of the given class.
 // Given String.class, the output would be "java.lang.Class<java.lang.String>".
-std::string PrettyClass(const mirror::Class* c)
+std::string PrettyClass(mirror::Class* c)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Returns a human-readable form of the name of the given class with its class loader.
-std::string PrettyClassAndClassLoader(const mirror::Class* c)
+std::string PrettyClassAndClassLoader(mirror::Class* c)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Returns a human-readable size string such as "1MB".
@@ -278,10 +300,10 @@
 bool IsValidMemberName(const char* s);
 
 // Returns the JNI native function name for the non-overloaded method 'm'.
-std::string JniShortName(const mirror::ArtMethod* m)
+std::string JniShortName(mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 // Returns the JNI native function name for the overloaded method 'm'.
-std::string JniLongName(const mirror::ArtMethod* m)
+std::string JniLongName(mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 bool ReadFileToString(const std::string& file_name, std::string* result);
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index d2681df..30be36c 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -85,7 +85,7 @@
   }
 }
 
-MethodVerifier::FailureKind MethodVerifier::VerifyClass(const mirror::Class* klass,
+MethodVerifier::FailureKind MethodVerifier::VerifyClass(mirror::Class* klass,
                                                         bool allow_soft_failures,
                                                         std::string* error) {
   if (klass->IsVerified()) {
@@ -837,7 +837,7 @@
   /* offset to array data table is a relative branch-style offset */
   array_data = insns + array_data_offset;
   /* make sure the table is 32-bit aligned */
-  if ((((uint32_t) array_data) & 0x03) != 0) {
+  if ((reinterpret_cast<uintptr_t>(array_data) & 0x03) != 0) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unaligned array data table: at " << cur_offset
                                       << ", data offset " << array_data_offset;
     return false;
@@ -941,7 +941,7 @@
   /* offset to switch table is a relative branch-style offset */
   const uint16_t* switch_insns = insns + switch_offset;
   /* make sure the table is 32-bit aligned */
-  if ((((uint32_t) switch_insns) & 0x03) != 0) {
+  if ((reinterpret_cast<uintptr_t>(switch_insns) & 0x03) != 0) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unaligned switch table: at " << cur_offset
                                       << ", switch offset " << switch_offset;
     return false;
@@ -3616,10 +3616,9 @@
 
 // Look for an instance field with this offset.
 // TODO: we may speed up the search if offsets are sorted by doing a quick search.
-static mirror::ArtField* FindInstanceFieldWithOffset(const mirror::Class* klass,
-                                                  uint32_t field_offset)
+static mirror::ArtField* FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  const mirror::ObjectArray<mirror::ArtField>* instance_fields = klass->GetIFields();
+  mirror::ObjectArray<mirror::ArtField>* instance_fields = klass->GetIFields();
   if (instance_fields != NULL) {
     for (int32_t i = 0, e = instance_fields->GetLength(); i < e; ++i) {
       mirror::ArtField* field = instance_fields->Get(i);
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 053cee5..7c75c9c 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -140,8 +140,7 @@
   };
 
   /* Verify a class. Returns "kNoFailure" on success. */
-  static FailureKind VerifyClass(const mirror::Class* klass, bool allow_soft_failures,
-                                 std::string* error)
+  static FailureKind VerifyClass(mirror::Class* klass, bool allow_soft_failures, std::string* error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static FailureKind VerifyClass(const DexFile* dex_file, SirtRef<mirror::DexCache>& dex_cache,
                                  SirtRef<mirror::ClassLoader>& class_loader,
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 4be1d02..3818375 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -203,7 +203,7 @@
   bool IsObjectArrayTypes() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   Primitive::Type GetPrimitiveType() const;
   bool IsJavaLangObjectArray() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsInstantiableTypes() const;
+  bool IsInstantiableTypes() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const std::string& GetDescriptor() const {
     DCHECK(HasClass() || (IsUnresolvedTypes() && !IsUnresolvedMergedReference() &&
                           !IsUnresolvedSuperClass()));
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index e3946f7..546eb40 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -179,7 +179,7 @@
   java_lang_Thread_name = CacheField(env, java_lang_Thread, false, "name", "Ljava/lang/String;");
   java_lang_Thread_priority = CacheField(env, java_lang_Thread, false, "priority", "I");
   java_lang_Thread_uncaughtHandler = CacheField(env, java_lang_Thread, false, "uncaughtHandler", "Ljava/lang/Thread$UncaughtExceptionHandler;");
-  java_lang_Thread_nativePeer = CacheField(env, java_lang_Thread, false, "nativePeer", "I");
+  java_lang_Thread_nativePeer = CacheField(env, java_lang_Thread, false, "nativePeer", "J");
   java_lang_ThreadGroup_mainThreadGroup = CacheField(env, java_lang_ThreadGroup, true, "mainThreadGroup", "Ljava/lang/ThreadGroup;");
   java_lang_ThreadGroup_name = CacheField(env, java_lang_ThreadGroup, false, "name", "Ljava/lang/String;");
   java_lang_ThreadGroup_systemThreadGroup = CacheField(env, java_lang_ThreadGroup, true, "systemThreadGroup", "Ljava/lang/ThreadGroup;");
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index 8cb1993..ba0b91e 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -55,7 +55,7 @@
   name += entry_filename;
   UniquePtr<MemMap> map(MemMap::MapAnonymous(name.c_str(),
                                              NULL, GetUncompressedLength(),
-                                             PROT_READ | PROT_WRITE, error_msg));
+                                             PROT_READ | PROT_WRITE, false, error_msg));
   if (map.get() == nullptr) {
     DCHECK(!error_msg->empty());
     return nullptr;
diff --git a/test/003-omnibus-opcodes/src/InternedString.java b/test/003-omnibus-opcodes/src/InternedString.java
index 804564b..35812e6 100644
--- a/test/003-omnibus-opcodes/src/InternedString.java
+++ b/test/003-omnibus-opcodes/src/InternedString.java
@@ -34,14 +34,14 @@
 
     private static void testDeadInternedString() {
         WeakReference<String> strRef = makeWeakString();
-        System.gc();
+        Runtime.getRuntime().gc();
         // "blahblah" should disappear from the intern list
         Main.assertTrue(strRef.get() == null);
     }
 
     private static void testImmortalInternedString() {
         WeakReference strRef = new WeakReference<String>(CONST.intern());
-        System.gc();
+        Runtime.getRuntime().gc();
         // Class constant string should be entered to the interned table when
         // loaded
         Main.assertTrue(CONST == CONST.intern());
@@ -53,7 +53,7 @@
         strRef = new WeakReference<String>(s.intern());
         // Kill s, otherwise the string object is still accessible from root set
         s = "";
-        System.gc();
+        Runtime.getRuntime().gc();
         Main.assertTrue(strRef.get() == CONST);
     }
 }
diff --git a/test/003-omnibus-opcodes/src/Main.java b/test/003-omnibus-opcodes/src/Main.java
index 5dcc79c..25050df 100644
--- a/test/003-omnibus-opcodes/src/Main.java
+++ b/test/003-omnibus-opcodes/src/Main.java
@@ -26,7 +26,7 @@
         main.run();
 
         /* run through the heap to see if we trashed something */
-        System.gc();
+        Runtime.getRuntime().gc();
 
         System.out.println("Done!");
     }
diff --git a/test/023-many-interfaces/src/ManyInterfaces.java b/test/023-many-interfaces/src/ManyInterfaces.java
index c4cb4ab..d69a490 100644
--- a/test/023-many-interfaces/src/ManyInterfaces.java
+++ b/test/023-many-interfaces/src/ManyInterfaces.java
@@ -200,7 +200,7 @@
          * Clear the heap.  The various classes involved should already
          * be loaded and ready as a result of instantiating ManyInterfaces.
          */
-        System.gc();
+        Runtime.getRuntime().gc();
 
         start = System.nanoTime();
         testIface001(obj, iter);
diff --git a/test/030-bad-finalizer/src/Main.java b/test/030-bad-finalizer/src/Main.java
index e19fd3e..330e344 100644
--- a/test/030-bad-finalizer/src/Main.java
+++ b/test/030-bad-finalizer/src/Main.java
@@ -25,12 +25,12 @@
         bf = null;
 
         System.out.println("Nulled. Requestion gc.");
-        System.gc();
+        Runtime.getRuntime().gc();
 
         for (int i = 0; i < 8; i++) {
             BadFinalizer.snooze(4000);
             System.out.println("Requesting another GC.");
-            System.gc();
+            Runtime.getRuntime().gc();
         }
 
         System.out.println("Done waiting.");
diff --git a/test/036-finalizer/src/Main.java b/test/036-finalizer/src/Main.java
index 4bc7d8d..6195aff 100644
--- a/test/036-finalizer/src/Main.java
+++ b/test/036-finalizer/src/Main.java
@@ -93,7 +93,7 @@
 
         /* this will try to collect and finalize ft */
         System.out.println("gc");
-        System.gc();
+        Runtime.getRuntime().gc();
 
         System.out.println("wimp: " + wimpString(wimp));
         System.out.println("finalize");
@@ -106,7 +106,7 @@
         System.out.println("reborn: " + FinalizerTest.mReborn);
         System.out.println("wimp: " + wimpString(wimp));
         System.out.println("reset reborn");
-        System.gc();
+        Runtime.getRuntime().gc();
         FinalizerTest.mReborn = FinalizerTest.mNothing;
         System.out.println("gc + finalize");
         System.gc();
diff --git a/test/067-preemptive-unpark/src/Main.java b/test/067-preemptive-unpark/src/Main.java
index a16219e..2c099b9 100644
--- a/test/067-preemptive-unpark/src/Main.java
+++ b/test/067-preemptive-unpark/src/Main.java
@@ -18,6 +18,7 @@
 
         System.out.println("GC'ing");
         System.gc();
+        System.runFinalization();
         System.gc();
 
         System.out.println("Asking thread to park");
diff --git a/test/072-precise-gc/src/Main.java b/test/072-precise-gc/src/Main.java
index e049221..f6cd8b1 100644
--- a/test/072-precise-gc/src/Main.java
+++ b/test/072-precise-gc/src/Main.java
@@ -88,7 +88,7 @@
          * Getting a zero result here isn't conclusive, but it's a strong
          * indicator that precise GC is having an impact.
          */
-        System.gc();
+        Runtime.getRuntime().gc();
 
         for (int i = 0; i < wrefs.length; i++) {
             if (wrefs[i].get() != null)
diff --git a/test/074-gc-thrash/src/Main.java b/test/074-gc-thrash/src/Main.java
index f85aa4b..78413f3 100644
--- a/test/074-gc-thrash/src/Main.java
+++ b/test/074-gc-thrash/src/Main.java
@@ -232,7 +232,7 @@
         for (int i = 0; i < MAX_DEPTH; i++)
             strong[i] = null;
 
-        System.gc();
+        Runtime.getRuntime().gc();
 
         for (int i = 0; i < MAX_DEPTH; i++) {
             if (weak[i].get() != null) {
diff --git a/test/079-phantom/src/Main.java b/test/079-phantom/src/Main.java
index 9c459c9..c54bc0b 100644
--- a/test/079-phantom/src/Main.java
+++ b/test/079-phantom/src/Main.java
@@ -38,29 +38,29 @@
     public void run() {
         createBitmaps();
 
-        System.gc();
+        Runtime.getRuntime().gc();
         sleep(250);
 
         mBitmap2.drawAt(0, 0);
 
         System.out.println("nulling 1");
         mBitmap1 = null;
-        System.gc();
+        Runtime.getRuntime().gc();
         sleep(500);
 
         System.out.println("nulling 2");
         mBitmap2 = null;
-        System.gc();
+        Runtime.getRuntime().gc();
         sleep(500);
 
         System.out.println("nulling 3");
         mBitmap3 = null;
-        System.gc();
+        Runtime.getRuntime().gc();
         sleep(500);
 
         System.out.println("nulling 4");
         mBitmap4 = null;
-        System.gc();
+        Runtime.getRuntime().gc();
         sleep(500);
 
         Bitmap.shutDown();
diff --git a/test/083-compiler-regressions/src/Main.java b/test/083-compiler-regressions/src/Main.java
index 4d6aca3..a532141 100644
--- a/test/083-compiler-regressions/src/Main.java
+++ b/test/083-compiler-regressions/src/Main.java
@@ -172,7 +172,7 @@
     }
 
     static void b2302318Test() {
-        System.gc();
+        Runtime.getRuntime().gc();
 
         SpinThread slow = new SpinThread(Thread.MIN_PRIORITY);
         SpinThread fast1 = new SpinThread(Thread.NORM_PRIORITY);
@@ -188,7 +188,7 @@
         try {
             Thread.sleep(3000);
         } catch (InterruptedException ie) {/*ignore */}
-        System.gc();
+        Runtime.getRuntime().gc();
 
         System.out.println("b2302318 passes");
     }
diff --git a/test/087-gc-after-link/src/Main.java b/test/087-gc-after-link/src/Main.java
index c99a7d5..e0a187a 100644
--- a/test/087-gc-after-link/src/Main.java
+++ b/test/087-gc-after-link/src/Main.java
@@ -170,7 +170,7 @@
                 ite.printStackTrace();
             }
         }
-        System.gc();
+        Runtime.getRuntime().gc();
         System.out.println("GC complete.");
     }
 }
diff --git a/test/109-suspend-check/src/Main.java b/test/109-suspend-check/src/Main.java
index d92b9e5..ae10576 100644
--- a/test/109-suspend-check/src/Main.java
+++ b/test/109-suspend-check/src/Main.java
@@ -28,7 +28,7 @@
         doWhileLoop.start();
         garbage.start();
         for (int i = 0; i < TEST_TIME; i++) {
-          System.gc();
+          Runtime.getRuntime().gc();
           System.out.println(".");
           sleep(1000);
         }