Merge "Remove redundant compare for x86 OpDecAndBranch"
diff --git a/Android.mk b/Android.mk
index bdd26da..62d40bb 100644
--- a/Android.mk
+++ b/Android.mk
@@ -63,6 +63,7 @@
 	rm -f $(TARGET_OUT_INTERMEDIATES)/JAVA_LIBRARIES/*_intermediates/javalib.odex
 	rm -f $(TARGET_OUT_INTERMEDIATES)/APPS/*_intermediates/*.odex
 	rm -rf /tmp/test-*/dalvik-cache/*@classes.dex
+	rm -rf /tmp/android-data/dalvik-cache/*@classes.dex
 
 .PHONY: clean-oat-target
 clean-oat-target:
@@ -91,7 +92,6 @@
 include $(art_path)/disassembler/Android.mk
 include $(art_path)/oatdump/Android.mk
 include $(art_path)/dalvikvm/Android.mk
-include $(art_path)/jdwpspy/Android.mk
 include $(art_build_path)/Android.oat.mk
 
 # ART_HOST_DEPENDENCIES depends on Android.executable.mk above for ART_HOST_EXECUTABLES
@@ -333,7 +333,7 @@
 .PHONY: dump-oat-boot
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
 dump-oat-boot: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(OATDUMP)
-	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt
+	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt --host-prefix=$(DEXPREOPT_PRODUCT_DIR_FULL_PATH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/boot.oatdump.txt
 endif
 
@@ -355,19 +355,19 @@
 .PHONY: use-art
 use-art:
 	adb root && sleep 3
-	adb shell setprop persist.sys.dalvik.vm.lib libart.so
+	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
 	adb reboot
 
 .PHONY: use-artd
 use-artd:
 	adb root && sleep 3
-	adb shell setprop persist.sys.dalvik.vm.lib libartd.so
+	adb shell setprop persist.sys.dalvik.vm.lib.1 libartd.so
 	adb reboot
 
 .PHONY: use-dalvik
 use-dalvik:
 	adb root && sleep 3
-	adb shell setprop persist.sys.dalvik.vm.lib libdvm.so
+	adb shell setprop persist.sys.dalvik.vm.lib.1 libdvm.so
 	adb reboot
 
 ########################################################################
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index bed48ba..acaa0f8 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -30,6 +30,7 @@
 	compiler/utils/x86/managed_register_x86_test.cc \
 	runtime/barrier_test.cc \
 	runtime/base/bit_vector_test.cc \
+	runtime/base/hex_dump_test.cc \
 	runtime/base/histogram_test.cc \
 	runtime/base/mutex_test.cc \
 	runtime/base/timing_logger_test.cc \
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index ec6efbc..10dc2d3 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -68,7 +68,7 @@
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.oat.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += $(HOST_CORE_IMG_OUT)
 include $(BUILD_PHONY_PACKAGE)
-endif
+endif # ART_BUILD_HOST
 
 # If we aren't building the host toolchain, skip building the target core.art.
 ifeq ($(WITH_HOST_DALVIK),true)
@@ -80,15 +80,5 @@
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.oat.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += $(TARGET_CORE_IMG_OUT)
 include $(BUILD_PHONY_PACKAGE)
-endif
-
-ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-include $(CLEAR_VARS)
-LOCAL_MODULE := boot.art
-LOCAL_MODULE_TAGS := optional
-LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
-LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.oat.mk
-LOCAL_ADDITIONAL_DEPENDENCIES += $(DEFAULT_DEX_PREOPT_INSTALLED_IMAGE)
-include $(BUILD_PHONY_PACKAGE)
-endif
-endif
+endif # ART_BUILD_TARGET
+endif # WITH_HOST_DALVIK
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 4340929..c6662c2 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -59,7 +59,8 @@
 	dex/frontend.cc \
 	dex/mir_graph.cc \
 	dex/mir_analysis.cc \
-	dex/verified_methods_data.cc \
+	dex/verified_method.cc \
+	dex/verification_results.cc \
 	dex/vreg_analysis.cc \
 	dex/ssa_transformation.cc \
 	driver/compiler_driver.cc \
@@ -196,6 +197,34 @@
   LOCAL_SHARED_LIBRARIES += libbcc libbcinfo libLLVM
   ifeq ($(ART_USE_PORTABLE_COMPILER),true)
     LOCAL_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
+    ifeq ($$(art_target_or_host),target)
+      ifeq ($(TARGET_ARCH),arm)
+        LOCAL_STATIC_LIBRARIES += libmcldARMInfo libmcldARMTarget
+      else # TARGET_ARCH != arm
+      ifeq ($(TARGET_ARCH),x86)
+        LOCAL_STATIC_LIBRARIES += libmcldX86Info libmcldX86Target
+      else # TARGET_ARCH != x86
+      ifeq ($(TARGET_ARCH),x86_64)
+        LOCAL_STATIC_LIBRARIES += libmcldX86Info libmcldX86Target
+      else # TARGET_ARCH != x86_64
+      ifeq ($(TARGET_ARCH),mips)
+        LOCAL_STATIC_LIBRARIES += libmcldMipsInfo libmcldMipsTarget
+      else # TARGET_ARCH != mips
+      ifeq ($(TARGET_ARCH),aarch64)
+         $$(info TODOAArch64: $$(LOCAL_PATH)/Android.mk Add AArch64 specific MCLinker libraries)
+      else # TARGET_ARCH != aarch64
+        $$(error unsupported TARGET_ARCH=$(TARGET_ARCH))
+      endif # TARGET_ARCH != aarch64
+      endif # TARGET_ARCH != mips
+      endif # TARGET_ARCH != x86_64
+      endif # TARGET_ARCH != x86
+      endif # TARGET_ARCH != arm
+    else # host
+      LOCAL_STATIC_LIBRARIES += libmcldARMInfo libmcldARMTarget
+      LOCAL_STATIC_LIBRARIES += libmcldX86Info libmcldX86Target
+      LOCAL_STATIC_LIBRARIES += libmcldMipsInfo libmcldMipsTarget
+    endif
+    LOCAL_STATIC_LIBRARIES += libmcldCore libmcldObject libmcldADT libmcldFragment libmcldTarget libmcldCodeGen libmcldLDVariant libmcldMC libmcldSupport libmcldLD
   endif
 
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
index f013067..2ab6252 100644
--- a/compiler/dex/bb_optimizations.cc
+++ b/compiler/dex/bb_optimizations.cc
@@ -23,7 +23,7 @@
 /*
  * Code Layout pass implementation start.
  */
-bool CodeLayout::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool CodeLayout::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->LayoutBlocks(bb);
   // No need of repeating, so just return false.
   return false;
@@ -32,13 +32,13 @@
 /*
  * SSATransformation pass implementation start.
  */
-bool SSATransformation::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool SSATransformation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->InsertPhiNodeOperands(bb);
   // No need of repeating, so just return false.
   return false;
 }
 
-void SSATransformation::End(CompilationUnit *cUnit) const {
+void SSATransformation::End(CompilationUnit* cUnit) const {
   // Verify the dataflow information after the pass.
   if (cUnit->enable_debug & (1 << kDebugVerifyDataflow)) {
     cUnit->mir_graph->VerifyDataflow();
@@ -48,7 +48,7 @@
 /*
  * ConstantPropagation pass implementation start
  */
-bool ConstantPropagation::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool ConstantPropagation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->DoConstantPropagation(bb);
   // No need of repeating, so just return false.
   return false;
@@ -57,7 +57,7 @@
 /*
  * MethodUseCount pass implementation start.
  */
-bool MethodUseCount::Gate(const CompilationUnit *cUnit) const {
+bool MethodUseCount::Gate(const CompilationUnit* cUnit) const {
   // First initialize the data.
   cUnit->mir_graph->InitializeMethodUses();
 
@@ -67,7 +67,7 @@
   return res;
 }
 
-bool MethodUseCount::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool MethodUseCount::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->CountUses(bb);
   // No need of repeating, so just return false.
   return false;
@@ -77,7 +77,7 @@
  * Null Check Elimination and Type Inference Initialization pass implementation start.
  */
 
-bool NullCheckEliminationAndTypeInferenceInit::Gate(const CompilationUnit *cUnit) const {
+bool NullCheckEliminationAndTypeInferenceInit::Gate(const CompilationUnit* cUnit) const {
   // First check the ssa register vector
   cUnit->mir_graph->CheckSSARegisterVector();
 
@@ -87,7 +87,8 @@
   return performInit;
 }
 
-bool NullCheckEliminationAndTypeInferenceInit::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool NullCheckEliminationAndTypeInferenceInit::WalkBasicBlocks(CompilationUnit* cUnit,
+                                                               BasicBlock* bb) const {
   cUnit->mir_graph->NullCheckEliminationInit(bb);
   // No need of repeating, so just return false.
   return false;
@@ -96,7 +97,7 @@
 /*
  * BasicBlock Combine pass implementation start.
  */
-bool BBCombine::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool BBCombine::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->CombineBlocks(bb);
 
   // No need of repeating, so just return false.
@@ -106,9 +107,7 @@
 /*
  * BasicBlock Optimization pass implementation start.
  */
-void BBOptimizations::Start(CompilationUnit *cUnit) const {
-  DCHECK_EQ(cUnit->num_compiler_temps, 0);
-
+void BBOptimizations::Start(CompilationUnit* cUnit) const {
   /*
    * This pass has a different ordering depEnding on the suppress exception,
    * so do the pass here for now:
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 768b273..1286a8e 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -28,14 +28,14 @@
  */
 class CodeLayout : public Pass {
  public:
-  CodeLayout():Pass("CodeLayout", "2_post_layout_cfg") {
+  CodeLayout() : Pass("CodeLayout", "2_post_layout_cfg") {
   }
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->VerifyDataflow();
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 };
 
 /**
@@ -44,16 +44,16 @@
  */
 class SSATransformation : public Pass {
  public:
-  SSATransformation():Pass("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
+  SSATransformation() : Pass("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->InitializeSSATransformation();
   }
 
-  void End(CompilationUnit *cUnit) const;
+  void End(CompilationUnit* cUnit) const;
 };
 
 /**
@@ -62,12 +62,12 @@
  */
 class ConstantPropagation : public Pass {
  public:
-  ConstantPropagation():Pass("ConstantPropagation") {
+  ConstantPropagation() : Pass("ConstantPropagation") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->InitializeConstantPropagation();
   }
 };
@@ -78,10 +78,10 @@
  */
 class InitRegLocations : public Pass {
  public:
-  InitRegLocations():Pass("InitRegLocation") {
+  InitRegLocations() : Pass("InitRegLocation", kNoNodes) {
   }
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->InitRegLocations();
   }
 };
@@ -92,12 +92,12 @@
  */
 class MethodUseCount : public Pass {
  public:
-  MethodUseCount():Pass("UseCount") {
+  MethodUseCount() : Pass("UseCount") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  bool Gate(const CompilationUnit *cUnit) const;
+  bool Gate(const CompilationUnit* cUnit) const;
 };
 
 /**
@@ -106,12 +106,12 @@
  */
 class NullCheckEliminationAndTypeInferenceInit : public Pass {
  public:
-  NullCheckEliminationAndTypeInferenceInit():Pass("NCE_TypeInferenceInit") {
+  NullCheckEliminationAndTypeInferenceInit() : Pass("NCE_TypeInferenceInit") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  bool Gate(const CompilationUnit *cUnit) const;
+  bool Gate(const CompilationUnit* cUnit) const;
 };
 
 /**
@@ -120,10 +120,11 @@
  */
 class NullCheckEliminationAndTypeInference : public Pass {
  public:
-  NullCheckEliminationAndTypeInference():Pass("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
+  NullCheckEliminationAndTypeInference()
+    : Pass("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
     return cUnit->mir_graph->EliminateNullChecksAndInferTypes(bb);
   }
 };
@@ -134,14 +135,14 @@
  */
 class BBCombine : public Pass {
  public:
-  BBCombine():Pass("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
+  BBCombine() : Pass("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const CompilationUnit* cUnit) const {
     return ((cUnit->disable_opt & (1 << kSuppressExceptionEdges)) != 0);
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 };
 
 /**
@@ -150,14 +151,14 @@
  */
 class BBOptimizations : public Pass {
  public:
-  BBOptimizations():Pass("BBOptimizations", "5_post_bbo_cfg") {
+  BBOptimizations() : Pass("BBOptimizations", kNoNodes, "5_post_bbo_cfg") {
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const CompilationUnit* cUnit) const {
     return ((cUnit->disable_opt & (1 << kBBOpt)) == 0);
   }
 
-  void Start(CompilationUnit *cUnit) const;
+  void Start(CompilationUnit* cUnit) const;
 };
 
 }  // namespace art
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 3798b45..32fd79b 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -81,9 +81,6 @@
   uint16_t num_outs;
   uint16_t num_regs;            // Unlike num_dalvik_registers, does not include ins.
 
-  // TODO: may want to move this to MIRGraph.
-  uint16_t num_compiler_temps;
-
   // If non-empty, apply optimizer/debug flags only to matching methods.
   std::string compiler_method_match;
   // Flips sense of compiler_method_match - apply flags if doesn't match.
diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h
index 0ca1a47..f8b9c1a 100644
--- a/compiler/dex/dataflow_iterator-inl.h
+++ b/compiler/dex/dataflow_iterator-inl.h
@@ -107,7 +107,7 @@
   // Find the next BasicBlock.
   while (keep_looking == true) {
     // Get next BasicBlock.
-    res = all_nodes_iterator_->Next();
+    res = all_nodes_iterator_.Next();
 
     // Are we done or is the BasicBlock not hidden?
     if ((res == NULL) || (res->hidden == false)) {
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index 658a9b1..b45d6a4 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -138,21 +138,6 @@
 
         return ForwardSingleNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(PreOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -184,21 +169,6 @@
 
         return ForwardRepeatNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(RepeatingPreOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -230,21 +200,6 @@
 
         return ForwardRepeatNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(RepeatingPostOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -275,21 +230,6 @@
 
         return ReverseSingleNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(ReversePostOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -321,21 +261,6 @@
 
         return ReverseRepeatNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(RepeatingReversePostOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -366,21 +291,6 @@
 
         return ForwardSingleNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(PostOrderDOMIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -394,16 +304,15 @@
        * @param mir_graph The MIRGraph considered.
        */
       explicit AllNodesIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, 0) {
-        all_nodes_iterator_ = new
-            (mir_graph->GetArena()) GrowableArray<BasicBlock*>::Iterator(mir_graph->GetBlockList());
+          : DataflowIterator(mir_graph, 0, 0),
+            all_nodes_iterator_(mir_graph->GetBlockList()) {
       }
 
       /**
        * @brief Resetting the iterator.
        */
       void Reset() {
-        all_nodes_iterator_->Reset();
+        all_nodes_iterator_.Reset();
       }
 
       /**
@@ -413,23 +322,8 @@
        */
       virtual BasicBlock* Next(bool had_change = false) ALWAYS_INLINE;
 
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(AllNodesIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
-
     private:
-      GrowableArray<BasicBlock*>::Iterator* all_nodes_iterator_;    /**< @brief The list of all the nodes */
+      GrowableArray<BasicBlock*>::Iterator all_nodes_iterator_;    /**< @brief The list of all the nodes */
   };
 
 }  // namespace art
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 3368132..ff8fea0 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -176,8 +176,7 @@
   if (!kEnableCheckCastEllision || !PerformOptimizations()) {
     return inst;
   }
-  MethodReference referrer(&GetDexFile(), unit_.GetDexMethodIndex());
-  if (!driver_.IsSafeCast(referrer, dex_pc)) {
+  if (!driver_.IsSafeCast(&unit_, dex_pc)) {
     return inst;
   }
   // Ok, this is a safe cast. Since the "check-cast" instruction size is 2 code
@@ -272,15 +271,16 @@
 }  // namespace optimizer
 }  // namespace art
 
-extern "C" void ArtCompileDEX(art::CompilerDriver& compiler, const art::DexFile::CodeItem* code_item,
+extern "C" void ArtCompileDEX(art::CompilerDriver& driver, const art::DexFile::CodeItem* code_item,
                   uint32_t access_flags, art::InvokeType invoke_type,
                   uint16_t class_def_idx, uint32_t method_idx, jobject class_loader,
                   const art::DexFile& dex_file,
                   art::DexToDexCompilationLevel dex_to_dex_compilation_level) {
   if (dex_to_dex_compilation_level != art::kDontDexToDexCompile) {
     art::DexCompilationUnit unit(NULL, class_loader, art::Runtime::Current()->GetClassLinker(),
-                                 dex_file, code_item, class_def_idx, method_idx, access_flags);
-    art::optimizer::DexCompiler dex_compiler(compiler, unit, dex_to_dex_compilation_level);
+                                 dex_file, code_item, class_def_idx, method_idx, access_flags,
+                                 driver.GetVerifiedMethod(&dex_file, method_idx));
+    art::optimizer::DexCompiler dex_compiler(driver, unit, dex_to_dex_compilation_level);
     dex_compiler.Compile();
   }
 }
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index f5bb85a..c2016d0 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -130,7 +130,6 @@
     num_ins(0),
     num_outs(0),
     num_regs(0),
-    num_compiler_temps(0),
     compiler_flip_match(false),
     arena(pool),
     mir_graph(NULL),
@@ -236,6 +235,43 @@
   cu.StartTimingSplit("BuildMIRGraph");
   cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));
 
+  /*
+   * After creation of the MIR graph, also create the code generator.
+   * The reason we do this is that optimizations on the MIR graph may need to get information
+   * that is only available if a CG exists.
+   */
+#if defined(ART_USE_PORTABLE_COMPILER)
+  if (compiler_backend == kPortable) {
+    cu.cg.reset(PortableCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena, llvm_compilation_unit));
+  } else {
+#endif
+    Mir2Lir* mir_to_lir = nullptr;
+    switch (compiler.GetInstructionSet()) {
+      case kThumb2:
+        mir_to_lir = ArmCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena);
+        break;
+      case kMips:
+        mir_to_lir = MipsCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena);
+        break;
+      case kX86:
+        mir_to_lir = X86CodeGenerator(&cu, cu.mir_graph.get(), &cu.arena);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected instruction set: " << compiler.GetInstructionSet();
+    }
+
+    cu.cg.reset(mir_to_lir);
+
+    /* The number of compiler temporaries depends on backend so set it up now if possible */
+    if (mir_to_lir) {
+      size_t max_temps = mir_to_lir->GetMaxPossibleCompilerTemps();
+      bool set_max = cu.mir_graph->SetMaxAvailableNonSpecialCompilerTemps(max_temps);
+      CHECK(set_max);
+    }
+#if defined(ART_USE_PORTABLE_COMPILER)
+  }
+#endif
+
   /* Gathering opcode stats? */
   if (kCompilerDebugFlags & (1 << kDebugCountOpcodes)) {
     cu.mir_graph->EnableOpcodeCounting();
@@ -269,28 +305,6 @@
 
   CompiledMethod* result = NULL;
 
-#if defined(ART_USE_PORTABLE_COMPILER)
-  if (compiler_backend == kPortable) {
-    cu.cg.reset(PortableCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena, llvm_compilation_unit));
-  } else {
-#endif
-    switch (compiler.GetInstructionSet()) {
-      case kThumb2:
-        cu.cg.reset(ArmCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena));
-        break;
-      case kMips:
-        cu.cg.reset(MipsCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena));
-        break;
-      case kX86:
-        cu.cg.reset(X86CodeGenerator(&cu, cu.mir_graph.get(), &cu.arena));
-        break;
-      default:
-        LOG(FATAL) << "Unexpected instruction set: " << compiler.GetInstructionSet();
-    }
-#if defined(ART_USE_PORTABLE_COMPILER)
-  }
-#endif
-
   cu.cg->Materialize();
 
   cu.NewTimingSplit("Dedupe");  /* deduping takes up the vast majority of time in GetCompiledMethod(). */
diff --git a/compiler/dex/growable_array.h b/compiler/dex/growable_array.h
index 639120a..b5842e1 100644
--- a/compiler/dex/growable_array.h
+++ b/compiler/dex/growable_array.h
@@ -66,11 +66,6 @@
           idx_ = 0;
         }
 
-        static void* operator new(size_t size, ArenaAllocator* arena) {
-          return arena->Alloc(sizeof(GrowableArray::Iterator), ArenaAllocator::kAllocGrowableArray);
-        };
-        static void operator delete(void* p) {}  // Nop.
-
       private:
         size_t idx_;
         GrowableArray* const g_list_;
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index 75883b7..9e83210 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -380,9 +380,6 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Establish value number for loaded register. Note use of memory version.
@@ -421,9 +418,6 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Rev the memory version
@@ -447,9 +441,6 @@
         } else {
           null_checked_.insert(base);
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         uint16_t memory_version = GetMemoryVersion(base, field_ref);
         if (opcode == Instruction::IGET_WIDE) {
@@ -479,9 +470,6 @@
         } else {
           null_checked_.insert(base);
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         AdvanceMemoryVersion(base, field_ref);
       }
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index c235448..9680450 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1158,8 +1158,8 @@
     ssa_last_defs_[i] = 0;
   }
 
-  /* Add ssa reg for Method* */
-  method_sreg_ = AddNewSReg(SSA_METHOD_BASEREG);
+  // Create a compiler temporary for Method*. This is done after SSA initialization.
+  GetNewCompilerTemp(kCompilerTempSpecialMethodPtr, false);
 
   /*
    * Allocate the BasicBlockDataFlow structure for the entry and code blocks
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 8d1653f..10bcdb9 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -46,7 +46,6 @@
 
 MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
     : reg_location_(NULL),
-      compiler_temps_(arena, 6, kGrowableArrayMisc),
       cu_(cu),
       ssa_base_vregs_(NULL),
       ssa_subscripts_(NULL),
@@ -82,8 +81,13 @@
       checkstats_(NULL),
       arena_(arena),
       backward_branches_(0),
-      forward_branches_(0) {
+      forward_branches_(0),
+      compiler_temps_(arena, 6, kGrowableArrayMisc),
+      num_non_special_compiler_temps_(0),
+      max_available_non_special_compiler_temps_(0) {
   try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
+  max_available_special_compiler_temps_ = std::abs(static_cast<int>(kVRegNonSpecialTempBaseReg))
+      - std::abs(static_cast<int>(kVRegTempBaseReg));
 }
 
 MIRGraph::~MIRGraph() {
@@ -126,9 +130,6 @@
   bottom_block->terminated_by_return = orig_block->terminated_by_return;
   orig_block->terminated_by_return = false;
 
-  /* Add it to the quick lookup cache */
-  dex_pc_to_block_map_.Put(bottom_block->start_offset, bottom_block->id);
-
   /* Handle the taken path */
   bottom_block->taken = orig_block->taken;
   if (bottom_block->taken != NullBasicBlockId) {
@@ -177,19 +178,29 @@
   }
 
   // Associate dex instructions in the bottom block with the new container.
-  MIR* p = bottom_block->first_mir_insn;
-  while (p != NULL) {
+  DCHECK(insn != nullptr);
+  DCHECK(insn != orig_block->first_mir_insn);
+  DCHECK(insn == bottom_block->first_mir_insn);
+  DCHECK_EQ(insn->offset, bottom_block->start_offset);
+  DCHECK(static_cast<int>(insn->dalvikInsn.opcode) == kMirOpCheck ||
+         !IsPseudoMirOp(insn->dalvikInsn.opcode));
+  DCHECK_EQ(dex_pc_to_block_map_.Get(insn->offset), orig_block->id);
+  MIR* p = insn;
+  dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
+  while (p != bottom_block->last_mir_insn) {
+    p = p->next;
+    DCHECK(p != nullptr);
     int opcode = p->dalvikInsn.opcode;
     /*
      * Some messiness here to ensure that we only enter real opcodes and only the
      * first half of a potentially throwing instruction that has been split into
-     * CHECK and work portions.  The 2nd half of a split operation will have a non-null
-     * throw_insn pointer that refers to the 1st half.
+     * CHECK and work portions. Since the 2nd half of a split operation is always
+     * the first in a BasicBlock, we can't hit it here.
      */
-    if ((opcode == kMirOpCheck) || (!IsPseudoMirOp(opcode) && (p->meta.throw_insn == NULL))) {
+    if ((opcode == kMirOpCheck) || !IsPseudoMirOp(opcode)) {
+      DCHECK_EQ(dex_pc_to_block_map_.Get(p->offset), orig_block->id);
       dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
     }
-    p = (p == bottom_block->last_mir_insn) ? NULL : p->next;
   }
 
   return bottom_block;
@@ -508,7 +519,6 @@
       static_cast<Instruction::Code>(kMirOpCheck);
   // Associate the two halves
   insn->meta.throw_insn = new_insn;
-  new_insn->meta.throw_insn = insn;
   AppendMIR(new_block, new_insn);
   return new_block;
 }
@@ -523,7 +533,8 @@
   current_offset_ = 0;
   // TODO: will need to snapshot stack image and use that as the mir context identification.
   m_units_.push_back(new DexCompilationUnit(cu_, class_loader, Runtime::Current()->GetClassLinker(),
-                     dex_file, current_code_item_, class_def_idx, method_idx, access_flags));
+                     dex_file, current_code_item_, class_def_idx, method_idx, access_flags,
+                     cu_->compiler_driver->GetVerifiedMethod(&dex_file, method_idx)));
   const uint16_t* code_ptr = current_code_item_->insns_;
   const uint16_t* code_end =
       current_code_item_->insns_ + current_code_item_->insns_size_in_code_units_;
@@ -973,7 +984,7 @@
         str.append(StringPrintf(", #%d", insn.vB));
         break;
       case Instruction::k51l:  // Add one wide immediate
-        str.append(StringPrintf(", #%lld", insn.vB_wide));
+        str.append(StringPrintf(", #%" PRId64, insn.vB_wide));
         break;
       case Instruction::k21c:  // One register, one string/type/method index
       case Instruction::k31c:
@@ -1026,7 +1037,7 @@
   }
   if (IsConst(reg_location_[ssa_reg])) {
     if (!singles_only && reg_location_[ssa_reg].wide) {
-      return StringPrintf("v%d_%d#0x%llx", SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg),
+      return StringPrintf("v%d_%d#0x%" PRIx64, SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg),
                           ConstantValueWide(reg_location_[ssa_reg]));
     } else {
       return StringPrintf("v%d_%d#0x%x", SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg),
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index b68e699..debcb5c 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -168,11 +168,6 @@
 #define INVALID_REG (0xFF)
 #define INVALID_OFFSET (0xDEADF00FU)
 
-/* SSA encodings for special registers */
-#define SSA_METHOD_BASEREG (-2)
-/* First compiler temp basereg, grows smaller */
-#define SSA_CTEMP_BASEREG (SSA_METHOD_BASEREG - 1)
-
 #define MIR_IGNORE_NULL_CHECK           (1 << kMIRIgnoreNullCheck)
 #define MIR_NULL_CHECK_ONLY             (1 << kMIRNullCheckOnly)
 #define MIR_IGNORE_RANGE_CHECK          (1 << kMIRIgnoreRangeCheck)
@@ -195,7 +190,13 @@
  * name of compiler-introduced temporaries.
  */
 struct CompilerTemp {
-  int32_t s_reg;
+  int32_t v_reg;      // Virtual register number for temporary.
+  int32_t s_reg_low;  // SSA name for low Dalvik word.
+};
+
+enum CompilerTempType {
+  kCompilerTempVR,                // A virtual register temporary.
+  kCompilerTempSpecialMethodPtr,  // Temporary that keeps track of current method pointer.
 };
 
 // When debug option enabled, records effectiveness of null and range check elimination.
@@ -253,8 +254,10 @@
   union {
     // Incoming edges for phi node.
     BasicBlockId* phi_incoming;
-    // Establish link between two halves of throwing instructions.
+    // Establish link from check instruction (kMirOpCheck) to the actual throwing instruction.
     MIR* throw_insn;
+    // Fused cmp branch condition.
+    ConditionCode ccode;
   } meta;
 };
 
@@ -569,10 +572,76 @@
     return bad_loc;
   }
 
-  int GetMethodSReg() {
+  int GetMethodSReg() const {
     return method_sreg_;
   }
 
+  /**
+   * @brief Used to obtain the number of compiler temporaries being used.
+   * @return Returns the number of compiler temporaries.
+   */
+  size_t GetNumUsedCompilerTemps() const {
+    size_t total_num_temps = compiler_temps_.Size();
+    DCHECK_LE(num_non_special_compiler_temps_, total_num_temps);
+    return total_num_temps;
+  }
+
+  /**
+   * @brief Used to obtain the number of non-special compiler temporaries being used.
+   * @return Returns the number of non-special compiler temporaries.
+   */
+  size_t GetNumNonSpecialCompilerTemps() const {
+    return num_non_special_compiler_temps_;
+  }
+
+  /**
+   * @brief Used to set the total number of available non-special compiler temporaries.
+   * @details Can fail setting the new max if there are more temps being used than the new_max.
+   * @param new_max The new maximum number of non-special compiler temporaries.
+   * @return Returns true if the max was set and false if failed to set.
+   */
+  bool SetMaxAvailableNonSpecialCompilerTemps(size_t new_max) {
+    if (new_max < GetNumNonSpecialCompilerTemps()) {
+      return false;
+    } else {
+      max_available_non_special_compiler_temps_ = new_max;
+      return true;
+    }
+  }
+
+  /**
+   * @brief Provides the number of non-special compiler temps available.
+   * @details Even if this returns zero, special compiler temps are guaranteed to be available.
+   * @return Returns the number of available temps.
+   */
+  size_t GetNumAvailableNonSpecialCompilerTemps();
+
+  /**
+   * @brief Used to obtain an existing compiler temporary.
+   * @param index The index of the temporary which must be strictly less than the
+   * number of temporaries.
+   * @return Returns the temporary that was asked for.
+   */
+  CompilerTemp* GetCompilerTemp(size_t index) const {
+    return compiler_temps_.Get(index);
+  }
+
+  /**
+   * @brief Used to obtain the maximum number of compiler temporaries that can be requested.
+   * @return Returns the maximum number of compiler temporaries, whether used or not.
+   */
+  size_t GetMaxPossibleCompilerTemps() const {
+    return max_available_special_compiler_temps_ + max_available_non_special_compiler_temps_;
+  }
+
+  /**
+   * @brief Used to obtain a new unique compiler temporary.
+   * @param ct_type Type of compiler temporary requested.
+   * @param wide Whether we should allocate a wide temporary.
+   * @return Returns the newly created compiler temporary.
+   */
+  CompilerTemp* GetNewCompilerTemp(CompilerTempType ct_type, bool wide);
+
   bool MethodIsLeaf() {
     return attributes_ & METHOD_IS_LEAF;
   }
@@ -725,7 +794,6 @@
 
   // TODO: make these private.
   RegLocation* reg_location_;                         // Map SSA names to location.
-  GrowableArray<CompilerTemp*> compiler_temps_;
   SafeMap<unsigned int, unsigned int> block_id_map_;  // Block collapse lookup cache.
 
   static const uint64_t oat_data_flow_attributes_[kMirOpLast];
@@ -834,6 +902,10 @@
   ArenaAllocator* arena_;
   int backward_branches_;
   int forward_branches_;
+  GrowableArray<CompilerTemp*> compiler_temps_;
+  size_t num_non_special_compiler_temps_;
+  size_t max_available_non_special_compiler_temps_;
+  size_t max_available_special_compiler_temps_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index ee9f28e..209ed3d 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -199,13 +199,94 @@
   return raw_use_counts_.Get(s_reg);
 }
 
+size_t MIRGraph::GetNumAvailableNonSpecialCompilerTemps() {
+  if (num_non_special_compiler_temps_ >= max_available_non_special_compiler_temps_) {
+    return 0;
+  } else {
+    return max_available_non_special_compiler_temps_ - num_non_special_compiler_temps_;
+  }
+}
+
+static const RegLocation temp_loc = {kLocCompilerTemp,
+                                     0, 1 /*defined*/, 0, 0, 0, 0, 0, 1 /*home*/,
+                                     kVectorNotUsed, INVALID_REG, INVALID_REG, INVALID_SREG,
+                                     INVALID_SREG};
+
+CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) {
+  // There is a limit to the number of non-special temps so check to make sure it wasn't exceeded.
+  if (ct_type == kCompilerTempVR) {
+    size_t available_temps = GetNumAvailableNonSpecialCompilerTemps();
+    if (available_temps <= 0 || (available_temps <= 1 && wide)) {
+      return 0;
+    }
+  }
+
+  CompilerTemp *compiler_temp = static_cast<CompilerTemp *>(arena_->Alloc(sizeof(CompilerTemp),
+                                                            ArenaAllocator::kAllocRegAlloc));
+
+  // Create the type of temp requested. Special temps need special handling because
+  // they have a specific virtual register assignment.
+  if (ct_type == kCompilerTempSpecialMethodPtr) {
+    DCHECK_EQ(wide, false);
+    compiler_temp->v_reg = static_cast<int>(kVRegMethodPtrBaseReg);
+    compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
+
+    // The MIR graph keeps track of the sreg for method pointer specially, so record that now.
+    method_sreg_ = compiler_temp->s_reg_low;
+  } else {
+    DCHECK_EQ(ct_type, kCompilerTempVR);
+
+    // The new non-special compiler temp must receive a unique v_reg with a negative value.
+    compiler_temp->v_reg = static_cast<int>(kVRegNonSpecialTempBaseReg) - num_non_special_compiler_temps_;
+    compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
+    num_non_special_compiler_temps_++;
+
+    if (wide) {
+      // Ensure that the two registers are consecutive. Since the virtual registers used for temps grow in a
+      // negative fashion, we need the smaller to refer to the low part. Thus, we redefine the v_reg and s_reg_low.
+      compiler_temp->v_reg--;
+      int ssa_reg_high = compiler_temp->s_reg_low;
+      compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
+      int ssa_reg_low = compiler_temp->s_reg_low;
+
+      // If needed initialize the register location for the high part.
+      // The low part is handled later in this method on a common path.
+      if (reg_location_ != nullptr) {
+        reg_location_[ssa_reg_high] = temp_loc;
+        reg_location_[ssa_reg_high].high_word = 1;
+        reg_location_[ssa_reg_high].s_reg_low = ssa_reg_low;
+        reg_location_[ssa_reg_high].wide = true;
+
+        // A new SSA needs new use counts.
+        use_counts_.Insert(0);
+        raw_use_counts_.Insert(0);
+      }
+
+      num_non_special_compiler_temps_++;
+    }
+  }
+
+  // Have we already allocated the register locations?
+  if (reg_location_ != nullptr) {
+    int ssa_reg_low = compiler_temp->s_reg_low;
+    reg_location_[ssa_reg_low] = temp_loc;
+    reg_location_[ssa_reg_low].s_reg_low = ssa_reg_low;
+    reg_location_[ssa_reg_low].wide = wide;
+
+    // A new SSA needs new use counts.
+    use_counts_.Insert(0);
+    raw_use_counts_.Insert(0);
+  }
+
+  compiler_temps_.Insert(compiler_temp);
+  return compiler_temp;
+}
 
 /* Do some MIR-level extended basic block optimizations */
 bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
   if (bb->block_type == kDead) {
     return true;
   }
-  int num_temps = 0;
   bool use_lvn = bb->use_lvn;
   UniquePtr<LocalValueNumbering> local_valnum;
   if (use_lvn) {
@@ -259,7 +340,7 @@
             if ((ccode != kCondNv) &&
                 (mir->ssa_rep->defs[0] == mir_next->ssa_rep->uses[0]) &&
                 (GetSSAUseCount(mir->ssa_rep->defs[0]) == 1)) {
-              mir_next->dalvikInsn.arg[0] = ccode;
+              mir_next->meta.ccode = ccode;
               switch (opcode) {
                 case Instruction::CMPL_FLOAT:
                   mir_next->dalvikInsn.opcode =
@@ -323,9 +404,10 @@
           break;
       }
       // Is this the select pattern?
-      // TODO: flesh out support for Mips and X86.  NOTE: llvm's select op doesn't quite work here.
+      // TODO: flesh out support for Mips.  NOTE: llvm's select op doesn't quite work here.
       // TUNING: expand to support IF_xx compare & branches
-      if (!(cu_->compiler_backend == kPortable) && (cu_->instruction_set == kThumb2) &&
+      if ((cu_->compiler_backend != kPortable) &&
+          (cu_->instruction_set == kThumb2 || cu_->instruction_set == kX86) &&
           ((mir->dalvikInsn.opcode == Instruction::IF_EQZ) ||
           (mir->dalvikInsn.opcode == Instruction::IF_NEZ))) {
         BasicBlock* ft = GetBasicBlock(bb->fall_through);
@@ -391,6 +473,11 @@
                 }
               }
               if (const_form) {
+                /*
+                 * TODO: If both constants are the same value, then instead of generating
+                 * a select, we should simply generate a const bytecode. This should be
+                 * considered after inlining which can lead to CFG of this form.
+                 */
                 // "true" set val in vB
                 mir->dalvikInsn.vB = if_true->dalvikInsn.vB;
                 // "false" set val in vC
@@ -462,9 +549,6 @@
     bb = ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) ? NextDominatedBlock(bb) : NULL;
   }
 
-  if (num_temps > cu_->num_compiler_temps) {
-    cu_->num_compiler_temps = num_temps;
-  }
   return true;
 }
 
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index c52ddf5..255892e 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -41,6 +41,7 @@
   kRepeatingPostOrderDFSTraversal,         /**< @brief Depth-First-Search / Repeating Post-Order. */
   kRepeatingReversePostOrderDFSTraversal,  /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */
   kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
+  kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
 };
 
 /**
@@ -50,20 +51,22 @@
  */
 class Pass {
  public:
-  Pass(const char *name, DataFlowAnalysisMode type, bool freed, const unsigned int f, const char *dump): pass_name_(name), traversal_type_(type), flags_(f), dump_cfg_folder_(dump) {
+  explicit Pass(const char* name, DataFlowAnalysisMode type = kAllNodes,
+                unsigned int flags = 0u, const char* dump = "")
+    : pass_name_(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) {
   }
 
-  Pass(const char *name, const char *dump): pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
+  Pass(const char* name, DataFlowAnalysisMode type, const char* dump)
+    : pass_name_(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) {
   }
 
-  explicit Pass(const char *name):pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_("") {
+  Pass(const char* name, const char* dump)
+    : pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
   }
 
-  Pass(const char *name, DataFlowAnalysisMode type, const char *dump):pass_name_(name), traversal_type_(type), flags_(false), dump_cfg_folder_(dump) {
+  virtual ~Pass() {
   }
 
-  virtual ~Pass() {}
-
   virtual const char* GetName() const {
     return pass_name_;
   }
@@ -76,14 +79,16 @@
     return (flags_ & flag);
   }
 
-  const char* GetDumpCFGFolder() const {return dump_cfg_folder_;}
+  const char* GetDumpCFGFolder() const {
+    return dump_cfg_folder_;
+  }
 
   /**
    * @brief Gate for the pass: determines whether to execute the pass or not considering a CompilationUnit
    * @param c_unit the CompilationUnit.
    * @return whether or not to execute the pass
    */
-  virtual bool Gate(const CompilationUnit *c_unit) const {
+  virtual bool Gate(const CompilationUnit* c_unit) const {
     // Unused parameter.
     UNUSED(c_unit);
 
@@ -95,7 +100,7 @@
    * @brief Start of the pass: called before the WalkBasicBlocks function
    * @param c_unit the considered CompilationUnit.
    */
-  virtual void Start(CompilationUnit *c_unit) const {
+  virtual void Start(CompilationUnit* c_unit) const {
     // Unused parameter.
     UNUSED(c_unit);
   }
@@ -104,7 +109,7 @@
    * @brief End of the pass: called after the WalkBasicBlocks function
    * @param c_unit the considered CompilationUnit.
    */
-  virtual void End(CompilationUnit *c_unit) const {
+  virtual void End(CompilationUnit* c_unit) const {
     // Unused parameter.
     UNUSED(c_unit);
   }
@@ -115,7 +120,7 @@
    * @param bb the BasicBlock.
    * @return whether or not there is a change when walking the BasicBlock
    */
-  virtual bool WalkBasicBlocks(CompilationUnit *c_unit, BasicBlock *bb) const {
+  virtual bool WalkBasicBlocks(CompilationUnit* c_unit, BasicBlock* bb) const {
     // Unused parameters.
     UNUSED(c_unit);
     UNUSED(bb);
diff --git a/compiler/dex/pass_driver.cc b/compiler/dex/pass_driver.cc
index 820dc5a..4f8739a 100644
--- a/compiler/dex/pass_driver.cc
+++ b/compiler/dex/pass_driver.cc
@@ -16,6 +16,8 @@
 
 #include <dlfcn.h>
 
+#include "base/logging.h"
+#include "base/macros.h"
 #include "bb_optimizations.h"
 #include "compiler_internals.h"
 #include "dataflow_iterator.h"
@@ -28,7 +30,8 @@
 namespace {  // anonymous namespace
 
 /**
- * @brief Helper function to create a single instance of a given Pass and can be shared across the threads
+ * @brief Helper function to create a single instance of a given Pass and can be shared across
+ * the threads.
  */
 template <typename PassType>
 const Pass* GetPassInstance() {
@@ -36,55 +39,58 @@
   return &pass;
 }
 
+void DoWalkBasicBlocks(CompilationUnit* c_unit, const Pass* pass, DataflowIterator* iterator) {
+  // Paranoid: Check the iterator before walking the BasicBlocks.
+  DCHECK(iterator != nullptr);
+
+  bool change = false;
+  for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) {
+    change = pass->WalkBasicBlocks(c_unit, bb);
+  }
+}
+
+template <typename Iterator>
+inline void DoWalkBasicBlocks(CompilationUnit* c_unit, const Pass* pass) {
+  Iterator iterator(c_unit->mir_graph.get());
+  DoWalkBasicBlocks(c_unit, pass, &iterator);
+}
+
 }  // anonymous namespace
 
-PassDriver::PassDriver(CompilationUnit* cu, bool create_default_passes) : cu_(cu) {
-  dump_cfg_folder_ = "/sdcard/";
+PassDriver::PassDriver(CompilationUnit* cu, bool create_default_passes)
+    : cu_(cu), dump_cfg_folder_("/sdcard/") {
+  DCHECK(cu != nullptr);
 
   // If need be, create the default passes.
-  if (create_default_passes == true) {
+  if (create_default_passes) {
     CreatePasses();
   }
 }
 
 PassDriver::~PassDriver() {
-  // Clear the map: done to remove any chance of having a pointer after freeing below
-  pass_map_.clear();
 }
 
-void PassDriver::InsertPass(const Pass* new_pass, bool warn_override) {
-  assert(new_pass != 0);
+void PassDriver::InsertPass(const Pass* new_pass) {
+  DCHECK(new_pass != nullptr);
+  DCHECK(new_pass->GetName() != nullptr && new_pass->GetName()[0] != 0);
 
-  // Get name here to not do it all over the method.
-  const std::string& name = new_pass->GetName();
+  // It is an error to override an existing pass.
+  DCHECK(GetPass(new_pass->GetName()) == nullptr)
+      << "Pass name " << new_pass->GetName() << " already used.";
 
-  // Do we want to warn the user about squashing a pass?
-  if (warn_override == false) {
-    auto it = pass_map_.find(name);
-
-    if (it != pass_map_.end()) {
-      LOG(INFO) << "Pass name " << name << " already used, overwriting pass";
-    }
-  }
-
-  // Now add to map and list.
-  pass_map_.Put(name, new_pass);
+  // Now add to the list.
   pass_list_.push_back(new_pass);
 }
 
 void PassDriver::CreatePasses() {
   /*
-   * Create the pass list:
-   *   - These passes are immutable and are shared across the threads:
-   *    - This is achieved via:
-   *     - The UniquePtr used here.
-   *     - DISALLOW_COPY_AND_ASSIGN in the base Pass class.
+   * Create the pass list. These passes are immutable and are shared across the threads.
    *
    * Advantage is that there will be no race conditions here.
    * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
    *   - This is not yet an issue: no current pass would require it.
    */
-  static const Pass* passes[] = {
+  static const Pass* const passes[] = {
       GetPassInstance<CodeLayout>(),
       GetPassInstance<SSATransformation>(),
       GetPassInstance<ConstantPropagation>(),
@@ -96,14 +102,10 @@
       GetPassInstance<BBOptimizations>(),
   };
 
-  // Get number of elements in the array.
-  unsigned int nbr = (sizeof(passes) / sizeof(passes[0]));
-
-  // Insert each pass into the map and into the list via the InsertPass method:
-  //   - Map is used for the lookup
-  //   - List is used for the pass walk
-  for (unsigned int i = 0; i < nbr; i++) {
-    InsertPass(passes[i]);
+  // Insert each pass into the list via the InsertPass method.
+  pass_list_.reserve(arraysize(passes));
+  for (const Pass* pass : passes) {
+    InsertPass(pass);
   }
 }
 
@@ -114,49 +116,37 @@
 }
 
 void PassDriver::DispatchPass(CompilationUnit* c_unit, const Pass* curPass) {
-  DataflowIterator* iterator = 0;
-
   LOG(DEBUG) << "Dispatching " << curPass->GetName();
 
-  MIRGraph* mir_graph = c_unit->mir_graph.get();
-  ArenaAllocator *arena = &(c_unit->arena);
-
-  // Let us start by getting the right iterator.
   DataFlowAnalysisMode mode = curPass->GetTraversal();
 
   switch (mode) {
     case kPreOrderDFSTraversal:
-      iterator = new (arena) PreOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<PreOrderDfsIterator>(c_unit, curPass);
       break;
     case kRepeatingPreOrderDFSTraversal:
-      iterator = new (arena) RepeatingPreOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(c_unit, curPass);
       break;
     case kRepeatingPostOrderDFSTraversal:
-      iterator = new (arena) RepeatingPostOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(c_unit, curPass);
       break;
     case kReversePostOrderDFSTraversal:
-      iterator = new (arena) ReversePostOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<ReversePostOrderDfsIterator>(c_unit, curPass);
       break;
     case kRepeatingReversePostOrderDFSTraversal:
-      iterator = new (arena) RepeatingReversePostOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(c_unit, curPass);
       break;
     case kPostOrderDOMTraversal:
-      iterator = new (arena) PostOrderDOMIterator(mir_graph);
+      DoWalkBasicBlocks<PostOrderDOMIterator>(c_unit, curPass);
       break;
     case kAllNodes:
-      iterator = new (arena) AllNodesIterator(mir_graph);
+      DoWalkBasicBlocks<AllNodesIterator>(c_unit, curPass);
+      break;
+    case kNoNodes:
       break;
     default:
       LOG(DEBUG) << "Iterator mode not handled in dispatcher: " << mode;
-      return;
-  }
-
-  // Paranoid: Check the iterator before walking the BasicBlocks.
-  assert(iterator != 0);
-
-  bool change = false;
-  for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) {
-    change = curPass->WalkBasicBlocks(c_unit, bb);
+      break;
   }
 }
 
@@ -166,33 +156,34 @@
   curPass->End(c_unit);
 }
 
-bool PassDriver::RunPass(CompilationUnit* c_unit, const Pass* curPass, bool time_split) {
-  // Paranoid: c_unit or curPass cannot be 0, and the pass should have a name.
-  if (c_unit == 0 || curPass == 0 || (strcmp(curPass->GetName(), "") == 0)) {
-    return false;
-  }
+bool PassDriver::RunPass(CompilationUnit* c_unit, const Pass* pass, bool time_split) {
+  // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name.
+  DCHECK(c_unit != nullptr);
+  DCHECK(pass != nullptr);
+  DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0);
 
   // Do we perform a time split
-  if (time_split == true) {
-    c_unit->NewTimingSplit(curPass->GetName());
+  if (time_split) {
+    c_unit->NewTimingSplit(pass->GetName());
   }
 
   // Check the pass gate first.
-  bool shouldApplyPass = curPass->Gate(c_unit);
+  bool should_apply_pass = pass->Gate(c_unit);
 
-  if (shouldApplyPass == true) {
+  if (should_apply_pass) {
     // Applying the pass: first start, doWork, and end calls.
-    ApplyPass(c_unit, curPass);
+    ApplyPass(c_unit, pass);
 
     // Clean up if need be.
-    HandlePassFlag(c_unit, curPass);
+    HandlePassFlag(c_unit, pass);
 
     // Do we want to log it?
     if ((c_unit->enable_debug&  (1 << kDebugDumpCFG)) != 0) {
       // Do we have a pass folder?
-      const std::string& passFolder = curPass->GetDumpCFGFolder();
+      const char* passFolder = pass->GetDumpCFGFolder();
+      DCHECK(passFolder != nullptr);
 
-      if (passFolder != "") {
+      if (passFolder[0] != 0) {
         // Create directory prefix.
         std::string prefix = GetDumpCFGFolder();
         prefix += passFolder;
@@ -204,19 +195,18 @@
   }
 
   // If the pass gate passed, we can declare success.
-  return shouldApplyPass;
+  return should_apply_pass;
 }
 
-bool PassDriver::RunPass(CompilationUnit* c_unit, const std::string& pass_name) {
-  // Paranoid: c_unit cannot be 0 and we need a pass name.
-  if (c_unit == 0 || pass_name == "") {
-    return false;
-  }
+bool PassDriver::RunPass(CompilationUnit* c_unit, const char* pass_name) {
+  // Paranoid: c_unit cannot be nullptr and we need a pass name.
+  DCHECK(c_unit != nullptr);
+  DCHECK(pass_name != nullptr && pass_name[0] != 0);
 
-  const Pass* curPass = GetPass(pass_name);
+  const Pass* cur_pass = GetPass(pass_name);
 
-  if (curPass != 0) {
-    return RunPass(c_unit, curPass);
+  if (cur_pass != nullptr) {
+    return RunPass(c_unit, cur_pass);
   }
 
   // Return false, we did not find the pass.
@@ -224,27 +214,26 @@
 }
 
 void PassDriver::Launch() {
-  for (const Pass *curPass : pass_list_) {
-    RunPass(cu_, curPass, true);
+  for (const Pass* cur_pass : pass_list_) {
+    RunPass(cu_, cur_pass, true);
   }
 }
 
 void PassDriver::PrintPassNames() const {
   LOG(INFO) << "Loop Passes are:";
 
-  for (const Pass *curPass : pass_list_) {
-    LOG(INFO) << "\t-" << curPass->GetName();
+  for (const Pass* cur_pass : pass_list_) {
+    LOG(INFO) << "\t-" << cur_pass->GetName();
   }
 }
 
-const Pass* PassDriver::GetPass(const std::string& name) const {
-  auto it = pass_map_.find(name);
-
-  if (it != pass_map_.end()) {
-    return it->second;
+const Pass* PassDriver::GetPass(const char* name) const {
+  for (const Pass* cur_pass : pass_list_) {
+    if (strcmp(name, cur_pass->GetName()) == 0) {
+      return cur_pass;
+    }
   }
-
-  return 0;
+  return nullptr;
 }
 
 }  // namespace art
diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h
index d580460..c734d3e 100644
--- a/compiler/dex/pass_driver.h
+++ b/compiler/dex/pass_driver.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_DEX_PASS_DRIVER_H_
 #define ART_COMPILER_DEX_PASS_DRIVER_H_
 
-#include <list>
+#include <vector>
 #include "pass.h"
 #include "safe_map.h"
 
@@ -42,7 +42,7 @@
    * @param new_pass the new Pass to insert in the map and list.
    * @param warn_override warn if the name of the Pass is already used.
    */
-  void InsertPass(const Pass* new_pass, bool warn_override = true);
+  void InsertPass(const Pass* new_pass);
 
   /**
    * @brief Run a pass using the name as key.
@@ -50,7 +50,7 @@
    * @param pass_name the Pass name.
    * @return whether the pass was applied.
    */
-  bool RunPass(CompilationUnit* c_unit, const std::string& pass_name);
+  bool RunPass(CompilationUnit* c_unit, const char* pass_name);
 
   /**
    * @brief Run a pass using the Pass itself.
@@ -75,20 +75,17 @@
 
   void PrintPassNames() const;
 
-  const Pass* GetPass(const std::string& name) const;
+  const Pass* GetPass(const char* name) const;
 
-  const char *GetDumpCFGFolder() const {
+  const char* GetDumpCFGFolder() const {
     return dump_cfg_folder_;
   }
 
  protected:
   void CreatePasses();
 
-  /** @brief The Pass Map: contains name -> pass for quick lookup. */
-  SafeMap<std::string, const Pass*> pass_map_;
-
   /** @brief List of passes: provides the order to execute the passes. */
-  std::list<const Pass*> pass_list_;
+  std::vector<const Pass*> pass_list_;
 
   /** @brief The CompilationUnit on which to execute the passes on. */
   CompilationUnit* const cu_;
diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc
index 70b660b..e6cc2de 100644
--- a/compiler/dex/portable/mir_to_gbc.cc
+++ b/compiler/dex/portable/mir_to_gbc.cc
@@ -1522,7 +1522,7 @@
   reg_info.push_back(irb_->getInt32(cu_->num_ins));
   reg_info.push_back(irb_->getInt32(cu_->num_regs));
   reg_info.push_back(irb_->getInt32(cu_->num_outs));
-  reg_info.push_back(irb_->getInt32(cu_->num_compiler_temps));
+  reg_info.push_back(irb_->getInt32(mir_graph_->GetNumUsedCompilerTemps()));
   reg_info.push_back(irb_->getInt32(mir_graph_->GetNumSSARegs()));
   ::llvm::MDNode* reg_info_node = ::llvm::MDNode::get(*context_, reg_info);
   inst->setMetadata("RegInfo", reg_info_node);
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 2bc579a..32673db 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
 #define ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
 
+#include "arm_lir.h"
 #include "dex/compiler_internals.h"
 
 namespace art {
@@ -94,9 +95,9 @@
                      RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                           RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +111,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, bool is_div);
@@ -191,10 +192,12 @@
     MIR* SpecialIPut(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object);
     MIR* SpecialIdentity(MIR* mir);
     LIR* LoadFPConstantValue(int r_dest, int value);
-    bool BadOverlap(RegLocation rl_src, RegLocation rl_dest);
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void AssignDataOffsets();
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                          RegLocation rl_src2, bool is_div, bool check_zero);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 1a9d9c5..46542e1 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -209,7 +209,7 @@
     NewLIR2(kThumb2Vcmps, rl_src1.low_reg, rl_src2.low_reg);
   }
   NewLIR0(kThumb2Fmstat);
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
   switch (ccode) {
     case kCondEq:
     case kCondNe:
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 86ae75e..150794e 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -228,7 +228,7 @@
   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
   // Normalize such that if either operand is constant, src2 will be constant.
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
   if (rl_src1.is_const) {
     std::swap(rl_src1, rl_src2);
     ccode = FlipComparisonOrder(ccode);
@@ -444,6 +444,17 @@
   return NULL;
 }
 
+RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, bool is_div, bool check_zero) {
+  LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
+  return rl_dest;
+}
+
+RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
+  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
+  return rl_dest;
+}
+
 RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit,
                                      bool is_div) {
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -783,20 +794,8 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-
- /*
-  * Check to see if a result pair has a misaligned overlap with an operand pair.  This
-  * is not usual for dx to generate, but it is legal (for now).  In a future rev of
-  * dex, we'll want to make this case illegal.
-  */
-bool ArmMir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) {
-  DCHECK(rl_src.wide);
-  DCHECK(rl_dest.wide);
-  return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
-}
-
-void ArmMir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
+void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
     /*
      * To pull off inline multiply, we have a worst-case requirement of 8 temporary
      * registers.  Normally for Arm, we get 5.  We can get to 6 by including
@@ -868,27 +867,27 @@
     UnmarkTemp(rARM_LR);
 }
 
-void ArmMir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAddLong for Arm";
 }
 
-void ArmMir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenSubLong for Arm";
 }
 
-void ArmMir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAndLong for Arm";
 }
 
-void ArmMir2Lir::GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenOrLong for Arm";
 }
 
-void ArmMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of genXoLong for Arm";
 }
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 7591041..ceec7d5 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -14,10 +14,12 @@
  * limitations under the License.
  */
 
+#include "codegen_arm.h"
+
+#include <inttypes.h>
+
 #include <string>
 
-#include "arm_lir.h"
-#include "codegen_arm.h"
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
 
@@ -407,9 +409,8 @@
              strcpy(tbuf, cc_names[operand]);
              break;
            case 't':
-             snprintf(tbuf, arraysize(tbuf), "0x%08x (L%p)",
-                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 +
-                 (operand << 1),
+             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
+                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
                  lir->target);
              break;
            case 'u': {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 29554c0..481a70f 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -21,7 +21,8 @@
 #include "mir_to_lir-inl.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/verified_methods_data.h"
+#include "dex/verification_results.h"
+#include "dex/verified_method.h"
 #include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier.h"
 
@@ -34,7 +35,7 @@
 void DumpMappingTable(const char* table_name, const char* descriptor, const char* name,
                       const Signature& signature, uint32_t size, It first) {
   if (size != 0) {
-    std::string line(StringPrintf("\n  %s %s%s_%s_table[%zu] = {", table_name,
+    std::string line(StringPrintf("\n  %s %s%s_%s_table[%u] = {", table_name,
                      descriptor, name, signature.ToString().c_str(), size));
     std::replace(line.begin(), line.end(), ';', '_');
     LOG(INFO) << line;
@@ -234,8 +235,8 @@
                                                lir, base_addr));
         std::string op_operands(BuildInsnString(GetTargetInstFmt(lir->opcode),
                                                     lir, base_addr));
-        LOG(INFO) << StringPrintf("%05x: %-9s%s%s",
-                                  reinterpret_cast<unsigned int>(base_addr + offset),
+        LOG(INFO) << StringPrintf("%5p: %-9s%s%s",
+                                  base_addr + offset,
                                   op_name.c_str(), op_operands.c_str(),
                                   lir->flags.is_nop ? "(nop)" : "");
       }
@@ -251,7 +252,7 @@
 }
 
 void Mir2Lir::DumpPromotionMap() {
-  int num_regs = cu_->num_dalvik_registers + cu_->num_compiler_temps + 1;
+  int num_regs = cu_->num_dalvik_registers + mir_graph_->GetNumUsedCompilerTemps();
   for (int i = 0; i < num_regs; i++) {
     PromotionMap v_reg_map = promotion_map_[i];
     std::string buf;
@@ -288,7 +289,7 @@
   LOG(INFO) << "Outs         : " << cu_->num_outs;
   LOG(INFO) << "CoreSpills       : " << num_core_spills_;
   LOG(INFO) << "FPSpills       : " << num_fp_spills_;
-  LOG(INFO) << "CompilerTemps    : " << cu_->num_compiler_temps;
+  LOG(INFO) << "CompilerTemps    : " << mir_graph_->GetNumUsedCompilerTemps();
   LOG(INFO) << "Frame size       : " << frame_size_;
   LOG(INFO) << "code size is " << total_size_ <<
     " bytes, Dalvik size is " << insns_size * 2;
@@ -440,6 +441,20 @@
     PushPointer(code_buffer_, &id);
     data_lir = NEXT_LIR(data_lir);
   }
+  // Push class literals.
+  data_lir = class_literal_list_;
+  while (data_lir != NULL) {
+    uint32_t target = data_lir->operands[0];
+    cu_->compiler_driver->AddClassPatch(cu_->dex_file,
+                                        cu_->class_def_idx,
+                                        cu_->method_idx,
+                                        target,
+                                        code_buffer_.size());
+    const DexFile::TypeId& id = cu_->dex_file->GetTypeId(target);
+    // unique value based on target to ensure code deduplication works
+    PushPointer(code_buffer_, &id);
+    data_lir = NEXT_LIR(data_lir);
+  }
 }
 
 /* Write the switch tables to the output stream */
@@ -749,10 +764,10 @@
     }
   }
   MethodReference method_ref(cu_->dex_file, cu_->method_idx);
-  const std::vector<uint8_t>* gc_map_raw =
-      cu_->compiler_driver->GetVerifiedMethodsData()->GetDexGcMap(method_ref);
-  verifier::DexPcToReferenceMap dex_gc_map(&(*gc_map_raw)[0]);
-  DCHECK_EQ(gc_map_raw->size(), dex_gc_map.RawSize());
+  const std::vector<uint8_t>& gc_map_raw =
+      mir_graph_->GetCurrentDexCompilationUnit()->GetVerifiedMethod()->GetDexGcMap();
+  verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
+  DCHECK_EQ(gc_map_raw.size(), dex_gc_map.RawSize());
   // Compute native offset to references size.
   NativePcToReferenceMapBuilder native_gc_map_builder(&native_gc_map_,
                                                       mapping_table.PcToDexSize(),
@@ -772,6 +787,7 @@
   offset = AssignLiteralOffsetCommon(literal_list_, offset);
   offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset);
   offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset);
+  offset = AssignLiteralPointerOffsetCommon(class_literal_list_, offset);
   return offset;
 }
 
@@ -960,6 +976,7 @@
     : Backend(arena),
       literal_list_(NULL),
       method_literal_list_(NULL),
+      class_literal_list_(NULL),
       code_literal_list_(NULL),
       first_fixup_(NULL),
       cu_(cu),
@@ -987,7 +1004,7 @@
       first_lir_insn_(NULL),
       last_lir_insn_(NULL) {
   promotion_map_ = static_cast<PromotionMap*>
-      (arena_->Alloc((cu_->num_dalvik_registers  + cu_->num_compiler_temps + 1) *
+      (arena_->Alloc((cu_->num_dalvik_registers  + mir_graph_->GetNumUsedCompilerTemps()) *
                       sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc));
   // Reserve pointer id 0 for NULL.
   size_t null_idx = WrapPointer(NULL);
@@ -1064,13 +1081,27 @@
   return result;
 }
 
+size_t Mir2Lir::GetMaxPossibleCompilerTemps() const {
+  // Chose a reasonably small value in order to contain stack growth.
+  // Backends that are smarter about spill region can return larger values.
+  const size_t max_compiler_temps = 10;
+  return max_compiler_temps;
+}
+
+size_t Mir2Lir::GetNumBytesForCompilerTempSpillRegion() {
+  // By default assume that the Mir2Lir will need one slot for each temporary.
+  // If the backend can better determine temps that have non-overlapping ranges and
+  // temps that do not need spilled, it can actually provide a small region.
+  return (mir_graph_->GetNumUsedCompilerTemps() * sizeof(uint32_t));
+}
+
 int Mir2Lir::ComputeFrameSize() {
   /* Figure out the frame size */
   static const uint32_t kAlignMask = kStackAlignment - 1;
-  uint32_t size = (num_core_spills_ + num_fp_spills_ +
-                   1 /* filler word */ + cu_->num_regs + cu_->num_outs +
-                   cu_->num_compiler_temps + 1 /* cur_method* */)
-                   * sizeof(uint32_t);
+  uint32_t size = ((num_core_spills_ + num_fp_spills_ +
+                   1 /* filler word */ + cu_->num_regs + cu_->num_outs)
+                   * sizeof(uint32_t)) +
+                   GetNumBytesForCompilerTempSpillRegion();
   /* Align and set */
   return (size + kAlignMask) & ~(kAlignMask);
 }
@@ -1121,4 +1152,36 @@
   new_lir->next->prev = new_lir;
 }
 
+bool Mir2Lir::IsPowerOfTwo(uint64_t x) {
+  return (x & (x - 1)) == 0;
+}
+
+// Returns the index of the lowest set bit in 'x'.
+int32_t Mir2Lir::LowestSetBit(uint64_t x) {
+  int bit_posn = 0;
+  while ((x & 0xf) == 0) {
+    bit_posn += 4;
+    x >>= 4;
+  }
+  while ((x & 1) == 0) {
+    bit_posn++;
+    x >>= 1;
+  }
+  return bit_posn;
+}
+
+bool Mir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) {
+  DCHECK(rl_src.wide);
+  DCHECK(rl_dest.wide);
+  return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
+}
+
+LIR *Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                int offset, int check_value, LIR* target) {
+  // Handle this for architectures that can't compare to memory.
+  LoadWordDisp(base_reg, offset, temp_reg);
+  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
+  return branch;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 3bd0298..760e06e 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -19,6 +19,7 @@
 #include "dex/quick/mir_to_lir-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
+#include "mirror/object-inl.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
@@ -206,13 +207,43 @@
                           RegLocation rl_src) {
   FlushAllRegs();  /* Everything to home location */
   ThreadOffset func_offset(-1);
-  if (cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file,
+  const DexFile* dex_file = cu_->dex_file;
+  CompilerDriver* driver = cu_->compiler_driver;
+  if (cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *dex_file,
                                                        type_idx)) {
-    func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocArray);
+    bool is_type_initialized;  // Ignored as an array does not have an initializer.
+    bool use_direct_type_ptr;
+    uintptr_t direct_type_ptr;
+    if (kEmbedClassInCode &&
+        driver->CanEmbedTypeInCode(*dex_file, type_idx,
+                                   &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
+      // The fast path.
+      if (!use_direct_type_ptr) {
+        // Use the literal pool and a PC-relative load from a data word.
+        LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
+        if (data_target == nullptr) {
+          data_target = AddWordData(&class_literal_list_, type_idx);
+        }
+        LIR* load_pc_rel = OpPcRelLoad(TargetReg(kArg0), data_target);
+        AppendLIR(load_pc_rel);
+        func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocArrayResolved);
+        CallRuntimeHelperRegMethodRegLocation(func_offset, TargetReg(kArg0), rl_src, true);
+      } else {
+        // Use the direct pointer.
+        func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocArrayResolved);
+        CallRuntimeHelperImmMethodRegLocation(func_offset, direct_type_ptr, rl_src, true);
+      }
+    } else {
+      // The slow path.
+      DCHECK_EQ(func_offset.Int32Value(), -1);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocArray);
+      CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
+    }
+    DCHECK_NE(func_offset.Int32Value(), -1);
   } else {
     func_offset= QUICK_ENTRYPOINT_OFFSET(pAllocArrayWithAccessCheck);
+    CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
   }
-  CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
   RegLocation rl_result = GetReturn(false);
   StoreValue(rl_dest, rl_result);
 }
@@ -374,10 +405,9 @@
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
         int r_tmp = TargetReg(kArg2);
         LockTemp(r_tmp);
-        // TODO: Fuse the compare of a constant with memory on X86 and avoid the load.
-        LoadWordDisp(r_base, mirror::Class::StatusOffset().Int32Value(), r_tmp);
-        LIR* initialized_branch = OpCmpImmBranch(kCondGe, r_tmp, mirror::Class::kStatusInitialized,
-                                                 NULL);
+        LIR* initialized_branch = OpCmpMemImmBranch(kCondGe, r_tmp, r_base,
+                                          mirror::Class::StatusOffset().Int32Value(),
+                                          mirror::Class::kStatusInitialized, NULL);
 
         LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
         unresolved_branch->target = unresolved_target;
@@ -468,10 +498,9 @@
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
         int r_tmp = TargetReg(kArg2);
         LockTemp(r_tmp);
-        // TODO: Fuse the compare of a constant with memory on X86 and avoid the load.
-        LoadWordDisp(r_base, mirror::Class::StatusOffset().Int32Value(), r_tmp);
-        LIR* initialized_branch = OpCmpImmBranch(kCondGe, r_tmp, mirror::Class::kStatusInitialized,
-                                                 NULL);
+        LIR* initialized_branch = OpCmpMemImmBranch(kCondGe, r_tmp, r_base,
+                                          mirror::Class::StatusOffset().Int32Value(),
+                                          mirror::Class::kStatusInitialized, NULL);
 
         LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
         unresolved_branch->target = unresolved_target;
@@ -830,27 +859,43 @@
     // slow path, resolve string if not in dex cache
     FlushAllRegs();
     LockCallTemps();  // Using explicit registers
-    LoadCurrMethodDirect(TargetReg(kArg2));
-    LoadWordDisp(TargetReg(kArg2),
-                 mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), TargetReg(kArg0));
+
+    // If the Method* is already in a register, we can save a copy.
+    RegLocation rl_method = mir_graph_->GetMethodLoc();
+    int r_method;
+    if (rl_method.location == kLocPhysReg) {
+      // A temp would conflict with register use below.
+      DCHECK(!IsTemp(rl_method.low_reg));
+      r_method = rl_method.low_reg;
+    } else {
+      r_method = TargetReg(kArg2);
+      LoadCurrMethodDirect(r_method);
+    }
+    LoadWordDisp(r_method, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(),
+                 TargetReg(kArg0));
+
     // Might call out to helper, which will return resolved string in kRet0
     int r_tgt = CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(pResolveString));
     LoadWordDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
-    LoadConstant(TargetReg(kArg1), string_idx);
     if (cu_->instruction_set == kThumb2) {
+      LoadConstant(TargetReg(kArg1), string_idx);
       OpRegImm(kOpCmp, TargetReg(kRet0), 0);  // Is resolved?
       GenBarrier();
       // For testing, always force through helper
       if (!EXERCISE_SLOWEST_STRING_PATH) {
         OpIT(kCondEq, "T");
       }
-      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));   // .eq
+      // The copy MUST generate exactly one instruction (for OpIT).
+      DCHECK_NE(TargetReg(kArg0), r_method);
+      OpRegCopy(TargetReg(kArg0), r_method);   // .eq
+
       LIR* call_inst = OpReg(kOpBlx, r_tgt);    // .eq, helper(Method*, string_idx)
       MarkSafepointPC(call_inst);
       FreeTemp(r_tgt);
     } else if (cu_->instruction_set == kMips) {
       LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kRet0), 0, NULL);
-      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));   // .eq
+      LoadConstant(TargetReg(kArg1), string_idx);
+      OpRegCopy(TargetReg(kArg0), r_method);   // .eq
       LIR* call_inst = OpReg(kOpBlx, r_tgt);
       MarkSafepointPC(call_inst);
       FreeTemp(r_tgt);
@@ -858,8 +903,12 @@
       branch->target = target;
     } else {
       DCHECK_EQ(cu_->instruction_set, kX86);
-      CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pResolveString), TargetReg(kArg2),
+      LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kRet0), 0, NULL);
+      LoadConstant(TargetReg(kArg1), string_idx);
+      CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pResolveString), r_method,
                               TargetReg(kArg1), true);
+      LIR* target = NewLIR0(kPseudoTargetLabel);
+      branch->target = target;
     }
     GenBarrier();
     StoreValue(rl_dest, GetReturn(false));
@@ -883,13 +932,53 @@
   // alloc will always check for resolution, do we also need to verify
   // access because the verifier was unable to?
   ThreadOffset func_offset(-1);
-  if (cu_->compiler_driver->CanAccessInstantiableTypeWithoutChecks(
-      cu_->method_idx, *cu_->dex_file, type_idx)) {
-    func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObject);
+  const DexFile* dex_file = cu_->dex_file;
+  CompilerDriver* driver = cu_->compiler_driver;
+  if (driver->CanAccessInstantiableTypeWithoutChecks(
+      cu_->method_idx, *dex_file, type_idx)) {
+    bool is_type_initialized;
+    bool use_direct_type_ptr;
+    uintptr_t direct_type_ptr;
+    if (kEmbedClassInCode &&
+        driver->CanEmbedTypeInCode(*dex_file, type_idx,
+                                   &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
+      // The fast path.
+      if (!use_direct_type_ptr) {
+        // Use the literal pool and a PC-relative load from a data word.
+        LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
+        if (data_target == nullptr) {
+          data_target = AddWordData(&class_literal_list_, type_idx);
+        }
+        LIR* load_pc_rel = OpPcRelLoad(TargetReg(kArg0), data_target);
+        AppendLIR(load_pc_rel);
+        if (!is_type_initialized) {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectResolved);
+          CallRuntimeHelperRegMethod(func_offset, TargetReg(kArg0), true);
+        } else {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectInitialized);
+          CallRuntimeHelperRegMethod(func_offset, TargetReg(kArg0), true);
+        }
+      } else {
+        // Use the direct pointer.
+        if (!is_type_initialized) {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectResolved);
+          CallRuntimeHelperImmMethod(func_offset, direct_type_ptr, true);
+        } else {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectInitialized);
+          CallRuntimeHelperImmMethod(func_offset, direct_type_ptr, true);
+        }
+      }
+    } else {
+      // The slow path.
+      DCHECK_EQ(func_offset.Int32Value(), -1);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObject);
+      CallRuntimeHelperImmMethod(func_offset, type_idx, true);
+    }
+    DCHECK_NE(func_offset.Int32Value(), -1);
   } else {
     func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectWithAccessCheck);
+    CallRuntimeHelperImmMethod(func_offset, type_idx, true);
   }
-  CallRuntimeHelperImmMethod(func_offset, type_idx, true);
   RegLocation rl_result = GetReturn(false);
   StoreValue(rl_dest, rl_result);
 }
@@ -903,6 +992,9 @@
 // question with simple comparisons.
 void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
                                  RegLocation rl_src) {
+  // X86 has its own implementation.
+  DCHECK_NE(cu_->instruction_set, kX86);
+
   RegLocation object = LoadValue(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   int result_reg = rl_result.low_reg;
@@ -1092,8 +1184,7 @@
   // Note: currently type_known_final is unused, as optimizing will only improve the performance
   // of the exception throw path.
   DexCompilationUnit* cu = mir_graph_->GetCurrentDexCompilationUnit();
-  const MethodReference mr(cu->GetDexFile(), cu->GetDexMethodIndex());
-  if (!needs_access_check && cu_->compiler_driver->IsSafeCast(mr, insn_idx)) {
+  if (!needs_access_check && cu_->compiler_driver->IsSafeCast(cu, insn_idx)) {
     // Verifier type analysis proved this check cast would never cause an exception.
     return;
   }
@@ -1341,6 +1432,9 @@
       }
       rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv);
       done = true;
+    } else if (cu_->instruction_set == kX86) {
+      rl_result = GenDivRem(rl_dest, rl_src1, rl_src2, op == kOpDiv, check_zero);
+      done = true;
     } else if (cu_->instruction_set == kThumb2) {
       if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) {
         // Use ARM SDIV instruction for division.  For remainder we also need to
@@ -1382,30 +1476,12 @@
  * or produce corresponding Thumb instructions directly.
  */
 
-static bool IsPowerOfTwo(int x) {
-  return (x & (x - 1)) == 0;
-}
-
 // Returns true if no more than two bits are set in 'x'.
 static bool IsPopCountLE2(unsigned int x) {
   x &= x - 1;
   return (x & (x - 1)) == 0;
 }
 
-// Returns the index of the lowest set bit in 'x'.
-static int32_t LowestSetBit(uint32_t x) {
-  int bit_posn = 0;
-  while ((x & 0xf) == 0) {
-    bit_posn += 4;
-    x >>= 4;
-  }
-  while ((x & 1) == 0) {
-    bit_posn++;
-    x >>= 1;
-  }
-  return bit_posn;
-}
-
 // Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
 // and store the result in 'rl_dest'.
 bool Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
@@ -1609,6 +1685,9 @@
         rl_src = LoadValue(rl_src, kCoreReg);
         rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div);
         done = true;
+      } else if (cu_->instruction_set == kX86) {
+        rl_result = GenDivRemLit(rl_dest, rl_src, lit, is_div);
+        done = true;
       } else if (cu_->instruction_set == kThumb2) {
         if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) {
           // Use ARM SDIV instruction for division.  For remainder we also need to
@@ -1677,7 +1756,7 @@
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
       if (cu_->instruction_set != kThumb2) {
-        GenAddLong(rl_dest, rl_src1, rl_src2);
+        GenAddLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpAdd;
@@ -1686,7 +1765,7 @@
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (cu_->instruction_set != kThumb2) {
-        GenSubLong(rl_dest, rl_src1, rl_src2);
+        GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpSub;
@@ -1694,8 +1773,8 @@
       break;
     case Instruction::MUL_LONG:
     case Instruction::MUL_LONG_2ADDR:
-      if (cu_->instruction_set == kThumb2) {
-        GenMulLong(rl_dest, rl_src1, rl_src2);
+      if (cu_->instruction_set != kMips) {
+        GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       } else {
         call_out = true;
@@ -1721,7 +1800,7 @@
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
       if (cu_->instruction_set == kX86) {
-        return GenAndLong(rl_dest, rl_src1, rl_src2);
+        return GenAndLong(opcode, rl_dest, rl_src1, rl_src2);
       }
       first_op = kOpAnd;
       second_op = kOpAnd;
@@ -1729,7 +1808,7 @@
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
       if (cu_->instruction_set == kX86) {
-        GenOrLong(rl_dest, rl_src1, rl_src2);
+        GenOrLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpOr;
@@ -1738,7 +1817,7 @@
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
       if (cu_->instruction_set == kX86) {
-        GenXorLong(rl_dest, rl_src1, rl_src2);
+        GenXorLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpXor;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index d942a24..3823fb3 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -142,6 +142,30 @@
   CallHelper(r_tgt, helper_offset, safepoint_pc);
 }
 
+void Mir2Lir::CallRuntimeHelperRegMethod(ThreadOffset helper_offset, int arg0, bool safepoint_pc) {
+  int r_tgt = CallHelperSetup(helper_offset);
+  DCHECK_NE(TargetReg(kArg1), arg0);
+  if (TargetReg(kArg0) != arg0) {
+    OpRegCopy(TargetReg(kArg0), arg0);
+  }
+  LoadCurrMethodDirect(TargetReg(kArg1));
+  ClobberCallerSave();
+  CallHelper(r_tgt, helper_offset, safepoint_pc);
+}
+
+void Mir2Lir::CallRuntimeHelperRegMethodRegLocation(ThreadOffset helper_offset, int arg0,
+                                                    RegLocation arg2, bool safepoint_pc) {
+  int r_tgt = CallHelperSetup(helper_offset);
+  DCHECK_NE(TargetReg(kArg1), arg0);
+  if (TargetReg(kArg0) != arg0) {
+    OpRegCopy(TargetReg(kArg0), arg0);
+  }
+  LoadCurrMethodDirect(TargetReg(kArg1));
+  LoadValueDirectFixed(arg2, TargetReg(kArg2));
+  ClobberCallerSave();
+  CallHelper(r_tgt, helper_offset, safepoint_pc);
+}
+
 void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset helper_offset, RegLocation arg0,
                                                       RegLocation arg1, bool safepoint_pc) {
   int r_tgt = CallHelperSetup(helper_offset);
@@ -1205,10 +1229,6 @@
     // TODO - add Mips implementation
     return false;
   }
-  if (cu_->instruction_set == kX86 && is_object) {
-    // TODO: fix X86, it exhausts registers for card marking.
-    return false;
-  }
   // Unused - RegLocation rl_src_unsafe = info->args[0];
   RegLocation rl_src_obj = info->args[1];  // Object
   RegLocation rl_src_offset = info->args[2];  // long low
@@ -1228,6 +1248,9 @@
     rl_value = LoadValue(rl_src_value, kCoreReg);
     StoreBaseIndexed(rl_object.low_reg, rl_offset.low_reg, rl_value.low_reg, 0, kWord);
   }
+
+  // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
+  FreeTemp(rl_offset.low_reg);
   if (is_volatile) {
     GenMemBarrier(kStoreLoad);
   }
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 8f2f6ad..65582dd 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -294,6 +294,53 @@
   }
 }
 
+void Mir2Lir::StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src) {
+  DCHECK_EQ(IsFpReg(rl_src.low_reg), IsFpReg(rl_src.high_reg));
+  DCHECK(rl_dest.wide);
+  DCHECK(rl_src.wide);
+  DCHECK_EQ(rl_src.location, kLocPhysReg);
+
+  if (rl_dest.location == kLocPhysReg) {
+    OpRegCopyWide(rl_dest.low_reg, rl_dest.high_reg, rl_src.low_reg, rl_src.high_reg);
+  } else {
+    // Just re-assign the registers.  Dest gets Src's regs.
+    rl_dest.low_reg = rl_src.low_reg;
+    rl_dest.high_reg = rl_src.high_reg;
+    rl_dest.location = kLocPhysReg;
+    Clobber(rl_src.low_reg);
+    Clobber(rl_src.high_reg);
+  }
+
+  // Dest is now live and dirty (until/if we flush it to home location).
+  MarkLive(rl_dest.low_reg, rl_dest.s_reg_low);
+
+  // Does this wide value live in two registers (or one vector one)?
+  if (rl_dest.low_reg != rl_dest.high_reg) {
+    MarkLive(rl_dest.high_reg, GetSRegHi(rl_dest.s_reg_low));
+    MarkDirty(rl_dest);
+    MarkPair(rl_dest.low_reg, rl_dest.high_reg);
+  } else {
+    // This must be an x86 vector register value,
+    DCHECK(IsFpReg(rl_dest.low_reg) && (cu_->instruction_set == kX86));
+    MarkDirty(rl_dest);
+  }
+
+  ResetDefLocWide(rl_dest);
+  if ((IsDirty(rl_dest.low_reg) ||
+      IsDirty(rl_dest.high_reg)) &&
+      (oat_live_out(rl_dest.s_reg_low) ||
+      oat_live_out(GetSRegHi(rl_dest.s_reg_low)))) {
+    LIR *def_start = last_lir_insn_;
+    DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
+              mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
+    StoreBaseDispWide(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low),
+                      rl_dest.low_reg, rl_dest.high_reg);
+    MarkClean(rl_dest);
+    LIR *def_end = last_lir_insn_;
+    MarkDefWide(rl_dest, def_start, def_end);
+  }
+}
+
 /* Utilities to load the current Method* */
 void Mir2Lir::LoadCurrMethodDirect(int r_tgt) {
   LoadValueDirectFixed(mir_graph_->GetMethodLoc(), r_tgt);
@@ -303,4 +350,47 @@
   return LoadValue(mir_graph_->GetMethodLoc(), kCoreReg);
 }
 
+RegLocation Mir2Lir::ForceTemp(RegLocation loc) {
+  DCHECK(!loc.wide);
+  DCHECK(loc.location == kLocPhysReg);
+  DCHECK(!IsFpReg(loc.low_reg));
+  DCHECK(!IsFpReg(loc.high_reg));
+  if (IsTemp(loc.low_reg)) {
+    Clobber(loc.low_reg);
+  } else {
+    int temp_low = AllocTemp();
+    OpRegCopy(temp_low, loc.low_reg);
+    loc.low_reg = temp_low;
+  }
+
+  // Ensure that this doesn't represent the original SR any more.
+  loc.s_reg_low = INVALID_SREG;
+  return loc;
+}
+
+RegLocation Mir2Lir::ForceTempWide(RegLocation loc) {
+  DCHECK(loc.wide);
+  DCHECK(loc.location == kLocPhysReg);
+  DCHECK(!IsFpReg(loc.low_reg));
+  DCHECK(!IsFpReg(loc.high_reg));
+  if (IsTemp(loc.low_reg)) {
+    Clobber(loc.low_reg);
+  } else {
+    int temp_low = AllocTemp();
+    OpRegCopy(temp_low, loc.low_reg);
+    loc.low_reg = temp_low;
+  }
+  if (IsTemp(loc.high_reg)) {
+    Clobber(loc.high_reg);
+  } else {
+    int temp_high = AllocTemp();
+    OpRegCopy(temp_high, loc.high_reg);
+    loc.high_reg = temp_high;
+  }
+
+  // Ensure that this doesn't represent the original SR any more.
+  loc.s_reg_low = INVALID_SREG;
+  return loc;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index a5a14d5..aca93f5 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -94,9 +94,9 @@
                      RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                                   RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +110,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, bool is_div);
@@ -175,6 +175,9 @@
 
   private:
     void ConvertShortToLongBranch(LIR* lir);
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                          RegLocation rl_src2, bool is_div, bool check_zero);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 180d56c..013041a 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -250,6 +250,17 @@
   return rl_result;
 }
 
+RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, bool is_div, bool check_zero) {
+  LOG(FATAL) << "Unexpected use of GenDivRem for Mips";
+  return rl_dest;
+}
+
+RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
+  LOG(FATAL) << "Unexpected use of GenDivRemLit for Mips";
+  return rl_dest;
+}
+
 void MipsMir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) {
   LOG(FATAL) << "Unexpected use of OpLea for Arm";
 }
@@ -356,13 +367,13 @@
   return NULL;
 }
 
-void MipsMir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenMulLong for Mips";
 }
 
-void MipsMir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -383,8 +394,8 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -425,18 +436,19 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+void MipsMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1,
                              RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAndLong for Mips";
 }
 
-void MipsMir2Lir::GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
+void MipsMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenOrLong for Mips";
 }
 
-void MipsMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenXorLong for Mips";
 }
 
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 1aee06c..b744adc 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -15,12 +15,15 @@
  */
 
 #include "codegen_mips.h"
+
+#include <inttypes.h>
+
+#include <string>
+
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "mips_lir.h"
 
-#include <string>
-
 namespace art {
 
 static int core_regs[] = {r_ZERO, r_AT, r_V0, r_V1, r_A0, r_A1, r_A2, r_A3,
@@ -203,9 +206,9 @@
              snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
              break;
            case 't':
-             snprintf(tbuf, arraysize(tbuf), "0x%08x (L%p)",
-                      reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 2),
-                      lir->target);
+             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
+                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
+                 lir->target);
              break;
            case 'T':
              snprintf(tbuf, arraysize(tbuf), "0x%08x", operand << 2);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 6281eff..1f4122d 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -342,8 +342,8 @@
       bool is_safe = is_null;  // Always safe to store null.
       if (!is_safe) {
         // Check safety from verifier type information.
-        const MethodReference mr(cu_->dex_file, cu_->method_idx);
-        is_safe = cu_->compiler_driver->IsSafeCast(mr, mir->offset);
+        const DexCompilationUnit* unit = mir_graph_->GetCurrentDexCompilationUnit();
+        is_safe = cu_->compiler_driver->IsSafeCast(unit, mir->offset);
       }
       if (is_null || is_safe) {
         // Store of constant null doesn't require an assignability test and can be generated inline
@@ -762,11 +762,13 @@
       // Combine check and work halves of throwing instruction.
       MIR* work_half = mir->meta.throw_insn;
       mir->dalvikInsn.opcode = work_half->dalvikInsn.opcode;
+      mir->meta = work_half->meta;  // Whatever the work_half had, we need to copy it.
       opcode = work_half->dalvikInsn.opcode;
       SSARepresentation* ssa_rep = work_half->ssa_rep;
       work_half->ssa_rep = mir->ssa_rep;
       mir->ssa_rep = ssa_rep;
       work_half->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheckPart2);
+      work_half->meta.throw_insn = mir;
     }
 
     if (opcode >= kMirOpFirst) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index c157327..81053a3 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -310,6 +310,19 @@
     void InsertLIRBefore(LIR* current_lir, LIR* new_lir);
     void InsertLIRAfter(LIR* current_lir, LIR* new_lir);
 
+    /**
+     * @brief Provides the maximum number of compiler temporaries that the backend can/wants
+     * to place in a frame.
+     * @return Returns the maximum number of compiler temporaries.
+     */
+    size_t GetMaxPossibleCompilerTemps() const;
+
+    /**
+     * @brief Provides the number of bytes needed in frame for spilling of compiler temporaries.
+     * @return Returns the size in bytes for space needed for compiler temporary spill region.
+     */
+    size_t GetNumBytesForCompilerTempSpillRegion();
+
     int ComputeFrameSize();
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
@@ -529,6 +542,9 @@
                                  bool safepoint_pc);
     void CallRuntimeHelperImmMethod(ThreadOffset helper_offset, int arg0,
                                     bool safepoint_pc);
+    void CallRuntimeHelperRegMethod(ThreadOffset helper_offset, int arg0, bool safepoint_pc);
+    void CallRuntimeHelperRegMethodRegLocation(ThreadOffset helper_offset, int arg0,
+                                               RegLocation arg2, bool safepoint_pc);
     void CallRuntimeHelperRegLocationRegLocation(ThreadOffset helper_offset,
                                                  RegLocation arg0, RegLocation arg1,
                                                  bool safepoint_pc);
@@ -627,6 +643,18 @@
      */
     void StoreValueWide(RegLocation rl_dest, RegLocation rl_src);
 
+    /**
+     * @brief Used to do the final store in a wide destination as per bytecode semantics.
+     * @see StoreValueWide
+     * @param rl_dest The destination dalvik register location.
+     * @param rl_src The source register location. It must be kLocPhysReg
+     *
+     * This is used for x86 two operand computations, where we have computed the correct
+     * register values that now need to be properly registered.  This is used to avoid an
+     * extra pair of register copies that would result if StoreValueWide was called.
+     */
+    void StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src);
+
     // Shared by all targets - implemented in mir_to_lir.cc.
     void CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list);
     void HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir);
@@ -634,7 +662,19 @@
     void SpecialMIR2LIR(const InlineMethod& special);
     void MethodMIR2LIR();
 
-
+    // Routines that work for the generic case, but may be overriden by target.
+    /*
+     * @brief Compare memory to immediate, and branch if condition true.
+     * @param cond The condition code that when true will branch to the target.
+     * @param temp_reg A temporary register that can be used if compare to memory is not
+     * supported by the architecture.
+     * @param base_reg The register holding the base address.
+     * @param offset The offset from the base.
+     * @param check_value The immediate to compare to.
+     * @returns The branch instruction that was generated.
+     */
+    virtual LIR* OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                   int offset, int check_value, LIR* target);
 
     // Required for target - codegen helpers.
     virtual bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
@@ -695,11 +735,14 @@
     // Required for target - Dalvik-level generators.
     virtual void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_src2) = 0;
-    virtual void GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenMulLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenAddLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenAndLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
     virtual void GenArithOpDouble(Instruction::Code opcode,
                                   RegLocation rl_dest, RegLocation rl_src1,
@@ -727,11 +770,14 @@
     virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0;
     virtual bool GenInlinedPoke(CallInfo* info, OpSize size) = 0;
     virtual void GenNegLong(RegLocation rl_dest, RegLocation rl_src) = 0;
-    virtual void GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenOrLong(Instruction::Code,
+                           RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2) = 0;
-    virtual void GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenSubLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenXorLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
     virtual LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base,
                                 int offset, ThrowKind kind) = 0;
@@ -739,6 +785,25 @@
                                   bool is_div) = 0;
     virtual RegLocation GenDivRemLit(RegLocation rl_dest, int reg_lo, int lit,
                                      bool is_div) = 0;
+    /*
+     * @brief Generate an integer div or rem operation by a literal.
+     * @param rl_dest Destination Location.
+     * @param rl_src1 Numerator Location.
+     * @param rl_src2 Divisor Location.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     * @param check_zero 'true' if an exception should be generated if the divisor is 0.
+     */
+    virtual RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                                  RegLocation rl_src2, bool is_div, bool check_zero) = 0;
+    /*
+     * @brief Generate an integer div or rem operation by a literal.
+     * @param rl_dest Destination Location.
+     * @param rl_src Numerator Location.
+     * @param lit Divisor.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     */
+    virtual RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1,
+                                     int lit, bool is_div) = 0;
     virtual void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
 
@@ -758,7 +823,14 @@
     virtual void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
                                      bool is_double) = 0;
     virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0;
+
+    /**
+     * @brief Lowers the kMirOpSelect MIR into LIR.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirOpSelect.
+     */
     virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0;
+
     virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0;
     virtual void GenMoveException(RegLocation rl_dest) = 0;
     virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
@@ -835,10 +907,48 @@
     CompilationUnit* GetCompilationUnit() {
       return cu_;
     }
+    /*
+     * @brief Returns the index of the lowest set bit in 'x'.
+     * @param x Value to be examined.
+     * @returns The bit number of the lowest bit set in the value.
+     */
+    int32_t LowestSetBit(uint64_t x);
+    /*
+     * @brief Is this value a power of two?
+     * @param x Value to be examined.
+     * @returns 'true' if only 1 bit is set in the value.
+     */
+    bool IsPowerOfTwo(uint64_t x);
+    /*
+     * @brief Do these SRs overlap?
+     * @param rl_op1 One RegLocation
+     * @param rl_op2 The other RegLocation
+     * @return 'true' if the VR pairs overlap
+     *
+     * Check to see if a result pair has a misaligned overlap with an operand pair.  This
+     * is not usual for dx to generate, but it is legal (for now).  In a future rev of
+     * dex, we'll want to make this case illegal.
+     */
+    bool BadOverlap(RegLocation rl_op1, RegLocation rl_op2);
+
+    /*
+     * @brief Force a location (in a register) into a temporary register
+     * @param loc location of result
+     * @returns update location
+     */
+    RegLocation ForceTemp(RegLocation loc);
+
+    /*
+     * @brief Force a wide location (in registers) into temporary registers
+     * @param loc location of result
+     * @returns update location
+     */
+    RegLocation ForceTempWide(RegLocation loc);
+
+    virtual void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                                    RegLocation rl_dest, RegLocation rl_src);
 
   private:
-    void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
-                            RegLocation rl_src);
     void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
                                     bool type_known_abstract, bool use_declaring_class,
                                     bool can_assume_type_is_in_dex_cache,
@@ -855,6 +965,7 @@
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
     LIR* method_literal_list_;                 // Method literals requiring patching.
+    LIR* class_literal_list_;                  // Class literals requiring patching.
     LIR* code_literal_list_;                   // Code literals requiring patching.
     LIR* first_fixup_;                         // Doubly-linked list of LIR nodes requiring fixups.
 
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 32c22f2..6d69512 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -132,9 +132,15 @@
     DCHECK_LT(v_reg, cu_->num_dalvik_registers);
     return v_reg;
   } else {
-    int pos = std::abs(v_reg) - std::abs(SSA_METHOD_BASEREG);
-    DCHECK_LE(pos, cu_->num_compiler_temps);
-    return cu_->num_dalvik_registers + pos;
+    /*
+     * It must be the case that the v_reg for temporary is less than or equal to the
+     * base reg for temps. For that reason, "position" must be zero or positive.
+     */
+    unsigned int position = std::abs(v_reg) - std::abs(static_cast<int>(kVRegTempBaseReg));
+
+    // The temporaries are placed after dalvik registers in the promotion map
+    DCHECK_LT(position, mir_graph_->GetNumUsedCompilerTemps());
+    return cu_->num_dalvik_registers + position;
   }
 }
 
@@ -897,9 +903,8 @@
  * optimization is disabled.
  */
 void Mir2Lir::DoPromotion() {
-  int reg_bias = cu_->num_compiler_temps + 1;
   int dalvik_regs = cu_->num_dalvik_registers;
-  int num_regs = dalvik_regs + reg_bias;
+  int num_regs = dalvik_regs + mir_graph_->GetNumUsedCompilerTemps();
   const int promotion_threshold = 1;
 
   // Allow target code to add any special registers
@@ -926,16 +931,13 @@
   for (int i = 0; i < dalvik_regs; i++) {
     core_regs[i].s_reg = FpRegs[i].s_reg = i;
   }
-  // Set ssa name for Method*
-  core_regs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();
-  FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();  // For consistecy.
-  FpRegs[dalvik_regs + num_regs].s_reg = mir_graph_->GetMethodSReg();  // for consistency.
-  // Set ssa names for compiler_temps
-  for (int i = 1; i <= cu_->num_compiler_temps; i++) {
-    CompilerTemp* ct = mir_graph_->compiler_temps_.Get(i);
-    core_regs[dalvik_regs + i].s_reg = ct->s_reg;
-    FpRegs[dalvik_regs + i].s_reg = ct->s_reg;
-    FpRegs[num_regs + dalvik_regs + i].s_reg = ct->s_reg;
+
+  // Set ssa names for compiler temporaries
+  for (unsigned int ct_idx = 0; ct_idx < mir_graph_->GetNumUsedCompilerTemps(); ct_idx++) {
+    CompilerTemp* ct = mir_graph_->GetCompilerTemp(ct_idx);
+    core_regs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
+    FpRegs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
+    FpRegs[num_regs + dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
   }
 
   // Duplicate in upper half to represent possible fp double starting sregs.
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 1dcff65..c29d6c4 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -211,6 +211,8 @@
 #undef SHIFT_ENCODING_MAP
 
   { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0}, "Cmc", "" },
+  { kX86Shld32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32", "!0r,!1r,!2d" },
+  { kX86Shrd32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32", "!0r,!1r,!2d" },
 
   { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" },
   { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" },
@@ -242,12 +244,13 @@
   UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0,           R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
   UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
 
-  UNARY_ENCODING_MAP(Mul,     0x4, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
-  UNARY_ENCODING_MAP(Imul,    0x5, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
-  UNARY_ENCODING_MAP(Divmod,  0x6, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
-  UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
+  UNARY_ENCODING_MAP(Mul,     0x4, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
+  UNARY_ENCODING_MAP(Imul,    0x5, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
+  UNARY_ENCODING_MAP(Divmod,  0x6, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
+  UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
 #undef UNARY_ENCODING_MAP
 
+  { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { 0, 0, 0x99, 0, 0, 0, 0, 0 }, "Cdq", "" },
   { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" },
   { kX86Push32R,  kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0, 0, 0x50, 0,    0, 0, 0, 0 }, "Push32R",  "!0r" },
   { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0, 0, 0x58, 0,    0, 0, 0, 0 }, "Pop32R",   "!0r" },
@@ -287,6 +290,7 @@
   EXT_0F_ENCODING_MAP(Subss,     0xF3, 0x5C, REG_DEF0),
   EXT_0F_ENCODING_MAP(Divsd,     0xF2, 0x5E, REG_DEF0),
   EXT_0F_ENCODING_MAP(Divss,     0xF3, 0x5E, REG_DEF0),
+  EXT_0F_ENCODING_MAP(Punpckldq, 0x66, 0x62, REG_DEF0),
 
   { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
@@ -421,6 +425,7 @@
     case kThreadImm:  // lir operands - 0: disp, 1: imm
       return ComputeSize(entry, 0, 0x12345678, false);  // displacement size is always 32bit
     case kRegRegImm:  // lir operands - 0: reg, 1: reg, 2: imm
+    case kRegRegImmRev:
       return ComputeSize(entry, 0, 0, false);
     case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
       return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
@@ -641,7 +646,6 @@
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  DCHECK_NE(rX86_SP, base);
   EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -754,6 +758,22 @@
   EmitImm(entry, imm);
 }
 
+void X86Mir2Lir::EmitRegRegImmRev(const X86EncodingMap* entry,
+                                  uint8_t reg1, uint8_t reg2, int32_t imm) {
+  EmitRegRegImm(entry, reg2, reg1, imm);
+}
+
+void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry,
+                               uint8_t reg, uint8_t base, int disp, int32_t imm) {
+  EmitPrefixAndOpcode(entry);
+  DCHECK(!X86_FPREG(reg));
+  DCHECK_LT(reg, 8);
+  EmitModrmDisp(reg, base, disp);
+  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  EmitImm(entry, imm);
+}
+
 void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
@@ -1185,9 +1205,16 @@
       case kRegRegStore:  // lir operands - 0: reg2, 1: reg1
         EmitRegReg(entry, lir->operands[1], lir->operands[0]);
         break;
+      case kRegRegImmRev:
+        EmitRegRegImmRev(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
       case kRegRegImm:
         EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
+      case kRegMemImm:
+        EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
+                      lir->operands[3]);
+        break;
       case kRegImm:  // lir operands - 0: reg, 1: immediate
         EmitRegImm(entry, lir->operands[0], lir->operands[1]);
         break;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 816f2d0..f054d82 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -94,9 +94,9 @@
                      RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                                   RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +110,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     LIR* GenMemImmedCheck(ConditionCode c_code, int base, int offset, int check_value,
@@ -136,7 +136,59 @@
     void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
+    /*
+     * @brief Generate a two address long operation with a constant value
+     * @param rl_dest location of result
+     * @param rl_src constant source operand
+     * @param op Opcode to be generated
+     */
+    void GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+    /*
+     * @brief Generate a three address long operation with a constant value
+     * @param rl_dest location of result
+     * @param rl_src1 source operand
+     * @param rl_src2 constant source operand
+     * @param op Opcode to be generated
+     */
+    void GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
+                        RegLocation rl_src2, Instruction::Code op);
 
+    /**
+      * @brief Generate a long arithmetic operation.
+      * @param rl_dest The destination.
+      * @param rl_src1 First operand.
+      * @param rl_src2 Second operand.
+      * @param op The DEX opcode for the operation.
+      * @param is_commutative The sources can be swapped if needed.
+      */
+    void GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, Instruction::Code op, bool is_commutative);
+
+    /**
+      * @brief Generate a two operand long arithmetic operation.
+      * @param rl_dest The destination.
+      * @param rl_src Second operand.
+      * @param op The DEX opcode for the operation.
+      */
+    void GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+
+    /**
+      * @brief Generate a long operation.
+      * @param rl_dest The destination.  Must be in a register
+      * @param rl_src The other operand.  May be in a register or in memory.
+      * @param op The DEX opcode for the operation.
+      */
+    void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+
+    /**
+     * @brief Implement instanceof a final class with x86 specific code.
+     * @param use_declaring_class 'true' if we can use the class itself.
+     * @param type_idx Type index to use if use_declaring_class is 'false'.
+     * @param rl_dest Result to be set to 0 or 1.
+     * @param rl_src Object to be tested.
+     */
+    void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                            RegLocation rl_dest, RegLocation rl_src);
     // Single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
     LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target);
@@ -202,6 +254,8 @@
     void EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp);
     void EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2);
     void EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
+    void EmitRegRegImmRev(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
+    void EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int disp, int32_t imm);
     void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm);
     void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
@@ -230,6 +284,108 @@
                                   int64_t val, ConditionCode ccode);
     void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg);
     void GenConstWide(RegLocation rl_dest, int64_t value);
+
+    /*
+     * @brief Return the correct x86 opcode for the Dex operation
+     * @param op Dex opcode for the operation
+     * @param loc Register location of the operand
+     * @param is_high_op 'true' if this is an operation on the high word
+     * @param value Immediate value for the operation.  Used for byte variants
+     * @returns the correct x86 opcode to perform the operation
+     */
+    X86OpCode GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value);
+
+    /*
+     * @brief Return the correct x86 opcode for the Dex operation
+     * @param op Dex opcode for the operation
+     * @param dest location of the destination.  May be register or memory.
+     * @param rhs Location for the rhs of the operation.  May be in register or memory.
+     * @param is_high_op 'true' if this is an operation on the high word
+     * @returns the correct x86 opcode to perform the operation
+     * @note at most one location may refer to memory
+     */
+    X86OpCode GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
+                        bool is_high_op);
+
+    /*
+     * @brief Is this operation a no-op for this opcode and value
+     * @param op Dex opcode for the operation
+     * @param value Immediate value for the operation.
+     * @returns 'true' if the operation will have no effect
+     */
+    bool IsNoOp(Instruction::Code op, int32_t value);
+
+    /*
+     * @brief Dump a RegLocation using printf
+     * @param loc Register location to dump
+     */
+    static void DumpRegLocation(RegLocation loc);
+
+    /**
+     * @brief Calculate magic number and shift for a given divisor
+     * @param divisor divisor number for calculation
+     * @param magic hold calculated magic number
+     * @param shift hold calculated shift
+     */
+    void CalculateMagicAndShift(int divisor, int& magic, int& shift);
+
+    /*
+     * @brief Generate an integer div or rem operation.
+     * @param rl_dest Destination Location.
+     * @param rl_src1 Numerator Location.
+     * @param rl_src2 Divisor Location.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     * @param check_zero 'true' if an exception should be generated if the divisor is 0.
+     */
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                                  RegLocation rl_src2, bool is_div, bool check_zero);
+
+    /*
+     * @brief Generate an integer div or rem operation by a literal.
+     * @param rl_dest Destination Location.
+     * @param rl_src Numerator Location.
+     * @param lit Divisor.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     */
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div);
+
+    /*
+     * Generate code to implement long shift operations.
+     * @param opcode The DEX opcode to specify the shift type.
+     * @param rl_dest The destination.
+     * @param rl_src The value to be shifted.
+     * @param shift_amount How much to shift.
+     * @returns the RegLocation of the result.
+     */
+    RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                                  RegLocation rl_src, int shift_amount);
+    /*
+     * Generate an imul of a register by a constant or a better sequence.
+     * @param dest Destination Register.
+     * @param src Source Register.
+     * @param val Constant multiplier.
+     */
+    void GenImulRegImm(int dest, int src, int val);
+    /*
+     * Generate an imul of a memory location by a constant or a better sequence.
+     * @param dest Destination Register.
+     * @param sreg Symbolic register.
+     * @param displacement Displacement on stack of Symbolic Register.
+     * @param val Constant multiplier.
+     */
+    void GenImulMemImm(int dest, int sreg, int displacement, int val);
+
+    /*
+     * @brief Compare memory to immediate, and branch if condition true.
+     * @param cond The condition code that when true will branch to the target.
+     * @param temp_reg A temporary register that can be used if compare memory is not
+     * supported by the architecture.
+     * @param base_reg The register holding the base address.
+     * @param offset The offset from the base.
+     * @param check_value The immediate to compare to.
+     */
+    LIR* OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                           int offset, int check_value, LIR* target);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 6272498..006fe76 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -303,7 +303,7 @@
     rl_src2 = LoadValue(rl_src2, kFPReg);
     NewLIR2(kX86UcomissRR, rl_src1.low_reg, rl_src2.low_reg);
   }
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
   switch (ccode) {
     case kCondEq:
       if (!gt_bias) {
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 95a44c9..e458e5f 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -157,8 +157,7 @@
       NewLIR2(kX86MovdxrRR, dest_lo, src_lo);
       dest_hi = AllocTempDouble();
       NewLIR2(kX86MovdxrRR, dest_hi, src_hi);
-      NewLIR2(kX86PsllqRI, dest_hi, 32);
-      NewLIR2(kX86OrpsRR, dest_lo, dest_hi);
+      NewLIR2(kX86PunpckldqRR, dest_lo, dest_hi);
       FreeTemp(dest_hi);
     }
   } else {
@@ -180,14 +179,104 @@
 }
 
 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
-  UNIMPLEMENTED(FATAL) << "Need codegen for GenSelect";
+  RegLocation rl_result;
+  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
+  RegLocation rl_dest = mir_graph_->GetDest(mir);
+  rl_src = LoadValue(rl_src, kCoreReg);
+
+  // The kMirOpSelect has two variants, one for constants and one for moves.
+  const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
+
+  if (is_constant_case) {
+    int true_val = mir->dalvikInsn.vB;
+    int false_val = mir->dalvikInsn.vC;
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+    /*
+     * 1) When the true case is zero and result_reg is not same as src_reg:
+     *     xor result_reg, result_reg
+     *     cmp $0, src_reg
+     *     mov t1, $false_case
+     *     cmovnz result_reg, t1
+     * 2) When the false case is zero and result_reg is not same as src_reg:
+     *     xor result_reg, result_reg
+     *     cmp $0, src_reg
+     *     mov t1, $true_case
+     *     cmovz result_reg, t1
+     * 3) All other cases (we do compare first to set eflags):
+     *     cmp $0, src_reg
+     *     mov result_reg, $true_case
+     *     mov t1, $false_case
+     *     cmovnz result_reg, t1
+     */
+    const bool result_reg_same_as_src = (rl_src.location == kLocPhysReg && rl_src.low_reg == rl_result.low_reg);
+    const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
+    const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
+    const bool catch_all_case = !(true_zero_case || false_zero_case);
+
+    if (true_zero_case || false_zero_case) {
+      OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg);
+    }
+
+    if (true_zero_case || false_zero_case || catch_all_case) {
+      OpRegImm(kOpCmp, rl_src.low_reg, 0);
+    }
+
+    if (catch_all_case) {
+      OpRegImm(kOpMov, rl_result.low_reg, true_val);
+    }
+
+    if (true_zero_case || false_zero_case || catch_all_case) {
+      int immediateForTemp = false_zero_case ? true_val : false_val;
+      int temp1_reg = AllocTemp();
+      OpRegImm(kOpMov, temp1_reg, immediateForTemp);
+
+      ConditionCode cc = false_zero_case ? kCondEq : kCondNe;
+      OpCondRegReg(kOpCmov, cc, rl_result.low_reg, temp1_reg);
+
+      FreeTemp(temp1_reg);
+    }
+  } else {
+    RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
+    RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
+    rl_true = LoadValue(rl_true, kCoreReg);
+    rl_false = LoadValue(rl_false, kCoreReg);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+    /*
+     * 1) When true case is already in place:
+     *     cmp $0, src_reg
+     *     cmovnz result_reg, false_reg
+     * 2) When false case is already in place:
+     *     cmp $0, src_reg
+     *     cmovz result_reg, true_reg
+     * 3) When neither cases are in place:
+     *     cmp $0, src_reg
+     *     mov result_reg, true_reg
+     *     cmovnz result_reg, false_reg
+     */
+
+    // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
+    OpRegImm(kOpCmp, rl_src.low_reg, 0);
+
+    if (rl_result.low_reg == rl_true.low_reg) {
+      OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg);
+    } else if (rl_result.low_reg == rl_false.low_reg) {
+      OpCondRegReg(kOpCmov, kCondEq, rl_result.low_reg, rl_true.low_reg);
+    } else {
+      OpRegCopy(rl_result.low_reg, rl_true.low_reg);
+      OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg);
+    }
+  }
+
+  StoreValue(rl_dest, rl_result);
 }
 
 void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
   LIR* taken = &block_label_list_[bb->taken];
   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
 
   if (rl_src1.is_const) {
     std::swap(rl_src1, rl_src2);
@@ -284,18 +373,261 @@
   OpCmpImmBranch(ccode, low_reg, val_lo, taken);
 }
 
+void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
+  // It does not make sense to calculate magic and shift for zero divisor.
+  DCHECK_NE(divisor, 0);
+
+  /* According to H.S.Warren's Hacker's Delight Chapter 10 and
+   * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
+   * The magic number M and shift S can be calculated in the following way:
+   * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
+   * where divisor(d) >=2.
+   * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
+   * where divisor(d) <= -2.
+   * Thus nc can be calculated like:
+   * nc = 2^31 + 2^31 % d - 1, where d >= 2
+   * nc = -2^31 + (2^31 + 1) % d, where d >= 2.
+   *
+   * So the shift p is the smallest p satisfying
+   * 2^p > nc * (d - 2^p % d), where d >= 2
+   * 2^p > nc * (d + 2^p % d), where d <= -2.
+   *
+   * the magic number M is calcuated by
+   * M = (2^p + d - 2^p % d) / d, where d >= 2
+   * M = (2^p - d - 2^p % d) / d, where d <= -2.
+   *
+   * Notice that p is always bigger than or equal to 32, so we just return 32-p as
+   * the shift number S.
+   */
+
+  int32_t p = 31;
+  const uint32_t two31 = 0x80000000U;
+
+  // Initialize the computations.
+  uint32_t abs_d = (divisor >= 0) ? divisor : -divisor;
+  uint32_t tmp = two31 + (static_cast<uint32_t>(divisor) >> 31);
+  uint32_t abs_nc = tmp - 1 - tmp % abs_d;
+  uint32_t quotient1 = two31 / abs_nc;
+  uint32_t remainder1 = two31 % abs_nc;
+  uint32_t quotient2 = two31 / abs_d;
+  uint32_t remainder2 = two31 % abs_d;
+
+  /*
+   * To avoid handling both positive and negative divisor, Hacker's Delight
+   * introduces a method to handle these 2 cases together to avoid duplication.
+   */
+  uint32_t delta;
+  do {
+    p++;
+    quotient1 = 2 * quotient1;
+    remainder1 = 2 * remainder1;
+    if (remainder1 >= abs_nc) {
+      quotient1++;
+      remainder1 = remainder1 - abs_nc;
+    }
+    quotient2 = 2 * quotient2;
+    remainder2 = 2 * remainder2;
+    if (remainder2 >= abs_d) {
+      quotient2++;
+      remainder2 = remainder2 - abs_d;
+    }
+    delta = abs_d - remainder2;
+  } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
+
+  magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
+  shift = p - 32;
+}
+
 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, int reg_lo,
                                      int lit, bool is_div) {
   LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
   return rl_dest;
 }
 
+RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
+                                     int imm, bool is_div) {
+  // Use a multiply (and fixup) to perform an int div/rem by a constant.
+
+  // We have to use fixed registers, so flush all the temps.
+  FlushAllRegs();
+  LockCallTemps();  // Prepare for explicit register usage.
+
+  // Assume that the result will be in EDX.
+  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+                          r2, INVALID_REG, INVALID_SREG, INVALID_SREG};
+
+  // handle 0x80000000 / -1 special case.
+  LIR *minint_branch = 0;
+  if (imm == -1) {
+    if (is_div) {
+      LoadValueDirectFixed(rl_src, r0);
+      OpRegImm(kOpCmp, r0, 0x80000000);
+      minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
+
+      // for x != MIN_INT, x / -1 == -x.
+      NewLIR1(kX86Neg32R, r0);
+
+      LIR* branch_around = NewLIR1(kX86Jmp8, 0);
+      // The target for cmp/jmp above.
+      minint_branch->target = NewLIR0(kPseudoTargetLabel);
+      // EAX already contains the right value (0x80000000),
+      branch_around->target = NewLIR0(kPseudoTargetLabel);
+    } else {
+      // x % -1 == 0.
+      LoadConstantNoClobber(r0, 0);
+    }
+    // For this case, return the result in EAX.
+    rl_result.low_reg = r0;
+  } else {
+    DCHECK(imm <= -2 || imm >= 2);
+    // Use H.S.Warren's Hacker's Delight Chapter 10 and
+    // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
+    int magic, shift;
+    CalculateMagicAndShift(imm, magic, shift);
+
+    /*
+     * For imm >= 2,
+     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
+     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
+     * For imm <= -2,
+     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
+     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
+     * We implement this algorithm in the following way:
+     * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
+     * 2. if imm > 0 and magic < 0, add numerator to EDX
+     *    if imm < 0 and magic > 0, sub numerator from EDX
+     * 3. if S !=0, SAR S bits for EDX
+     * 4. add 1 to EDX if EDX < 0
+     * 5. Thus, EDX is the quotient
+     */
+
+    // Numerator into EAX.
+    int numerator_reg = -1;
+    if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
+      // We will need the value later.
+      if (rl_src.location == kLocPhysReg) {
+        // We can use it directly.
+        DCHECK(rl_src.low_reg != r0 && rl_src.low_reg != r2);
+        numerator_reg = rl_src.low_reg;
+      } else {
+        LoadValueDirectFixed(rl_src, r1);
+        numerator_reg = r1;
+      }
+      OpRegCopy(r0, numerator_reg);
+    } else {
+      // Only need this once.  Just put it into EAX.
+      LoadValueDirectFixed(rl_src, r0);
+    }
+
+    // EDX = magic.
+    LoadConstantNoClobber(r2, magic);
+
+    // EDX:EAX = magic & dividend.
+    NewLIR1(kX86Imul32DaR, r2);
+
+    if (imm > 0 && magic < 0) {
+      // Add numerator to EDX.
+      DCHECK_NE(numerator_reg, -1);
+      NewLIR2(kX86Add32RR, r2, numerator_reg);
+    } else if (imm < 0 && magic > 0) {
+      DCHECK_NE(numerator_reg, -1);
+      NewLIR2(kX86Sub32RR, r2, numerator_reg);
+    }
+
+    // Do we need the shift?
+    if (shift != 0) {
+      // Shift EDX by 'shift' bits.
+      NewLIR2(kX86Sar32RI, r2, shift);
+    }
+
+    // Add 1 to EDX if EDX < 0.
+
+    // Move EDX to EAX.
+    OpRegCopy(r0, r2);
+
+    // Move sign bit to bit 0, zeroing the rest.
+    NewLIR2(kX86Shr32RI, r2, 31);
+
+    // EDX = EDX + EAX.
+    NewLIR2(kX86Add32RR, r2, r0);
+
+    // Quotient is in EDX.
+    if (!is_div) {
+      // We need to compute the remainder.
+      // Remainder is divisor - (quotient * imm).
+      DCHECK_NE(numerator_reg, -1);
+      OpRegCopy(r0, numerator_reg);
+
+      // EAX = numerator * imm.
+      OpRegRegImm(kOpMul, r2, r2, imm);
+
+      // EDX -= EAX.
+      NewLIR2(kX86Sub32RR, r0, r2);
+
+      // For this case, return the result in EAX.
+      rl_result.low_reg = r0;
+    }
+  }
+
+  return rl_result;
+}
+
 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, int reg_lo,
                                   int reg_hi, bool is_div) {
   LOG(FATAL) << "Unexpected use of GenDivRem for x86";
   return rl_dest;
 }
 
+RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                                  RegLocation rl_src2, bool is_div, bool check_zero) {
+  // We have to use fixed registers, so flush all the temps.
+  FlushAllRegs();
+  LockCallTemps();  // Prepare for explicit register usage.
+
+  // Load LHS into EAX.
+  LoadValueDirectFixed(rl_src1, r0);
+
+  // Load RHS into EBX.
+  LoadValueDirectFixed(rl_src2, r1);
+
+  // Copy LHS sign bit into EDX.
+  NewLIR0(kx86Cdq32Da);
+
+  if (check_zero) {
+    // Handle division by zero case.
+    GenImmedCheck(kCondEq, r1, 0, kThrowDivZero);
+  }
+
+  // Have to catch 0x80000000/-1 case, or we will get an exception!
+  OpRegImm(kOpCmp, r1, -1);
+  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // RHS is -1.
+  OpRegImm(kOpCmp, r0, 0x80000000);
+  LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // In 0x80000000/-1 case.
+  if (!is_div) {
+    // For DIV, EAX is already right. For REM, we need EDX 0.
+    LoadConstantNoClobber(r2, 0);
+  }
+  LIR* done = NewLIR1(kX86Jmp8, 0);
+
+  // Expected case.
+  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
+  minint_branch->target = minus_one_branch->target;
+  NewLIR1(kX86Idivmod32DaR, r1);
+  done->target = NewLIR0(kPseudoTargetLabel);
+
+  // Result is in EAX for div and EDX for rem.
+  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+                          r0, INVALID_REG, INVALID_SREG, INVALID_SREG};
+  if (!is_div) {
+    rl_result.low_reg = r2;
+  }
+  return rl_result;
+}
+
 bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
   DCHECK_EQ(cu_->instruction_set, kX86);
 
@@ -512,100 +844,353 @@
   return NULL;
 }
 
-void X86Mir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
+void X86Mir2Lir::GenImulRegImm(int dest, int src, int val) {
+  switch (val) {
+    case 0:
+      NewLIR2(kX86Xor32RR, dest, dest);
+      break;
+    case 1:
+      OpRegCopy(dest, src);
+      break;
+    default:
+      OpRegRegImm(kOpMul, dest, src, val);
+      break;
+  }
+}
+
+void X86Mir2Lir::GenImulMemImm(int dest, int sreg, int displacement, int val) {
+  LIR *m;
+  switch (val) {
+    case 0:
+      NewLIR2(kX86Xor32RR, dest, dest);
+      break;
+    case 1:
+      LoadBaseDisp(rX86_SP, displacement, dest, kWord, sreg);
+      break;
+    default:
+      m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest, rX86_SP,
+                  displacement, val);
+      AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
+      break;
+  }
+}
+
+void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
-  LOG(FATAL) << "Unexpected use of GenX86Long for x86";
-}
-void X86Mir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
+  if (rl_src1.is_const) {
+    std::swap(rl_src1, rl_src2);
+  }
+  // Are we multiplying by a constant?
+  if (rl_src2.is_const) {
+    // Do special compare/branch against simple const operand
+    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+    if (val == 0) {
+      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+      OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg);
+      OpRegReg(kOpXor, rl_result.high_reg, rl_result.high_reg);
+      StoreValueWide(rl_dest, rl_result);
+      return;
+    } else if (val == 1) {
+      rl_src1 = EvalLocWide(rl_src1, kCoreReg, true);
+      StoreValueWide(rl_dest, rl_src1);
+      return;
+    } else if (val == 2) {
+      GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
+      return;
+    } else if (IsPowerOfTwo(val)) {
+      int shift_amount = LowestSetBit(val);
+      if (!BadOverlap(rl_src1, rl_dest)) {
+        rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+        RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest,
+                                                  rl_src1, shift_amount);
+        StoreValueWide(rl_dest, rl_result);
+        return;
+      }
+    }
+
+    // Okay, just bite the bullet and do it.
+    int32_t val_lo = Low32Bits(val);
+    int32_t val_hi = High32Bits(val);
+    FlushAllRegs();
+    LockCallTemps();  // Prepare for explicit register usage.
+    rl_src1 = UpdateLocWide(rl_src1);
+    bool src1_in_reg = rl_src1.location == kLocPhysReg;
+    int displacement = SRegOffset(rl_src1.s_reg_low);
+
+    // ECX <- 1H * 2L
+    // EAX <- 1L * 2H
+    if (src1_in_reg) {
+      GenImulRegImm(r1, rl_src1.high_reg, val_lo);
+      GenImulRegImm(r0, rl_src1.low_reg, val_hi);
+    } else {
+      GenImulMemImm(r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
+      GenImulMemImm(r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
+    }
+
+    // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
+    NewLIR2(kX86Add32RR, r1, r0);
+
+    // EAX <- 2L
+    LoadConstantNoClobber(r0, val_lo);
+
+    // EDX:EAX <- 2L * 1L (double precision)
+    if (src1_in_reg) {
+      NewLIR1(kX86Mul32DaR, rl_src1.low_reg);
+    } else {
+      LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET);
+      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+                              true /* is_load */, true /* is_64bit */);
+    }
+
+    // EDX <- EDX + ECX (add high words)
+    NewLIR2(kX86Add32RR, r2, r1);
+
+    // Result is EDX:EAX
+    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2,
+                             INVALID_SREG, INVALID_SREG};
+    StoreValueWide(rl_dest, rl_result);
+    return;
+  }
+
+  // Nope.  Do it the hard way
   FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) + (r2:r3)
-  OpRegReg(kOpAdd, r0, r2);  // r0 = r0 + r2
-  OpRegReg(kOpAdc, r1, r3);  // r1 = r1 + r3 + CF
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
+  LockCallTemps();  // Prepare for explicit register usage.
+  rl_src1 = UpdateLocWide(rl_src1);
+  rl_src2 = UpdateLocWide(rl_src2);
+
+  // At this point, the VRs are in their home locations.
+  bool src1_in_reg = rl_src1.location == kLocPhysReg;
+  bool src2_in_reg = rl_src2.location == kLocPhysReg;
+
+  // ECX <- 1H
+  if (src1_in_reg) {
+    NewLIR2(kX86Mov32RR, r1, rl_src1.high_reg);
+  } else {
+    LoadBaseDisp(rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, r1,
+                 kWord, GetSRegHi(rl_src1.s_reg_low));
+  }
+
+  // EAX <- 2H
+  if (src2_in_reg) {
+    NewLIR2(kX86Mov32RR, r0, rl_src2.high_reg);
+  } else {
+    LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, r0,
+                 kWord, GetSRegHi(rl_src2.s_reg_low));
+  }
+
+  // EAX <- EAX * 1L  (2H * 1L)
+  if (src1_in_reg) {
+    NewLIR2(kX86Imul32RR, r0, rl_src1.low_reg);
+  } else {
+    int displacement = SRegOffset(rl_src1.s_reg_low);
+    LIR *m = NewLIR3(kX86Imul32RM, r0, rX86_SP, displacement + LOWORD_OFFSET);
+    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is_64bit */);
+  }
+
+  // ECX <- ECX * 2L  (1H * 2L)
+  if (src2_in_reg) {
+    NewLIR2(kX86Imul32RR, r1, rl_src2.low_reg);
+  } else {
+    int displacement = SRegOffset(rl_src2.s_reg_low);
+    LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET);
+    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is_64bit */);
+  }
+
+  // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
+  NewLIR2(kX86Add32RR, r1, r0);
+
+  // EAX <- 2L
+  if (src2_in_reg) {
+    NewLIR2(kX86Mov32RR, r0, rl_src2.low_reg);
+  } else {
+    LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, r0,
+                 kWord, rl_src2.s_reg_low);
+  }
+
+  // EDX:EAX <- 2L * 1L (double precision)
+  if (src1_in_reg) {
+    NewLIR1(kX86Mul32DaR, rl_src1.low_reg);
+  } else {
+    int displacement = SRegOffset(rl_src1.s_reg_low);
+    LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET);
+    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is_64bit */);
+  }
+
+  // EDX <- EDX + ECX (add high words)
+  NewLIR2(kX86Add32RR, r2, r1);
+
+  // Result is EDX:EAX
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2,
+                           INVALID_SREG, INVALID_SREG};
   StoreValueWide(rl_dest, rl_result);
 }
 
-void X86Mir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) + (r2:r3)
-  OpRegReg(kOpSub, r0, r2);  // r0 = r0 - r2
-  OpRegReg(kOpSbc, r1, r3);  // r1 = r1 - r3 - CF
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
+                                   Instruction::Code op) {
+  DCHECK_EQ(rl_dest.location, kLocPhysReg);
+  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
+  if (rl_src.location == kLocPhysReg) {
+    // Both operands are in registers.
+    if (rl_dest.low_reg == rl_src.high_reg) {
+      // The registers are the same, so we would clobber it before the use.
+      int temp_reg = AllocTemp();
+      OpRegCopy(temp_reg, rl_dest.low_reg);
+      rl_src.high_reg = temp_reg;
+    }
+    NewLIR2(x86op, rl_dest.low_reg, rl_src.low_reg);
+
+    x86op = GetOpcode(op, rl_dest, rl_src, true);
+    NewLIR2(x86op, rl_dest.high_reg, rl_src.high_reg);
+    FreeTemp(rl_src.low_reg);
+    FreeTemp(rl_src.high_reg);
+    return;
+  }
+
+  // RHS is in memory.
+  DCHECK((rl_src.location == kLocDalvikFrame) ||
+         (rl_src.location == kLocCompilerTemp));
+  int rBase = TargetReg(kSp);
+  int displacement = SRegOffset(rl_src.s_reg_low);
+
+  LIR *lir = NewLIR3(x86op, rl_dest.low_reg, rBase, displacement + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, true /* is64bit */);
+  x86op = GetOpcode(op, rl_dest, rl_src, true);
+  lir = NewLIR3(x86op, rl_dest.high_reg, rBase, displacement + HIWORD_OFFSET);
+  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                          true /* is_load */, true /* is64bit */);
 }
 
-void X86Mir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) & (r2:r3)
-  OpRegReg(kOpAnd, r0, r2);  // r0 = r0 & r2
-  OpRegReg(kOpAnd, r1, r3);  // r1 = r1 & r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
+  rl_dest = UpdateLocWide(rl_dest);
+  if (rl_dest.location == kLocPhysReg) {
+    // Ensure we are in a register pair
+    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+
+    rl_src = UpdateLocWide(rl_src);
+    GenLongRegOrMemOp(rl_result, rl_src, op);
+    StoreFinalValueWide(rl_dest, rl_result);
+    return;
+  }
+
+  // It wasn't in registers, so it better be in memory.
+  DCHECK((rl_dest.location == kLocDalvikFrame) ||
+         (rl_dest.location == kLocCompilerTemp));
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+
+  // Operate directly into memory.
+  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
+  int rBase = TargetReg(kSp);
+  int displacement = SRegOffset(rl_dest.s_reg_low);
+
+  LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, rl_src.low_reg);
+  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                          false /* is_load */, true /* is64bit */);
+  x86op = GetOpcode(op, rl_dest, rl_src, true);
+  lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, rl_src.high_reg);
+  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                          false /* is_load */, true /* is64bit */);
+  FreeTemp(rl_src.low_reg);
+  FreeTemp(rl_src.high_reg);
 }
 
-void X86Mir2Lir::GenOrLong(RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) | (r2:r3)
-  OpRegReg(kOpOr, r0, r2);  // r0 = r0 | r2
-  OpRegReg(kOpOr, r1, r3);  // r1 = r1 | r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
+                              RegLocation rl_src2, Instruction::Code op,
+                              bool is_commutative) {
+  // Is this really a 2 operand operation?
+  switch (op) {
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::SUB_LONG_2ADDR:
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG_2ADDR:
+      GenLongArith(rl_dest, rl_src2, op);
+      return;
+    default:
+      break;
+  }
+
+  if (rl_dest.location == kLocPhysReg) {
+    RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
+
+    // We are about to clobber the LHS, so it needs to be a temp.
+    rl_result = ForceTempWide(rl_result);
+
+    // Perform the operation using the RHS.
+    rl_src2 = UpdateLocWide(rl_src2);
+    GenLongRegOrMemOp(rl_result, rl_src2, op);
+
+    // And now record that the result is in the temp.
+    StoreFinalValueWide(rl_dest, rl_result);
+    return;
+  }
+
+  // It wasn't in registers, so it better be in memory.
+  DCHECK((rl_dest.location == kLocDalvikFrame) ||
+         (rl_dest.location == kLocCompilerTemp));
+  rl_src1 = UpdateLocWide(rl_src1);
+  rl_src2 = UpdateLocWide(rl_src2);
+
+  // Get one of the source operands into temporary register.
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  if (IsTemp(rl_src1.low_reg) && IsTemp(rl_src1.high_reg)) {
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  } else if (is_commutative) {
+    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+    // We need at least one of them to be a temporary.
+    if (!(IsTemp(rl_src2.low_reg) && IsTemp(rl_src2.high_reg))) {
+      rl_src1 = ForceTempWide(rl_src1);
+    }
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  } else {
+    // Need LHS to be the temp.
+    rl_src1 = ForceTempWide(rl_src1);
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  }
+
+  StoreFinalValueWide(rl_dest, rl_src1);
 }
 
-void X86Mir2Lir::GenXorLong(RegLocation rl_dest,
+void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest,
                             RegLocation rl_src1, RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) ^ (r2:r3)
-  OpRegReg(kOpXor, r0, r2);  // r0 = r0 ^ r2
-  OpRegReg(kOpXor, r1, r3);  // r1 = r1 ^ r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
+}
+
+void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest,
+                           RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
 }
 
 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src, r0, r1);
-  // Compute (r1:r0) = -(r1:r0)
-  OpRegReg(kOpNeg, r0, r0);  // r0 = -r0
-  OpRegImm(kOpAdc, r1, 0);   // r1 = r1 + CF
-  OpRegReg(kOpNeg, r1, r1);  // r1 = -r1
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_result = ForceTempWide(rl_src);
+  if (rl_dest.low_reg == rl_src.high_reg) {
+    // The registers are the same, so we would clobber it before the use.
+    int temp_reg = AllocTemp();
+    OpRegCopy(temp_reg, rl_result.low_reg);
+    rl_result.high_reg = temp_reg;
+  }
+  OpRegReg(kOpNeg, rl_result.low_reg, rl_result.low_reg);    // rLow = -rLow
+  OpRegImm(kOpAdc, rl_result.high_reg, 0);                   // rHigh = rHigh + CF
+  OpRegReg(kOpNeg, rl_result.high_reg, rl_result.high_reg);  // rHigh = -rHigh
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -740,16 +1325,396 @@
   }
 }
 
+RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                                          RegLocation rl_src, int shift_amount) {
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  switch (opcode) {
+    case Instruction::SHL_LONG:
+    case Instruction::SHL_LONG_2ADDR:
+      DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
+      if (shift_amount == 32) {
+        OpRegCopy(rl_result.high_reg, rl_src.low_reg);
+        LoadConstant(rl_result.low_reg, 0);
+      } else if (shift_amount > 31) {
+        OpRegCopy(rl_result.high_reg, rl_src.low_reg);
+        FreeTemp(rl_src.high_reg);
+        NewLIR2(kX86Sal32RI, rl_result.high_reg, shift_amount - 32);
+        LoadConstant(rl_result.low_reg, 0);
+      } else {
+        OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+        OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+        NewLIR3(kX86Shld32RRI, rl_result.high_reg, rl_result.low_reg, shift_amount);
+        NewLIR2(kX86Sal32RI, rl_result.low_reg, shift_amount);
+      }
+      break;
+    case Instruction::SHR_LONG:
+    case Instruction::SHR_LONG_2ADDR:
+      if (shift_amount == 32) {
+        OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+        OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+        NewLIR2(kX86Sar32RI, rl_result.high_reg, 31);
+      } else if (shift_amount > 31) {
+        OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+        OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+        NewLIR2(kX86Sar32RI, rl_result.low_reg, shift_amount - 32);
+        NewLIR2(kX86Sar32RI, rl_result.high_reg, 31);
+      } else {
+        OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+        OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+        NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount);
+        NewLIR2(kX86Sar32RI, rl_result.high_reg, shift_amount);
+      }
+      break;
+    case Instruction::USHR_LONG:
+    case Instruction::USHR_LONG_2ADDR:
+      if (shift_amount == 32) {
+        OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+        LoadConstant(rl_result.high_reg, 0);
+      } else if (shift_amount > 31) {
+        OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+        NewLIR2(kX86Shr32RI, rl_result.low_reg, shift_amount - 32);
+        LoadConstant(rl_result.high_reg, 0);
+      } else {
+        OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+        OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+        NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount);
+        NewLIR2(kX86Shr32RI, rl_result.high_reg, shift_amount);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected case";
+  }
+  return rl_result;
+}
+
 void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src1, RegLocation rl_shift) {
-  // Default implementation is just to ignore the constant case.
-  GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
+                                   RegLocation rl_src, RegLocation rl_shift) {
+  // Per spec, we only care about low 6 bits of shift amount.
+  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
+  if (shift_amount == 0) {
+    rl_src = LoadValueWide(rl_src, kCoreReg);
+    StoreValueWide(rl_dest, rl_src);
+    return;
+  } else if (shift_amount == 1 &&
+            (opcode ==  Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
+    // Need to handle this here to avoid calling StoreValueWide twice.
+    GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src);
+    return;
+  }
+  if (BadOverlap(rl_src, rl_dest)) {
+    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
+    return;
+  }
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount);
+  StoreValueWide(rl_dest, rl_result);
 }
 
 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  // Default - bail to non-const handler.
-  GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+  switch (opcode) {
+    case Instruction::ADD_LONG:
+    case Instruction::AND_LONG:
+    case Instruction::OR_LONG:
+    case Instruction::XOR_LONG:
+      if (rl_src2.is_const) {
+        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+      } else {
+        DCHECK(rl_src1.is_const);
+        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+      }
+      break;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (rl_src2.is_const) {
+        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+      } else {
+        GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
+      }
+      break;
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG_2ADDR:
+    case Instruction::AND_LONG_2ADDR:
+      if (rl_src2.is_const) {
+        GenLongImm(rl_dest, rl_src2, opcode);
+      } else {
+        DCHECK(rl_src1.is_const);
+        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+      }
+      break;
+    default:
+      // Default - bail to non-const handler.
+      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+      break;
+  }
+}
+
+bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
+  switch (op) {
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      return value == -1;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      return value == 0;
+    default:
+      return false;
+  }
+}
+
+X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
+                                bool is_high_op) {
+  bool rhs_in_mem = rhs.location != kLocPhysReg;
+  bool dest_in_mem = dest.location != kLocPhysReg;
+  DCHECK(!rhs_in_mem || !dest_in_mem);
+  switch (op) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      if (dest_in_mem) {
+        return is_high_op ? kX86Adc32MR : kX86Add32MR;
+      } else if (rhs_in_mem) {
+        return is_high_op ? kX86Adc32RM : kX86Add32RM;
+      }
+      return is_high_op ? kX86Adc32RR : kX86Add32RR;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (dest_in_mem) {
+        return is_high_op ? kX86Sbb32MR : kX86Sub32MR;
+      } else if (rhs_in_mem) {
+        return is_high_op ? kX86Sbb32RM : kX86Sub32RM;
+      }
+      return is_high_op ? kX86Sbb32RR : kX86Sub32RR;
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      if (dest_in_mem) {
+        return kX86And32MR;
+      }
+      return rhs_in_mem ? kX86And32RM : kX86And32RR;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      if (dest_in_mem) {
+        return kX86Or32MR;
+      }
+      return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      if (dest_in_mem) {
+        return kX86Xor32MR;
+      }
+      return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << op;
+      return kX86Add32RR;
+  }
+}
+
+X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
+                                int32_t value) {
+  bool in_mem = loc.location != kLocPhysReg;
+  bool byte_imm = IS_SIMM8(value);
+  DCHECK(in_mem || !IsFpReg(loc.low_reg));
+  switch (op) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      if (byte_imm) {
+        if (in_mem) {
+          return is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
+        }
+        return is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
+      }
+      if (in_mem) {
+        return is_high_op ? kX86Adc32MI : kX86Add32MI;
+      }
+      return is_high_op ? kX86Adc32RI : kX86Add32RI;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (byte_imm) {
+        if (in_mem) {
+          return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
+        }
+        return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
+      }
+      if (in_mem) {
+        return is_high_op ? kX86Sbb32MI : kX86Sub32MI;
+      }
+      return is_high_op ? kX86Sbb32RI : kX86Sub32RI;
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      if (byte_imm) {
+        return in_mem ? kX86And32MI8 : kX86And32RI8;
+      }
+      return in_mem ? kX86And32MI : kX86And32RI;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      if (byte_imm) {
+        return in_mem ? kX86Or32MI8 : kX86Or32RI8;
+      }
+      return in_mem ? kX86Or32MI : kX86Or32RI;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      if (byte_imm) {
+        return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
+      }
+      return in_mem ? kX86Xor32MI : kX86Xor32RI;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << op;
+      return kX86Add32MI;
+  }
+}
+
+void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
+  DCHECK(rl_src.is_const);
+  int64_t val = mir_graph_->ConstantValueWide(rl_src);
+  int32_t val_lo = Low32Bits(val);
+  int32_t val_hi = High32Bits(val);
+  rl_dest = UpdateLocWide(rl_dest);
+
+  // Can we just do this into memory?
+  if ((rl_dest.location == kLocDalvikFrame) ||
+      (rl_dest.location == kLocCompilerTemp)) {
+    int rBase = TargetReg(kSp);
+    int displacement = SRegOffset(rl_dest.s_reg_low);
+
+    if (!IsNoOp(op, val_lo)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
+      LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, val_lo);
+      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                              false /* is_load */, true /* is64bit */);
+    }
+    if (!IsNoOp(op, val_hi)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
+      LIR *lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, val_hi);
+      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                                false /* is_load */, true /* is64bit */);
+    }
+    return;
+  }
+
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  DCHECK_EQ(rl_result.location, kLocPhysReg);
+  DCHECK(!IsFpReg(rl_result.low_reg));
+
+  if (!IsNoOp(op, val_lo)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
+    NewLIR2(x86op, rl_result.low_reg, val_lo);
+  }
+  if (!IsNoOp(op, val_hi)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
+    NewLIR2(x86op, rl_result.high_reg, val_hi);
+  }
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
+                                RegLocation rl_src2, Instruction::Code op) {
+  DCHECK(rl_src2.is_const);
+  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+  int32_t val_lo = Low32Bits(val);
+  int32_t val_hi = High32Bits(val);
+  rl_dest = UpdateLocWide(rl_dest);
+  rl_src1 = UpdateLocWide(rl_src1);
+
+  // Can we do this directly into the destination registers?
+  if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
+      rl_dest.low_reg == rl_src1.low_reg && rl_dest.high_reg == rl_src1.high_reg &&
+      !IsFpReg(rl_dest.low_reg)) {
+    if (!IsNoOp(op, val_lo)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
+      NewLIR2(x86op, rl_dest.low_reg, val_lo);
+    }
+    if (!IsNoOp(op, val_hi)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
+      NewLIR2(x86op, rl_dest.high_reg, val_hi);
+    }
+    return;
+  }
+
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  DCHECK_EQ(rl_src1.location, kLocPhysReg);
+
+  // We need the values to be in a temporary
+  RegLocation rl_result = ForceTempWide(rl_src1);
+  if (!IsNoOp(op, val_lo)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
+    NewLIR2(x86op, rl_result.low_reg, val_lo);
+  }
+  if (!IsNoOp(op, val_hi)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
+    NewLIR2(x86op, rl_result.high_reg, val_hi);
+  }
+
+  StoreFinalValueWide(rl_dest, rl_result);
+}
+
+// For final classes there are no sub-classes to check and so we can answer the instance-of
+// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
+void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                                    RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  int result_reg = rl_result.low_reg;
+
+  // SETcc only works with EAX..EDX.
+  if (result_reg == object.low_reg || result_reg >= 4) {
+    result_reg = AllocTypedTemp(false, kCoreReg);
+    DCHECK_LT(result_reg, 4);
+  }
+
+  // Assume that there is no match.
+  LoadConstant(result_reg, 0);
+  LIR* null_branchover = OpCmpImmBranch(kCondEq, object.low_reg, 0, NULL);
+
+  int check_class = AllocTypedTemp(false, kCoreReg);
+
+  // If Method* is already in a register, we can save a copy.
+  RegLocation rl_method = mir_graph_->GetMethodLoc();
+  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() +
+    (sizeof(mirror::Class*) * type_idx);
+
+  if (rl_method.location == kLocPhysReg) {
+    if (use_declaring_class) {
+      LoadWordDisp(rl_method.low_reg,
+                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+                   check_class);
+    } else {
+      LoadWordDisp(rl_method.low_reg,
+                   mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                   check_class);
+      LoadWordDisp(check_class, offset_of_type, check_class);
+    }
+  } else {
+    LoadCurrMethodDirect(check_class);
+    if (use_declaring_class) {
+      LoadWordDisp(check_class,
+                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+                   check_class);
+    } else {
+      LoadWordDisp(check_class,
+                   mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                   check_class);
+      LoadWordDisp(check_class, offset_of_type, check_class);
+    }
+  }
+
+  // Compare the computed class to the class in the object.
+  DCHECK_EQ(object.location, kLocPhysReg);
+  OpRegMem(kOpCmp, check_class, object.low_reg,
+           mirror::Object::ClassOffset().Int32Value());
+
+  // Set the low byte of the result to 0 or 1 from the compare condition code.
+  NewLIR2(kX86Set8R, result_reg, kX86CondEq);
+
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  null_branchover->target = target;
+  FreeTemp(check_class);
+  if (IsTemp(result_reg)) {
+    OpRegCopy(rl_result.low_reg, result_reg);
+    FreeTemp(result_reg);
+  }
+  StoreValue(rl_dest, rl_result);
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 5c993c5..f223548 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -243,9 +243,9 @@
             }
             break;
           case 't':
-            buf += StringPrintf("0x%08x (L%p)",
-                                reinterpret_cast<uintptr_t>(base_addr)
-                                + lir->offset + operand, lir->target);
+            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
+                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
+                                lir->target);
             break;
           default:
             buf += StringPrintf("DecodeError '%c'", fmt[i]);
@@ -679,31 +679,24 @@
   }
 
   DCHECK_NE(loc.s_reg_low, INVALID_SREG);
-  if (IsFpReg(loc.low_reg) && reg_class != kCoreReg) {
-    // Need a wide vector register.
-    low_reg = AllocTypedTemp(true, reg_class);
-    loc.low_reg = low_reg;
-    loc.high_reg = low_reg;  // Play nice with existing code.
-    loc.vec_len = kVectorLength8;
-    if (update) {
-      loc.location = kLocPhysReg;
-      MarkLive(loc.low_reg, loc.s_reg_low);
-    }
+  DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
+
+  new_regs = AllocTypedTempPair(loc.fp, reg_class);
+  loc.low_reg = new_regs & 0xff;
+  loc.high_reg = (new_regs >> 8) & 0xff;
+
+  if (loc.low_reg == loc.high_reg) {
     DCHECK(IsFpReg(loc.low_reg));
+    loc.vec_len = kVectorLength8;
   } else {
-    DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
-
-    new_regs = AllocTypedTempPair(loc.fp, reg_class);
-    loc.low_reg = new_regs & 0xff;
-    loc.high_reg = (new_regs >> 8) & 0xff;
-
     MarkPair(loc.low_reg, loc.high_reg);
-    if (update) {
-      loc.location = kLocPhysReg;
-      MarkLive(loc.low_reg, loc.s_reg_low);
+  }
+  if (update) {
+    loc.location = kLocPhysReg;
+    MarkLive(loc.low_reg, loc.s_reg_low);
+    if (loc.low_reg != loc.high_reg) {
       MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
     }
-    DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
   }
   return loc;
 }
@@ -796,4 +789,23 @@
   // Just use the standard code to do the generation.
   Mir2Lir::GenConstWide(rl_dest, value);
 }
+
+// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
+void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
+  LOG(INFO)  << "location: " << loc.location << ','
+             << (loc.wide ? " w" : "  ")
+             << (loc.defined ? " D" : "  ")
+             << (loc.is_const ? " c" : "  ")
+             << (loc.fp ? " F" : "  ")
+             << (loc.core ? " C" : "  ")
+             << (loc.ref ? " r" : "  ")
+             << (loc.high_word ? " h" : "  ")
+             << (loc.home ? " H" : "  ")
+             << " vec_len: " << loc.vec_len
+             << ", low: " << static_cast<int>(loc.low_reg)
+             << ", high: " << static_cast<int>(loc.high_reg)
+             << ", s_reg: " << loc.s_reg_low
+             << ", orig: " << loc.orig_sreg;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 91c39fa..bd38c03 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -141,7 +141,14 @@
     case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break;
     case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break;
     case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
-    case kOpMov: return LoadConstantNoClobber(r_dest_src1, value);
+    case kOpMov:
+      /*
+       * Moving the constant zero into register can be specialized as an xor of the register.
+       * However, that sets eflags while the move does not. For that reason here, always do
+       * the move and if caller is flexible, they should be calling LoadConstantNoClobber instead.
+       */
+      opcode = kX86Mov32RI;
+      break;
     case kOpMul:
       opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
       return NewLIR3(opcode, r_dest_src1, r_dest_src1, value);
@@ -346,8 +353,7 @@
         if (val_hi != 0) {
           r_dest_hi = AllocTempDouble();
           LoadConstantNoClobber(r_dest_hi, val_hi);
-          NewLIR2(kX86PsllqRI, r_dest_hi, 32);
-          NewLIR2(kX86OrpsRR, r_dest_lo, r_dest_hi);
+          NewLIR2(kX86PunpckldqRR, r_dest_lo, r_dest_hi);
           FreeTemp(r_dest_hi);
         }
       }
@@ -435,15 +441,37 @@
                      displacement + LOWORD_OFFSET);
     } else {
       if (rBase == r_dest) {
-        load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
-                        displacement + HIWORD_OFFSET);
-        load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
-                       displacement + LOWORD_OFFSET);
+        if (r_dest_hi == r_index) {
+          // We can't use either register for the first load.
+          int temp = AllocTemp();
+          load2 = NewLIR5(opcode, temp, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+          load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+          OpRegCopy(r_dest_hi, temp);
+          FreeTemp(temp);
+        } else {
+          load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+          load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+        }
       } else {
-        load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
-                       displacement + LOWORD_OFFSET);
-        load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
-                        displacement + HIWORD_OFFSET);
+        if (r_dest == r_index) {
+          // We can't use either register for the first load.
+          int temp = AllocTemp();
+          load = NewLIR5(opcode, temp, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+          load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+          OpRegCopy(r_dest, temp);
+          FreeTemp(temp);
+        } else {
+          load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+          load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+        }
       }
     }
   }
@@ -572,9 +600,16 @@
   NewLIR2(kX86MovdxrRR, fp_reg, low_reg);
   int tmp_reg = AllocTempDouble();
   NewLIR2(kX86MovdxrRR, tmp_reg, high_reg);
-  NewLIR2(kX86PsllqRI, tmp_reg, 32);
-  NewLIR2(kX86OrpsRR, fp_reg, tmp_reg);
+  NewLIR2(kX86PunpckldqRR, fp_reg, tmp_reg);
   FreeTemp(tmp_reg);
 }
 
+LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                   int offset, int check_value, LIR* target) {
+    NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg, offset,
+            check_value);
+    LIR* branch = OpCondBranch(cond, target);
+    return branch;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 1488f5d..7f35d06 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -304,6 +304,8 @@
   BinaryShiftOpCode(kX86Sar),
 #undef BinaryShiftOpcode
   kX86Cmc,
+  kX86Shld32RRI,
+  kX86Shrd32RRI,
 #define UnaryOpcode(opcode, reg, mem, array) \
   opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
   opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
@@ -316,6 +318,7 @@
   UnaryOpcode(kX86Imul, DaR, DaM, DaA),
   UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
   UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
+  kx86Cdq32Da,
   kX86Bswap32R,
   kX86Push32R, kX86Pop32R,
 #undef UnaryOpcode
@@ -349,6 +352,7 @@
   Binary0fOpCode(kX86Subss),    // float subtract
   Binary0fOpCode(kX86Divsd),    // double divide
   Binary0fOpCode(kX86Divss),    // float divide
+  Binary0fOpCode(kX86Punpckldq),  // Interleave low-order double words
   kX86PsrlqRI,                  // right shift of floating point registers
   kX86PsllqRI,                  // left shift of floating point registers
   kX86SqrtsdRR,                 // sqrt of floating point register
@@ -397,6 +401,7 @@
   kRegImm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
   kRegRegImm, kRegMemImm, kRegArrayImm,    // RRI, RMI and RAI instruction kinds.
   kMovRegImm,                              // Shorter form move RI.
+  kRegRegImmRev,                           // RRI with first reg in r/m
   kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
   kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
   kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
new file mode 100644
index 0000000..edccec5
--- /dev/null
+++ b/compiler/dex/verification_results.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "verification_results.h"
+
+#include "base/stl_util.h"
+#include "base/mutex.h"
+#include "base/mutex-inl.h"
+#include "thread.h"
+#include "thread-inl.h"
+#include "verified_method.h"
+#include "verifier/method_verifier.h"
+#include "verifier/method_verifier-inl.h"
+
+namespace art {
+
+VerificationResults::VerificationResults()
+    : verified_methods_lock_("compiler verified methods lock"),
+      verified_methods_(),
+      rejected_classes_lock_("compiler rejected classes lock"),
+      rejected_classes_() {
+}
+
+VerificationResults::~VerificationResults() {
+  Thread* self = Thread::Current();
+  {
+    WriterMutexLock mu(self, verified_methods_lock_);
+    STLDeleteValues(&verified_methods_);
+  }
+}
+
+bool VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
+  MethodReference ref = method_verifier->GetMethodReference();
+  bool compile = IsCandidateForCompilation(ref, method_verifier->GetAccessFlags());
+  // TODO: Check also for virtual/interface invokes when DEX-to-DEX supports devirtualization.
+  if (!compile && !method_verifier->HasCheckCasts()) {
+    return true;
+  }
+
+  const VerifiedMethod* verified_method = VerifiedMethod::Create(method_verifier, compile);
+  if (verified_method == nullptr) {
+    DCHECK(method_verifier->HasFailures());
+    return false;
+  }
+
+  WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
+  auto it = verified_methods_.find(ref);
+  if (it != verified_methods_.end()) {
+    // TODO: Investigate why are we doing the work again for this method and try to avoid it.
+    LOG(WARNING) << "Method processed more than once: "
+        << PrettyMethod(ref.dex_method_index, *ref.dex_file);
+    DCHECK_EQ(it->second->GetDevirtMap().size(), verified_method->GetDevirtMap().size());
+    DCHECK_EQ(it->second->GetSafeCastSet().size(), verified_method->GetSafeCastSet().size());
+    DCHECK_EQ(it->second->GetDexGcMap().size(), verified_method->GetDexGcMap().size());
+    delete it->second;
+    verified_methods_.erase(it);
+  }
+  verified_methods_.Put(ref, verified_method);
+  DCHECK(verified_methods_.find(ref) != verified_methods_.end());
+  return true;
+}
+
+const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref) {
+  ReaderMutexLock mu(Thread::Current(), verified_methods_lock_);
+  auto it = verified_methods_.find(ref);
+  return (it != verified_methods_.end()) ? it->second : nullptr;
+}
+
+void VerificationResults::AddRejectedClass(ClassReference ref) {
+  {
+    WriterMutexLock mu(Thread::Current(), rejected_classes_lock_);
+    rejected_classes_.insert(ref);
+  }
+  DCHECK(IsClassRejected(ref));
+}
+
+bool VerificationResults::IsClassRejected(ClassReference ref) {
+  ReaderMutexLock mu(Thread::Current(), rejected_classes_lock_);
+  return (rejected_classes_.find(ref) != rejected_classes_.end());
+}
+
+bool VerificationResults::IsCandidateForCompilation(MethodReference& method_ref,
+                                                    const uint32_t access_flags) {
+#ifdef ART_SEA_IR_MODE
+    bool use_sea = Runtime::Current()->IsSeaIRMode();
+    use_sea = use_sea && (std::string::npos != PrettyMethod(
+                          method_ref.dex_method_index, *(method_ref.dex_file)).find("fibonacci"));
+    if (use_sea) return true;
+#endif
+  // Don't compile class initializers, ever.
+  if (((access_flags & kAccConstructor) != 0) && ((access_flags & kAccStatic) != 0)) {
+    return false;
+  }
+  return (Runtime::Current()->GetCompilerFilter() != Runtime::kInterpretOnly);
+}
+
+}  // namespace art
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
new file mode 100644
index 0000000..2eb0713
--- /dev/null
+++ b/compiler/dex/verification_results.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_VERIFICATION_RESULTS_H_
+#define ART_COMPILER_DEX_VERIFICATION_RESULTS_H_
+
+#include <stdint.h>
+#include <set>
+#include <vector>
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "class_reference.h"
+#include "method_reference.h"
+#include "safe_map.h"
+
+namespace art {
+
+namespace verifier {
+class MethodVerifier;
+}  // namespace verifier
+
+class VerifiedMethod;
+
+class VerificationResults {
+  public:
+    VerificationResults();
+    ~VerificationResults();
+
+    bool ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        LOCKS_EXCLUDED(verified_methods_lock_);
+
+    const VerifiedMethod* GetVerifiedMethod(MethodReference ref)
+        LOCKS_EXCLUDED(verified_methods_lock_);
+
+    void AddRejectedClass(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
+    bool IsClassRejected(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
+
+    static bool IsCandidateForCompilation(MethodReference& method_ref,
+                                          const uint32_t access_flags);
+
+  private:
+    // Verified methods.
+    typedef SafeMap<MethodReference, const VerifiedMethod*,
+        MethodReferenceComparator> VerifiedMethodMap;
+    ReaderWriterMutex verified_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+    VerifiedMethodMap verified_methods_;
+
+    // Rejected classes.
+    ReaderWriterMutex rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+    std::set<ClassReference> rejected_classes_ GUARDED_BY(rejected_classes_lock_);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_VERIFICATION_RESULTS_H_
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
new file mode 100644
index 0000000..0f812a4
--- /dev/null
+++ b/compiler/dex/verified_method.cc
@@ -0,0 +1,312 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "verified_method.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "base/logging.h"
+#include "base/stl_util.h"
+#include "dex_file.h"
+#include "dex_instruction.h"
+#include "dex_instruction-inl.h"
+#include "base/mutex.h"
+#include "base/mutex-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
+#include "mirror/class.h"
+#include "mirror/class-inl.h"
+#include "mirror/dex_cache.h"
+#include "mirror/dex_cache-inl.h"
+#include "mirror/object.h"
+#include "mirror/object-inl.h"
+#include "UniquePtr.h"
+#include "verifier/dex_gc_map.h"
+#include "verifier/method_verifier.h"
+#include "verifier/method_verifier-inl.h"
+#include "verifier/register_line.h"
+#include "verifier/register_line-inl.h"
+
+namespace art {
+
+const VerifiedMethod* VerifiedMethod::Create(verifier::MethodVerifier* method_verifier,
+                                             bool compile) {
+  UniquePtr<VerifiedMethod> verified_method(new VerifiedMethod);
+  if (compile) {
+    /* Generate a register map. */
+    if (!verified_method->GenerateGcMap(method_verifier)) {
+      CHECK(method_verifier->HasFailures());
+      return nullptr;  // Not a real failure, but a failure to encode.
+    }
+    if (kIsDebugBuild) {
+      VerifyGcMap(method_verifier, verified_method->dex_gc_map_);
+    }
+
+    // TODO: move this out when DEX-to-DEX supports devirtualization.
+    if (method_verifier->HasVirtualOrInterfaceInvokes()) {
+      verified_method->GenerateDevirtMap(method_verifier);
+    }
+  }
+
+  if (method_verifier->HasCheckCasts()) {
+    verified_method->GenerateSafeCastSet(method_verifier);
+  }
+  return verified_method.release();
+}
+
+const MethodReference* VerifiedMethod::GetDevirtTarget(uint32_t dex_pc) const {
+  auto it = devirt_map_.find(dex_pc);
+  return (it != devirt_map_.end()) ? &it->second : nullptr;
+}
+
+bool VerifiedMethod::IsSafeCast(uint32_t pc) const {
+  return std::binary_search(safe_cast_set_.begin(), safe_cast_set_.end(), pc);
+}
+
+bool VerifiedMethod::GenerateGcMap(verifier::MethodVerifier* method_verifier) {
+  DCHECK(dex_gc_map_.empty());
+  size_t num_entries, ref_bitmap_bits, pc_bits;
+  ComputeGcMapSizes(method_verifier, &num_entries, &ref_bitmap_bits, &pc_bits);
+  // There's a single byte to encode the size of each bitmap.
+  if (ref_bitmap_bits >= (8 /* bits per byte */ * 8192 /* 13-bit size */ )) {
+    // TODO: either a better GC map format or per method failures
+    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Cannot encode GC map for method with " << ref_bitmap_bits << " registers";
+    return false;
+  }
+  size_t ref_bitmap_bytes = (ref_bitmap_bits + 7) / 8;
+  // There are 2 bytes to encode the number of entries.
+  if (num_entries >= 65536) {
+    // TODO: Either a better GC map format or per method failures.
+    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Cannot encode GC map for method with " << num_entries << " entries";
+    return false;
+  }
+  size_t pc_bytes;
+  verifier::RegisterMapFormat format;
+  if (pc_bits <= 8) {
+    format = verifier::kRegMapFormatCompact8;
+    pc_bytes = 1;
+  } else if (pc_bits <= 16) {
+    format = verifier::kRegMapFormatCompact16;
+    pc_bytes = 2;
+  } else {
+    // TODO: Either a better GC map format or per method failures.
+    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Cannot encode GC map for method with "
+        << (1 << pc_bits) << " instructions (number is rounded up to nearest power of 2)";
+    return false;
+  }
+  size_t table_size = ((pc_bytes + ref_bitmap_bytes) * num_entries) + 4;
+  dex_gc_map_.reserve(table_size);
+  // Write table header.
+  dex_gc_map_.push_back(format | ((ref_bitmap_bytes & ~0xFF) >> 5));
+  dex_gc_map_.push_back(ref_bitmap_bytes & 0xFF);
+  dex_gc_map_.push_back(num_entries & 0xFF);
+  dex_gc_map_.push_back((num_entries >> 8) & 0xFF);
+  // Write table data.
+  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
+  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
+    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
+      dex_gc_map_.push_back(i & 0xFF);
+      if (pc_bytes == 2) {
+        dex_gc_map_.push_back((i >> 8) & 0xFF);
+      }
+      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
+      line->WriteReferenceBitMap(dex_gc_map_, ref_bitmap_bytes);
+    }
+  }
+  DCHECK_EQ(dex_gc_map_.size(), table_size);
+  return true;
+}
+
+void VerifiedMethod::VerifyGcMap(verifier::MethodVerifier* method_verifier,
+                                 const std::vector<uint8_t>& data) {
+  // Check that for every GC point there is a map entry, there aren't entries for non-GC points,
+  // that the table data is well formed and all references are marked (or not) in the bitmap.
+  verifier::DexPcToReferenceMap map(&data[0]);
+  DCHECK_EQ(data.size(), map.RawSize());
+  size_t map_index = 0;
+  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
+  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
+    const uint8_t* reg_bitmap = map.FindBitMap(i, false);
+    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
+      DCHECK_LT(map_index, map.NumEntries());
+      DCHECK_EQ(map.GetDexPc(map_index), i);
+      DCHECK_EQ(map.GetBitMap(map_index), reg_bitmap);
+      map_index++;
+      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
+      for (size_t j = 0; j < code_item->registers_size_; j++) {
+        if (line->GetRegisterType(j).IsNonZeroReferenceTypes()) {
+          DCHECK_LT(j / 8, map.RegWidth());
+          DCHECK_EQ((reg_bitmap[j / 8] >> (j % 8)) & 1, 1);
+        } else if ((j / 8) < map.RegWidth()) {
+          DCHECK_EQ((reg_bitmap[j / 8] >> (j % 8)) & 1, 0);
+        } else {
+          // If a register doesn't contain a reference then the bitmap may be shorter than the line.
+        }
+      }
+    } else {
+      DCHECK(reg_bitmap == NULL);
+    }
+  }
+}
+
+void VerifiedMethod::ComputeGcMapSizes(verifier::MethodVerifier* method_verifier,
+                                       size_t* gc_points, size_t* ref_bitmap_bits,
+                                       size_t* log2_max_gc_pc) {
+  size_t local_gc_points = 0;
+  size_t max_insn = 0;
+  size_t max_ref_reg = -1;
+  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
+  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
+    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
+      local_gc_points++;
+      max_insn = i;
+      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
+      max_ref_reg = line->GetMaxNonZeroReferenceReg(max_ref_reg);
+    }
+  }
+  *gc_points = local_gc_points;
+  *ref_bitmap_bits = max_ref_reg + 1;  // If max register is 0 we need 1 bit to encode (ie +1).
+  size_t i = 0;
+  while ((1U << i) <= max_insn) {
+    i++;
+  }
+  *log2_max_gc_pc = i;
+}
+
+void VerifiedMethod::GenerateDevirtMap(verifier::MethodVerifier* method_verifier) {
+  // It is risky to rely on reg_types for sharpening in cases of soft
+  // verification, we might end up sharpening to a wrong implementation. Just abort.
+  if (method_verifier->HasFailures()) {
+    return;
+  }
+
+  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
+  const uint16_t* insns = code_item->insns_;
+  const Instruction* inst = Instruction::At(insns);
+  const Instruction* end = Instruction::At(insns + code_item->insns_size_in_code_units_);
+
+  for (; inst < end; inst = inst->Next()) {
+    bool is_virtual   = (inst->Opcode() == Instruction::INVOKE_VIRTUAL) ||
+        (inst->Opcode() ==  Instruction::INVOKE_VIRTUAL_RANGE);
+    bool is_interface = (inst->Opcode() == Instruction::INVOKE_INTERFACE) ||
+        (inst->Opcode() == Instruction::INVOKE_INTERFACE_RANGE);
+
+    if (!is_interface && !is_virtual) {
+      continue;
+    }
+    // Get reg type for register holding the reference to the object that will be dispatched upon.
+    uint32_t dex_pc = inst->GetDexPc(insns);
+    verifier::RegisterLine* line = method_verifier->GetRegLine(dex_pc);
+    bool is_range = (inst->Opcode() ==  Instruction::INVOKE_VIRTUAL_RANGE) ||
+        (inst->Opcode() ==  Instruction::INVOKE_INTERFACE_RANGE);
+    const verifier::RegType&
+        reg_type(line->GetRegisterType(is_range ? inst->VRegC_3rc() : inst->VRegC_35c()));
+
+    if (!reg_type.HasClass()) {
+      // We will compute devirtualization information only when we know the Class of the reg type.
+      continue;
+    }
+    mirror::Class* reg_class = reg_type.GetClass();
+    if (reg_class->IsInterface()) {
+      // We can't devirtualize when the known type of the register is an interface.
+      continue;
+    }
+    if (reg_class->IsAbstract() && !reg_class->IsArrayClass()) {
+      // We can't devirtualize abstract classes except on arrays of abstract classes.
+      continue;
+    }
+    mirror::ArtMethod* abstract_method = method_verifier->GetDexCache()->GetResolvedMethod(
+        is_range ? inst->VRegB_3rc() : inst->VRegB_35c());
+    if (abstract_method == NULL) {
+      // If the method is not found in the cache this means that it was never found
+      // by ResolveMethodAndCheckAccess() called when verifying invoke_*.
+      continue;
+    }
+    // Find the concrete method.
+    mirror::ArtMethod* concrete_method = NULL;
+    if (is_interface) {
+      concrete_method = reg_type.GetClass()->FindVirtualMethodForInterface(abstract_method);
+    }
+    if (is_virtual) {
+      concrete_method = reg_type.GetClass()->FindVirtualMethodForVirtual(abstract_method);
+    }
+    if (concrete_method == NULL || concrete_method->IsAbstract()) {
+      // In cases where concrete_method is not found, or is abstract, continue to the next invoke.
+      continue;
+    }
+    if (reg_type.IsPreciseReference() || concrete_method->IsFinal() ||
+        concrete_method->GetDeclaringClass()->IsFinal()) {
+      // If we knew exactly the class being dispatched upon, or if the target method cannot be
+      // overridden record the target to be used in the compiler driver.
+      MethodReference concrete_ref(
+          concrete_method->GetDeclaringClass()->GetDexCache()->GetDexFile(),
+          concrete_method->GetDexMethodIndex());
+      devirt_map_.Put(dex_pc, concrete_ref);
+    }
+  }
+}
+
+void VerifiedMethod::GenerateSafeCastSet(verifier::MethodVerifier* method_verifier) {
+  /*
+   * Walks over the method code and adds any cast instructions in which
+   * the type cast is implicit to a set, which is used in the code generation
+   * to elide these casts.
+   */
+  if (method_verifier->HasFailures()) {
+    return;
+  }
+  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
+  const Instruction* inst = Instruction::At(code_item->insns_);
+  const Instruction* end = Instruction::At(code_item->insns_ +
+                                           code_item->insns_size_in_code_units_);
+
+  for (; inst < end; inst = inst->Next()) {
+    Instruction::Code code = inst->Opcode();
+    if ((code == Instruction::CHECK_CAST) || (code == Instruction::APUT_OBJECT)) {
+      uint32_t dex_pc = inst->GetDexPc(code_item->insns_);
+      const verifier::RegisterLine* line = method_verifier->GetRegLine(dex_pc);
+      bool is_safe_cast = false;
+      if (code == Instruction::CHECK_CAST) {
+        const verifier::RegType& reg_type(line->GetRegisterType(inst->VRegA_21c()));
+        const verifier::RegType& cast_type =
+            method_verifier->ResolveCheckedClass(inst->VRegB_21c());
+        is_safe_cast = cast_type.IsStrictlyAssignableFrom(reg_type);
+      } else {
+        const verifier::RegType& array_type(line->GetRegisterType(inst->VRegB_23x()));
+        // We only know its safe to assign to an array if the array type is precise. For example,
+        // an Object[] can have any type of object stored in it, but it may also be assigned a
+        // String[] in which case the stores need to be of Strings.
+        if (array_type.IsPreciseReference()) {
+          const verifier::RegType& value_type(line->GetRegisterType(inst->VRegA_23x()));
+          const verifier::RegType& component_type = method_verifier->GetRegTypeCache()
+              ->GetComponentType(array_type, method_verifier->GetClassLoader());
+          is_safe_cast = component_type.IsStrictlyAssignableFrom(value_type);
+        }
+      }
+      if (is_safe_cast) {
+        // Verify ordering for push_back() to the sorted vector.
+        DCHECK(safe_cast_set_.empty() || safe_cast_set_.back() < dex_pc);
+        safe_cast_set_.push_back(dex_pc);
+      }
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
new file mode 100644
index 0000000..aa0e72a
--- /dev/null
+++ b/compiler/dex/verified_method.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_VERIFIED_METHOD_H_
+#define ART_COMPILER_DEX_VERIFIED_METHOD_H_
+
+#include <vector>
+
+#include "method_reference.h"
+#include "safe_map.h"
+
+namespace art {
+
+namespace verifier {
+class MethodVerifier;
+}  // namespace verifier
+
+class VerifiedMethod {
+ public:
+  // Cast elision set type.
+  // Since we're adding the dex PCs to the set in increasing order, a sorted vector
+  // is better for performance (not just memory usage), especially for large sets.
+  typedef std::vector<uint32_t> SafeCastSet;
+
+  // Devirtualization map type maps dex offset to concrete method reference.
+  typedef SafeMap<uint32_t, MethodReference> DevirtualizationMap;
+
+  static const VerifiedMethod* Create(verifier::MethodVerifier* method_verifier, bool compile)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ~VerifiedMethod() = default;
+
+  const std::vector<uint8_t>& GetDexGcMap() const {
+    return dex_gc_map_;
+  }
+
+  const DevirtualizationMap& GetDevirtMap() const {
+    return devirt_map_;
+  }
+
+  const SafeCastSet& GetSafeCastSet() const {
+    return safe_cast_set_;
+  }
+
+  // Returns the devirtualization target method, or nullptr if none.
+  const MethodReference* GetDevirtTarget(uint32_t dex_pc) const;
+
+  // Returns true if the cast can statically be verified to be redundant
+  // by using the check-cast elision peephole optimization in the verifier.
+  bool IsSafeCast(uint32_t pc) const;
+
+ private:
+  VerifiedMethod() = default;
+
+  /*
+   * Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
+   * verification). For type-precise determination we have all the data we need, so we just need to
+   * encode it in some clever fashion.
+   * Stores the data in dex_gc_map_, returns true on success and false on failure.
+   */
+  bool GenerateGcMap(verifier::MethodVerifier* method_verifier);
+
+  // Verify that the GC map associated with method_ is well formed.
+  static void VerifyGcMap(verifier::MethodVerifier* method_verifier,
+                          const std::vector<uint8_t>& data);
+
+  // Compute sizes for GC map data.
+  static void ComputeGcMapSizes(verifier::MethodVerifier* method_verifier,
+                                size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
+
+  // Generate devirtualizaion map into devirt_map_.
+  void GenerateDevirtMap(verifier::MethodVerifier* method_verifier)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Generate safe case set into safe_cast_set_.
+  void GenerateSafeCastSet(verifier::MethodVerifier* method_verifier)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  std::vector<uint8_t> dex_gc_map_;
+  DevirtualizationMap devirt_map_;
+  SafeCastSet safe_cast_set_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_VERIFIED_METHOD_H_
diff --git a/compiler/dex/verified_methods_data.cc b/compiler/dex/verified_methods_data.cc
deleted file mode 100644
index e6c4dda..0000000
--- a/compiler/dex/verified_methods_data.cc
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/stl_util.h"
-#include "dex_file.h"
-#include "dex_instruction.h"
-#include "dex_instruction-inl.h"
-#include "base/mutex.h"
-#include "base/mutex-inl.h"
-#include "mirror/art_method.h"
-#include "mirror/art_method-inl.h"
-#include "mirror/class.h"
-#include "mirror/class-inl.h"
-#include "mirror/dex_cache.h"
-#include "mirror/dex_cache-inl.h"
-#include "mirror/object.h"
-#include "mirror/object-inl.h"
-#include "verified_methods_data.h"
-#include "verifier/dex_gc_map.h"
-#include "verifier/method_verifier.h"
-#include "verifier/method_verifier-inl.h"
-#include "verifier/register_line.h"
-#include "verifier/register_line-inl.h"
-
-namespace art {
-
-VerifiedMethodsData::VerifiedMethodsData()
-    : dex_gc_maps_lock_("compiler GC maps lock"),
-      dex_gc_maps_(),
-      safecast_map_lock_("compiler Cast Elision lock"),
-      safecast_map_(),
-      devirt_maps_lock_("compiler Devirtualization lock"),
-      devirt_maps_(),
-      rejected_classes_lock_("compiler rejected classes lock"),
-      rejected_classes_() {
-}
-
-VerifiedMethodsData::~VerifiedMethodsData() {
-  Thread* self = Thread::Current();
-  {
-    WriterMutexLock mu(self, dex_gc_maps_lock_);
-    STLDeleteValues(&dex_gc_maps_);
-  }
-  {
-    WriterMutexLock mu(self, safecast_map_lock_);
-    STLDeleteValues(&safecast_map_);
-  }
-  {
-    WriterMutexLock mu(self, devirt_maps_lock_);
-    STLDeleteValues(&devirt_maps_);
-  }
-}
-
-bool VerifiedMethodsData::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
-  MethodReference ref = method_verifier->GetMethodReference();
-  bool compile = IsCandidateForCompilation(ref, method_verifier->GetAccessFlags());
-  if (compile) {
-    /* Generate a register map and add it to the method. */
-    const std::vector<uint8_t>* dex_gc_map = GenerateGcMap(method_verifier);
-    if (dex_gc_map == NULL) {
-      DCHECK(method_verifier->HasFailures());
-      return false;  // Not a real failure, but a failure to encode
-    }
-    if (kIsDebugBuild) {
-      VerifyGcMap(method_verifier, *dex_gc_map);
-    }
-    SetDexGcMap(ref, dex_gc_map);
-
-    // TODO: move this out when DEX-to-DEX supports devirtualization.
-    if (method_verifier->HasVirtualOrInterfaceInvokes()) {
-      PcToConcreteMethodMap* pc_to_concrete_method = GenerateDevirtMap(method_verifier);
-      if (pc_to_concrete_method != NULL) {
-        SetDevirtMap(ref, pc_to_concrete_method);
-      }
-    }
-  }
-
-  if (method_verifier->HasCheckCasts()) {
-    MethodSafeCastSet* method_to_safe_casts = GenerateSafeCastSet(method_verifier);
-    if (method_to_safe_casts != NULL) {
-      SetSafeCastMap(ref, method_to_safe_casts);
-    }
-  }
-  return true;
-}
-
-const std::vector<uint8_t>* VerifiedMethodsData::GetDexGcMap(MethodReference ref) {
-  ReaderMutexLock mu(Thread::Current(), dex_gc_maps_lock_);
-  DexGcMapTable::const_iterator it = dex_gc_maps_.find(ref);
-  CHECK(it != dex_gc_maps_.end())
-    << "Didn't find GC map for: " << PrettyMethod(ref.dex_method_index, *ref.dex_file);
-  CHECK(it->second != NULL);
-  return it->second;
-}
-
-const MethodReference* VerifiedMethodsData::GetDevirtMap(const MethodReference& ref,
-                                                                    uint32_t dex_pc) {
-  ReaderMutexLock mu(Thread::Current(), devirt_maps_lock_);
-  DevirtualizationMapTable::const_iterator it = devirt_maps_.find(ref);
-  if (it == devirt_maps_.end()) {
-    return NULL;
-  }
-
-  // Look up the PC in the map, get the concrete method to execute and return its reference.
-  PcToConcreteMethodMap::const_iterator pc_to_concrete_method = it->second->find(dex_pc);
-  if (pc_to_concrete_method != it->second->end()) {
-    return &(pc_to_concrete_method->second);
-  } else {
-    return NULL;
-  }
-}
-
-bool VerifiedMethodsData::IsSafeCast(MethodReference ref, uint32_t pc) {
-  ReaderMutexLock mu(Thread::Current(), safecast_map_lock_);
-  SafeCastMap::const_iterator it = safecast_map_.find(ref);
-  if (it == safecast_map_.end()) {
-    return false;
-  }
-
-  // Look up the cast address in the set of safe casts
-  // Use binary_search for lookup in the sorted vector.
-  return std::binary_search(it->second->begin(), it->second->end(), pc);
-}
-
-void VerifiedMethodsData::AddRejectedClass(ClassReference ref) {
-  {
-    WriterMutexLock mu(Thread::Current(), rejected_classes_lock_);
-    rejected_classes_.insert(ref);
-  }
-  DCHECK(IsClassRejected(ref));
-}
-
-bool VerifiedMethodsData::IsClassRejected(ClassReference ref) {
-  ReaderMutexLock mu(Thread::Current(), rejected_classes_lock_);
-  return (rejected_classes_.find(ref) != rejected_classes_.end());
-}
-
-bool VerifiedMethodsData::IsCandidateForCompilation(MethodReference& method_ref,
-                                                    const uint32_t access_flags) {
-#ifdef ART_SEA_IR_MODE
-    bool use_sea = Runtime::Current()->IsSeaIRMode();
-    use_sea = use_sea && (std::string::npos != PrettyMethod(
-                          method_ref.dex_method_index, *(method_ref.dex_file)).find("fibonacci"));
-    if (use_sea) return true;
-#endif
-  // Don't compile class initializers, ever.
-  if (((access_flags & kAccConstructor) != 0) && ((access_flags & kAccStatic) != 0)) {
-    return false;
-  }
-  return (Runtime::Current()->GetCompilerFilter() != Runtime::kInterpretOnly);
-}
-
-const std::vector<uint8_t>* VerifiedMethodsData::GenerateGcMap(
-    verifier::MethodVerifier* method_verifier) {
-  size_t num_entries, ref_bitmap_bits, pc_bits;
-  ComputeGcMapSizes(method_verifier, &num_entries, &ref_bitmap_bits, &pc_bits);
-  // There's a single byte to encode the size of each bitmap
-  if (ref_bitmap_bits >= (8 /* bits per byte */ * 8192 /* 13-bit size */ )) {
-    // TODO: either a better GC map format or per method failures
-    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
-        << "Cannot encode GC map for method with " << ref_bitmap_bits << " registers";
-    return NULL;
-  }
-  size_t ref_bitmap_bytes = (ref_bitmap_bits + 7) / 8;
-  // There are 2 bytes to encode the number of entries
-  if (num_entries >= 65536) {
-    // TODO: either a better GC map format or per method failures
-    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
-        << "Cannot encode GC map for method with " << num_entries << " entries";
-    return NULL;
-  }
-  size_t pc_bytes;
-  verifier::RegisterMapFormat format;
-  if (pc_bits <= 8) {
-    format = verifier::kRegMapFormatCompact8;
-    pc_bytes = 1;
-  } else if (pc_bits <= 16) {
-    format = verifier::kRegMapFormatCompact16;
-    pc_bytes = 2;
-  } else {
-    // TODO: either a better GC map format or per method failures
-    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
-        << "Cannot encode GC map for method with "
-        << (1 << pc_bits) << " instructions (number is rounded up to nearest power of 2)";
-    return NULL;
-  }
-  size_t table_size = ((pc_bytes + ref_bitmap_bytes) * num_entries) + 4;
-  std::vector<uint8_t>* table = new std::vector<uint8_t>;
-  if (table == NULL) {
-    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
-        << "Failed to encode GC map (size=" << table_size << ")";
-    return NULL;
-  }
-  table->reserve(table_size);
-  // Write table header
-  table->push_back(format | ((ref_bitmap_bytes & ~0xFF) >> 5));
-  table->push_back(ref_bitmap_bytes & 0xFF);
-  table->push_back(num_entries & 0xFF);
-  table->push_back((num_entries >> 8) & 0xFF);
-  // Write table data
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      table->push_back(i & 0xFF);
-      if (pc_bytes == 2) {
-        table->push_back((i >> 8) & 0xFF);
-      }
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      line->WriteReferenceBitMap(*table, ref_bitmap_bytes);
-    }
-  }
-  DCHECK_EQ(table->size(), table_size);
-  return table;
-}
-
-void VerifiedMethodsData::VerifyGcMap(verifier::MethodVerifier* method_verifier,
-                                      const std::vector<uint8_t>& data) {
-  // Check that for every GC point there is a map entry, there aren't entries for non-GC points,
-  // that the table data is well formed and all references are marked (or not) in the bitmap
-  verifier::DexPcToReferenceMap map(&data[0]);
-  DCHECK_EQ(data.size(), map.RawSize());
-  size_t map_index = 0;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    const uint8_t* reg_bitmap = map.FindBitMap(i, false);
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      CHECK_LT(map_index, map.NumEntries());
-      CHECK_EQ(map.GetDexPc(map_index), i);
-      CHECK_EQ(map.GetBitMap(map_index), reg_bitmap);
-      map_index++;
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      for (size_t j = 0; j < code_item->registers_size_; j++) {
-        if (line->GetRegisterType(j).IsNonZeroReferenceTypes()) {
-          CHECK_LT(j / 8, map.RegWidth());
-          CHECK_EQ((reg_bitmap[j / 8] >> (j % 8)) & 1, 1);
-        } else if ((j / 8) < map.RegWidth()) {
-          CHECK_EQ((reg_bitmap[j / 8] >> (j % 8)) & 1, 0);
-        } else {
-          // If a register doesn't contain a reference then the bitmap may be shorter than the line
-        }
-      }
-    } else {
-      CHECK(reg_bitmap == NULL);
-    }
-  }
-}
-
-void VerifiedMethodsData::ComputeGcMapSizes(verifier::MethodVerifier* method_verifier,
-                                            size_t* gc_points, size_t* ref_bitmap_bits,
-                                            size_t* log2_max_gc_pc) {
-  size_t local_gc_points = 0;
-  size_t max_insn = 0;
-  size_t max_ref_reg = -1;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      local_gc_points++;
-      max_insn = i;
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      max_ref_reg = line->GetMaxNonZeroReferenceReg(max_ref_reg);
-    }
-  }
-  *gc_points = local_gc_points;
-  *ref_bitmap_bits = max_ref_reg + 1;  // if max register is 0 we need 1 bit to encode (ie +1)
-  size_t i = 0;
-  while ((1U << i) <= max_insn) {
-    i++;
-  }
-  *log2_max_gc_pc = i;
-}
-
-void VerifiedMethodsData::SetDexGcMap(MethodReference ref, const std::vector<uint8_t>* gc_map) {
-  DCHECK(Runtime::Current()->IsCompiler());
-  {
-    WriterMutexLock mu(Thread::Current(), dex_gc_maps_lock_);
-    DexGcMapTable::iterator it = dex_gc_maps_.find(ref);
-    if (it != dex_gc_maps_.end()) {
-      delete it->second;
-      dex_gc_maps_.erase(it);
-    }
-    dex_gc_maps_.Put(ref, gc_map);
-  }
-  DCHECK(GetDexGcMap(ref) != NULL);
-}
-
-VerifiedMethodsData::MethodSafeCastSet* VerifiedMethodsData::GenerateSafeCastSet(
-    verifier::MethodVerifier* method_verifier) {
-  /*
-   * Walks over the method code and adds any cast instructions in which
-   * the type cast is implicit to a set, which is used in the code generation
-   * to elide these casts.
-   */
-  if (method_verifier->HasFailures()) {
-    return NULL;
-  }
-  UniquePtr<MethodSafeCastSet> mscs;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  const Instruction* inst = Instruction::At(code_item->insns_);
-  const Instruction* end = Instruction::At(code_item->insns_ +
-                                           code_item->insns_size_in_code_units_);
-
-  for (; inst < end; inst = inst->Next()) {
-    Instruction::Code code = inst->Opcode();
-    if ((code == Instruction::CHECK_CAST) || (code == Instruction::APUT_OBJECT)) {
-      uint32_t dex_pc = inst->GetDexPc(code_item->insns_);
-      const verifier::RegisterLine* line = method_verifier->GetRegLine(dex_pc);
-      bool is_safe_cast = false;
-      if (code == Instruction::CHECK_CAST) {
-        const verifier::RegType& reg_type(line->GetRegisterType(inst->VRegA_21c()));
-        const verifier::RegType& cast_type =
-            method_verifier->ResolveCheckedClass(inst->VRegB_21c());
-        is_safe_cast = cast_type.IsStrictlyAssignableFrom(reg_type);
-      } else {
-        const verifier::RegType& array_type(line->GetRegisterType(inst->VRegB_23x()));
-        // We only know its safe to assign to an array if the array type is precise. For example,
-        // an Object[] can have any type of object stored in it, but it may also be assigned a
-        // String[] in which case the stores need to be of Strings.
-        if (array_type.IsPreciseReference()) {
-          const verifier::RegType& value_type(line->GetRegisterType(inst->VRegA_23x()));
-          const verifier::RegType& component_type = method_verifier->GetRegTypeCache()
-              ->GetComponentType(array_type, method_verifier->GetClassLoader());
-          is_safe_cast = component_type.IsStrictlyAssignableFrom(value_type);
-        }
-      }
-      if (is_safe_cast) {
-        if (mscs.get() == nullptr) {
-          mscs.reset(new MethodSafeCastSet());
-        } else {
-          DCHECK_LT(mscs->back(), dex_pc);  // Verify ordering for push_back() to the sorted vector.
-        }
-        mscs->push_back(dex_pc);
-      }
-    }
-  }
-  return mscs.release();
-}
-
-void  VerifiedMethodsData::SetSafeCastMap(MethodReference ref, const MethodSafeCastSet* cast_set) {
-  WriterMutexLock mu(Thread::Current(), safecast_map_lock_);
-  SafeCastMap::iterator it = safecast_map_.find(ref);
-  if (it != safecast_map_.end()) {
-    delete it->second;
-    safecast_map_.erase(it);
-  }
-  safecast_map_.Put(ref, cast_set);
-  DCHECK(safecast_map_.find(ref) != safecast_map_.end());
-}
-
-VerifiedMethodsData::PcToConcreteMethodMap* VerifiedMethodsData::GenerateDevirtMap(
-    verifier::MethodVerifier* method_verifier) {
-  // It is risky to rely on reg_types for sharpening in cases of soft
-  // verification, we might end up sharpening to a wrong implementation. Just abort.
-  if (method_verifier->HasFailures()) {
-    return NULL;
-  }
-
-  UniquePtr<PcToConcreteMethodMap> pc_to_concrete_method_map;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  const uint16_t* insns = code_item->insns_;
-  const Instruction* inst = Instruction::At(insns);
-  const Instruction* end = Instruction::At(insns + code_item->insns_size_in_code_units_);
-
-  for (; inst < end; inst = inst->Next()) {
-    bool is_virtual   = (inst->Opcode() == Instruction::INVOKE_VIRTUAL) ||
-        (inst->Opcode() ==  Instruction::INVOKE_VIRTUAL_RANGE);
-    bool is_interface = (inst->Opcode() == Instruction::INVOKE_INTERFACE) ||
-        (inst->Opcode() == Instruction::INVOKE_INTERFACE_RANGE);
-
-    if (!is_interface && !is_virtual) {
-      continue;
-    }
-    // Get reg type for register holding the reference to the object that will be dispatched upon.
-    uint32_t dex_pc = inst->GetDexPc(insns);
-    verifier::RegisterLine* line = method_verifier->GetRegLine(dex_pc);
-    bool is_range = (inst->Opcode() ==  Instruction::INVOKE_VIRTUAL_RANGE) ||
-        (inst->Opcode() ==  Instruction::INVOKE_INTERFACE_RANGE);
-    const verifier::RegType&
-        reg_type(line->GetRegisterType(is_range ? inst->VRegC_3rc() : inst->VRegC_35c()));
-
-    if (!reg_type.HasClass()) {
-      // We will compute devirtualization information only when we know the Class of the reg type.
-      continue;
-    }
-    mirror::Class* reg_class = reg_type.GetClass();
-    if (reg_class->IsInterface()) {
-      // We can't devirtualize when the known type of the register is an interface.
-      continue;
-    }
-    if (reg_class->IsAbstract() && !reg_class->IsArrayClass()) {
-      // We can't devirtualize abstract classes except on arrays of abstract classes.
-      continue;
-    }
-    mirror::ArtMethod* abstract_method = method_verifier->GetDexCache()->GetResolvedMethod(
-        is_range ? inst->VRegB_3rc() : inst->VRegB_35c());
-    if (abstract_method == NULL) {
-      // If the method is not found in the cache this means that it was never found
-      // by ResolveMethodAndCheckAccess() called when verifying invoke_*.
-      continue;
-    }
-    // Find the concrete method.
-    mirror::ArtMethod* concrete_method = NULL;
-    if (is_interface) {
-      concrete_method = reg_type.GetClass()->FindVirtualMethodForInterface(abstract_method);
-    }
-    if (is_virtual) {
-      concrete_method = reg_type.GetClass()->FindVirtualMethodForVirtual(abstract_method);
-    }
-    if (concrete_method == NULL || concrete_method->IsAbstract()) {
-      // In cases where concrete_method is not found, or is abstract, continue to the next invoke.
-      continue;
-    }
-    if (reg_type.IsPreciseReference() || concrete_method->IsFinal() ||
-        concrete_method->GetDeclaringClass()->IsFinal()) {
-      // If we knew exactly the class being dispatched upon, or if the target method cannot be
-      // overridden record the target to be used in the compiler driver.
-      if (pc_to_concrete_method_map.get() == NULL) {
-        pc_to_concrete_method_map.reset(new PcToConcreteMethodMap());
-      }
-      MethodReference concrete_ref(
-          concrete_method->GetDeclaringClass()->GetDexCache()->GetDexFile(),
-          concrete_method->GetDexMethodIndex());
-      pc_to_concrete_method_map->Put(dex_pc, concrete_ref);
-    }
-  }
-  return pc_to_concrete_method_map.release();
-}
-
-void  VerifiedMethodsData::SetDevirtMap(MethodReference ref,
-                                   const PcToConcreteMethodMap* devirt_map) {
-  WriterMutexLock mu(Thread::Current(), devirt_maps_lock_);
-  DevirtualizationMapTable::iterator it = devirt_maps_.find(ref);
-  if (it != devirt_maps_.end()) {
-    delete it->second;
-    devirt_maps_.erase(it);
-  }
-
-  devirt_maps_.Put(ref, devirt_map);
-  DCHECK(devirt_maps_.find(ref) != devirt_maps_.end());
-}
-
-}  // namespace art
diff --git a/compiler/dex/verified_methods_data.h b/compiler/dex/verified_methods_data.h
deleted file mode 100644
index d495dff..0000000
--- a/compiler/dex/verified_methods_data.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_VERIFIED_METHODS_DATA_H_
-#define ART_COMPILER_DEX_VERIFIED_METHODS_DATA_H_
-
-#include <stdint.h>
-#include <set>
-#include <vector>
-
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "class_reference.h"
-#include "method_reference.h"
-#include "safe_map.h"
-
-namespace art {
-
-namespace verifier {
-class MethodVerifier;
-}  // namespace verifier
-
-class VerifiedMethodsData {
-  public:
-    VerifiedMethodsData();
-    ~VerifiedMethodsData();
-
-    bool ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        LOCKS_EXCLUDED(dex_gc_maps_lock_, devirt_maps_lock_, safecast_map_lock_);
-
-    const std::vector<uint8_t>* GetDexGcMap(MethodReference ref)
-        LOCKS_EXCLUDED(dex_gc_maps_lock_);
-
-    const MethodReference* GetDevirtMap(const MethodReference& ref, uint32_t dex_pc)
-        LOCKS_EXCLUDED(devirt_maps_lock_);
-
-    // Returns true if the cast can statically be verified to be redundant
-    // by using the check-cast elision peephole optimization in the verifier
-    bool IsSafeCast(MethodReference ref, uint32_t pc) LOCKS_EXCLUDED(safecast_map_lock_);
-
-    void AddRejectedClass(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
-    bool IsClassRejected(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
-
-    static bool IsCandidateForCompilation(MethodReference& method_ref,
-                                          const uint32_t access_flags);
-
-  private:
-    /*
-     * Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
-     * verification). For type-precise determination we have all the data we need, so we just need to
-     * encode it in some clever fashion.
-     * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
-     */
-    const std::vector<uint8_t>* GenerateGcMap(verifier::MethodVerifier* method_verifier);
-
-    // Verify that the GC map associated with method_ is well formed
-    void VerifyGcMap(verifier::MethodVerifier* method_verifier, const std::vector<uint8_t>& data);
-
-    // Compute sizes for GC map data
-    void ComputeGcMapSizes(verifier::MethodVerifier* method_verifier,
-                           size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
-
-    // All the GC maps that the verifier has created
-    typedef SafeMap<const MethodReference, const std::vector<uint8_t>*,
-        MethodReferenceComparator> DexGcMapTable;
-    ReaderWriterMutex dex_gc_maps_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-    DexGcMapTable dex_gc_maps_ GUARDED_BY(dex_gc_maps_lock_);
-    void SetDexGcMap(MethodReference ref, const std::vector<uint8_t>* dex_gc_map)
-        LOCKS_EXCLUDED(dex_gc_maps_lock_);
-
-    // Cast elision types.
-    // Since we're adding the dex PCs to the set in increasing order, a sorted vector
-    // is better for performance (not just memory usage), especially for large sets.
-    typedef std::vector<uint32_t> MethodSafeCastSet;
-    typedef SafeMap<MethodReference, const MethodSafeCastSet*,
-        MethodReferenceComparator> SafeCastMap;
-    MethodSafeCastSet* GenerateSafeCastSet(verifier::MethodVerifier* method_verifier)
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-    void SetSafeCastMap(MethodReference ref, const MethodSafeCastSet* mscs)
-        LOCKS_EXCLUDED(safecast_map_lock_);
-    ReaderWriterMutex safecast_map_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-    SafeCastMap safecast_map_ GUARDED_BY(safecast_map_lock_);
-
-    // Devirtualization map.
-    typedef SafeMap<uint32_t, MethodReference> PcToConcreteMethodMap;
-    typedef SafeMap<MethodReference, const PcToConcreteMethodMap*,
-        MethodReferenceComparator> DevirtualizationMapTable;
-    PcToConcreteMethodMap* GenerateDevirtMap(verifier::MethodVerifier* method_verifier)
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-    ReaderWriterMutex devirt_maps_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-    DevirtualizationMapTable devirt_maps_ GUARDED_BY(devirt_maps_lock_);
-    void SetDevirtMap(MethodReference ref, const PcToConcreteMethodMap* pc_method_map)
-          LOCKS_EXCLUDED(devirt_maps_lock_);
-
-    // Rejected classes
-    typedef std::set<ClassReference> RejectedClassesTable;
-    ReaderWriterMutex rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-    RejectedClassesTable rejected_classes_ GUARDED_BY(rejected_classes_lock_);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_VERIFIED_METHODS_DATA_H_
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index f211e3f..f8dc223 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -410,7 +410,8 @@
 
 void MIRGraph::InitRegLocations() {
   /* Allocate the location map */
-  RegLocation* loc = static_cast<RegLocation*>(arena_->Alloc(GetNumSSARegs() * sizeof(*loc),
+  int max_regs = GetNumSSARegs() + GetMaxPossibleCompilerTemps();
+  RegLocation* loc = static_cast<RegLocation*>(arena_->Alloc(max_regs * sizeof(*loc),
                                                              ArenaAllocator::kAllocRegAlloc));
   for (int i = 0; i < GetNumSSARegs(); i++) {
     loc[i] = fresh_loc;
@@ -418,13 +419,11 @@
     loc[i].is_const = is_constant_v_->IsBitSet(i);
   }
 
-  /* Patch up the locations for Method* and the compiler temps */
-  loc[method_sreg_].location = kLocCompilerTemp;
-  loc[method_sreg_].defined = true;
-  for (int i = 0; i < cu_->num_compiler_temps; i++) {
-    CompilerTemp* ct = compiler_temps_.Get(i);
-    loc[ct->s_reg].location = kLocCompilerTemp;
-    loc[ct->s_reg].defined = true;
+  /* Patch up the locations for the compiler temps */
+  GrowableArray<CompilerTemp*>::Iterator iter(&compiler_temps_);
+  for (CompilerTemp* ct = iter.Next(); ct != NULL; ct = iter.Next()) {
+    loc[ct->s_reg_low].location = kLocCompilerTemp;
+    loc[ct->s_reg_low].defined = true;
   }
 
   reg_location_ = loc;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f713151..d504a4e 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -27,7 +27,8 @@
 #include "class_linker.h"
 #include "dex_compilation_unit.h"
 #include "dex_file-inl.h"
-#include "dex/verified_methods_data.h"
+#include "dex/verification_results.h"
+#include "dex/verified_method.h"
 #include "jni_internal.h"
 #include "object_utils.h"
 #include "runtime.h"
@@ -336,13 +337,13 @@
 extern "C" void compilerLLVMSetBitcodeFileName(art::CompilerDriver& driver,
                                                std::string const& filename);
 
-CompilerDriver::CompilerDriver(VerifiedMethodsData* verified_methods_data,
+CompilerDriver::CompilerDriver(VerificationResults* verification_results,
                                DexFileToMethodInlinerMap* method_inliner_map,
                                CompilerBackend compiler_backend, InstructionSet instruction_set,
                                InstructionSetFeatures instruction_set_features,
                                bool image, DescriptorSet* image_classes, size_t thread_count,
                                bool dump_stats)
-    : verified_methods_data_(verified_methods_data),
+    : verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_backend_(compiler_backend),
       instruction_set_(instruction_set),
@@ -439,6 +440,10 @@
     MutexLock mu(self, compiled_methods_lock_);
     STLDeleteElements(&methods_to_patch_);
   }
+  {
+    MutexLock mu(self, compiled_methods_lock_);
+    STLDeleteElements(&classes_to_patch_);
+  }
   CHECK_PTHREAD_CALL(pthread_key_delete, (tls_key_), "delete tls key");
   typedef void (*UninitCompilerContextFn)(CompilerDriver&);
   UninitCompilerContextFn uninit_compiler_context;
@@ -906,6 +911,51 @@
   return result;
 }
 
+bool CompilerDriver::CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
+                                        bool* is_type_initialized, bool* use_direct_type_ptr,
+                                        uintptr_t* direct_type_ptr) {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
+  mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
+  if (resolved_class == nullptr) {
+    return false;
+  }
+  const bool compiling_boot = Runtime::Current()->GetHeap()->IsCompilingBoot();
+  if (compiling_boot) {
+    // boot -> boot class pointers.
+    // True if the class is in the image at boot compiling time.
+    const bool is_image_class = IsImage() && IsImageClass(
+        dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_));
+    // True if pc relative load works.
+    const bool support_boot_image_fixup = GetSupportBootImageFixup();
+    if (is_image_class && support_boot_image_fixup) {
+      *is_type_initialized = resolved_class->IsInitialized();
+      *use_direct_type_ptr = false;
+      *direct_type_ptr = 0;
+      return true;
+    } else {
+      return false;
+    }
+  } else {
+    // True if the class is in the image at app compiling time.
+    const bool class_in_image =
+        Runtime::Current()->GetHeap()->FindSpaceFromObject(resolved_class, false)->IsImageSpace();
+    if (class_in_image) {
+      // boot -> app class pointers.
+      *is_type_initialized = resolved_class->IsInitialized();
+      *use_direct_type_ptr = true;
+      *direct_type_ptr = reinterpret_cast<uintptr_t>(resolved_class);
+      return true;
+    } else {
+      // app -> app class pointers.
+      // Give up because app does not have an image and class
+      // isn't created at compile time.  TODO: implement this
+      // if/when each app gets an image.
+      return false;
+    }
+  }
+}
+
 static mirror::Class* ComputeCompilingMethodsClass(ScopedObjectAccess& soa,
                                                    SirtRef<mirror::DexCache>& dex_cache,
                                                    const DexCompilationUnit* mUnit)
@@ -1223,9 +1273,9 @@
         if (enableVerifierBasedSharpening && (*invoke_type == kVirtual ||
                                               *invoke_type == kInterface)) {
           // Did the verifier record a more precise invoke target based on its type information?
-          const MethodReference caller_method(mUnit->GetDexFile(), mUnit->GetDexMethodIndex());
+          DCHECK(mUnit->GetVerifiedMethod() != nullptr);
           const MethodReference* devirt_map_target =
-              verified_methods_data_->GetDevirtMap(caller_method, dex_pc);
+              mUnit->GetVerifiedMethod()->GetDevirtTarget(dex_pc);
           if (devirt_map_target != NULL) {
             SirtRef<mirror::DexCache> target_dex_cache(soa.Self(), mUnit->GetClassLinker()->FindDexCache(*devirt_map_target->dex_file));
             SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
@@ -1272,8 +1322,15 @@
   return false;  // Incomplete knowledge needs slow path.
 }
 
-bool CompilerDriver::IsSafeCast(const MethodReference& mr, uint32_t dex_pc) {
-  bool result = verified_methods_data_->IsSafeCast(mr, dex_pc);
+const VerifiedMethod* CompilerDriver::GetVerifiedMethod(const DexFile* dex_file,
+                                                        uint32_t method_idx) const {
+  MethodReference ref(dex_file, method_idx);
+  return verification_results_->GetVerifiedMethod(ref);
+}
+
+bool CompilerDriver::IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc) {
+  DCHECK(mUnit->GetVerifiedMethod() != nullptr);
+  bool result = mUnit->GetVerifiedMethod()->IsSafeCast(dex_pc);
   if (result) {
     stats_->SafeCast();
   } else {
@@ -1291,13 +1348,13 @@
                                   InvokeType target_invoke_type,
                                   size_t literal_offset) {
   MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  code_to_patch_.push_back(new PatchInformation(dex_file,
-                                                referrer_class_def_idx,
-                                                referrer_method_idx,
-                                                referrer_invoke_type,
-                                                target_method_idx,
-                                                target_invoke_type,
-                                                literal_offset));
+  code_to_patch_.push_back(new CallPatchInformation(dex_file,
+                                                    referrer_class_def_idx,
+                                                    referrer_method_idx,
+                                                    referrer_invoke_type,
+                                                    target_method_idx,
+                                                    target_invoke_type,
+                                                    literal_offset));
 }
 void CompilerDriver::AddMethodPatch(const DexFile* dex_file,
                                     uint16_t referrer_class_def_idx,
@@ -1307,13 +1364,25 @@
                                     InvokeType target_invoke_type,
                                     size_t literal_offset) {
   MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  methods_to_patch_.push_back(new PatchInformation(dex_file,
-                                                   referrer_class_def_idx,
-                                                   referrer_method_idx,
-                                                   referrer_invoke_type,
-                                                   target_method_idx,
-                                                   target_invoke_type,
-                                                   literal_offset));
+  methods_to_patch_.push_back(new CallPatchInformation(dex_file,
+                                                       referrer_class_def_idx,
+                                                       referrer_method_idx,
+                                                       referrer_invoke_type,
+                                                       target_method_idx,
+                                                       target_invoke_type,
+                                                       literal_offset));
+}
+void CompilerDriver::AddClassPatch(const DexFile* dex_file,
+                                    uint16_t referrer_class_def_idx,
+                                    uint32_t referrer_method_idx,
+                                    uint32_t target_type_idx,
+                                    size_t literal_offset) {
+  MutexLock mu(Thread::Current(), compiled_methods_lock_);
+  classes_to_patch_.push_back(new TypePatchInformation(dex_file,
+                                                       referrer_class_def_idx,
+                                                       referrer_method_idx,
+                                                       target_type_idx,
+                                                       literal_offset));
 }
 
 class ParallelCompilationManager {
@@ -2207,7 +2276,7 @@
   }
   ClassReference ref(&dex_file, class_def_index);
   // Skip compiling classes with generic verifier failures since they will still fail at runtime
-  if (manager->GetCompiler()->verified_methods_data_->IsClassRejected(ref)) {
+  if (manager->GetCompiler()->verification_results_->IsClassRejected(ref)) {
     return;
   }
   const byte* class_data = dex_file.GetClassData(class_def);
@@ -2290,7 +2359,7 @@
   } else if ((access_flags & kAccAbstract) != 0) {
   } else {
     MethodReference method_ref(&dex_file, method_idx);
-    bool compile = VerifiedMethodsData::IsCandidateForCompilation(method_ref, access_flags);
+    bool compile = VerificationResults::IsCandidateForCompilation(method_ref, access_flags);
 
     if (compile) {
       CompilerFn compiler = compiler_;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 9e31624..a8110e7 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -44,7 +44,8 @@
 class DexFileToMethodInlinerMap;
 class OatWriter;
 class TimingLogger;
-class VerifiedMethodsData;
+class VerificationResults;
+class VerifiedMethod;
 
 enum CompilerBackend {
   kQuick,
@@ -92,7 +93,7 @@
   // enabled.  "image_classes" lets the compiler know what classes it
   // can assume will be in the image, with NULL implying all available
   // classes.
-  explicit CompilerDriver(VerifiedMethodsData* verified_methods_data,
+  explicit CompilerDriver(VerificationResults* verification_results,
                           DexFileToMethodInlinerMap* method_inliner_map,
                           CompilerBackend compiler_backend, InstructionSet instruction_set,
                           InstructionSetFeatures instruction_set_features,
@@ -109,8 +110,8 @@
   void CompileOne(const mirror::ArtMethod* method, TimingLogger& timings)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  VerifiedMethodsData* GetVerifiedMethodsData() const {
-    return verified_methods_data_;
+  VerificationResults* GetVerificationResults() const {
+    return verification_results_;
   }
 
   DexFileToMethodInlinerMap* GetMethodInlinerMap() const {
@@ -189,6 +190,10 @@
                                               uint32_t type_idx)
      LOCKS_EXCLUDED(Locks::mutator_lock_);
 
+  bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
+                          bool* is_type_initialized, bool* use_direct_type_ptr,
+                          uintptr_t* direct_type_ptr);
+
   // Can we fast path instance field access? Computes field's offset and volatility.
   bool ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put,
                                 int* field_offset, bool* is_volatile)
@@ -209,7 +214,8 @@
                          uintptr_t* direct_code, uintptr_t* direct_method)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  bool IsSafeCast(const MethodReference& mr, uint32_t dex_pc);
+  const VerifiedMethod* GetVerifiedMethod(const DexFile* dex_file, uint32_t method_idx) const;
+  bool IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc);
 
   // Record patch information for later fix up.
   void AddCodePatch(const DexFile* dex_file,
@@ -228,6 +234,12 @@
                       InvokeType target_invoke_type,
                       size_t literal_offset)
       LOCKS_EXCLUDED(compiled_methods_lock_);
+  void AddClassPatch(const DexFile* dex_file,
+                     uint16_t referrer_class_def_idx,
+                     uint32_t referrer_method_idx,
+                     uint32_t target_method_idx,
+                     size_t literal_offset)
+      LOCKS_EXCLUDED(compiled_methods_lock_);
 
   void SetBitcodeFileName(std::string const& filename);
 
@@ -267,6 +279,8 @@
     return thread_count_;
   }
 
+  class CallPatchInformation;
+  class TypePatchInformation;
   class PatchInformation {
    public:
     const DexFile& GetDexFile() const {
@@ -278,6 +292,48 @@
     uint32_t GetReferrerMethodIdx() const {
       return referrer_method_idx_;
     }
+    size_t GetLiteralOffset() const {
+      return literal_offset_;
+    }
+
+    virtual bool IsCall() const {
+      return false;
+    }
+    virtual bool IsType() const {
+      return false;
+    }
+    virtual const CallPatchInformation* AsCall() const {
+      LOG(FATAL) << "Unreachable";
+      return nullptr;
+    }
+    virtual const TypePatchInformation* AsType() const {
+      LOG(FATAL) << "Unreachable";
+      return nullptr;
+    }
+
+   protected:
+    PatchInformation(const DexFile* dex_file,
+                     uint16_t referrer_class_def_idx,
+                     uint32_t referrer_method_idx,
+                     size_t literal_offset)
+      : dex_file_(dex_file),
+        referrer_class_def_idx_(referrer_class_def_idx),
+        referrer_method_idx_(referrer_method_idx),
+        literal_offset_(literal_offset) {
+      CHECK(dex_file_ != NULL);
+    }
+    virtual ~PatchInformation() {}
+
+    const DexFile* const dex_file_;
+    const uint16_t referrer_class_def_idx_;
+    const uint32_t referrer_method_idx_;
+    const size_t literal_offset_;
+
+    friend class CompilerDriver;
+  };
+
+  class CallPatchInformation : public PatchInformation {
+   public:
     InvokeType GetReferrerInvokeType() const {
       return referrer_invoke_type_;
     }
@@ -287,46 +343,76 @@
     InvokeType GetTargetInvokeType() const {
       return target_invoke_type_;
     }
-    size_t GetLiteralOffset() const {;
-      return literal_offset_;
+
+    const CallPatchInformation* AsCall() const {
+      return this;
+    }
+    bool IsCall() const {
+      return true;
     }
 
    private:
-    PatchInformation(const DexFile* dex_file,
-                     uint16_t referrer_class_def_idx,
-                     uint32_t referrer_method_idx,
-                     InvokeType referrer_invoke_type,
-                     uint32_t target_method_idx,
-                     InvokeType target_invoke_type,
-                     size_t literal_offset)
-      : dex_file_(dex_file),
-        referrer_class_def_idx_(referrer_class_def_idx),
-        referrer_method_idx_(referrer_method_idx),
-        referrer_invoke_type_(referrer_invoke_type),
-        target_method_idx_(target_method_idx),
-        target_invoke_type_(target_invoke_type),
-        literal_offset_(literal_offset) {
-      CHECK(dex_file_ != NULL);
+    CallPatchInformation(const DexFile* dex_file,
+                         uint16_t referrer_class_def_idx,
+                         uint32_t referrer_method_idx,
+                         InvokeType referrer_invoke_type,
+                         uint32_t target_method_idx,
+                         InvokeType target_invoke_type,
+                         size_t literal_offset)
+        : PatchInformation(dex_file, referrer_class_def_idx,
+                           referrer_method_idx, literal_offset),
+          referrer_invoke_type_(referrer_invoke_type),
+          target_method_idx_(target_method_idx),
+          target_invoke_type_(target_invoke_type) {
     }
 
-    const DexFile* const dex_file_;
-    const uint16_t referrer_class_def_idx_;
-    const uint32_t referrer_method_idx_;
     const InvokeType referrer_invoke_type_;
     const uint32_t target_method_idx_;
     const InvokeType target_invoke_type_;
-    const size_t literal_offset_;
 
     friend class CompilerDriver;
-    DISALLOW_COPY_AND_ASSIGN(PatchInformation);
+    DISALLOW_COPY_AND_ASSIGN(CallPatchInformation);
   };
 
-  const std::vector<const PatchInformation*>& GetCodeToPatch() const {
+  class TypePatchInformation : public PatchInformation {
+   public:
+    uint32_t GetTargetTypeIdx() const {
+      return target_type_idx_;
+    }
+
+    bool IsType() const {
+      return true;
+    }
+    const TypePatchInformation* AsType() const {
+      return this;
+    }
+
+   private:
+    TypePatchInformation(const DexFile* dex_file,
+                         uint16_t referrer_class_def_idx,
+                         uint32_t referrer_method_idx,
+                         uint32_t target_type_idx,
+                         size_t literal_offset)
+        : PatchInformation(dex_file, referrer_class_def_idx,
+                           referrer_method_idx, literal_offset),
+          target_type_idx_(target_type_idx) {
+    }
+
+    const uint32_t target_type_idx_;
+
+    friend class CompilerDriver;
+    DISALLOW_COPY_AND_ASSIGN(TypePatchInformation);
+  };
+
+  const std::vector<const CallPatchInformation*>& GetCodeToPatch() const {
     return code_to_patch_;
   }
-  const std::vector<const PatchInformation*>& GetMethodsToPatch() const {
+  const std::vector<const CallPatchInformation*>& GetMethodsToPatch() const {
     return methods_to_patch_;
   }
+  const std::vector<const TypePatchInformation*>& GetClassesToPatch() const {
+    return classes_to_patch_;
+  }
 
   // Checks if class specified by type_idx is one of the image_classes_
   bool IsImageClass(const char* descriptor) const;
@@ -398,10 +484,11 @@
   static void CompileClass(const ParallelCompilationManager* context, size_t class_def_index)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  std::vector<const PatchInformation*> code_to_patch_;
-  std::vector<const PatchInformation*> methods_to_patch_;
+  std::vector<const CallPatchInformation*> code_to_patch_;
+  std::vector<const CallPatchInformation*> methods_to_patch_;
+  std::vector<const TypePatchInformation*> classes_to_patch_;
 
-  VerifiedMethodsData* verified_methods_data_;
+  VerificationResults* verification_results_;
   DexFileToMethodInlinerMap* method_inliner_map_;
 
   CompilerBackend compiler_backend_;
diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc
index c441d09..840b0ad 100644
--- a/compiler/driver/dex_compilation_unit.cc
+++ b/compiler/driver/dex_compilation_unit.cc
@@ -31,7 +31,8 @@
       code_item_(cu->code_item),
       class_def_idx_(cu->class_def_idx),
       dex_method_idx_(cu->method_idx),
-      access_flags_(cu->access_flags) {
+      access_flags_(cu->access_flags),
+      verified_method_(cu_->compiler_driver->GetVerifiedMethod(cu->dex_file, cu->method_idx)) {
 }
 
 DexCompilationUnit::DexCompilationUnit(CompilationUnit* cu,
@@ -41,7 +42,8 @@
                                        const DexFile::CodeItem* code_item,
                                        uint16_t class_def_idx,
                                        uint32_t method_idx,
-                                       uint32_t access_flags)
+                                       uint32_t access_flags,
+                                       const VerifiedMethod* verified_method)
     : cu_(cu),
       class_loader_(class_loader),
       class_linker_(class_linker),
@@ -49,7 +51,8 @@
       code_item_(code_item),
       class_def_idx_(class_def_idx),
       dex_method_idx_(method_idx),
-      access_flags_(access_flags) {
+      access_flags_(access_flags),
+      verified_method_(verified_method) {
 }
 
 const std::string& DexCompilationUnit::GetSymbol() {
diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h
index 3df50ff..84f5799 100644
--- a/compiler/driver/dex_compilation_unit.h
+++ b/compiler/driver/dex_compilation_unit.h
@@ -29,6 +29,7 @@
 }  // namespace mirror
 class ClassLinker;
 struct CompilationUnit;
+class VerifiedMethod;
 
 class DexCompilationUnit {
  public:
@@ -36,7 +37,8 @@
 
   DexCompilationUnit(CompilationUnit* cu, jobject class_loader, ClassLinker* class_linker,
                      const DexFile& dex_file, const DexFile::CodeItem* code_item,
-                     uint16_t class_def_idx, uint32_t method_idx, uint32_t access_flags);
+                     uint16_t class_def_idx, uint32_t method_idx, uint32_t access_flags,
+                     const VerifiedMethod* verified_method);
 
   CompilationUnit* GetCompilationUnit() const {
     return cu_;
@@ -96,6 +98,10 @@
     return ((access_flags_ & kAccSynchronized) != 0);
   }
 
+  const VerifiedMethod* GetVerifiedMethod() const {
+    return verified_method_;
+  }
+
   const std::string& GetSymbol();
 
  private:
@@ -111,6 +117,7 @@
   const uint16_t class_def_idx_;
   const uint32_t dex_method_idx_;
   const uint32_t access_flags_;
+  const VerifiedMethod* const verified_method_;
 
   std::string symbol_;
 };
diff --git a/compiler/elf_writer_mclinker.cc b/compiler/elf_writer_mclinker.cc
index f3fef23..cdfb881 100644
--- a/compiler/elf_writer_mclinker.cc
+++ b/compiler/elf_writer_mclinker.cc
@@ -22,6 +22,7 @@
 #include <mcld/IRBuilder.h>
 #include <mcld/Linker.h>
 #include <mcld/LinkerConfig.h>
+#include <mcld/LinkerScript.h>
 #include <mcld/MC/ZOption.h>
 #include <mcld/Module.h>
 #include <mcld/Support/Path.h>
@@ -142,13 +143,14 @@
   }
 
   // Based on alone::Linker::config
-  module_.reset(new mcld::Module(linker_config_->options().soname()));
+  linker_script_.reset(new mcld::LinkerScript());
+  module_.reset(new mcld::Module(linker_config_->options().soname(), *linker_script_.get()));
   CHECK(module_.get() != NULL);
   ir_builder_.reset(new mcld::IRBuilder(*module_.get(), *linker_config_.get()));
   CHECK(ir_builder_.get() != NULL);
   linker_.reset(new mcld::Linker());
   CHECK(linker_.get() != NULL);
-  linker_->config(*linker_config_.get());
+  linker_->emulate(*linker_script_.get(), *linker_config_.get());
 }
 
 void ElfWriterMclinker::AddOatInput(std::vector<uint8_t>& oat_contents) {
diff --git a/compiler/elf_writer_mclinker.h b/compiler/elf_writer_mclinker.h
index a0cd7f0..8ee7231 100644
--- a/compiler/elf_writer_mclinker.h
+++ b/compiler/elf_writer_mclinker.h
@@ -29,6 +29,7 @@
 class LDSymbol;
 class Linker;
 class LinkerConfig;
+class LinkerScript;
 class Module;
 }  // namespace mcld
 
@@ -74,6 +75,7 @@
 
   // Setup by Init()
   UniquePtr<mcld::LinkerConfig> linker_config_;
+  UniquePtr<mcld::LinkerScript> linker_script_;
   UniquePtr<mcld::Module> module_;
   UniquePtr<mcld::IRBuilder> ir_builder_;
   UniquePtr<mcld::Linker> linker_;
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 556dec2..09bb70c 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -734,7 +734,7 @@
   }
 }
 
-static ArtMethod* GetTargetMethod(const CompilerDriver::PatchInformation* patch)
+static ArtMethod* GetTargetMethod(const CompilerDriver::CallPatchInformation* patch)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Thread* self = Thread::Current();
@@ -757,15 +757,34 @@
   return method;
 }
 
+static Class* GetTargetType(const CompilerDriver::TypePatchInformation* patch)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, class_linker->FindDexCache(patch->GetDexFile()));
+  SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
+  Class* klass = class_linker->ResolveType(patch->GetDexFile(),
+                                           patch->GetTargetTypeIdx(),
+                                           dex_cache,
+                                           class_loader);
+  CHECK(klass != NULL)
+    << patch->GetDexFile().GetLocation() << " " << patch->GetTargetTypeIdx();
+  CHECK(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx()) == klass)
+    << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
+    << PrettyClass(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx())) << " "
+    << PrettyClass(klass);
+  return klass;
+}
+
 void ImageWriter::PatchOatCodeAndMethods() {
   Thread* self = Thread::Current();
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   const char* old_cause = self->StartAssertNoThreadSuspension("ImageWriter");
 
-  typedef std::vector<const CompilerDriver::PatchInformation*> Patches;
-  const Patches& code_to_patch = compiler_driver_.GetCodeToPatch();
+  typedef std::vector<const CompilerDriver::CallPatchInformation*> CallPatches;
+  const CallPatches& code_to_patch = compiler_driver_.GetCodeToPatch();
   for (size_t i = 0; i < code_to_patch.size(); i++) {
-    const CompilerDriver::PatchInformation* patch = code_to_patch[i];
+    const CompilerDriver::CallPatchInformation* patch = code_to_patch[i];
     ArtMethod* target = GetTargetMethod(patch);
     uint32_t code = reinterpret_cast<uint32_t>(class_linker->GetOatCodeFor(target));
     uint32_t code_base = reinterpret_cast<uint32_t>(&oat_file_->GetOatHeader());
@@ -773,13 +792,21 @@
     SetPatchLocation(patch, reinterpret_cast<uint32_t>(GetOatAddress(code_offset)));
   }
 
-  const Patches& methods_to_patch = compiler_driver_.GetMethodsToPatch();
+  const CallPatches& methods_to_patch = compiler_driver_.GetMethodsToPatch();
   for (size_t i = 0; i < methods_to_patch.size(); i++) {
-    const CompilerDriver::PatchInformation* patch = methods_to_patch[i];
+    const CompilerDriver::CallPatchInformation* patch = methods_to_patch[i];
     ArtMethod* target = GetTargetMethod(patch);
     SetPatchLocation(patch, reinterpret_cast<uint32_t>(GetImageAddress(target)));
   }
 
+  const std::vector<const CompilerDriver::TypePatchInformation*>& classes_to_patch =
+      compiler_driver_.GetClassesToPatch();
+  for (size_t i = 0; i < classes_to_patch.size(); i++) {
+    const CompilerDriver::TypePatchInformation* patch = classes_to_patch[i];
+    Class* target = GetTargetType(patch);
+    SetPatchLocation(patch, reinterpret_cast<uint32_t>(GetImageAddress(target)));
+  }
+
   // Update the image header with the new checksum after patching
   ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   image_header->SetOatChecksum(oat_file_->GetOatHeader().GetChecksum());
@@ -796,13 +823,26 @@
   uint8_t* base = reinterpret_cast<uint8_t*>(reinterpret_cast<uint32_t>(oat_code) & ~0x1);
   uint32_t* patch_location = reinterpret_cast<uint32_t*>(base + patch->GetLiteralOffset());
   if (kIsDebugBuild) {
-    const DexFile::MethodId& id = patch->GetDexFile().GetMethodId(patch->GetTargetMethodIdx());
-    uint32_t expected = reinterpret_cast<uint32_t>(&id);
-    uint32_t actual = *patch_location;
-    CHECK(actual == expected || actual == value) << std::hex
-      << "actual=" << actual
-      << "expected=" << expected
-      << "value=" << value;
+    if (patch->IsCall()) {
+      const CompilerDriver::CallPatchInformation* cpatch = patch->AsCall();
+      const DexFile::MethodId& id = cpatch->GetDexFile().GetMethodId(cpatch->GetTargetMethodIdx());
+      uint32_t expected = reinterpret_cast<uint32_t>(&id);
+      uint32_t actual = *patch_location;
+      CHECK(actual == expected || actual == value) << std::hex
+          << "actual=" << actual
+          << "expected=" << expected
+          << "value=" << value;
+    }
+    if (patch->IsType()) {
+      const CompilerDriver::TypePatchInformation* tpatch = patch->AsType();
+      const DexFile::TypeId& id = tpatch->GetDexFile().GetTypeId(tpatch->GetTargetTypeIdx());
+      uint32_t expected = reinterpret_cast<uint32_t>(&id);
+      uint32_t actual = *patch_location;
+      CHECK(actual == expected || actual == value) << std::hex
+          << "actual=" << actual
+          << "expected=" << expected
+          << "value=" << value;
+    }
   }
   *patch_location = value;
   oat_header.UpdateChecksum(patch_location, sizeof(value));
diff --git a/compiler/llvm/compiler_llvm.cc b/compiler/llvm/compiler_llvm.cc
index 35d1ecd..94408bb 100644
--- a/compiler/llvm/compiler_llvm.cc
+++ b/compiler/llvm/compiler_llvm.cc
@@ -20,7 +20,8 @@
 #include "base/stl_util.h"
 #include "class_linker.h"
 #include "compiled_method.h"
-#include "dex/verified_methods_data.h"
+#include "dex/verification_results.h"
+#include "dex/verified_method.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "globals.h"
@@ -153,11 +154,9 @@
 
   cunit->Materialize();
 
-  MethodReference mref(dex_compilation_unit->GetDexFile(),
-                       dex_compilation_unit->GetDexMethodIndex());
   return new CompiledMethod(*compiler_driver_, compiler_driver_->GetInstructionSet(),
                             cunit->GetElfObject(),
-                            *compiler_driver_->GetVerifiedMethodsData()->GetDexGcMap(mref),
+                            dex_compilation_unit->GetVerifiedMethod()->GetDexGcMap(),
                             cunit->GetDexCompilationUnit()->GetSymbol());
 }
 
@@ -214,7 +213,7 @@
 
   art::DexCompilationUnit dex_compilation_unit(
     NULL, class_loader, class_linker, dex_file, code_item,
-    class_def_idx, method_idx, access_flags);
+    class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx));
   art::llvm::CompilerLLVM* compiler_llvm = ContextOf(driver);
   art::CompiledMethod* result = compiler_llvm->CompileDexMethod(&dex_compilation_unit, invoke_type);
   return result;
@@ -226,8 +225,8 @@
   art::ClassLinker *class_linker = art::Runtime::Current()->GetClassLinker();
 
   art::DexCompilationUnit dex_compilation_unit(
-    NULL, NULL, class_linker, dex_file, NULL,
-    0, method_idx, access_flags);
+      nullptr, nullptr, class_linker, dex_file, nullptr,
+      0, method_idx, access_flags, nullptr);
 
   art::llvm::CompilerLLVM* compiler_llvm = ContextOf(driver);
   art::CompiledMethod* result = compiler_llvm->CompileNativeMethod(&dex_compilation_unit);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 2434262..fc45412 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -79,10 +79,10 @@
   InstructionSet insn_set = kIsTargetBuild ? kThumb2 : kX86;
 
   InstructionSetFeatures insn_features;
-  verified_methods_data_.reset(new VerifiedMethodsData);
+  verification_results_.reset(new VerificationResults);
   method_inliner_map_.reset(compiler_backend == kQuick ? new DexFileToMethodInlinerMap : nullptr);
-  callbacks_.Reset(verified_methods_data_.get(), method_inliner_map_.get());
-  compiler_driver_.reset(new CompilerDriver(verified_methods_data_.get(),
+  callbacks_.Reset(verification_results_.get(), method_inliner_map_.get());
+  compiler_driver_.reset(new CompilerDriver(verification_results_.get(),
                                             method_inliner_map_.get(),
                                             compiler_backend, insn_set,
                                             insn_features, false, NULL, 2, true));
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 199a2b8..7a902d8 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -23,7 +23,7 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "dex_file-inl.h"
-#include "dex/verified_methods_data.h"
+#include "dex/verification_results.h"
 #include "gc/space/space.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/array.h"
@@ -218,7 +218,7 @@
       mirror::Class::Status status;
       if (compiled_class != NULL) {
         status = compiled_class->GetStatus();
-      } else if (compiler_driver_->GetVerifiedMethodsData()->IsClassRejected(class_ref)) {
+      } else if (compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) {
         status = mirror::Class::kStatusError;
       } else {
         status = mirror::Class::kStatusNotReady;
@@ -433,7 +433,7 @@
       mirror::Class::Status status;
       if (compiled_class != NULL) {
         status = compiled_class->GetStatus();
-      } else if (compiler_driver_->GetVerifiedMethodsData()->IsClassRejected(class_ref)) {
+      } else if (compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) {
         status = mirror::Class::kStatusError;
       } else {
         status = mirror::Class::kStatusNotReady;
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index 05dcd7b..6cd0538 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -33,7 +33,7 @@
   ifeq ($(ART_BUILD_NDEBUG),true)
     $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart-compiler,art/compiler,host,ndebug))
   endif
-  ifeq ($(ART_BUILD_NDEBUG),true)
+  ifeq ($(ART_BUILD_DEBUG),true)
     $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd-compiler,art/compiler,host,debug))
   endif
 endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 26fac23..97df199 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -32,7 +32,7 @@
 #include "class_linker.h"
 #include "compiler_callbacks.h"
 #include "dex_file-inl.h"
-#include "dex/verified_methods_data.h"
+#include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "elf_fixup.h"
 #include "elf_stripper.h"
@@ -268,7 +268,7 @@
       Runtime::Current()->SetCompileTimeClassPath(class_loader, class_path_files);
     }
 
-    UniquePtr<CompilerDriver> driver(new CompilerDriver(verified_methods_data_.get(),
+    UniquePtr<CompilerDriver> driver(new CompilerDriver(verification_results_.get(),
                                                         method_inliner_map_.get(),
                                                         compiler_backend_,
                                                         instruction_set_,
@@ -348,15 +348,15 @@
  private:
   class Dex2OatCompilerCallbacks : public CompilerCallbacks {
     public:
-      Dex2OatCompilerCallbacks(VerifiedMethodsData* verified_methods_data,
+      Dex2OatCompilerCallbacks(VerificationResults* verification_results,
                                DexFileToMethodInlinerMap* method_inliner_map)
-          : verified_methods_data_(verified_methods_data),
+          : verification_results_(verification_results),
             method_inliner_map_(method_inliner_map) { }
       virtual ~Dex2OatCompilerCallbacks() { }
 
       virtual bool MethodVerified(verifier::MethodVerifier* verifier)
           SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-        bool result = verified_methods_data_->ProcessVerifiedMethod(verifier);
+        bool result = verification_results_->ProcessVerifiedMethod(verifier);
         if (result && method_inliner_map_ != nullptr) {
           MethodReference ref = verifier->GetMethodReference();
           method_inliner_map_->GetMethodInliner(ref.dex_file)
@@ -365,11 +365,11 @@
         return result;
       }
       virtual void ClassRejected(ClassReference ref) {
-        verified_methods_data_->AddRejectedClass(ref);
+        verification_results_->AddRejectedClass(ref);
       }
 
     private:
-      VerifiedMethodsData* verified_methods_data_;
+      VerificationResults* verification_results_;
       DexFileToMethodInlinerMap* method_inliner_map_;
   };
 
@@ -380,9 +380,9 @@
       : compiler_backend_(compiler_backend),
         instruction_set_(instruction_set),
         instruction_set_features_(instruction_set_features),
-        verified_methods_data_(new VerifiedMethodsData),
+        verification_results_(new VerificationResults),
         method_inliner_map_(compiler_backend == kQuick ? new DexFileToMethodInlinerMap : nullptr),
-        callbacks_(verified_methods_data_.get(), method_inliner_map_.get()),
+        callbacks_(verification_results_.get(), method_inliner_map_.get()),
         runtime_(nullptr),
         thread_count_(thread_count),
         start_ns_(NanoTime()) {
@@ -446,7 +446,7 @@
   const InstructionSet instruction_set_;
   const InstructionSetFeatures instruction_set_features_;
 
-  UniquePtr<VerifiedMethodsData> verified_methods_data_;
+  UniquePtr<VerificationResults> verification_results_;
   UniquePtr<DexFileToMethodInlinerMap> method_inliner_map_;
   Dex2OatCompilerCallbacks callbacks_;
   Runtime* runtime_;
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index c51ea7b..6c25e0a 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -392,6 +392,17 @@
         has_modrm = true;
         src_reg_file = dst_reg_file = SSE;
         break;
+      case 0x62:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // Clear prefix now. It has served its purpose as part of the opcode.
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "punpckldq";
+        load = true;
+        has_modrm = true;
+        break;
       case 0x6E:
         if (prefix[2] == 0x66) {
           dst_reg_file = SSE;
@@ -485,6 +496,18 @@
         has_modrm = true;
         store = true;
         break;
+      case 0xA4:
+        opcode << "shld";
+        has_modrm = true;
+        load = true;
+        immediate_bytes = 1;
+        break;
+      case 0xAC:
+        opcode << "shrd";
+        has_modrm = true;
+        load = true;
+        immediate_bytes = 1;
+        break;
       case 0xAE:
         if (prefix[0] == 0xF3) {
           prefix[0] = 0;  // clear prefix now it's served its purpose as part of the opcode
@@ -571,6 +594,9 @@
     reg_is_opcode = true;
     store = true;
     break;
+  case 0x99:
+    opcode << "cdq";
+    break;
   case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7:
     opcode << "mov";
     immediate_bytes = 1;
diff --git a/jdwpspy/Android.mk b/jdwpspy/Android.mk
deleted file mode 100644
index 97162f0..0000000
--- a/jdwpspy/Android.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# Copyright (C) 2006 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-LOCAL_PATH:= $(call my-dir)
-
-include $(CLEAR_VARS)
-LOCAL_SRC_FILES:= Main.cpp Net.cpp
-LOCAL_C_INCLUDES += art/runtime
-LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
-LOCAL_MODULE := jdwpspy
-include $(BUILD_HOST_EXECUTABLE)
-ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
diff --git a/jdwpspy/Common.h b/jdwpspy/Common.h
deleted file mode 100644
index 30a49fb..0000000
--- a/jdwpspy/Common.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright 2006 The Android Open Source Project
- *
- * jdwpspy common stuff.
- */
-#ifndef ART_JDWPSPY_COMMON_H_
-#define ART_JDWPSPY_COMMON_H_
-
-#include <stdint.h>
-#include <stdio.h>
-#include <sys/types.h>
-
-typedef uint8_t u1;
-typedef uint16_t u2;
-typedef uint32_t u4;
-typedef uint64_t u8;
-
-#define NELEM(x) (sizeof(x) / sizeof((x)[0]))
-
-#ifndef _JDWP_MISC_INLINE
-# define INLINE extern inline
-#else
-# define INLINE
-#endif
-
-/*
- * Get 1 byte.  (Included to make the code more legible.)
- */
-INLINE u1 get1(unsigned const char* pSrc) {
-    return *pSrc;
-}
-
-/*
- * Get 2 big-endian bytes.
- */
-INLINE u2 get2BE(unsigned char const* pSrc) {
-    u2 result;
-
-    result = *pSrc++ << 8;
-    result |= *pSrc++;
-
-    return result;
-}
-
-/*
- * Get 4 big-endian bytes.
- */
-INLINE u4 get4BE(unsigned char const* pSrc) {
-    u4 result;
-
-    result = *pSrc++ << 24;
-    result |= *pSrc++ << 16;
-    result |= *pSrc++ << 8;
-    result |= *pSrc++;
-
-    return result;
-}
-
-/*
- * Get 8 big-endian bytes.
- */
-INLINE u8 get8BE(unsigned char const* pSrc) {
-    u8 result;
-
-    result = (u8) *pSrc++ << 56;
-    result |= (u8) *pSrc++ << 48;
-    result |= (u8) *pSrc++ << 40;
-    result |= (u8) *pSrc++ << 32;
-    result |= (u8) *pSrc++ << 24;
-    result |= (u8) *pSrc++ << 16;
-    result |= (u8) *pSrc++ << 8;
-    result |= (u8) *pSrc++;
-
-    return result;
-}
-
-
-/*
- * Start here.
- */
-int run(const char* connectHost, int connectPort, int listenPort);
-
-/*
- * Print a hex dump to the specified file pointer.
- *
- * "local" mode prints a hex dump starting from offset 0 (roughly equivalent
- * to "xxd -g1").
- *
- * "mem" mode shows the actual memory address, and will offset the start
- * so that the low nibble of the address is always zero.
- */
-enum HexDumpMode { kHexDumpLocal, kHexDumpMem };
-void printHexDump(const void* vaddr, size_t length);
-void printHexDump2(const void* vaddr, size_t length, const char* prefix);
-void printHexDumpEx(FILE* fp, const void* vaddr, size_t length,
-    HexDumpMode mode, const char* prefix);
-
-#endif  // ART_JDWPSPY_COMMON_H_
diff --git a/jdwpspy/Main.cpp b/jdwpspy/Main.cpp
deleted file mode 100644
index 0f68d52..0000000
--- a/jdwpspy/Main.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright 2006 The Android Open Source Project
- *
- * JDWP spy.
- */
-#define _JDWP_MISC_INLINE
-#include "Common.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <ctype.h>
-
-static const char gHexDigit[] = "0123456789abcdef";
-
-/*
- * Print a hex dump.  Just hands control off to the fancy version.
- */
-void printHexDump(const void* vaddr, size_t length)
-{
-    printHexDumpEx(stdout, vaddr, length, kHexDumpLocal, "");
-}
-void printHexDump2(const void* vaddr, size_t length, const char* prefix)
-{
-    printHexDumpEx(stdout, vaddr, length, kHexDumpLocal, prefix);
-}
-
-/*
- * Print a hex dump in this format:
- *
-01234567: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef\n
- */
-void printHexDumpEx(FILE* fp, const void* vaddr, size_t length,
-    HexDumpMode mode, const char* prefix)
-{
-    const unsigned char* addr = reinterpret_cast<const unsigned char*>(vaddr);
-    char out[77];       /* exact fit */
-    unsigned int offset;    /* offset to show while printing */
-    char* hex;
-    char* asc;
-    int gap;
-
-    if (mode == kHexDumpLocal)
-        offset = 0;
-    else
-        offset = (int) addr;
-
-    memset(out, ' ', sizeof(out)-1);
-    out[8] = ':';
-    out[sizeof(out)-2] = '\n';
-    out[sizeof(out)-1] = '\0';
-
-    gap = (int) offset & 0x0f;
-    while (length) {
-        unsigned int lineOffset = offset & ~0x0f;
-        char* hex = out;
-        char* asc = out + 59;
-
-        for (int i = 0; i < 8; i++) {
-            *hex++ = gHexDigit[lineOffset >> 28];
-            lineOffset <<= 4;
-        }
-        hex++;
-        hex++;
-
-        int count = ((int)length > 16-gap) ? 16-gap : (int) length; /* cap length */
-        assert(count != 0);
-        assert(count+gap <= 16);
-
-        if (gap) {
-            /* only on first line */
-            hex += gap * 3;
-            asc += gap;
-        }
-
-        int i;
-        for (i = gap ; i < count+gap; i++) {
-            *hex++ = gHexDigit[*addr >> 4];
-            *hex++ = gHexDigit[*addr & 0x0f];
-            hex++;
-            if (isprint(*addr))
-                *asc++ = *addr;
-            else
-                *asc++ = '.';
-            addr++;
-        }
-        for ( ; i < 16; i++) {
-            /* erase extra stuff; only happens on last line */
-            *hex++ = ' ';
-            *hex++ = ' ';
-            hex++;
-            *asc++ = ' ';
-        }
-
-        fprintf(fp, "%s%s", prefix, out);
-
-        gap = 0;
-        length -= count;
-        offset += count;
-    }
-}
-
-
-/*
- * Explain it.
- */
-static void usage(const char* progName)
-{
-    fprintf(stderr, "Usage: %s VM-port [debugger-listen-port]\n\n", progName);
-    fprintf(stderr,
-"When a debugger connects to the debugger-listen-port, jdwpspy will connect\n");
-    fprintf(stderr, "to the VM on the VM-port.\n");
-}
-
-/*
- * Parse args.
- */
-int main(int argc, char* argv[])
-{
-    if (argc < 2 || argc > 3) {
-        usage("jdwpspy");
-        return 2;
-    }
-
-    setvbuf(stdout, NULL, _IONBF, 0);
-
-    /* may want this to be host:port */
-    int connectPort = atoi(argv[1]);
-
-    int listenPort;
-    if (argc > 2)
-        listenPort = atoi(argv[2]);
-    else
-        listenPort = connectPort + 1;
-
-    int cc = run("localhost", connectPort, listenPort);
-
-    return (cc != 0);
-}
diff --git a/jdwpspy/Net.cpp b/jdwpspy/Net.cpp
deleted file mode 100644
index 38d4e26..0000000
--- a/jdwpspy/Net.cpp
+++ /dev/null
@@ -1,751 +0,0 @@
-/*
- * Copyright 2006 The Android Open Source Project
- *
- * JDWP spy.  This is a rearranged version of the JDWP code from the VM.
- */
-#include "Common.h"
-#include "jdwp/jdwp_constants.h"
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <time.h>
-#include <errno.h>
-#include <assert.h>
-
-#include <iostream>
-#include <sstream>
-
-#define kInputBufferSize    (256*1024)
-
-#define kMagicHandshakeLen  14      /* "JDWP-Handshake" */
-#define kJDWPHeaderLen      11
-#define kJDWPFlagReply      0x80
-
-
-/*
- * Information about the remote end.
- */
-struct Peer {
-    char    label[2];           /* 'D' or 'V' */
-
-    int     sock;
-    unsigned char   inputBuffer[kInputBufferSize];
-    int     inputCount;
-
-    bool    awaitingHandshake;  /* waiting for "JDWP-Handshake" */
-};
-
-
-/*
- * Network state.
- */
-struct NetState {
-    /* listen here for connection from debugger */
-    int     listenSock;
-
-    /* connect here to contact VM */
-    in_addr vmAddr;
-    uint16_t vmPort;
-
-    Peer    dbg;
-    Peer    vm;
-};
-
-/*
- * Function names.
- */
-struct JdwpHandlerMap {
-    u1  cmdSet;
-    u1  cmd;
-    const char* descr;
-};
-
-/*
- * Map commands to names.
- *
- * Command sets 0-63 are incoming requests, 64-127 are outbound requests,
- * and 128-256 are vendor-defined.
- */
-static const JdwpHandlerMap gHandlerMap[] = {
-    /* VirtualMachine command set (1) */
-    { 1,    1,  "VirtualMachine.Version" },
-    { 1,    2,  "VirtualMachine.ClassesBySignature" },
-    { 1,    3,  "VirtualMachine.AllClasses" },
-    { 1,    4,  "VirtualMachine.AllThreads" },
-    { 1,    5,  "VirtualMachine.TopLevelThreadGroups" },
-    { 1,    6,  "VirtualMachine.Dispose" },
-    { 1,    7,  "VirtualMachine.IDSizes" },
-    { 1,    8,  "VirtualMachine.Suspend" },
-    { 1,    9,  "VirtualMachine.Resume" },
-    { 1,    10, "VirtualMachine.Exit" },
-    { 1,    11, "VirtualMachine.CreateString" },
-    { 1,    12, "VirtualMachine.Capabilities" },
-    { 1,    13, "VirtualMachine.ClassPaths" },
-    { 1,    14, "VirtualMachine.DisposeObjects" },
-    { 1,    15, "VirtualMachine.HoldEvents" },
-    { 1,    16, "VirtualMachine.ReleaseEvents" },
-    { 1,    17, "VirtualMachine.CapabilitiesNew" },
-    { 1,    18, "VirtualMachine.RedefineClasses" },
-    { 1,    19, "VirtualMachine.SetDefaultStratum" },
-    { 1,    20, "VirtualMachine.AllClassesWithGeneric"},
-    { 1,    21, "VirtualMachine.InstanceCounts"},
-
-    /* ReferenceType command set (2) */
-    { 2,    1,  "ReferenceType.Signature" },
-    { 2,    2,  "ReferenceType.ClassLoader" },
-    { 2,    3,  "ReferenceType.Modifiers" },
-    { 2,    4,  "ReferenceType.Fields" },
-    { 2,    5,  "ReferenceType.Methods" },
-    { 2,    6,  "ReferenceType.GetValues" },
-    { 2,    7,  "ReferenceType.SourceFile" },
-    { 2,    8,  "ReferenceType.NestedTypes" },
-    { 2,    9,  "ReferenceType.Status" },
-    { 2,    10, "ReferenceType.Interfaces" },
-    { 2,    11, "ReferenceType.ClassObject" },
-    { 2,    12, "ReferenceType.SourceDebugExtension" },
-    { 2,    13, "ReferenceType.SignatureWithGeneric" },
-    { 2,    14, "ReferenceType.FieldsWithGeneric" },
-    { 2,    15, "ReferenceType.MethodsWithGeneric" },
-    { 2,    16, "ReferenceType.Instances" },
-    { 2,    17, "ReferenceType.ClassFileVersion" },
-    { 2,    18, "ReferenceType.ConstantPool" },
-
-    /* ClassType command set (3) */
-    { 3,    1,  "ClassType.Superclass" },
-    { 3,    2,  "ClassType.SetValues" },
-    { 3,    3,  "ClassType.InvokeMethod" },
-    { 3,    4,  "ClassType.NewInstance" },
-
-    /* ArrayType command set (4) */
-    { 4,    1,  "ArrayType.NewInstance" },
-
-    /* InterfaceType command set (5) */
-
-    /* Method command set (6) */
-    { 6,    1,  "Method.LineTable" },
-    { 6,    2,  "Method.VariableTable" },
-    { 6,    3,  "Method.Bytecodes" },
-    { 6,    4,  "Method.IsObsolete" },
-    { 6,    5,  "Method.VariableTableWithGeneric" },
-
-    /* Field command set (8) */
-
-    /* ObjectReference command set (9) */
-    { 9,    1,  "ObjectReference.ReferenceType" },
-    { 9,    2,  "ObjectReference.GetValues" },
-    { 9,    3,  "ObjectReference.SetValues" },
-    { 9,    4,  "ObjectReference.UNUSED" },
-    { 9,    5,  "ObjectReference.MonitorInfo" },
-    { 9,    6,  "ObjectReference.InvokeMethod" },
-    { 9,    7,  "ObjectReference.DisableCollection" },
-    { 9,    8,  "ObjectReference.EnableCollection" },
-    { 9,    9,  "ObjectReference.IsCollected" },
-    { 9,    10, "ObjectReference.ReferringObjects" },
-
-    /* StringReference command set (10) */
-    { 10,   1,  "StringReference.Value" },
-
-    /* ThreadReference command set (11) */
-    { 11,   1,  "ThreadReference.Name" },
-    { 11,   2,  "ThreadReference.Suspend" },
-    { 11,   3,  "ThreadReference.Resume" },
-    { 11,   4,  "ThreadReference.Status" },
-    { 11,   5,  "ThreadReference.ThreadGroup" },
-    { 11,   6,  "ThreadReference.Frames" },
-    { 11,   7,  "ThreadReference.FrameCount" },
-    { 11,   8,  "ThreadReference.OwnedMonitors" },
-    { 11,   9,  "ThreadReference.CurrentContendedMonitor" },
-    { 11,   10, "ThreadReference.Stop" },
-    { 11,   11, "ThreadReference.Interrupt" },
-    { 11,   12, "ThreadReference.SuspendCount" },
-    { 11,   13, "ThreadReference.OwnedMonitorsStackDepthInfo" },
-    { 11,   14, "ThreadReference.ForceEarlyReturn" },
-
-    /* ThreadGroupReference command set (12) */
-    { 12,   1,  "ThreadGroupReference.Name" },
-    { 12,   2,  "ThreadGroupReference.Parent" },
-    { 12,   3,  "ThreadGroupReference.Children" },
-
-    /* ArrayReference command set (13) */
-    { 13,   1,  "ArrayReference.Length" },
-    { 13,   2,  "ArrayReference.GetValues" },
-    { 13,   3,  "ArrayReference.SetValues" },
-
-    /* ClassLoaderReference command set (14) */
-    { 14,   1,  "ArrayReference.VisibleClasses" },
-
-    /* EventRequest command set (15) */
-    { 15,   1,  "EventRequest.Set" },
-    { 15,   2,  "EventRequest.Clear" },
-    { 15,   3,  "EventRequest.ClearAllBreakpoints" },
-
-    /* StackFrame command set (16) */
-    { 16,   1,  "StackFrame.GetValues" },
-    { 16,   2,  "StackFrame.SetValues" },
-    { 16,   3,  "StackFrame.ThisObject" },
-    { 16,   4,  "StackFrame.PopFrames" },
-
-    /* ClassObjectReference command set (17) */
-    { 17,   1,  "ClassObjectReference.ReflectedType" },
-
-    /* Event command set (64) */
-    { 64,  100, "Event.Composite" },
-
-    /* DDMS */
-    { 199,  1,  "DDMS.Chunk" },
-};
-
-/*
- * Look up a command's name.
- */
-static const char* getCommandName(int cmdSet, int cmd)
-{
-    for (int i = 0; i < (int) NELEM(gHandlerMap); i++) {
-        if (gHandlerMap[i].cmdSet == cmdSet &&
-            gHandlerMap[i].cmd == cmd)
-        {
-            return gHandlerMap[i].descr;
-        }
-    }
-
-    return "?UNKNOWN?";
-}
-
-
-void jdwpNetFree(NetState* netState);       /* fwd */
-
-/*
- * Allocate state structure and bind to the listen port.
- *
- * Returns 0 on success.
- */
-NetState* jdwpNetStartup(uint16_t listenPort, const char* connectHost, uint16_t connectPort) {
-    NetState* netState = new NetState;
-    memset(netState, 0, sizeof(*netState));
-    netState->listenSock = -1;
-    netState->dbg.sock = netState->vm.sock = -1;
-
-    strcpy(netState->dbg.label, "D");
-    strcpy(netState->vm.label, "V");
-
-    /*
-     * Set up a socket to listen for connections from the debugger.
-     */
-
-    netState->listenSock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
-    if (netState->listenSock < 0) {
-        fprintf(stderr, "Socket create failed: %s\n", strerror(errno));
-        goto fail;
-    }
-
-    /* allow immediate re-use if we die */
-    {
-        int one = 1;
-        if (setsockopt(netState->listenSock, SOL_SOCKET, SO_REUSEADDR, &one,
-                sizeof(one)) < 0)
-        {
-            fprintf(stderr, "setsockopt(SO_REUSEADDR) failed: %s\n",
-                strerror(errno));
-            goto fail;
-        }
-    }
-
-    sockaddr_in addr;
-    addr.sin_family = AF_INET;
-    addr.sin_port = htons(listenPort);
-    addr.sin_addr.s_addr = INADDR_ANY;
-
-    if (bind(netState->listenSock, (sockaddr*) &addr, sizeof(addr)) != 0)
-    {
-        fprintf(stderr, "attempt to bind to port %u failed: %s\n",
-            listenPort, strerror(errno));
-        goto fail;
-    }
-
-    fprintf(stderr, "+++ bound to port %u\n", listenPort);
-
-    if (listen(netState->listenSock, 5) != 0) {
-        fprintf(stderr, "Listen failed: %s\n", strerror(errno));
-        goto fail;
-    }
-
-    /*
-     * Do the hostname lookup for the VM.
-     */
-    hostent* pHost;
-
-    pHost = gethostbyname(connectHost);
-    if (pHost == NULL) {
-        fprintf(stderr, "Name lookup of '%s' failed: %s\n",
-            connectHost, strerror(h_errno));
-        goto fail;
-    }
-
-    netState->vmAddr = *((in_addr*) pHost->h_addr_list[0]);
-    netState->vmPort = connectPort;
-
-    fprintf(stderr, "+++ connect host resolved to %s\n",
-        inet_ntoa(netState->vmAddr));
-
-    return netState;
-
-fail:
-    jdwpNetFree(netState);
-    return NULL;
-}
-
-/*
- * Shut down JDWP listener.  Don't free state.
- *
- * Note that "netState" may be partially initialized if "startup" failed.
- */
-void jdwpNetShutdown(NetState* netState)
-{
-    int listenSock = netState->listenSock;
-    int dbgSock = netState->dbg.sock;
-    int vmSock = netState->vm.sock;
-
-    /* clear these out so it doesn't wake up and try to reuse them */
-    /* (important when multi-threaded) */
-    netState->listenSock = netState->dbg.sock = netState->vm.sock = -1;
-
-    if (listenSock >= 0) {
-        shutdown(listenSock, SHUT_RDWR);
-        close(listenSock);
-    }
-    if (dbgSock >= 0) {
-        shutdown(dbgSock, SHUT_RDWR);
-        close(dbgSock);
-    }
-    if (vmSock >= 0) {
-        shutdown(vmSock, SHUT_RDWR);
-        close(vmSock);
-    }
-}
-
-/*
- * Shut down JDWP listener and free its state.
- */
-void jdwpNetFree(NetState* netState)
-{
-    if (netState == NULL)
-        return;
-
-    jdwpNetShutdown(netState);
-    delete netState;
-}
-
-/*
- * Disable the TCP Nagle algorithm, which delays transmission of outbound
- * packets until the previous transmissions have been acked.  JDWP does a
- * lot of back-and-forth with small packets, so this may help.
- */
-static int setNoDelay(int fd)
-{
-    int cc, on = 1;
-
-    cc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
-    assert(cc == 0);
-    return cc;
-}
-
-/*
- * Accept a connection.  This will block waiting for somebody to show up.
- */
-bool jdwpAcceptConnection(NetState* netState)
-{
-    sockaddr_in addr;
-    socklen_t addrlen;
-    int sock;
-
-    if (netState->listenSock < 0)
-        return false;       /* you're not listening! */
-
-    assert(netState->dbg.sock < 0);     /* must not already be talking */
-
-    addrlen = sizeof(addr);
-    do {
-        sock = accept(netState->listenSock, (sockaddr*) &addr, &addrlen);
-        if (sock < 0 && errno != EINTR) {
-            fprintf(stderr, "accept failed: %s\n", strerror(errno));
-            return false;
-        }
-    } while (sock < 0);
-
-    fprintf(stderr, "+++ accepted connection from %s:%u\n",
-        inet_ntoa(addr.sin_addr), ntohs(addr.sin_port));
-
-    netState->dbg.sock = sock;
-    netState->dbg.awaitingHandshake = true;
-    netState->dbg.inputCount = 0;
-
-    setNoDelay(sock);
-
-    return true;
-}
-
-/*
- * Close the connections to the debugger and VM.
- *
- * Reset the state so we're ready to receive a new connection.
- */
-void jdwpCloseConnection(NetState* netState)
-{
-    if (netState->dbg.sock >= 0) {
-        fprintf(stderr, "+++ closing connection to debugger\n");
-        close(netState->dbg.sock);
-        netState->dbg.sock = -1;
-    }
-    if (netState->vm.sock >= 0) {
-        fprintf(stderr, "+++ closing connection to vm\n");
-        close(netState->vm.sock);
-        netState->vm.sock = -1;
-    }
-}
-
-/*
- * Figure out if we have a full packet in the buffer.
- */
-static bool haveFullPacket(Peer* pPeer)
-{
-    long length;
-
-    if (pPeer->awaitingHandshake)
-        return (pPeer->inputCount >= kMagicHandshakeLen);
-
-    if (pPeer->inputCount < 4)
-        return false;
-
-    length = get4BE(pPeer->inputBuffer);
-    return (pPeer->inputCount >= length);
-}
-
-/*
- * Consume bytes from the buffer.
- *
- * This would be more efficient with a circular buffer.  However, we're
- * usually only going to find one packet, which is trivial to handle.
- */
-static void consumeBytes(Peer* pPeer, int count)
-{
-    assert(count > 0);
-    assert(count <= pPeer->inputCount);
-
-    if (count == pPeer->inputCount) {
-        pPeer->inputCount = 0;
-        return;
-    }
-
-    memmove(pPeer->inputBuffer, pPeer->inputBuffer + count,
-        pPeer->inputCount - count);
-    pPeer->inputCount -= count;
-}
-
-/*
- * Get the current time.
- */
-static void getCurrentTime(int* pMin, int* pSec)
-{
-    time_t now;
-    tm* ptm;
-
-    now = time(NULL);
-    ptm = localtime(&now);
-    *pMin = ptm->tm_min;
-    *pSec = ptm->tm_sec;
-}
-
-/*
- * Dump the contents of a packet to stdout.
- */
-static void dumpPacket(const unsigned char* packetBuf, const char* srcName,
-    const char* dstName)
-{
-    const unsigned char* buf = packetBuf;
-    char prefix[3];
-    u4 length, id;
-    u1 flags, cmdSet=0, cmd=0;
-    art::JDWP::JdwpError error = art::JDWP::ERR_NONE;
-    bool reply;
-    int dataLen;
-
-    length = get4BE(buf+0);
-    id = get4BE(buf+4);
-    flags = get1(buf+8);
-    if ((flags & kJDWPFlagReply) != 0) {
-        reply = true;
-        error = static_cast<art::JDWP::JdwpError>(get2BE(buf+9));
-    } else {
-        reply = false;
-        cmdSet = get1(buf+9);
-        cmd = get1(buf+10);
-    }
-
-    buf += kJDWPHeaderLen;
-    dataLen = length - (buf - packetBuf);
-
-    if (!reply) {
-        prefix[0] = srcName[0];
-        prefix[1] = '>';
-    } else {
-        prefix[0] = dstName[0];
-        prefix[1] = '<';
-    }
-    prefix[2] = '\0';
-
-    int min, sec;
-    getCurrentTime(&min, &sec);
-
-    if (!reply) {
-        printf("%s REQUEST dataLen=%-5u id=0x%08x flags=0x%02x cmd=%d/%d [%02d:%02d]\n",
-            prefix, dataLen, id, flags, cmdSet, cmd, min, sec);
-        printf("%s   --> %s\n", prefix, getCommandName(cmdSet, cmd));
-    } else {
-        std::ostringstream ss;
-        ss << "TODO";  // get access to the operator<<, or regenerate it for jdwpspy?
-        printf("%s REPLY   dataLen=%-5u id=0x%08x flags=0x%02x err=%d (%s) [%02d:%02d]\n",
-            prefix, dataLen, id, flags, error, ss.str().c_str(), min,sec);
-    }
-    if (dataLen > 0)
-        printHexDump2(buf, dataLen, prefix);
-    printf("%s ----------\n", prefix);
-}
-
-/*
- * Handle a packet.  Returns "false" if we encounter a connection-fatal error.
- */
-static bool handlePacket(Peer* pDst, Peer* pSrc)
-{
-    const unsigned char* buf = pSrc->inputBuffer;
-    u4 length;
-    u1 flags;
-    int cc;
-
-    length = get4BE(buf+0);
-    flags = get1(buf+9);
-
-    assert((int) length <= pSrc->inputCount);
-
-    dumpPacket(buf, pSrc->label, pDst->label);
-
-    cc = write(pDst->sock, buf, length);
-    if (cc != (int) length) {
-        fprintf(stderr, "Failed sending packet: %s\n", strerror(errno));
-        return false;
-    }
-    /*printf("*** wrote %d bytes from %c to %c\n",
-        cc, pSrc->label[0], pDst->label[0]);*/
-
-    consumeBytes(pSrc, length);
-    return true;
-}
-
-/*
- * Handle incoming data.  If we have a full packet in the buffer, process it.
- */
-static bool handleIncoming(Peer* pWritePeer, Peer* pReadPeer)
-{
-    if (haveFullPacket(pReadPeer)) {
-        if (pReadPeer->awaitingHandshake) {
-            printf("Handshake [%c]: %.14s\n",
-                pReadPeer->label[0], pReadPeer->inputBuffer);
-            if (write(pWritePeer->sock, pReadPeer->inputBuffer,
-                    kMagicHandshakeLen) != kMagicHandshakeLen)
-            {
-                fprintf(stderr,
-                    "+++ [%c] handshake write failed\n", pReadPeer->label[0]);
-                goto fail;
-            }
-            consumeBytes(pReadPeer, kMagicHandshakeLen);
-            pReadPeer->awaitingHandshake = false;
-        } else {
-            if (!handlePacket(pWritePeer, pReadPeer))
-                goto fail;
-        }
-    } else {
-        /*printf("*** %c not full yet\n", pReadPeer->label[0]);*/
-    }
-
-    return true;
-
-fail:
-    return false;
-}
-
-/*
- * Process incoming data.  If no data is available, this will block until
- * some arrives.
- *
- * Returns "false" on error (indicating that the connection has been severed).
- */
-bool jdwpProcessIncoming(NetState* netState)
-{
-    int cc;
-
-    assert(netState->dbg.sock >= 0);
-    assert(netState->vm.sock >= 0);
-
-    while (!haveFullPacket(&netState->dbg) && !haveFullPacket(&netState->vm)) {
-        /* read some more */
-        int highFd;
-        fd_set readfds;
-
-        highFd = (netState->dbg.sock > netState->vm.sock) ?
-            netState->dbg.sock+1 : netState->vm.sock+1;
-        FD_ZERO(&readfds);
-        FD_SET(netState->dbg.sock, &readfds);
-        FD_SET(netState->vm.sock, &readfds);
-
-        errno = 0;
-        cc = select(highFd, &readfds, NULL, NULL, NULL);
-        if (cc < 0) {
-            if (errno == EINTR) {
-                fprintf(stderr, "+++ EINTR on select\n");
-                continue;
-            }
-            fprintf(stderr, "+++ select failed: %s\n", strerror(errno));
-            goto fail;
-        }
-
-        if (FD_ISSET(netState->dbg.sock, &readfds)) {
-            cc = read(netState->dbg.sock,
-                netState->dbg.inputBuffer + netState->dbg.inputCount,
-                sizeof(netState->dbg.inputBuffer) - netState->dbg.inputCount);
-            if (cc < 0) {
-                if (errno == EINTR) {
-                    fprintf(stderr, "+++ EINTR on read\n");
-                    continue;
-                }
-                fprintf(stderr, "+++ dbg read failed: %s\n", strerror(errno));
-                goto fail;
-            }
-            if (cc == 0) {
-                if (sizeof(netState->dbg.inputBuffer) ==
-                        netState->dbg.inputCount)
-                    fprintf(stderr, "+++ debugger sent huge message\n");
-                else
-                    fprintf(stderr, "+++ debugger disconnected\n");
-                goto fail;
-            }
-
-            /*printf("*** %d bytes from dbg\n", cc);*/
-            netState->dbg.inputCount += cc;
-        }
-
-        if (FD_ISSET(netState->vm.sock, &readfds)) {
-            cc = read(netState->vm.sock,
-                netState->vm.inputBuffer + netState->vm.inputCount,
-                sizeof(netState->vm.inputBuffer) - netState->vm.inputCount);
-            if (cc < 0) {
-                if (errno == EINTR) {
-                    fprintf(stderr, "+++ EINTR on read\n");
-                    continue;
-                }
-                fprintf(stderr, "+++ vm read failed: %s\n", strerror(errno));
-                goto fail;
-            }
-            if (cc == 0) {
-                if (sizeof(netState->vm.inputBuffer) ==
-                        netState->vm.inputCount)
-                    fprintf(stderr, "+++ vm sent huge message\n");
-                else
-                    fprintf(stderr, "+++ vm disconnected\n");
-                goto fail;
-            }
-
-            /*printf("*** %d bytes from vm\n", cc);*/
-            netState->vm.inputCount += cc;
-        }
-    }
-
-    if (!handleIncoming(&netState->dbg, &netState->vm))
-        goto fail;
-    if (!handleIncoming(&netState->vm, &netState->dbg))
-        goto fail;
-
-    return true;
-
-fail:
-    jdwpCloseConnection(netState);
-    return false;
-}
-
-/*
- * Connect to the VM.
- */
-bool jdwpConnectToVm(NetState* netState)
-{
-    sockaddr_in addr;
-    int sock = -1;
-
-    sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
-    if (sock < 0) {
-        fprintf(stderr, "Socket create failed: %s\n", strerror(errno));
-        goto fail;
-    }
-
-    addr.sin_family = AF_INET;
-    addr.sin_addr = netState->vmAddr;
-    addr.sin_port = htons(netState->vmPort);
-    if (connect(sock, (struct sockaddr*) &addr, sizeof(addr)) != 0) {
-        fprintf(stderr, "Connection to %s:%u failed: %s\n",
-            inet_ntoa(addr.sin_addr), ntohs(addr.sin_port), strerror(errno));
-        goto fail;
-    }
-    fprintf(stderr, "+++ connected to VM %s:%u\n",
-        inet_ntoa(addr.sin_addr), ntohs(addr.sin_port));
-
-    netState->vm.sock = sock;
-    netState->vm.awaitingHandshake = true;
-    netState->vm.inputCount = 0;
-
-    setNoDelay(netState->vm.sock);
-    return true;
-
-fail:
-    if (sock >= 0)
-        close(sock);
-    return false;
-}
-
-/*
- * Establish network connections and start things running.
- *
- * We wait for a new connection from the debugger.  When one arrives we
- * open a connection to the VM.  If one side or the other goes away, we
- * drop both ends and go back to listening.
- */
-int run(const char* connectHost, int connectPort, int listenPort)
-{
-    NetState* state;
-
-    state = jdwpNetStartup(listenPort, connectHost, connectPort);
-    if (state == NULL)
-        return -1;
-
-    while (true) {
-        if (!jdwpAcceptConnection(state))
-            break;
-
-        if (jdwpConnectToVm(state)) {
-            while (true) {
-                if (!jdwpProcessIncoming(state))
-                    break;
-            }
-        }
-
-        jdwpCloseConnection(state);
-    }
-
-    jdwpNetFree(state);
-
-    return 0;
-}
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 353f160..bab250c 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -23,6 +23,7 @@
 	barrier.cc \
 	base/allocator.cc \
 	base/bit_vector.cc \
+	base/hex_dump.cc \
 	base/logging.cc \
 	base/mutex.cc \
 	base/stringpiece.cc \
@@ -65,6 +66,7 @@
 	gc/space/malloc_space.cc \
 	gc/space/rosalloc_space.cc \
 	gc/space/space.cc \
+	gc/space/zygote_space.cc \
 	hprof/hprof.cc \
 	image.cc \
 	indirect_reference_table.cc \
@@ -230,14 +232,14 @@
 	arch/mips/quick_entrypoints_mips.S \
 	arch/mips/thread_mips.cc
 else # TARGET_ARCH != mips
-ifeq ($(TARGET_ARCH),aarch64)
-$(info TODOAArch64: $(LOCAL_PATH)/Android.mk Add AArch64 specific runtime files)
+ifeq ($(TARGET_ARCH),arm64)
+$(info TODOArm64: $(LOCAL_PATH)/Android.mk Add Arm64 specific runtime files)
 else
 $(error unsupported TARGET_ARCH=$(TARGET_ARCH))
-endif # TARGET_ARCH != aarch64
+endif # TARGET_ARCH != arm64
 endif # TARGET_ARCH != mips
-endif # TARGET_ARCH != x86
 endif # TARGET_ARCH != x86_64
+endif # TARGET_ARCH != x86
 endif # TARGET_ARCH != arm
 
 
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index d32f998..632c5f3 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -17,11 +17,17 @@
 .macro GENERATE_ALLOC_ENTRYPOINTS c_suffix, cxx_suffix
 // Called by managed code to allocate an object.
 TWO_ARG_DOWNCALL art_quick_alloc_object\c_suffix, artAllocObjectFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an object of a resolved class.
+TWO_ARG_DOWNCALL art_quick_alloc_object_resolved\c_suffix, artAllocObjectFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an object of an initialized class.
+TWO_ARG_DOWNCALL art_quick_alloc_object_initialized\c_suffix, artAllocObjectFromCodeInitialized\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 // Called by managed code to allocate an object when the caller doesn't know whether it has access
 // to the created type.
 TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check\c_suffix, artAllocObjectFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 // Called by managed code to allocate an array.
 THREE_ARG_DOWNCALL art_quick_alloc_array\c_suffix, artAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an array of a resolve class.
+THREE_ARG_DOWNCALL art_quick_alloc_array_resolved\c_suffix, artAllocArrayFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 // Called by managed code to allocate an array when the caller doesn't know whether it has access
 // to the created type.
 THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check\c_suffix, artAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
diff --git a/runtime/arch/quick_alloc_entrypoints.cc b/runtime/arch/quick_alloc_entrypoints.cc
index 457c73a..9363f81 100644
--- a/runtime/arch/quick_alloc_entrypoints.cc
+++ b/runtime/arch/quick_alloc_entrypoints.cc
@@ -19,29 +19,41 @@
 
 #define GENERATE_ENTRYPOINTS(suffix) \
 extern "C" void* art_quick_alloc_array##suffix(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_array_resolved##suffix(void* klass, void*, int32_t); \
 extern "C" void* art_quick_alloc_array_with_access_check##suffix(uint32_t, void*, int32_t); \
 extern "C" void* art_quick_alloc_object##suffix(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_alloc_object_resolved##suffix(void* klass, void* method); \
+extern "C" void* art_quick_alloc_object_initialized##suffix(void* klass, void* method); \
 extern "C" void* art_quick_alloc_object_with_access_check##suffix(uint32_t type_idx, void* method); \
 extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, void*, int32_t); \
 extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, void*, int32_t); \
 extern "C" void* art_quick_alloc_array##suffix##_instrumented(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_array_resolved##suffix##_instrumented(void* klass, void*, int32_t); \
 extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, void*, int32_t); \
 extern "C" void* art_quick_alloc_object##suffix##_instrumented(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_alloc_object_resolved##suffix##_instrumented(void* klass, void* method); \
+extern "C" void* art_quick_alloc_object_initialized##suffix##_instrumented(void* klass, void* method); \
 extern "C" void* art_quick_alloc_object_with_access_check##suffix##_instrumented(uint32_t type_idx, void* method); \
 extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, void*, int32_t); \
 extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, void*, int32_t); \
 void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrumented) { \
   if (instrumented) { \
     qpoints->pAllocArray = art_quick_alloc_array##suffix##_instrumented; \
+    qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix##_instrumented; \
     qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix##_instrumented; \
     qpoints->pAllocObject = art_quick_alloc_object##suffix##_instrumented; \
+    qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix##_instrumented; \
+    qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix##_instrumented; \
     qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix##_instrumented; \
     qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix##_instrumented; \
     qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented; \
   } else { \
     qpoints->pAllocArray = art_quick_alloc_array##suffix; \
+    qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix; \
     qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix; \
     qpoints->pAllocObject = art_quick_alloc_object##suffix; \
+    qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix; \
+    qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix; \
     qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix; \
     qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix; \
     qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix; \
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 0a414b1..74ec761 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -405,10 +405,16 @@
 // multi-line macros that use each other (hence using 1 macro per newline below).
 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
   TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
   TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
   THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
   THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check ## c_suffix, artAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 #define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(c_suffix, cxx_suffix) \
@@ -417,57 +423,81 @@
   THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
diff --git a/runtime/base/hex_dump.cc b/runtime/base/hex_dump.cc
new file mode 100644
index 0000000..936c52b
--- /dev/null
+++ b/runtime/base/hex_dump.cc
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hex_dump.h"
+
+#include "globals.h"
+
+#include <string.h>
+
+namespace art {
+
+void HexDump::Dump(std::ostream& os) const {
+  if (byte_count_ == 0) {
+    return;
+  }
+
+  if (address_ == NULL) {
+    os << "00000000:";
+    return;
+  }
+
+  static const char gHexDigit[] = "0123456789abcdef";
+  const unsigned char* addr = reinterpret_cast<const unsigned char*>(address_);
+  // 01234560: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
+  char out[(kBitsPerWord / 4) + /* offset */
+           1 + /* colon */
+           (16 * 3) + /* 16 hex digits and space */
+           2 + /* white space */
+           16 + /* 16 characters*/
+           1 /* \0 */ ];
+  size_t offset;    /* offset to show while printing */
+
+  if (show_actual_addresses_) {
+    offset = reinterpret_cast<size_t>(addr);
+  } else {
+    offset = 0;
+  }
+  memset(out, ' ', sizeof(out)-1);
+  out[kBitsPerWord / 4] = ':';
+  out[sizeof(out)-1] = '\0';
+
+  size_t byte_count = byte_count_;
+  size_t gap = offset & 0x0f;
+  while (byte_count > 0) {
+    size_t line_offset = offset & ~0x0f;
+
+    char* hex = out;
+    char* asc = out + (kBitsPerWord / 4) + /* offset */ 1 + /* colon */
+        (16 * 3) + /* 16 hex digits and space */ 2 /* white space */;
+
+    for (int i = 0; i < (kBitsPerWord / 4); i++) {
+      *hex++ = gHexDigit[line_offset >> (kBitsPerWord - 4)];
+      line_offset <<= 4;
+    }
+    hex++;
+    hex++;
+
+    size_t count = std::min(byte_count, 16 - gap);
+    // CHECK_NE(count, 0U);
+    // CHECK_LE(count + gap, 16U);
+
+    if (gap) {
+      /* only on first line */
+      hex += gap * 3;
+      asc += gap;
+    }
+
+    size_t i;
+    for (i = gap ; i < count + gap; i++) {
+      *hex++ = gHexDigit[*addr >> 4];
+      *hex++ = gHexDigit[*addr & 0x0f];
+      hex++;
+      if (*addr >= 0x20 && *addr < 0x7f /*isprint(*addr)*/) {
+        *asc++ = *addr;
+      } else {
+        *asc++ = '.';
+      }
+      addr++;
+    }
+    for (; i < 16; i++) {
+      /* erase extra stuff; only happens on last line */
+      *hex++ = ' ';
+      *hex++ = ' ';
+      hex++;
+      *asc++ = ' ';
+    }
+
+    os << prefix_ << out;
+
+    gap = 0;
+    byte_count -= count;
+    offset += count;
+    if (byte_count > 0) {
+      os << "\n";
+    }
+  }
+}
+
+}  // namespace art
diff --git a/runtime/base/hex_dump.h b/runtime/base/hex_dump.h
new file mode 100644
index 0000000..8769ece
--- /dev/null
+++ b/runtime/base/hex_dump.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_HEX_DUMP_H_
+#define ART_RUNTIME_BASE_HEX_DUMP_H_
+
+#include "macros.h"
+
+#include <ostream>
+
+namespace art {
+
+// Prints a hex dump in this format:
+//
+// 01234560: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
+// 01234568: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
+class HexDump {
+ public:
+  HexDump(const void* address, size_t byte_count, bool show_actual_addresses, const char* prefix)
+      : address_(address), byte_count_(byte_count), show_actual_addresses_(show_actual_addresses),
+        prefix_(prefix) {
+  }
+
+  void Dump(std::ostream& os) const;
+
+ private:
+  const void* const address_;
+  const size_t byte_count_;
+  const bool show_actual_addresses_;
+  const char* const prefix_;
+
+  DISALLOW_COPY_AND_ASSIGN(HexDump);
+};
+
+inline std::ostream& operator<<(std::ostream& os, const HexDump& rhs) {
+  rhs.Dump(os);
+  return os;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_HEX_DUMP_H_
diff --git a/runtime/base/hex_dump_test.cc b/runtime/base/hex_dump_test.cc
new file mode 100644
index 0000000..d950961
--- /dev/null
+++ b/runtime/base/hex_dump_test.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hex_dump.h"
+
+#include "globals.h"
+
+#include "gtest/gtest.h"
+
+#include <stdint.h>
+
+namespace art {
+
+TEST(HexDump, OneLine) {
+  const char* test_text = "0123456789abcdef";
+  std::ostringstream oss;
+  oss << HexDump(test_text, strlen(test_text), false, "");
+  EXPECT_STREQ(oss.str().c_str(),
+               "00000000: 30 31 32 33 34 35 36 37 38 39 61 62 63 64 65 66  0123456789abcdef");
+}
+
+TEST(HexDump, MultiLine) {
+  const char* test_text = "0123456789abcdef0123456789ABCDEF";
+  std::ostringstream oss;
+  oss << HexDump(test_text, strlen(test_text), false, "");
+  EXPECT_STREQ(oss.str().c_str(),
+               "00000000: 30 31 32 33 34 35 36 37 38 39 61 62 63 64 65 66  0123456789abcdef\n"
+               "00000010: 30 31 32 33 34 35 36 37 38 39 41 42 43 44 45 46  0123456789ABCDEF");
+}
+
+uint64_t g16byte_aligned_number __attribute__ ((aligned(16)));  // NOLINT(whitespace/parens)
+TEST(HexDump, ShowActualAddresses) {
+  g16byte_aligned_number = 0x6162636465666768;
+  std::ostringstream oss;
+  oss << HexDump(&g16byte_aligned_number, 8, true, "");
+  // Compare ignoring pointer.
+  EXPECT_STREQ(oss.str().c_str() + (kBitsPerWord / 4),
+               ": 68 67 66 65 64 63 62 61                          hgfedcba        ");
+}
+
+TEST(HexDump, Prefix) {
+  const char* test_text = "0123456789abcdef";
+  std::ostringstream oss;
+  oss << HexDump(test_text, strlen(test_text), false, "test prefix: ");
+  EXPECT_STREQ(oss.str().c_str(),
+               "test prefix: 00000000: 30 31 32 33 34 35 36 37 38 39 61 62 63 64 65 66  "
+               "0123456789abcdef");
+}
+
+}  // namespace art
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 15554ac..46b8ff2 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -161,97 +161,4 @@
   }
 }
 
-HexDump::HexDump(const void* address, size_t byte_count, bool show_actual_addresses)
-    : address_(address), byte_count_(byte_count), show_actual_addresses_(show_actual_addresses) {
-}
-
-void HexDump::Dump(std::ostream& os) const {
-  if (byte_count_ == 0) {
-    return;
-  }
-
-  if (address_ == NULL) {
-    os << "00000000:";
-    return;
-  }
-
-  static const char gHexDigit[] = "0123456789abcdef";
-  const unsigned char* addr = reinterpret_cast<const unsigned char*>(address_);
-  // 01234560: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
-  char out[(kBitsPerWord / 4) + /* offset */
-           1 + /* colon */
-           (16 * 3) + /* 16 hex digits and space */
-           2 + /* white space */
-           16 + /* 16 characters*/
-           1 /* \0 */ ];
-  size_t offset;    /* offset to show while printing */
-
-  if (show_actual_addresses_) {
-    offset = reinterpret_cast<size_t>(addr);
-  } else {
-    offset = 0;
-  }
-  memset(out, ' ', sizeof(out)-1);
-  out[kBitsPerWord / 4] = ':';
-  out[sizeof(out)-1] = '\0';
-
-  size_t byte_count = byte_count_;
-  size_t gap = offset & 0x0f;
-  while (byte_count) {
-    size_t line_offset = offset & ~0x0f;
-
-    char* hex = out;
-    char* asc = out + (kBitsPerWord / 4) + /* offset */ 1 + /* colon */
-        (16 * 3) + /* 16 hex digits and space */ 2 /* white space */;
-
-    for (int i = 0; i < (kBitsPerWord / 4); i++) {
-      *hex++ = gHexDigit[line_offset >> (kBitsPerWord - 4)];
-      line_offset <<= 4;
-    }
-    hex++;
-    hex++;
-
-    size_t count = std::min(byte_count, 16 - gap);
-    CHECK_NE(count, 0U);
-    CHECK_LE(count + gap, 16U);
-
-    if (gap) {
-      /* only on first line */
-      hex += gap * 3;
-      asc += gap;
-    }
-
-    size_t i;
-    for (i = gap ; i < count + gap; i++) {
-      *hex++ = gHexDigit[*addr >> 4];
-      *hex++ = gHexDigit[*addr & 0x0f];
-      hex++;
-      if (*addr >= 0x20 && *addr < 0x7f /*isprint(*addr)*/) {
-        *asc++ = *addr;
-      } else {
-        *asc++ = '.';
-      }
-      addr++;
-    }
-    for (; i < 16; i++) {
-      /* erase extra stuff; only happens on last line */
-      *hex++ = ' ';
-      *hex++ = ' ';
-      hex++;
-      *asc++ = ' ';
-    }
-
-    os << out;
-
-    gap = 0;
-    byte_count -= count;
-    offset += count;
-  }
-}
-
-std::ostream& operator<<(std::ostream& os, const HexDump& rhs) {
-  rhs.Dump(os);
-  return os;
-}
-
 }  // namespace art
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 8e40da0..075d571 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -208,24 +208,6 @@
   DISALLOW_COPY_AND_ASSIGN(LogMessage);
 };
 
-// Prints a hex dump in this format:
-//
-// 01234560: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
-// 01234568: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
-class HexDump {
- public:
-  HexDump(const void* address, size_t byte_count, bool show_actual_addresses = false);
-  void Dump(std::ostream& os) const;
-
- private:
-  const void* address_;
-  size_t byte_count_;
-  bool show_actual_addresses_;
-
-  DISALLOW_COPY_AND_ASSIGN(HexDump);
-};
-std::ostream& operator<<(std::ostream& os, const HexDump& rhs);
-
 // A convenience to allow any class with a "Dump(std::ostream& os)" member function
 // but without an operator<< to be used as if it had an operator<<. Use like this:
 //
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 30bf623..a7e25cb 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -17,7 +17,6 @@
 #ifndef ART_RUNTIME_BASE_MUTEX_INL_H_
 #define ART_RUNTIME_BASE_MUTEX_INL_H_
 
-#define __STDC_FORMAT_MACROS 1
 #include <inttypes.h>
 
 #include "mutex.h"
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index b5d9fdf..344da3f 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -199,14 +199,14 @@
   Thread* self = Thread::Current();
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // The GC can't handle an object with a null class since we can't get the size of this object.
-  heap->IncrementDisableGC(self);
+  heap->IncrementDisableMovingGC(self);
   SirtRef<mirror::Class> java_lang_Class(self, down_cast<mirror::Class*>(
       heap->AllocNonMovableObject<true>(self, nullptr, sizeof(mirror::ClassClass))));
   CHECK(java_lang_Class.get() != NULL);
   mirror::Class::SetClassClass(java_lang_Class.get());
   java_lang_Class->SetClass(java_lang_Class.get());
   java_lang_Class->SetClassSize(sizeof(mirror::ClassClass));
-  heap->DecrementDisableGC(self);
+  heap->DecrementDisableMovingGC(self);
   // AllocClass(mirror::Class*) can now be used
 
   // Class[] is used for reflection support.
diff --git a/runtime/common_test.h b/runtime/common_test.h
index ee95d5b..fce3f3f 100644
--- a/runtime/common_test.h
+++ b/runtime/common_test.h
@@ -26,7 +26,7 @@
 
 #include "../../external/icu4c/common/unicode/uvernum.h"
 #include "../compiler/dex/quick/dex_file_to_method_inliner_map.h"
-#include "../compiler/dex/verified_methods_data.h"
+#include "../compiler/dex/verification_results.h"
 #include "../compiler/driver/compiler_driver.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
@@ -160,11 +160,7 @@
 
 #if defined(__arm__)
 
-
-#include <signal.h>
-#include <asm/sigcontext.h>
-#include <asm-generic/ucontext.h>
-
+#include <sys/ucontext.h>
 
 // A signal handler called when have an illegal instruction.  We record the fact in
 // a global boolean and then increment the PC in the signal context to return to
@@ -425,9 +421,9 @@
     CompilerBackend compiler_backend = kQuick;
 #endif
 
-    verified_methods_data_.reset(new VerifiedMethodsData);
+    verification_results_.reset(new VerificationResults);
     method_inliner_map_.reset(compiler_backend == kQuick ? new DexFileToMethodInlinerMap : nullptr);
-    callbacks_.Reset(verified_methods_data_.get(), method_inliner_map_.get());
+    callbacks_.Reset(verification_results_.get(), method_inliner_map_.get());
     Runtime::Options options;
     options.push_back(std::make_pair("compilercallbacks", static_cast<CompilerCallbacks*>(&callbacks_)));
     options.push_back(std::make_pair("bootclasspath", &boot_class_path_));
@@ -474,7 +470,7 @@
         }
       }
       class_linker_->FixupDexCaches(runtime_->GetResolutionMethod());
-      compiler_driver_.reset(new CompilerDriver(verified_methods_data_.get(),
+      compiler_driver_.reset(new CompilerDriver(verification_results_.get(),
                                                 method_inliner_map_.get(),
                                                 compiler_backend, instruction_set,
                                                 instruction_set_features,
@@ -526,7 +522,7 @@
     compiler_driver_.reset();
     callbacks_.Reset(nullptr, nullptr);
     method_inliner_map_.reset();
-    verified_methods_data_.reset();
+    verification_results_.reset();
     STLDeleteElements(&opened_dex_files_);
 
     Runtime::Current()->GetHeap()->VerifyHeap();  // Check for heap corruption after the test
@@ -654,18 +650,18 @@
 
   class TestCompilerCallbacks : public CompilerCallbacks {
    public:
-    TestCompilerCallbacks() : verified_methods_data_(nullptr), method_inliner_map_(nullptr) { }
+    TestCompilerCallbacks() : verification_results_(nullptr), method_inliner_map_(nullptr) { }
 
-    void Reset(VerifiedMethodsData* verified_methods_data,
+    void Reset(VerificationResults* verification_results,
                DexFileToMethodInlinerMap* method_inliner_map) {
-        verified_methods_data_ = verified_methods_data;
+        verification_results_ = verification_results;
         method_inliner_map_ = method_inliner_map;
     }
 
     virtual bool MethodVerified(verifier::MethodVerifier* verifier)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-      CHECK(verified_methods_data_);
-      bool result = verified_methods_data_->ProcessVerifiedMethod(verifier);
+      CHECK(verification_results_);
+      bool result = verification_results_->ProcessVerifiedMethod(verifier);
       if (result && method_inliner_map_ != nullptr) {
         MethodReference ref = verifier->GetMethodReference();
         method_inliner_map_->GetMethodInliner(ref.dex_file)
@@ -674,11 +670,11 @@
       return result;
     }
     virtual void ClassRejected(ClassReference ref) {
-      verified_methods_data_->AddRejectedClass(ref);
+      verification_results_->AddRejectedClass(ref);
     }
 
    private:
-    VerifiedMethodsData* verified_methods_data_;
+    VerificationResults* verification_results_;
     DexFileToMethodInlinerMap* method_inliner_map_;
   };
 
@@ -689,7 +685,7 @@
   UniquePtr<Runtime> runtime_;
   // Owned by the runtime
   ClassLinker* class_linker_;
-  UniquePtr<VerifiedMethodsData> verified_methods_data_;
+  UniquePtr<VerificationResults> verification_results_;
   UniquePtr<DexFileToMethodInlinerMap> method_inliner_map_;
   TestCompilerCallbacks callbacks_;
   UniquePtr<CompilerDriver> compiler_driver_;
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index dc9d337..528e112 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -113,13 +113,13 @@
 }
 
 bool DexFileVerifier::CheckPointerRange(const void* start, const void* end, const char* label) {
-  uint32_t range_start = reinterpret_cast<uint32_t>(start);
-  uint32_t range_end = reinterpret_cast<uint32_t>(end);
-  uint32_t file_start = reinterpret_cast<uint32_t>(begin_);
-  uint32_t file_end = file_start + size_;
+  const byte* range_start = reinterpret_cast<const byte*>(start);
+  const byte* range_end = reinterpret_cast<const byte*>(end);
+  const byte* file_start = reinterpret_cast<const byte*>(begin_);
+  const byte* file_end = file_start + size_;
   if (UNLIKELY((range_start < file_start) || (range_start > file_end) ||
                (range_end < file_start) || (range_end > file_end))) {
-    ErrorStringPrintf("Bad range for %s: %x to %x", label,
+    ErrorStringPrintf("Bad range for %s: %zx to %zx", label,
                       range_start - file_start, range_end - file_start);
     return false;
   }
@@ -284,7 +284,7 @@
 
   for (uint32_t i = 0; i < handlers_size; i++) {
     bool catch_all;
-    uint32_t offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(handlers_base);
+    size_t offset = ptr_ - handlers_base;
     int32_t size = DecodeSignedLeb128(&ptr_);
 
     if (UNLIKELY((size < -65536) || (size > 65536))) {
@@ -299,7 +299,7 @@
       catch_all = false;
     }
 
-    handler_offsets[i] = offset;
+    handler_offsets[i] = static_cast<uint32_t>(offset);
 
     while (size-- > 0) {
       uint32_t type_idx = DecodeUnsignedLeb128(&ptr_);
@@ -386,14 +386,14 @@
   return true;
 }
 
-bool DexFileVerifier::CheckPadding(uint32_t offset, uint32_t aligned_offset) {
+bool DexFileVerifier::CheckPadding(size_t offset, uint32_t aligned_offset) {
   if (offset < aligned_offset) {
     if (!CheckPointerRange(begin_ + offset, begin_ + aligned_offset, "section")) {
       return false;
     }
     while (offset < aligned_offset) {
       if (UNLIKELY(*ptr_ != '\0')) {
-        ErrorStringPrintf("Non-zero padding %x before section start at %x", *ptr_, offset);
+        ErrorStringPrintf("Non-zero padding %x before section start at %zx", *ptr_, offset);
         return false;
       }
       ptr_++;
@@ -634,7 +634,7 @@
   }
 
   // try_items are 4-byte aligned. Verify the spacer is 0.
-  if ((((uint32_t) &insns[insns_size] & 3) != 0) && (insns[insns_size] != 0)) {
+  if (((reinterpret_cast<uintptr_t>(&insns[insns_size]) & 3) != 0) && (insns[insns_size] != 0)) {
     ErrorStringPrintf("Non-zero padding: %x", insns[insns_size]);
     return false;
   }
@@ -975,9 +975,9 @@
   return true;
 }
 
-bool DexFileVerifier::CheckIntraSectionIterate(uint32_t offset, uint32_t count, uint16_t type) {
+bool DexFileVerifier::CheckIntraSectionIterate(size_t offset, uint32_t count, uint16_t type) {
   // Get the right alignment mask for the type of section.
-  uint32_t alignment_mask;
+  size_t alignment_mask;
   switch (type) {
     case DexFile::kDexTypeClassDataItem:
     case DexFile::kDexTypeStringDataItem:
@@ -993,7 +993,7 @@
 
   // Iterate through the items in the section.
   for (uint32_t i = 0; i < count; i++) {
-    uint32_t aligned_offset = (offset + alignment_mask) & ~alignment_mask;
+    size_t aligned_offset = (offset + alignment_mask) & ~alignment_mask;
 
     // Check the padding between items.
     if (!CheckPadding(offset, aligned_offset)) {
@@ -1134,7 +1134,7 @@
       offset_to_type_map_.Put(aligned_offset, type);
     }
 
-    aligned_offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(begin_);
+    aligned_offset = ptr_ - begin_;
     if (UNLIKELY(aligned_offset > size_)) {
       ErrorStringPrintf("Item %d at ends out of bounds", i);
       return false;
@@ -1146,7 +1146,7 @@
   return true;
 }
 
-bool DexFileVerifier::CheckIntraIdSection(uint32_t offset, uint32_t count, uint16_t type) {
+bool DexFileVerifier::CheckIntraIdSection(size_t offset, uint32_t count, uint16_t type) {
   uint32_t expected_offset;
   uint32_t expected_size;
 
@@ -1183,7 +1183,7 @@
 
   // Check that the offset and size are what were expected from the header.
   if (UNLIKELY(offset != expected_offset)) {
-    ErrorStringPrintf("Bad offset for section: got %x, expected %x", offset, expected_offset);
+    ErrorStringPrintf("Bad offset for section: got %zx, expected %x", offset, expected_offset);
     return false;
   }
   if (UNLIKELY(count != expected_size)) {
@@ -1194,13 +1194,13 @@
   return CheckIntraSectionIterate(offset, count, type);
 }
 
-bool DexFileVerifier::CheckIntraDataSection(uint32_t offset, uint32_t count, uint16_t type) {
-  uint32_t data_start = header_->data_off_;
-  uint32_t data_end = data_start + header_->data_size_;
+bool DexFileVerifier::CheckIntraDataSection(size_t offset, uint32_t count, uint16_t type) {
+  size_t data_start = header_->data_off_;
+  size_t data_end = data_start + header_->data_size_;
 
   // Sanity check the offset of the section.
   if (UNLIKELY((offset < data_start) || (offset > data_end))) {
-    ErrorStringPrintf("Bad offset for data subsection: %x", offset);
+    ErrorStringPrintf("Bad offset for data subsection: %zx", offset);
     return false;
   }
 
@@ -1208,9 +1208,9 @@
     return false;
   }
 
-  uint32_t next_offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(begin_);
+  size_t next_offset = ptr_ - begin_;
   if (next_offset > data_end) {
-    ErrorStringPrintf("Out-of-bounds end of data subsection: %x", next_offset);
+    ErrorStringPrintf("Out-of-bounds end of data subsection: %zx", next_offset);
     return false;
   }
 
@@ -1222,7 +1222,7 @@
   const DexFile::MapItem* item = map->list_;
 
   uint32_t count = map->size_;
-  uint32_t offset = 0;
+  size_t offset = 0;
   ptr_ = begin_;
 
   // Check the items listed in the map.
@@ -1235,7 +1235,7 @@
     if (!CheckPadding(offset, section_offset)) {
       return false;
     } else if (UNLIKELY(offset > section_offset)) {
-      ErrorStringPrintf("Section overlap or out-of-order map: %x, %x", offset, section_offset);
+      ErrorStringPrintf("Section overlap or out-of-order map: %zx, %x", offset, section_offset);
       return false;
     }
 
@@ -1262,7 +1262,7 @@
         if (!CheckIntraIdSection(section_offset, section_count, type)) {
           return false;
         }
-        offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(begin_);
+        offset = ptr_ - begin_;
         break;
       case DexFile::kDexTypeMapList:
         if (UNLIKELY(section_count != 1)) {
@@ -1290,7 +1290,7 @@
         if (!CheckIntraDataSection(section_offset, section_count, type)) {
           return false;
         }
-        offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(begin_);
+        offset = ptr_ - begin_;
         break;
       default:
         ErrorStringPrintf("Unknown map item type %x", type);
@@ -1303,14 +1303,14 @@
   return true;
 }
 
-bool DexFileVerifier::CheckOffsetToTypeMap(uint32_t offset, uint16_t type) {
+bool DexFileVerifier::CheckOffsetToTypeMap(size_t offset, uint16_t type) {
   auto it = offset_to_type_map_.find(offset);
   if (UNLIKELY(it == offset_to_type_map_.end())) {
-    ErrorStringPrintf("No data map entry found @ %x; expected %x", offset, type);
+    ErrorStringPrintf("No data map entry found @ %zx; expected %x", offset, type);
     return false;
   }
   if (UNLIKELY(it->second != type)) {
-    ErrorStringPrintf("Unexpected data map entry @ %x; expected %x, found %x",
+    ErrorStringPrintf("Unexpected data map entry @ %zx; expected %x, found %x",
                       offset, type, it->second);
     return false;
   }
@@ -1784,9 +1784,9 @@
   return true;
 }
 
-bool DexFileVerifier::CheckInterSectionIterate(uint32_t offset, uint32_t count, uint16_t type) {
+bool DexFileVerifier::CheckInterSectionIterate(size_t offset, uint32_t count, uint16_t type) {
   // Get the right alignment mask for the type of section.
-  uint32_t alignment_mask;
+  size_t alignment_mask;
   switch (type) {
     case DexFile::kDexTypeClassDataItem:
       alignment_mask = sizeof(uint8_t) - 1;
@@ -1871,7 +1871,7 @@
     }
 
     previous_item_ = prev_ptr;
-    offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(begin_);
+    offset = ptr_ - begin_;
   }
 
   return true;
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 4b8b80a..3337785 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -53,7 +53,7 @@
   bool CheckClassDataItemField(uint32_t idx, uint32_t access_flags, bool expect_static);
   bool CheckClassDataItemMethod(uint32_t idx, uint32_t access_flags, uint32_t code_offset,
                                 bool expect_direct);
-  bool CheckPadding(uint32_t offset, uint32_t aligned_offset);
+  bool CheckPadding(size_t offset, uint32_t aligned_offset);
   bool CheckEncodedValue();
   bool CheckEncodedArray();
   bool CheckEncodedAnnotation();
@@ -65,12 +65,12 @@
   bool CheckIntraAnnotationItem();
   bool CheckIntraAnnotationsDirectoryItem();
 
-  bool CheckIntraSectionIterate(uint32_t offset, uint32_t count, uint16_t type);
-  bool CheckIntraIdSection(uint32_t offset, uint32_t count, uint16_t type);
-  bool CheckIntraDataSection(uint32_t offset, uint32_t count, uint16_t type);
+  bool CheckIntraSectionIterate(size_t offset, uint32_t count, uint16_t type);
+  bool CheckIntraIdSection(size_t offset, uint32_t count, uint16_t type);
+  bool CheckIntraDataSection(size_t offset, uint32_t count, uint16_t type);
   bool CheckIntraSection();
 
-  bool CheckOffsetToTypeMap(uint32_t offset, uint16_t type);
+  bool CheckOffsetToTypeMap(size_t offset, uint16_t type);
   uint16_t FindFirstClassDataDefiner(const byte* ptr) const;
   uint16_t FindFirstAnnotationsDirectoryDefiner(const byte* ptr) const;
 
@@ -85,7 +85,7 @@
   bool CheckInterClassDataItem();
   bool CheckInterAnnotationsDirectoryItem();
 
-  bool CheckInterSectionIterate(uint32_t offset, uint32_t count, uint16_t type);
+  bool CheckInterSectionIterate(size_t offset, uint32_t count, uint16_t type);
   bool CheckInterSection();
 
   void ErrorStringPrintf(const char* fmt, ...)
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 6e8736a..8fccd6d 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -16,9 +16,12 @@
 
 #include "dex_instruction-inl.h"
 
+#include <inttypes.h>
+
+#include <iomanip>
+
 #include "dex_file-inl.h"
 #include "utils.h"
-#include <iomanip>
 
 namespace art {
 
@@ -403,7 +406,8 @@
           os << StringPrintf("%s v%d, #int %+d // 0x%x", opcode, VRegA_21h(), value, value);
         } else {
           uint64_t value = static_cast<uint64_t>(VRegB_21h()) << 48;
-          os << StringPrintf("%s v%d, #long %+lld // 0x%llx", opcode, VRegA_21h(), value, value);
+          os << StringPrintf("%s v%d, #long %+" PRId64 " // 0x%" PRIx64, opcode, VRegA_21h(),
+                             value, value);
         }
       }
       break;
@@ -611,7 +615,7 @@
       }
       break;
     }
-    case k51l: os << StringPrintf("%s v%d, #%+lld", opcode, VRegA_51l(), VRegB_51l()); break;
+    case k51l: os << StringPrintf("%s v%d, #%+" PRId64, opcode, VRegA_51l(), VRegB_51l()); break;
     default: os << " unknown format (" << DumpHex(5) << ")"; break;
   }
   return os.str();
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index b3b24ba..2f7c38a 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -64,15 +64,16 @@
     prot = PROT_READ;
     flags = MAP_PRIVATE;
   }
-  int64_t file_length = file_->GetLength();
-  if (file_length < 0) {
-    errno = -file_length;
+  int64_t temp_file_length = file_->GetLength();
+  if (temp_file_length < 0) {
+    errno = -temp_file_length;
     *error_msg = StringPrintf("Failed to get length of file: '%s' fd=%d: %s",
                               file_->GetPath().c_str(), file_->Fd(), strerror(errno));
     return false;
   }
+  size_t file_length = static_cast<size_t>(temp_file_length);
   if (file_length < sizeof(llvm::ELF::Elf32_Ehdr)) {
-    *error_msg = StringPrintf("File size of %lld bytes not large enough to contain ELF header of "
+    *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF header of "
                               "%zd bytes: '%s'", file_length, sizeof(llvm::ELF::Elf32_Ehdr),
                               file_->GetPath().c_str());
     return false;
@@ -89,7 +90,7 @@
     // then remap to cover program header
     size_t program_header_size = header_->e_phoff + (header_->e_phentsize * header_->e_phnum);
     if (file_length < program_header_size) {
-      *error_msg = StringPrintf("File size of %lld bytes not large enough to contain ELF program "
+      *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF program "
                                 "header of %zd bytes: '%s'", file_length,
                                 sizeof(llvm::ELF::Elf32_Ehdr), file_->GetPath().c_str());
       return false;
@@ -632,7 +633,14 @@
     // non-zero, the segments require the specific address specified,
     // which either was specified in the file because we already set
     // base_address_ after the first zero segment).
-    int64_t file_length = file_->GetLength();
+    int64_t temp_file_length = file_->GetLength();
+    if (temp_file_length < 0) {
+      errno = -temp_file_length;
+      *error_msg = StringPrintf("Failed to get length of file: '%s' fd=%d: %s",
+                                file_->GetPath().c_str(), file_->Fd(), strerror(errno));
+      return false;
+    }
+    size_t file_length = static_cast<size_t>(temp_file_length);
     if (program_header.p_vaddr == 0) {
       std::string reservation_name("ElfFile reservation for ");
       reservation_name += file_->GetPath();
@@ -666,7 +674,7 @@
       flags |= MAP_PRIVATE;
     }
     if (file_length < (program_header.p_offset + program_header.p_memsz)) {
-      *error_msg = StringPrintf("File size of %lld bytes not large enough to contain ELF segment "
+      *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF segment "
                                 "%d of %d bytes: '%s'", file_length, i,
                                 program_header.p_offset + program_header.p_memsz,
                                 file_->GetPath().c_str());
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 09be56e..f6e8ca3 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -74,21 +74,48 @@
   }
   if (UNLIKELY(!klass->IsInitialized())) {
     SirtRef<mirror::Class> sirt_klass(self, klass);
-    // The class initializer might cause a GC.
+    // EnsureInitialized (the class initializer) might cause a GC.
+    // may cause us to suspend meaning that another thread may try to
+    // change the allocator while we are stuck in the entrypoints of
+    // an old allocator. Also, the class initialization may fail. To
+    // handle these cases we mark the slow path boolean as true so
+    // that the caller knows to check the allocator type to see if it
+    // has changed and to null-check the return value in case the
+    // initialization fails.
+    *slow_path = true;
     if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_klass, true, true)) {
       DCHECK(self->IsExceptionPending());
       return nullptr;  // Failure
     }
-    // TODO: EnsureInitialized may cause us to suspend meaning that another thread may try to
-    // change the allocator while we are stuck in the entrypoints of an old allocator. To handle
-    // this case we mark the slow path boolean as true so that the caller knows to check the
-    // allocator type to see if it has changed.
-    *slow_path = true;
     return sirt_klass.get();
   }
   return klass;
 }
 
+// TODO: Fix no thread safety analysis when annotalysis is smarter.
+ALWAYS_INLINE static inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
+                                                                               Thread* self, bool* slow_path)
+    NO_THREAD_SAFETY_ANALYSIS {
+  if (UNLIKELY(!klass->IsInitialized())) {
+    SirtRef<mirror::Class> sirt_class(self, klass);
+    // EnsureInitialized (the class initializer) might cause a GC.
+    // may cause us to suspend meaning that another thread may try to
+    // change the allocator while we are stuck in the entrypoints of
+    // an old allocator. Also, the class initialization may fail. To
+    // handle these cases we mark the slow path boolean as true so
+    // that the caller knows to check the allocator type to see if it
+    // has changed and to null-check the return value in case the
+    // initialization fails.
+    *slow_path = true;
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_class, true, true)) {
+      DCHECK(self->IsExceptionPending());
+      return nullptr;  // Failure
+    }
+    return sirt_class.get();
+  }
+  return klass;
+}
+
 // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
 // cannot be resolved, throw an error. If it can, use it to create an instance.
 // When verification/compiler hasn't been able to verify access, optionally perform an access
@@ -112,6 +139,40 @@
   return klass->Alloc<kInstrumented>(self, allocator_type);
 }
 
+// Given the context of a calling Method and a resolved class, create an instance.
+// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
+template <bool kInstrumented>
+ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
+                                                                        mirror::ArtMethod* method,
+                                                                        Thread* self,
+                                                                        gc::AllocatorType allocator_type)
+    NO_THREAD_SAFETY_ANALYSIS {
+  DCHECK(klass != nullptr);
+  bool slow_path = false;
+  klass = CheckClassInitializedForObjectAlloc(klass, self, &slow_path);
+  if (UNLIKELY(slow_path)) {
+    if (klass == nullptr) {
+      return nullptr;
+    }
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    return klass->Alloc<kInstrumented>(self, heap->GetCurrentAllocator());
+  }
+  return klass->Alloc<kInstrumented>(self, allocator_type);
+}
+
+// Given the context of a calling Method and an initialized class, create an instance.
+// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
+template <bool kInstrumented>
+ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
+                                                                           mirror::ArtMethod* method,
+                                                                           Thread* self,
+                                                                           gc::AllocatorType allocator_type)
+    NO_THREAD_SAFETY_ANALYSIS {
+  DCHECK(klass != nullptr);
+  return klass->Alloc<kInstrumented>(self, allocator_type);
+}
+
+
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <bool kAccessCheck>
 ALWAYS_INLINE static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
@@ -171,6 +232,30 @@
   return mirror::Array::Alloc<kInstrumented>(self, klass, component_count, allocator_type);
 }
 
+template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE static inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
+                                                                      mirror::ArtMethod* method,
+                                                                      int32_t component_count,
+                                                                      Thread* self,
+                                                                      gc::AllocatorType allocator_type)
+    NO_THREAD_SAFETY_ANALYSIS {
+  DCHECK(klass != nullptr);
+  if (UNLIKELY(component_count < 0)) {
+    ThrowNegativeArraySizeException(component_count);
+    return nullptr;  // Failure
+  }
+  if (kAccessCheck) {
+    mirror::Class* referrer = method->GetDeclaringClass();
+    if (UNLIKELY(!referrer->CanAccess(klass))) {
+      ThrowIllegalAccessErrorClass(referrer, klass);
+      return nullptr;  // Failure
+    }
+  }
+  // No need to retry a slow-path allocation as the above code won't
+  // cause a GC or thread suspension.
+  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count, allocator_type);
+}
+
 extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
                                                  int32_t component_count, Thread* self,
                                                  bool access_check,
@@ -316,8 +401,10 @@
     }
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
     mirror::Class* referring_class = referrer->GetDeclaringClass();
-    if (!referring_class->CanAccessResolvedMethod<true, type>(methods_class, resolved_method,
-                                                              method_idx)) {
+    bool can_access_resolved_method =
+        referring_class->CanAccessResolvedMethod<true, type>(methods_class, resolved_method,
+                                                             method_idx);
+    if (UNLIKELY(!can_access_resolved_method)) {
       DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
       return nullptr;  // Failure.
     }
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index b1dca77..2e1b69d 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -30,6 +30,18 @@
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
 } \
+extern "C" mirror::Object* artAllocObjectFromCodeResolved##suffix##suffix2( \
+    mirror::Class* klass, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocObjectFromCodeResolved<instrumented_bool>(klass, method, self, allocator_type); \
+} \
+extern "C" mirror::Object* artAllocObjectFromCodeInitialized##suffix##suffix2( \
+    mirror::Class* klass, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocObjectFromCodeInitialized<instrumented_bool>(klass, method, self, allocator_type); \
+} \
 extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck##suffix##suffix2( \
     uint32_t type_idx, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
@@ -44,6 +56,14 @@
   return AllocArrayFromCode<false, instrumented_bool>(type_idx, method, component_count, self, \
                                                       allocator_type); \
 } \
+extern "C" mirror::Array* artAllocArrayFromCodeResolved##suffix##suffix2( \
+    mirror::Class* klass, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
+    mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocArrayFromCodeResolved<false, instrumented_bool>(klass, method, component_count, self, \
+                                                              allocator_type); \
+} \
 extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \
     uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
     mirror::ArtMethod** sp) \
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 1ba2066..011e926 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -40,8 +40,11 @@
 struct PACKED(4) QuickEntryPoints {
   // Alloc
   void* (*pAllocArray)(uint32_t, void*, int32_t);
+  void* (*pAllocArrayResolved)(void*, void*, int32_t);
   void* (*pAllocArrayWithAccessCheck)(uint32_t, void*, int32_t);
   void* (*pAllocObject)(uint32_t, void*);
+  void* (*pAllocObjectResolved)(void*, void*);
+  void* (*pAllocObjectInitialized)(void*, void*);
   void* (*pAllocObjectWithAccessCheck)(uint32_t, void*);
   void* (*pCheckAndAllocArray)(uint32_t, void*, int32_t);
   void* (*pCheckAndAllocArrayWithAccessCheck)(uint32_t, void*, int32_t);
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 1dde18d..01c70fa 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -18,7 +18,6 @@
 #define ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_INL_H_
 
 #include "base/logging.h"
-#include "cutils/atomic-inline.h"
 #include "utils.h"
 
 namespace art {
@@ -40,7 +39,7 @@
     if ((old_word & mask) != 0) {
       return true;
     }
-  } while (UNLIKELY(android_atomic_cas(old_word, old_word | mask, address) != 0));
+  } while (!__sync_bool_compare_and_swap(address, old_word, old_word | mask));
   return false;
 }
 
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 99800fc..b831843 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -128,9 +128,9 @@
 //
 // The callback is not permitted to increase the max of either bitmap.
 void SpaceBitmap::SweepWalk(const SpaceBitmap& live_bitmap,
-                           const SpaceBitmap& mark_bitmap,
-                           uintptr_t sweep_begin, uintptr_t sweep_end,
-                           SpaceBitmap::SweepCallback* callback, void* arg) {
+                            const SpaceBitmap& mark_bitmap,
+                            uintptr_t sweep_begin, uintptr_t sweep_end,
+                            SpaceBitmap::SweepCallback* callback, void* arg) {
   CHECK(live_bitmap.bitmap_begin_ != NULL);
   CHECK(mark_bitmap.bitmap_begin_ != NULL);
   CHECK_EQ(live_bitmap.heap_begin_, mark_bitmap.heap_begin_);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 5d450a7..862d06f 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -89,7 +89,8 @@
 void MarkSweep::ImmuneSpace(space::ContinuousSpace* space) {
   // Bind live to mark bitmap if necessary.
   if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
-    BindLiveToMarkBitmap(space);
+    CHECK(space->IsContinuousMemMapAllocSpace());
+    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
   }
 
   // Add the space to the immune region.
@@ -143,11 +144,6 @@
       mark_stack_(NULL),
       immune_begin_(NULL),
       immune_end_(NULL),
-      soft_reference_list_(NULL),
-      weak_reference_list_(NULL),
-      finalizer_reference_list_(NULL),
-      phantom_reference_list_(NULL),
-      cleared_reference_list_(NULL),
       live_stack_freeze_size_(0),
       gc_barrier_(new Barrier(0)),
       large_object_lock_("mark sweep large object lock", kMarkSweepLargeObjectLock),
@@ -161,11 +157,6 @@
   mark_stack_ = heap_->mark_stack_.get();
   DCHECK(mark_stack_ != nullptr);
   SetImmuneRange(nullptr, nullptr);
-  soft_reference_list_ = nullptr;
-  weak_reference_list_ = nullptr;
-  finalizer_reference_list_ = nullptr;
-  phantom_reference_list_ = nullptr;
-  cleared_reference_list_ = nullptr;
   class_count_ = 0;
   array_count_ = 0;
   other_count_ = 0;
@@ -347,7 +338,8 @@
     timings_.EndSplit();
 
     // Unbind the live and mark bitmaps.
-    UnBindBitmaps();
+    TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+    GetHeap()->UnBindBitmaps();
   }
 }
 
@@ -589,14 +581,6 @@
   timings_.EndSplit();
 }
 
-void MarkSweep::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsMallocSpace());
-  space::MallocSpace* alloc_space = space->AsMallocSpace();
-  accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
-  GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-}
-
 class ScanObjectVisitor {
  public:
   explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
@@ -893,14 +877,8 @@
 // recursively marks until the mark stack is emptied.
 void MarkSweep::RecursiveMark() {
   TimingLogger::ScopedSplit split("RecursiveMark", &timings_);
-  // RecursiveMark will build the lists of known instances of the Reference classes.
-  // See DelayReferenceReferent for details.
-  CHECK(soft_reference_list_ == NULL);
-  CHECK(weak_reference_list_ == NULL);
-  CHECK(finalizer_reference_list_ == NULL);
-  CHECK(phantom_reference_list_ == NULL);
-  CHECK(cleared_reference_list_ == NULL);
-
+  // RecursiveMark will build the lists of known instances of the Reference classes. See
+  // DelayReferenceReferent for details.
   if (kUseRecursiveMark) {
     const bool partial = GetGcType() == kGcTypePartial;
     ScanObjectVisitor scan_visitor(this);
@@ -1146,13 +1124,13 @@
   DCHECK(mark_stack_->IsEmpty());
   TimingLogger::ScopedSplit("Sweep", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* malloc_space = space->AsMallocSpace();
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
       TimingLogger::ScopedSplit split(
-          malloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", &timings_);
       size_t freed_objects = 0;
       size_t freed_bytes = 0;
-      malloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
+      alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
       heap_->RecordFree(freed_objects, freed_bytes);
       freed_objects_.FetchAndAdd(freed_objects);
       freed_bytes_.FetchAndAdd(freed_bytes);
@@ -1278,23 +1256,6 @@
   return heap_->GetMarkBitmap()->Test(object);
 }
 
-void MarkSweep::UnBindBitmaps() {
-  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
-  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* alloc_space = space->AsMallocSpace();
-      if (alloc_space->temp_bitmap_.get() != NULL) {
-        // At this point, the temp_bitmap holds our old mark bitmap.
-        accounting::SpaceBitmap* new_bitmap = alloc_space->temp_bitmap_.release();
-        GetHeap()->GetMarkBitmap()->ReplaceBitmap(alloc_space->mark_bitmap_.get(), new_bitmap);
-        CHECK_EQ(alloc_space->mark_bitmap_.release(), alloc_space->live_bitmap_.get());
-        alloc_space->mark_bitmap_.reset(new_bitmap);
-        DCHECK(alloc_space->temp_bitmap_.get() == NULL);
-      }
-    }
-  }
-}
-
 void MarkSweep::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
   // Can't enqueue references if we hold the mutator lock.
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index e2eafb5..0c27a3b 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -118,12 +118,6 @@
   // the image. Mark that portion of the heap as immune.
   virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void BindLiveToMarkBitmap(space::ContinuousSpace* space)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  void UnBindBitmaps()
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   // Builds a mark stack with objects on dirty cards and recursively mark until it empties.
   void RecursiveMarkDirtyObjects(bool paused, byte minimum_age)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
@@ -151,10 +145,6 @@
   void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  mirror::Object* GetClearedReferences() {
-    return cleared_reference_list_;
-  }
-
   // Blackens an object.
   void ScanObject(mirror::Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
@@ -346,12 +336,6 @@
   mirror::Object* immune_begin_;
   mirror::Object* immune_end_;
 
-  mirror::Object* soft_reference_list_;
-  mirror::Object* weak_reference_list_;
-  mirror::Object* finalizer_reference_list_;
-  mirror::Object* phantom_reference_list_;
-  mirror::Object* cleared_reference_list_;
-
   // Parallel finger.
   AtomicInteger atomic_finger_;
   // Number of classes scanned, if kCountScannedTypes.
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 99c726d..625f869 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -67,7 +67,8 @@
 void SemiSpace::ImmuneSpace(space::ContinuousSpace* space) {
   // Bind live to mark bitmap if necessary.
   if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
-    BindLiveToMarkBitmap(space);
+    CHECK(space->IsContinuousMemMapAllocSpace());
+    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
   }
   // Add the space to the immune region.
   if (immune_begin_ == nullptr) {
@@ -98,12 +99,13 @@
 
 void SemiSpace::BindBitmaps() {
   timings_.StartSplit("BindBitmaps");
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetLiveBitmap() != nullptr) {
       if (space == to_space_) {
-        BindLiveToMarkBitmap(to_space_);
+        CHECK(to_space_->IsContinuousMemMapAllocSpace());
+        to_space_->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
       } else if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
                  || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect
                  // Add the main free list space and the non-moving
@@ -119,7 +121,6 @@
   if (generational_ && !whole_heap_collection_) {
     // We won't collect the large object space if a bump pointer space only collection.
     is_large_object_space_immune_ = true;
-    GetHeap()->GetLargeObjectsSpace()->CopyLiveToMarked();
   }
   timings_.EndSplit();
 }
@@ -180,8 +181,7 @@
       VLOG(heap) << "Bump pointer space only collection";
     }
   }
-  Thread* self = Thread::Current();
-  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  Locks::mutator_lock_->AssertExclusiveHeld(self_);
   TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
   // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
   // wrong space.
@@ -209,7 +209,7 @@
   // the live stack during the recursive mark.
   timings_.NewSplit("SwapStacks");
   heap_->SwapStacks();
-  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
   MarkRoots();
   // Mark roots of immune spaces.
   UpdateAndMarkModUnion();
@@ -287,6 +287,11 @@
 
   if (is_large_object_space_immune_) {
     DCHECK(generational_ && !whole_heap_collection_);
+    // Delay copying the live set to the marked set until here from
+    // BindBitmaps() as the large objects on the allocation stack may
+    // be newly added to the live set above in MarkAllocStackAsLive().
+    GetHeap()->GetLargeObjectsSpace()->CopyLiveToMarked();
+
     // When the large object space is immune, we need to scan the
     // large object space as roots as they contain references to their
     // classes (primitive array classes) that could move though they
@@ -305,10 +310,9 @@
 
 void SemiSpace::ReclaimPhase() {
   TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
-  Thread* self = Thread::Current();
-  ProcessReferences(self);
+  ProcessReferences(self_);
   {
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     SweepSystemWeaks();
   }
   // Record freed memory.
@@ -329,7 +333,7 @@
   timings_.EndSplit();
 
   {
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     // Reclaim unmarked objects.
     Sweep(false);
     // Swap the live and mark bitmaps for each space which we modified space. This is an
@@ -339,7 +343,8 @@
     SwapBitmaps();
     timings_.EndSplit();
     // Unbind the live and mark bitmaps.
-    UnBindBitmaps();
+    TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+    GetHeap()->UnBindBitmaps();
   }
   // Release the memory used by the from space.
   if (kResetFromSpace) {
@@ -530,14 +535,6 @@
   timings_.EndSplit();
 }
 
-void SemiSpace::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsMallocSpace());
-  space::MallocSpace* alloc_space = space->AsMallocSpace();
-  accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
-  GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-}
-
 mirror::Object* SemiSpace::MarkedForwardingAddressCallback(Object* object, void* arg) {
   return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
 }
@@ -548,7 +545,7 @@
   timings_.EndSplit();
 }
 
-bool SemiSpace::ShouldSweepSpace(space::MallocSpace* space) const {
+bool SemiSpace::ShouldSweepSpace(space::ContinuousSpace* space) const {
   return space != from_space_ && space != to_space_ && !IsImmuneSpace(space);
 }
 
@@ -556,16 +553,16 @@
   DCHECK(mark_stack_->IsEmpty());
   TimingLogger::ScopedSplit("Sweep", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* malloc_space = space->AsMallocSpace();
-      if (!ShouldSweepSpace(malloc_space)) {
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
+      if (!ShouldSweepSpace(alloc_space)) {
         continue;
       }
       TimingLogger::ScopedSplit split(
-          malloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
       size_t freed_objects = 0;
       size_t freed_bytes = 0;
-      malloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
+      alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
       heap_->RecordFree(freed_objects, freed_bytes);
       freed_objects_.FetchAndAdd(freed_objects);
       freed_bytes_.FetchAndAdd(freed_bytes);
@@ -660,20 +657,6 @@
   return heap_->GetMarkBitmap()->Test(obj) ? obj : nullptr;
 }
 
-void SemiSpace::UnBindBitmaps() {
-  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
-  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* alloc_space = space->AsMallocSpace();
-      if (alloc_space->HasBoundBitmaps()) {
-        alloc_space->UnBindBitmaps();
-        heap_->GetMarkBitmap()->ReplaceBitmap(alloc_space->GetLiveBitmap(),
-                                              alloc_space->GetMarkBitmap());
-      }
-    }
-  }
-}
-
 void SemiSpace::SetToSpace(space::ContinuousMemMapAllocSpace* to_space) {
   DCHECK(to_space != nullptr);
   to_space_ = to_space;
@@ -686,7 +669,6 @@
 
 void SemiSpace::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
-  // Can't enqueue references if we hold the mutator lock.
   Heap* heap = GetHeap();
   timings_.NewSplit("PostGcVerification");
   heap->PostGcVerification(this);
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index bf129a3..f81a7c2 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -114,9 +114,6 @@
   // the image. Mark that portion of the heap as immune.
   virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void BindLiveToMarkBitmap(space::ContinuousSpace* space)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   void UnBindBitmaps()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -171,7 +168,7 @@
   void ResizeMarkStack(size_t new_size);
 
   // Returns true if we should sweep the space.
-  virtual bool ShouldSweepSpace(space::MallocSpace* space) const;
+  virtual bool ShouldSweepSpace(space::ContinuousSpace* space) const;
 
   // Returns how many threads we should use for the current GC phase based on if we are paused,
   // whether or not we care about pauses.
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index c562e8c..30f3753 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -40,10 +40,10 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsMallocSpace() &&
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
-      BindLiveToMarkBitmap(space);
+      DCHECK(space->IsContinuousMemMapAllocSpace());
+      space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
     }
   }
-
   GetHeap()->GetLargeObjectsSpace()->CopyLiveToMarked();
 }
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index fc772b1..fd98e29 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -44,6 +44,7 @@
 #include "gc/space/large_object_space.h"
 #include "gc/space/rosalloc_space-inl.h"
 #include "gc/space/space-inl.h"
+#include "gc/space/zygote_space.h"
 #include "heap-inl.h"
 #include "image.h"
 #include "invoke_arg_array_builder.h"
@@ -72,6 +73,7 @@
 static constexpr size_t kGcAlotInterval = KB;
 // Minimum amount of remaining bytes before a concurrent GC is triggered.
 static constexpr size_t kMinConcurrentRemainingBytes = 128 * KB;
+static constexpr size_t kMaxConcurrentRemainingBytes = 512 * KB;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, size_t capacity, const std::string& image_file_name,
@@ -146,7 +148,7 @@
       total_wait_time_(0),
       total_allocation_time_(0),
       verify_object_mode_(kHeapVerificationNotPermitted),
-      gc_disable_count_(0),
+      disable_moving_gc_count_(0),
       running_on_valgrind_(RUNNING_ON_VALGRIND),
       use_tlab_(use_tlab) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
@@ -222,16 +224,18 @@
   // Compute heap capacity. Continuous spaces are sorted in order of Begin().
   CHECK(!continuous_spaces_.empty());
 
-  std::string error_str;
-  post_zygote_non_moving_space_mem_map_.reset(
-      MemMap::MapAnonymous("post zygote non-moving space", nullptr, 64 * MB,
-                           PROT_READ | PROT_WRITE, &error_str));
-  CHECK(post_zygote_non_moving_space_mem_map_.get() != nullptr) << error_str;
   // Relies on the spaces being sorted.
-  byte* heap_begin = std::min(post_zygote_non_moving_space_mem_map_->Begin(),
-                              continuous_spaces_.front()->Begin());
-  byte* heap_end = std::max(post_zygote_non_moving_space_mem_map_->End(),
-                            continuous_spaces_.back()->Limit());
+  byte* heap_begin = continuous_spaces_.front()->Begin();
+  byte* heap_end = continuous_spaces_.back()->Limit();
+  if (Runtime::Current()->IsZygote()) {
+    std::string error_str;
+    post_zygote_non_moving_space_mem_map_.reset(
+        MemMap::MapAnonymous("post zygote non-moving space", nullptr, 64 * MB,
+                             PROT_READ | PROT_WRITE, &error_str));
+    CHECK(post_zygote_non_moving_space_mem_map_.get() != nullptr) << error_str;
+    heap_begin = std::min(post_zygote_non_moving_space_mem_map_->Begin(), heap_begin);
+    heap_end = std::max(post_zygote_non_moving_space_mem_map_->End(), heap_end);
+  }
   size_t heap_capacity = heap_end - heap_begin;
 
   // Allocate the card table.
@@ -325,19 +329,19 @@
   return false;
 }
 
-void Heap::IncrementDisableGC(Thread* self) {
+void Heap::IncrementDisableMovingGC(Thread* self) {
   // Need to do this holding the lock to prevent races where the GC is about to run / running when
   // we attempt to disable it.
   ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
   MutexLock mu(self, *gc_complete_lock_);
-  WaitForGcToCompleteLocked(self);
-  ++gc_disable_count_;
+  ++disable_moving_gc_count_;
+  // TODO: Wait for compacting GC to complete if we ever have a concurrent compacting GC.
 }
 
-void Heap::DecrementDisableGC(Thread* self) {
+void Heap::DecrementDisableMovingGC(Thread* self) {
   MutexLock mu(self, *gc_complete_lock_);
-  CHECK_GE(gc_disable_count_, 0U);
-  --gc_disable_count_;
+  CHECK_GE(disable_moving_gc_count_, 0U);
+  --disable_moving_gc_count_;
 }
 
 void Heap::UpdateProcessState(ProcessState process_state) {
@@ -482,7 +486,6 @@
     DCHECK(it != alloc_spaces_.end());
     alloc_spaces_.erase(it);
   }
-  delete space;
 }
 
 void Heap::RegisterGCAllocation(size_t bytes) {
@@ -602,7 +605,7 @@
 };
 
 mirror::Object* Heap::PreserveSoftReferenceCallback(mirror::Object* obj, void* arg) {
-  SoftReferenceArgs* args  = reinterpret_cast<SoftReferenceArgs*>(arg);
+  SoftReferenceArgs* args = reinterpret_cast<SoftReferenceArgs*>(arg);
   // TODO: Not preserve all soft references.
   return args->recursive_mark_callback_(obj, args->arg_);
 }
@@ -746,7 +749,7 @@
   uint64_t total_alloc_space_size = 0;
   uint64_t managed_reclaimed = 0;
   for (const auto& space : continuous_spaces_) {
-    if (space->IsMallocSpace() && !space->IsZygoteSpace()) {
+    if (space->IsMallocSpace()) {
       gc::space::MallocSpace* alloc_space = space->AsMallocSpace();
       total_alloc_space_size += alloc_space->Size();
       managed_reclaimed += alloc_space->Trim();
@@ -794,6 +797,7 @@
   if (bump_pointer_space_ != nullptr && bump_pointer_space_->HasAddress(obj)) {
     mirror::Class* klass = obj->GetClass();
     if (obj == klass) {
+      // This case happens for java.lang.Class.
       return true;
     }
     return VerifyClassClass(klass) && IsLiveObjectLocked(klass);
@@ -1176,14 +1180,17 @@
   Thread* self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
-  // Busy wait until we can GC (StartGC can fail if we have a non-zero gc_disable_count_, this
-  // rarely occurs however).
-  while (!StartGC(self)) {
+  // Busy wait until we can GC (StartGC can fail if we have a non-zero
+  // compacting_gc_disable_count_, this should rarely occurs).
+  bool copying_transition =
+      IsCompactingGC(background_collector_type_) || IsCompactingGC(post_zygote_collector_type_);
+  while (!StartGC(self, copying_transition)) {
     usleep(100);
   }
   tl->SuspendAll();
   switch (collector_type) {
     case kCollectorTypeSS:
+      // Fall-through.
     case kCollectorTypeGSS: {
       mprotect(temp_space_->Begin(), temp_space_->Capacity(), PROT_READ | PROT_WRITE);
       CHECK(main_space_ != nullptr);
@@ -1191,14 +1198,16 @@
       DCHECK(allocator_mem_map_.get() == nullptr);
       allocator_mem_map_.reset(main_space_->ReleaseMemMap());
       madvise(main_space_->Begin(), main_space_->Size(), MADV_DONTNEED);
-      // RemoveSpace deletes the removed space.
-      RemoveSpace(main_space_);
+      // RemoveSpace does not delete the removed space.
+      space::Space* old_space = main_space_;
+      RemoveSpace(old_space);
+      delete old_space;
       break;
     }
     case kCollectorTypeMS:
       // Fall through.
     case kCollectorTypeCMS: {
-      if (collector_type_ == kCollectorTypeSS || collector_type_ == kCollectorTypeGSS) {
+      if (IsCompactingGC(collector_type_)) {
         // TODO: Use mem-map from temp space?
         MemMap* mem_map = allocator_mem_map_.release();
         CHECK(mem_map != nullptr);
@@ -1253,6 +1262,7 @@
     gc_plan_.clear();
     switch (collector_type_) {
       case kCollectorTypeSS:
+        // Fall-through.
       case kCollectorTypeGSS: {
         concurrent_gc_ = false;
         gc_plan_.push_back(collector::kGcTypeFull);
@@ -1341,7 +1351,7 @@
     }
   }
 
-  virtual bool ShouldSweepSpace(space::MallocSpace* space) const {
+  virtual bool ShouldSweepSpace(space::ContinuousSpace* space) const {
     // Don't sweep any spaces since we probably blasted the internal accounting of the free list
     // allocator.
     return false;
@@ -1381,6 +1391,17 @@
   }
 };
 
+void Heap::UnBindBitmaps() {
+  for (const auto& space : GetContinuousSpaces()) {
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
+      if (alloc_space->HasBoundBitmaps()) {
+        alloc_space->UnBindBitmaps();
+      }
+    }
+  }
+}
+
 void Heap::PreZygoteFork() {
   static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock);
   Thread* self = Thread::Current();
@@ -1416,30 +1437,28 @@
     non_moving_space_->SetLimit(target_space.Limit());
     VLOG(heap) << "Zygote size " << non_moving_space_->Size() << " bytes";
   }
+  // Save the old space so that we can remove it after we complete creating the zygote space.
+  space::MallocSpace* old_alloc_space = non_moving_space_;
   // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
-  // the remaining available heap memory.
-  space::MallocSpace* zygote_space = non_moving_space_;
-  main_space_ = non_moving_space_->CreateZygoteSpace("alloc space", low_memory_mode_);
+  // the remaining available space.
+  // Remove the old space before creating the zygote space since creating the zygote space sets
+  // the old alloc space's bitmaps to nullptr.
+  RemoveSpace(old_alloc_space);
+  space::ZygoteSpace* zygote_space = old_alloc_space->CreateZygoteSpace("alloc space",
+                                                                        low_memory_mode_,
+                                                                        &main_space_);
+  delete old_alloc_space;
+  CHECK(zygote_space != nullptr) << "Failed creating zygote space";
+  AddSpace(zygote_space, false);
+  CHECK(main_space_ != nullptr);
   if (main_space_->IsRosAllocSpace()) {
     rosalloc_space_ = main_space_->AsRosAllocSpace();
   } else if (main_space_->IsDlMallocSpace()) {
     dlmalloc_space_ = main_space_->AsDlMallocSpace();
   }
   main_space_->SetFootprintLimit(main_space_->Capacity());
-  // Change the GC retention policy of the zygote space to only collect when full.
-  zygote_space->SetGcRetentionPolicy(space::kGcRetentionPolicyFullCollect);
   AddSpace(main_space_);
   have_zygote_space_ = true;
-  // Remove the zygote space from alloc_spaces_ array since not doing so causes crashes in
-  // GetObjectsAllocated. This happens because the bin packing blows away the internal accounting
-  // stored in between objects.
-  if (zygote_space->IsAllocSpace()) {
-    // TODO: Refactor zygote spaces to be a new space type to avoid more of these types of issues.
-    auto it = std::find(alloc_spaces_.begin(), alloc_spaces_.end(), zygote_space->AsAllocSpace());
-    CHECK(it != alloc_spaces_.end());
-    alloc_spaces_.erase(it);
-    zygote_space->InvalidateAllocator();
-  }
   // Create the zygote space mod union table.
   accounting::ModUnionTable* mod_union_table =
       new accounting::ModUnionTableCardCache("zygote space mod-union table", this, zygote_space);
@@ -1525,7 +1544,8 @@
     LOG(WARNING) << "Performing GC on a thread that is handling a stack overflow.";
   }
   gc_complete_lock_->AssertNotHeld(self);
-  if (!StartGC(self)) {
+  const bool compacting_gc = IsCompactingGC(collector_type_);
+  if (!StartGC(self, compacting_gc)) {
     return collector::kGcTypeNone;
   }
   if (gc_cause == kGcCauseForAlloc && runtime->HasStatsEnabled()) {
@@ -1547,7 +1567,7 @@
 
   collector::GarbageCollector* collector = nullptr;
   // TODO: Clean this up.
-  if (collector_type_ == kCollectorTypeSS || collector_type_ == kCollectorTypeGSS) {
+  if (compacting_gc) {
     DCHECK(current_allocator_ == kAllocatorTypeBumpPointer ||
            current_allocator_ == kAllocatorTypeTLAB);
     gc_type = semi_space_collector_->GetGcType();
@@ -1627,15 +1647,15 @@
   return gc_type;
 }
 
-bool Heap::StartGC(Thread* self) {
+bool Heap::StartGC(Thread* self, bool is_compacting) {
   MutexLock mu(self, *gc_complete_lock_);
   // Ensure there is only one GC at a time.
   WaitForGcToCompleteLocked(self);
   // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
   //       Not doing at the moment to ensure soft references are cleared.
   // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
-  if (gc_disable_count_ != 0) {
-    LOG(WARNING) << "Skipping GC due to disable count " << gc_disable_count_;
+  if (is_compacting && disable_moving_gc_count_ != 0) {
+    LOG(WARNING) << "Skipping GC due to disable moving GC count " << disable_moving_gc_count_;
     return false;
   }
   is_gc_running_ = true;
@@ -2117,7 +2137,12 @@
     if (bump_pointer_space_->HasAddress(obj)) {
       return true;
     }
-    if (main_space_ != nullptr && main_space_->HasAddress(obj)) {
+    // TODO: Refactor this logic into the space itself?
+    // Objects in the main space are only copied during background -> foreground transitions or
+    // visa versa.
+    if (main_space_ != nullptr && main_space_->HasAddress(obj) &&
+        (IsCompactingGC(background_collector_type_) ||
+            IsCompactingGC(post_zygote_collector_type_))) {
       return true;
     }
   }
@@ -2167,7 +2192,7 @@
     if (bytes_allocated + min_free_ <= max_allowed_footprint_) {
       next_gc_type_ = collector::kGcTypeSticky;
     } else {
-      next_gc_type_ = collector::kGcTypePartial;
+      next_gc_type_ = have_zygote_space_ ? collector::kGcTypePartial : collector::kGcTypeFull;
     }
     // If we have freed enough memory, shrink the heap back down.
     if (bytes_allocated + max_free_ < max_allowed_footprint_) {
@@ -2181,24 +2206,23 @@
     if (concurrent_gc_) {
       // Calculate when to perform the next ConcurrentGC.
       // Calculate the estimated GC duration.
-      double gc_duration_seconds = NsToMs(gc_duration) / 1000.0;
+      const double gc_duration_seconds = NsToMs(gc_duration) / 1000.0;
       // Estimate how many remaining bytes we will have when we need to start the next GC.
       size_t remaining_bytes = allocation_rate_ * gc_duration_seconds;
+      remaining_bytes = std::min(remaining_bytes, kMaxConcurrentRemainingBytes);
       remaining_bytes = std::max(remaining_bytes, kMinConcurrentRemainingBytes);
       if (UNLIKELY(remaining_bytes > max_allowed_footprint_)) {
         // A never going to happen situation that from the estimated allocation rate we will exceed
         // the applications entire footprint with the given estimated allocation rate. Schedule
-        // another GC straight away.
-        concurrent_start_bytes_ = bytes_allocated;
-      } else {
-        // Start a concurrent GC when we get close to the estimated remaining bytes. When the
-        // allocation rate is very high, remaining_bytes could tell us that we should start a GC
-        // right away.
-        concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes,
-                                           bytes_allocated);
+        // another GC nearly straight away.
+        remaining_bytes = kMinConcurrentRemainingBytes;
       }
-      DCHECK_LE(concurrent_start_bytes_, max_allowed_footprint_);
+      DCHECK_LE(remaining_bytes, max_allowed_footprint_);
       DCHECK_LE(max_allowed_footprint_, growth_limit_);
+      // Start a concurrent GC when we get close to the estimated remaining bytes. When the
+      // allocation rate is very high, remaining_bytes could tell us that we should start a GC
+      // right away.
+      concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes, bytes_allocated);
     }
   }
 }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 26bbacd..fd7a614 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -243,9 +243,9 @@
   // compaction related errors.
   bool IsInTempSpace(const mirror::Object* obj) const;
 
-  // Enables us to prevent GC until objects are released.
-  void IncrementDisableGC(Thread* self);
-  void DecrementDisableGC(Thread* self);
+  // Enables us to compacting GC until objects are released.
+  void IncrementDisableMovingGC(Thread* self);
+  void DecrementDisableMovingGC(Thread* self);
 
   // Initiates an explicit garbage collection.
   void CollectGarbage(bool clear_soft_references) LOCKS_EXCLUDED(Locks::mutator_lock_);
@@ -467,6 +467,9 @@
   void MarkAllocStackAsLive(accounting::ObjectStack* stack)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  // Unbind any bound bitmaps.
+  void UnBindBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   // DEPRECATED: Should remove in "near" future when support for multiple image spaces is added.
   // Assumes there is only one image space.
   space::ImageSpace* GetImageSpace() const;
@@ -534,7 +537,7 @@
   void Compact(space::ContinuousMemMapAllocSpace* target_space,
                space::ContinuousMemMapAllocSpace* source_space);
 
-  bool StartGC(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
+  bool StartGC(Thread* self, bool is_compacting) LOCKS_EXCLUDED(gc_complete_lock_);
   void FinishGC(Thread* self, collector::GcType gc_type) LOCKS_EXCLUDED(gc_complete_lock_);
 
   static ALWAYS_INLINE bool AllocatorHasAllocationStack(AllocatorType allocator_type) {
@@ -545,6 +548,9 @@
   static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) {
     return AllocatorHasAllocationStack(allocator_type);
   }
+  static bool IsCompactingGC(CollectorType collector_type) {
+    return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS;
+  }
   bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const;
   ALWAYS_INLINE void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
                                        mirror::Object* obj);
@@ -877,8 +883,8 @@
   // The current state of heap verification, may be enabled or disabled.
   HeapVerificationMode verify_object_mode_;
 
-  // GC disable count, error on GC if > 0.
-  size_t gc_disable_count_ GUARDED_BY(gc_complete_lock_);
+  // Compacting GC disable count, prevents compacting GC from running iff > 0.
+  size_t disable_moving_gc_count_ GUARDED_BY(gc_complete_lock_);
 
   std::vector<collector::GarbageCollector*> garbage_collectors_;
   collector::SemiSpace* semi_space_collector_;
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index ac20972..74a0274 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -34,10 +34,9 @@
     if (UNLIKELY(new_end > growth_end_)) {
       return nullptr;
     }
-    // TODO: Use a cas which always equals the size of pointers.
-  } while (android_atomic_cas(reinterpret_cast<int32_t>(old_end),
-                              reinterpret_cast<int32_t>(new_end),
-                              reinterpret_cast<volatile int32_t*>(&end_)) != 0);
+  } while (!__sync_bool_compare_and_swap(reinterpret_cast<volatile intptr_t*>(&end_),
+                                         reinterpret_cast<intptr_t>(old_end),
+                                         reinterpret_cast<intptr_t>(new_end)));
   return reinterpret_cast<mirror::Object*>(old_end);
 }
 
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 981af53..9ae6a33 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -287,6 +287,7 @@
 }
 
 void DlMallocSpace::Clear() {
+  // TODO: Delete and create new mspace here.
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
   GetLiveBitmap()->Clear();
   GetMarkBitmap()->Clear();
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 671d2b2..24308f7 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -97,10 +97,6 @@
 
   virtual void Clear();
 
-  virtual void InvalidateAllocator() {
-    mspace_for_alloc_ = nullptr;
-  }
-
   virtual bool IsDlMallocSpace() const {
     return true;
   }
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 78a83c9..c3f0ae6 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -29,10 +29,6 @@
 // An image space is a space backed with a memory mapped image.
 class ImageSpace : public MemMapSpace {
  public:
-  bool CanAllocateInto() const {
-    return false;
-  }
-
   SpaceType GetType() const {
     return kSpaceTypeImageSpace;
   }
@@ -75,6 +71,10 @@
 
   void Dump(std::ostream& os) const;
 
+  // Sweeping image spaces is a NOP.
+  void Sweep(bool /* swap_bitmaps */, size_t* /* freed_objects */, size_t* /* freed_bytes */) {
+  }
+
  private:
   // Tries to initialize an ImageSpace from the given image path,
   // returning NULL on error.
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 2b2b26e..9ca4eac 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -19,6 +19,8 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
+#include "gc/space/space-inl.h"
+#include "gc/space/zygote_space.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
@@ -33,22 +35,27 @@
 size_t MallocSpace::bitmap_index_ = 0;
 
 MallocSpace::MallocSpace(const std::string& name, MemMap* mem_map,
-                         byte* begin, byte* end, byte* limit, size_t growth_limit)
+                         byte* begin, byte* end, byte* limit, size_t growth_limit,
+                         bool create_bitmaps)
     : ContinuousMemMapAllocSpace(name, mem_map, begin, end, limit, kGcRetentionPolicyAlwaysCollect),
       recent_free_pos_(0), lock_("allocation space lock", kAllocSpaceLock),
       growth_limit_(growth_limit) {
-  size_t bitmap_index = bitmap_index_++;
-  static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
-  live_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #" << bitmap_index;
-  mark_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #" << bitmap_index;
+  if (create_bitmaps) {
+    size_t bitmap_index = bitmap_index_++;
+    static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
+    CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
+    CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
+    live_bitmap_.reset(accounting::SpaceBitmap::Create(
+        StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+        Begin(), Capacity()));
+    DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #"
+        << bitmap_index;
+    mark_bitmap_.reset(accounting::SpaceBitmap::Create(
+        StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+        Begin(), Capacity()));
+    DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #"
+        << bitmap_index;
+  }
   for (auto& freed : recent_freed_objects_) {
     freed.first = nullptr;
     freed.second = nullptr;
@@ -154,29 +161,8 @@
   return original_end;
 }
 
-// Returns the old mark bitmap.
-accounting::SpaceBitmap* MallocSpace::BindLiveToMarkBitmap() {
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
-  temp_bitmap_.reset(mark_bitmap);
-  mark_bitmap_.reset(live_bitmap);
-  return mark_bitmap;
-}
-
-bool MallocSpace::HasBoundBitmaps() const {
-  return temp_bitmap_.get() != nullptr;
-}
-
-void MallocSpace::UnBindBitmaps() {
-  CHECK(HasBoundBitmaps());
-  // At this point, the temp_bitmap holds our old mark bitmap.
-  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
-  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
-  mark_bitmap_.reset(new_bitmap);
-  DCHECK(temp_bitmap_.get() == NULL);
-}
-
-MallocSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode) {
+ZygoteSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode,
+                                            MallocSpace** out_malloc_space) {
   // For RosAlloc, revoke thread local runs before creating a new
   // alloc space so that we won't mix thread local runs from different
   // alloc spaces.
@@ -220,15 +206,23 @@
   if (capacity - initial_size > 0) {
     CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
   }
-  MallocSpace* alloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator,
-                                            end_, end, limit_, growth_limit);
+  *out_malloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator, end_, end,
+                                     limit_, growth_limit);
   SetLimit(End());
   live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
   CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
   mark_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
   CHECK_EQ(mark_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
-  VLOG(heap) << "zygote space creation done";
-  return alloc_space;
+
+  // Create the actual zygote space.
+  ZygoteSpace* zygote_space = ZygoteSpace::Create("Zygote space", ReleaseMemMap(),
+                                                  live_bitmap_.release(), mark_bitmap_.release());
+  if (UNLIKELY(zygote_space == nullptr)) {
+    VLOG(heap) << "Failed creating zygote space from space " << GetName();
+  } else {
+    VLOG(heap) << "zygote space creation done";
+  }
+  return zygote_space;
 }
 
 void MallocSpace::Dump(std::ostream& os) const {
@@ -239,24 +233,16 @@
       << ",name=\"" << GetName() << "\"]";
 }
 
-struct SweepCallbackContext {
-  bool swap_bitmaps;
-  Heap* heap;
-  space::MallocSpace* space;
-  Thread* self;
-  size_t freed_objects;
-  size_t freed_bytes;
-};
-
-static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
+void MallocSpace::SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
   SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
-  space::AllocSpace* space = context->space;
+  DCHECK(context->space->IsMallocSpace());
+  space::MallocSpace* space = context->space->AsMallocSpace();
   Thread* self = context->self;
   Locks::heap_bitmap_lock_->AssertExclusiveHeld(self);
   // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
   // the bitmaps as an optimization.
   if (!context->swap_bitmaps) {
-    accounting::SpaceBitmap* bitmap = context->space->GetLiveBitmap();
+    accounting::SpaceBitmap* bitmap = space->GetLiveBitmap();
     for (size_t i = 0; i < num_ptrs; ++i) {
       bitmap->Clear(ptrs[i]);
     }
@@ -268,54 +254,6 @@
   context->freed_bytes += space->FreeList(self, num_ptrs, ptrs);
 }
 
-static void ZygoteSweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
-  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
-  Locks::heap_bitmap_lock_->AssertExclusiveHeld(context->self);
-  accounting::CardTable* card_table = context->heap->GetCardTable();
-  // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
-  // the bitmaps as an optimization.
-  if (!context->swap_bitmaps) {
-    accounting::SpaceBitmap* bitmap = context->space->GetLiveBitmap();
-    for (size_t i = 0; i < num_ptrs; ++i) {
-      bitmap->Clear(ptrs[i]);
-    }
-  }
-  // We don't free any actual memory to avoid dirtying the shared zygote pages.
-  for (size_t i = 0; i < num_ptrs; ++i) {
-    // Need to mark the card since this will update the mod-union table next GC cycle.
-    card_table->MarkCard(ptrs[i]);
-  }
-}
-
-void MallocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes) {
-  DCHECK(freed_objects != nullptr);
-  DCHECK(freed_bytes != nullptr);
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = GetMarkBitmap();
-  // If the bitmaps are bound then sweeping this space clearly won't do anything.
-  if (live_bitmap == mark_bitmap) {
-    return;
-  }
-  SweepCallbackContext scc;
-  scc.swap_bitmaps = swap_bitmaps;
-  scc.heap = Runtime::Current()->GetHeap();
-  scc.self = Thread::Current();
-  scc.space = this;
-  scc.freed_objects = 0;
-  scc.freed_bytes = 0;
-  if (swap_bitmaps) {
-    std::swap(live_bitmap, mark_bitmap);
-  }
-  // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
-  accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap,
-                                     reinterpret_cast<uintptr_t>(Begin()),
-                                     reinterpret_cast<uintptr_t>(End()),
-                                     IsZygoteSpace() ? &ZygoteSweepCallback : &SweepCallback,
-                                     reinterpret_cast<void*>(&scc));
-  *freed_objects += scc.freed_objects;
-  *freed_bytes += scc.freed_bytes;
-}
-
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index 7681b6d..58cfe8b 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -31,6 +31,8 @@
 
 namespace space {
 
+class ZygoteSpace;
+
 // TODO: Remove define macro
 #define CHECK_MEMORY_CALL(call, args, what) \
   do { \
@@ -41,19 +43,13 @@
     } \
   } while (false)
 
-// const bool kUseRosAlloc = true;
-
 // A common parent of DlMallocSpace and RosAllocSpace.
 class MallocSpace : public ContinuousMemMapAllocSpace {
  public:
   typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
 
   SpaceType GetType() const {
-    if (GetGcRetentionPolicy() == kGcRetentionPolicyFullCollect) {
-      return kSpaceTypeZygoteSpace;
-    } else {
-      return kSpaceTypeAllocSpace;
-    }
+    return kSpaceTypeMallocSpace;
   }
 
   // Allocate num_bytes without allowing the underlying space to grow.
@@ -109,14 +105,6 @@
     return GetMemMap()->Size();
   }
 
-  accounting::SpaceBitmap* GetLiveBitmap() const {
-    return live_bitmap_.get();
-  }
-
-  accounting::SpaceBitmap* GetMarkBitmap() const {
-    return mark_bitmap_.get();
-  }
-
   void Dump(std::ostream& os) const;
 
   void SetGrowthLimit(size_t growth_limit);
@@ -127,33 +115,20 @@
   virtual MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
                                       byte* begin, byte* end, byte* limit, size_t growth_limit) = 0;
 
-  // Turn ourself into a zygote space and return a new alloc space
-  // which has our unused memory.  When true, the low memory mode
-  // argument specifies that the heap wishes the created space to be
-  // more aggressive in releasing unused pages.
-  MallocSpace* CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode);
-
+  // Splits ourself into a zygote space and new malloc space which has our unused memory. When true,
+  // the low memory mode argument specifies that the heap wishes the created space to be more
+  // aggressive in releasing unused pages. Invalidates the space its called on.
+  ZygoteSpace* CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode,
+                                 MallocSpace** out_malloc_space) NO_THREAD_SAFETY_ANALYSIS;
   virtual uint64_t GetBytesAllocated() = 0;
   virtual uint64_t GetObjectsAllocated() = 0;
 
-  // Returns the old mark bitmap.
-  accounting::SpaceBitmap* BindLiveToMarkBitmap();
-  bool HasBoundBitmaps() const;
-  void UnBindBitmaps();
-
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
-  // Used to ensure that failure happens when you free / allocate into an invalidated space. If we
-  // don't do this we may get heap corruption instead of a segfault at null.
-  virtual void InvalidateAllocator() = 0;
-
-  // Sweep the references in the malloc space.
-  void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
-
  protected:
   MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
-              byte* limit, size_t growth_limit);
+              byte* limit, size_t growth_limit, bool create_bitmaps = true);
 
   static MemMap* CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
                               size_t* growth_limit, size_t* capacity, byte* requested_begin);
@@ -166,9 +141,9 @@
 
   void RegisterRecentFree(mirror::Object* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
-  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+    return &SweepCallback;
+  }
 
   // Recent allocation buffer.
   static constexpr size_t kRecentFreeCount = kDebugSpaces ? (1 << 16) : 0;
@@ -190,9 +165,9 @@
   // one time by a call to ClearGrowthLimit.
   size_t growth_limit_;
 
-  friend class collector::MarkSweep;
-
  private:
+  static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg);
+
   DISALLOW_COPY_AND_ASSIGN(MallocSpace);
 };
 
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index e5993f6..177e38e 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -312,6 +312,7 @@
 }
 
 void RosAllocSpace::Clear() {
+  // TODO: Delete and create new mspace here.
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
   GetLiveBitmap()->Clear();
   GetMarkBitmap()->Clear();
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 6720976..555eb3c 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -95,10 +95,6 @@
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
-  virtual void InvalidateAllocator() {
-    rosalloc_for_alloc_ = NULL;
-  }
-
   virtual bool IsRosAllocSpace() const {
     return true;
   }
diff --git a/runtime/gc/space/space-inl.h b/runtime/gc/space/space-inl.h
index 0c1d7a2..e94c44e 100644
--- a/runtime/gc/space/space-inl.h
+++ b/runtime/gc/space/space-inl.h
@@ -32,7 +32,7 @@
 }
 
 inline MallocSpace* Space::AsMallocSpace() {
-  DCHECK(GetType() == kSpaceTypeAllocSpace || GetType() == kSpaceTypeZygoteSpace);
+  DCHECK(GetType() == kSpaceTypeMallocSpace);
   DCHECK(IsDlMallocSpace() || IsRosAllocSpace());
   return down_cast<MallocSpace*>(down_cast<MemMapSpace*>(this));
 }
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index f8ba6b3..5478d5b 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -17,6 +17,9 @@
 #include "space.h"
 
 #include "base/logging.h"
+#include "gc/accounting/heap_bitmap.h"
+#include "runtime.h"
+#include "thread-inl.h"
 
 namespace art {
 namespace gc {
@@ -41,6 +44,59 @@
     mark_objects_(new accounting::ObjectSet("large marked objects")) {
 }
 
+void ContinuousMemMapAllocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes) {
+  DCHECK(freed_objects != nullptr);
+  DCHECK(freed_bytes != nullptr);
+  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::SpaceBitmap* mark_bitmap = GetMarkBitmap();
+  // If the bitmaps are bound then sweeping this space clearly won't do anything.
+  if (live_bitmap == mark_bitmap) {
+    return;
+  }
+  SweepCallbackContext scc;
+  scc.swap_bitmaps = swap_bitmaps;
+  scc.heap = Runtime::Current()->GetHeap();
+  scc.self = Thread::Current();
+  scc.space = this;
+  scc.freed_objects = 0;
+  scc.freed_bytes = 0;
+  if (swap_bitmaps) {
+    std::swap(live_bitmap, mark_bitmap);
+  }
+  // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
+  accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap,
+                                     reinterpret_cast<uintptr_t>(Begin()),
+                                     reinterpret_cast<uintptr_t>(End()),
+                                     GetSweepCallback(),
+                                     reinterpret_cast<void*>(&scc));
+  *freed_objects += scc.freed_objects;
+  *freed_bytes += scc.freed_bytes;
+}
+
+// Returns the old mark bitmap.
+void ContinuousMemMapAllocSpace::BindLiveToMarkBitmap() {
+  CHECK(!HasBoundBitmaps());
+  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
+  Runtime::Current()->GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
+  temp_bitmap_.reset(mark_bitmap);
+  mark_bitmap_.reset(live_bitmap);
+}
+
+bool ContinuousMemMapAllocSpace::HasBoundBitmaps() const {
+  return temp_bitmap_.get() != nullptr;
+}
+
+void ContinuousMemMapAllocSpace::UnBindBitmaps() {
+  CHECK(HasBoundBitmaps());
+  // At this point, the temp_bitmap holds our old mark bitmap.
+  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
+  Runtime::Current()->GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap_.get(), new_bitmap);
+  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
+  mark_bitmap_.reset(new_bitmap);
+  DCHECK(temp_bitmap_.get() == nullptr);
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 5292344..32230b3 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -44,6 +44,7 @@
 
 class AllocSpace;
 class BumpPointerSpace;
+class ContinuousMemMapAllocSpace;
 class ContinuousSpace;
 class DiscontinuousSpace;
 class MallocSpace;
@@ -51,6 +52,7 @@
 class RosAllocSpace;
 class ImageSpace;
 class LargeObjectSpace;
+class ZygoteSpace;
 
 static constexpr bool kDebugSpaces = kIsDebugBuild;
 
@@ -68,7 +70,7 @@
 
 enum SpaceType {
   kSpaceTypeImageSpace,
-  kSpaceTypeAllocSpace,
+  kSpaceTypeMallocSpace,
   kSpaceTypeZygoteSpace,
   kSpaceTypeBumpPointerSpace,
   kSpaceTypeLargeObjectSpace,
@@ -91,11 +93,6 @@
     return gc_retention_policy_;
   }
 
-  // Does the space support allocation?
-  virtual bool CanAllocateInto() const {
-    return true;
-  }
-
   // Is the given object contained within this space?
   virtual bool Contains(const mirror::Object* obj) const = 0;
 
@@ -111,7 +108,7 @@
   // Is this a dlmalloc backed allocation space?
   bool IsMallocSpace() const {
     SpaceType type = GetType();
-    return type == kSpaceTypeAllocSpace || type == kSpaceTypeZygoteSpace;
+    return type == kSpaceTypeMallocSpace;
   }
   MallocSpace* AsMallocSpace();
 
@@ -120,20 +117,24 @@
   }
   virtual DlMallocSpace* AsDlMallocSpace() {
     LOG(FATAL) << "Unreachable";
-    return NULL;
+    return nullptr;
   }
   virtual bool IsRosAllocSpace() const {
     return false;
   }
   virtual RosAllocSpace* AsRosAllocSpace() {
     LOG(FATAL) << "Unreachable";
-    return NULL;
+    return nullptr;
   }
 
   // Is this the space allocated into by the Zygote and no-longer in use?
   bool IsZygoteSpace() const {
     return GetType() == kSpaceTypeZygoteSpace;
   }
+  virtual ZygoteSpace* AsZygoteSpace() {
+    LOG(FATAL) << "Unreachable";
+    return nullptr;
+  }
 
   // Is this space a bump pointer space?
   bool IsBumpPointerSpace() const {
@@ -141,7 +142,7 @@
   }
   virtual BumpPointerSpace* AsBumpPointerSpace() {
     LOG(FATAL) << "Unreachable";
-    return NULL;
+    return nullptr;
   }
 
   // Does this space hold large objects and implement the large object space abstraction?
@@ -168,6 +169,14 @@
     return nullptr;
   }
 
+  virtual bool IsContinuousMemMapAllocSpace() const {
+    return false;
+  }
+  virtual ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace() {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+
   virtual ~Space() {}
 
  protected:
@@ -181,6 +190,15 @@
   std::string name_;
 
  protected:
+  struct SweepCallbackContext {
+    bool swap_bitmaps;
+    Heap* heap;
+    space::Space* space;
+    Thread* self;
+    size_t freed_objects;
+    size_t freed_bytes;
+  };
+
   // When should objects within this space be reclaimed? Not constant as we vary it in the case
   // of Zygote forking.
   GcRetentionPolicy gc_retention_policy_;
@@ -378,22 +396,51 @@
   virtual bool IsAllocSpace() const {
     return true;
   }
-
   virtual AllocSpace* AsAllocSpace() {
     return this;
   }
 
+  virtual bool IsContinuousMemMapAllocSpace() const {
+    return true;
+  }
+  virtual ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace() {
+    return this;
+  }
+
+  bool HasBoundBitmaps() const EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void BindLiveToMarkBitmap()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void UnBindBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   virtual void Clear() {
     LOG(FATAL) << "Unimplemented";
   }
 
+  virtual accounting::SpaceBitmap* GetLiveBitmap() const {
+    return live_bitmap_.get();
+  }
+  virtual accounting::SpaceBitmap* GetMarkBitmap() const {
+    return mark_bitmap_.get();
+  }
+
+  virtual void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
+  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+
  protected:
+  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+
   ContinuousMemMapAllocSpace(const std::string& name, MemMap* mem_map, byte* begin,
                              byte* end, byte* limit, GcRetentionPolicy gc_retention_policy)
       : MemMapSpace(name, mem_map, begin, end, limit, gc_retention_policy) {
   }
 
  private:
+  friend class gc::Heap;
   DISALLOW_COPY_AND_ASSIGN(ContinuousMemMapAllocSpace);
 };
 
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index b1be9d8..427d547 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -16,6 +16,7 @@
 
 #include "dlmalloc_space.h"
 #include "large_object_space.h"
+#include "zygote_space.h"
 
 #include "common_test.h"
 #include "globals.h"
@@ -179,7 +180,16 @@
 
   // Make sure that the zygote space isn't directly at the start of the space.
   space->Alloc(self, 1U * MB, &dummy);
-  space = space->CreateZygoteSpace("alloc space", Runtime::Current()->GetHeap()->IsLowMemoryMode());
+
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  space::Space* old_space = space;
+  heap->RemoveSpace(old_space);
+  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
+                                                              heap->IsLowMemoryMode(),
+                                                              &space);
+  delete old_space;
+  // Add the zygote space.
+  AddSpace(zygote_space);
 
   // Make space findable to the heap, will also delete space when runtime is cleaned up
   AddSpace(space);
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
new file mode 100644
index 0000000..a303765
--- /dev/null
+++ b/runtime/gc/space/zygote_space.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "zygote_space.h"
+
+#include "gc/accounting/card_table-inl.h"
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+class CountObjectsAllocated {
+ public:
+  explicit CountObjectsAllocated(size_t* objects_allocated)
+      : objects_allocated_(objects_allocated) {}
+
+  void operator()(mirror::Object* obj) const {
+    ++*objects_allocated_;
+  }
+
+ private:
+  size_t* const objects_allocated_;
+};
+
+ZygoteSpace* ZygoteSpace::Create(const std::string& name, MemMap* mem_map,
+                                 accounting::SpaceBitmap* live_bitmap,
+                                 accounting::SpaceBitmap* mark_bitmap) {
+  DCHECK(live_bitmap != nullptr);
+  DCHECK(mark_bitmap != nullptr);
+  size_t objects_allocated = 0;
+  CountObjectsAllocated visitor(&objects_allocated);
+  ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(mem_map->Begin()),
+                                reinterpret_cast<uintptr_t>(mem_map->End()), visitor);
+  ZygoteSpace* zygote_space = new ZygoteSpace(name, mem_map, objects_allocated);
+  CHECK(zygote_space->live_bitmap_.get() == nullptr);
+  CHECK(zygote_space->mark_bitmap_.get() == nullptr);
+  zygote_space->live_bitmap_.reset(live_bitmap);
+  zygote_space->mark_bitmap_.reset(mark_bitmap);
+  return zygote_space;
+}
+
+ZygoteSpace::ZygoteSpace(const std::string& name, MemMap* mem_map, size_t objects_allocated)
+    : ContinuousMemMapAllocSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
+                                 kGcRetentionPolicyFullCollect),
+      objects_allocated_(objects_allocated) {
+}
+
+void ZygoteSpace::Dump(std::ostream& os) const {
+  os << GetType()
+      << " begin=" << reinterpret_cast<void*>(Begin())
+      << ",end=" << reinterpret_cast<void*>(End())
+      << ",size=" << PrettySize(Size())
+      << ",name=\"" << GetName() << "\"]";
+}
+
+void ZygoteSpace::SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
+  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
+  DCHECK(context->space->IsZygoteSpace());
+  ZygoteSpace* zygote_space = context->space->AsZygoteSpace();
+  Locks::heap_bitmap_lock_->AssertExclusiveHeld(context->self);
+  accounting::CardTable* card_table = context->heap->GetCardTable();
+  // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
+  // the bitmaps as an optimization.
+  if (!context->swap_bitmaps) {
+    accounting::SpaceBitmap* bitmap = zygote_space->GetLiveBitmap();
+    for (size_t i = 0; i < num_ptrs; ++i) {
+      bitmap->Clear(ptrs[i]);
+    }
+  }
+  // We don't free any actual memory to avoid dirtying the shared zygote pages.
+  for (size_t i = 0; i < num_ptrs; ++i) {
+    // Need to mark the card since this will update the mod-union table next GC cycle.
+    card_table->MarkCard(ptrs[i]);
+  }
+  zygote_space->objects_allocated_.FetchAndSub(num_ptrs);
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h
new file mode 100644
index 0000000..10a5492
--- /dev/null
+++ b/runtime/gc/space/zygote_space.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
+
+#include "malloc_space.h"
+#include "mem_map.h"
+
+namespace art {
+namespace gc {
+
+namespace accounting {
+class SpaceBitmap;
+}
+
+namespace space {
+
+// An zygote space is a space which you cannot allocate into or free from.
+class ZygoteSpace : public ContinuousMemMapAllocSpace {
+ public:
+  // Returns the remaining storage in the out_map field.
+  static ZygoteSpace* Create(const std::string& name, MemMap* mem_map,
+                             accounting::SpaceBitmap* live_bitmap,
+                             accounting::SpaceBitmap* mark_bitmap)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void Dump(std::ostream& os) const;
+  virtual SpaceType GetType() const {
+    return kSpaceTypeZygoteSpace;
+  }
+  virtual ZygoteSpace* AsZygoteSpace() {
+    return this;
+  }
+  virtual mirror::Object* AllocWithGrowth(Thread* /*self*/, size_t /*num_bytes*/,
+                                          size_t* /*bytes_allocated*/) {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+  virtual size_t AllocationSize(const mirror::Object* obj) {
+    LOG(FATAL) << "Unimplemented";
+    return 0;
+  }
+  virtual size_t Free(Thread* self, mirror::Object* ptr) {
+    LOG(FATAL) << "Unimplemented";
+    return 0;
+  }
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+    LOG(FATAL) << "Unimplemented";
+    return 0;
+  }
+  virtual uint64_t GetBytesAllocated() {
+    return Size();
+  }
+  virtual uint64_t GetObjectsAllocated() {
+    return objects_allocated_;
+  }
+
+ protected:
+  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+    return &SweepCallback;
+  }
+
+ private:
+  ZygoteSpace(const std::string& name, MemMap* mem_map, size_t objects_allocated);
+  static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg);
+
+  AtomicInteger objects_allocated_;
+
+  friend class Space;
+  DISALLOW_COPY_AND_ASSIGN(ZygoteSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
diff --git a/runtime/globals.h b/runtime/globals.h
index a0d7e48..b1ccbdc 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -88,6 +88,10 @@
 // True if we allow moving methods.
 static constexpr bool kMovingMethods = false;
 
+// If true, the quick compiler embeds class pointers in the compiled
+// code, if possible.
+static constexpr bool kEmbedClassInCode = true;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_GLOBALS_H_
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 51b238c..21e942e 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -326,7 +326,7 @@
    * Extract the table index from an indirect reference.
    */
   static uint32_t ExtractIndex(IndirectRef iref) {
-    uint32_t uref = (uint32_t) iref;
+    uintptr_t uref = reinterpret_cast<uintptr_t>(iref);
     return (uref >> 2) & 0xffff;
   }
 
@@ -337,8 +337,8 @@
   IndirectRef ToIndirectRef(const mirror::Object* /*o*/, uint32_t tableIndex) const {
     DCHECK_LT(tableIndex, 65536U);
     uint32_t serialChunk = slot_data_[tableIndex].serial;
-    uint32_t uref = serialChunk << 20 | (tableIndex << 2) | kind_;
-    return (IndirectRef) uref;
+    uintptr_t uref = serialChunk << 20 | (tableIndex << 2) | kind_;
+    return reinterpret_cast<IndirectRef>(uref);
   }
 
   /*
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index be358e3..0f94ccd 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -200,26 +200,20 @@
     DCHECK(self->IsExceptionPending());
     return false;
   }
+  uint32_t arg[5];  // only used in filled-new-array.
+  uint32_t vregC;   // only used in filled-new-array-range.
   if (is_range) {
-    uint32_t vregC = inst->VRegC_3rc();
-    const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
-    for (int32_t i = 0; i < length; ++i) {
-      if (is_primitive_int_component) {
-        newArray->AsIntArray()->Set(i, shadow_frame.GetVReg(vregC + i));
-      } else {
-        newArray->AsObjectArray<Object>()->Set(i, shadow_frame.GetVRegReference(vregC + i));
-      }
-    }
+    vregC = inst->VRegC_3rc();
   } else {
-    uint32_t arg[5];
     inst->GetArgs(arg);
-    const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
-    for (int32_t i = 0; i < length; ++i) {
-      if (is_primitive_int_component) {
-        newArray->AsIntArray()->Set(i, shadow_frame.GetVReg(arg[i]));
-      } else {
-        newArray->AsObjectArray<Object>()->Set(i, shadow_frame.GetVRegReference(arg[i]));
-      }
+  }
+  const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
+  for (int32_t i = 0; i < length; ++i) {
+    size_t src_reg = is_range ? vregC + i : arg[i];
+    if (is_primitive_int_component) {
+      newArray->AsIntArray()->SetWithoutChecks(i, shadow_frame.GetVReg(src_reg));
+    } else {
+      newArray->AsObjectArray<Object>()->SetWithoutChecks(i, shadow_frame.GetVRegReference(src_reg));
     }
   }
 
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 942c275..ca03885 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -932,8 +932,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       BooleanArray* array = a->AsBooleanArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -950,8 +950,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       ByteArray* array = a->AsByteArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -968,8 +968,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       CharArray* array = a->AsCharArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -986,8 +986,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       ShortArray* array = a->AsShortArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1004,8 +1004,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       IntArray* array = a->AsIntArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1022,8 +1022,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       LongArray* array = a->AsLongArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1040,7 +1040,7 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       ObjectArray<Object>* array = a->AsObjectArray<Object>();
-      if (LIKELY(array->IsValidIndex(index))) {
+      if (LIKELY(array->CheckIsValidIndex(index))) {
         shadow_frame.SetVRegReference(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
@@ -1059,8 +1059,8 @@
       uint8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       BooleanArray* array = a->AsBooleanArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1078,8 +1078,8 @@
       int8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       ByteArray* array = a->AsByteArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1097,8 +1097,8 @@
       uint16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       CharArray* array = a->AsCharArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1116,8 +1116,8 @@
       int16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       ShortArray* array = a->AsShortArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1135,8 +1135,8 @@
       int32_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       IntArray* array = a->AsIntArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1154,8 +1154,8 @@
       int64_t val = shadow_frame.GetVRegLong(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       LongArray* array = a->AsLongArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1173,7 +1173,7 @@
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       Object* val = shadow_frame.GetVRegReference(inst->VRegA_23x(inst_data));
       ObjectArray<Object>* array = a->AsObjectArray<Object>();
-      if (LIKELY(array->IsValidIndex(index) && array->CheckAssignable(val))) {
+      if (LIKELY(array->CheckIsValidIndex(index) && array->CheckAssignable(val))) {
         array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 75041ea..7631736 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -830,8 +830,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         BooleanArray* array = a->AsBooleanArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -848,8 +848,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         ByteArray* array = a->AsByteArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -866,8 +866,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         CharArray* array = a->AsCharArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -884,8 +884,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         ShortArray* array = a->AsShortArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -902,8 +902,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         IntArray* array = a->AsIntArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -920,8 +920,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         LongArray* array = a->AsLongArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -938,7 +938,7 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         ObjectArray<Object>* array = a->AsObjectArray<Object>();
-        if (LIKELY(array->IsValidIndex(index))) {
+        if (LIKELY(array->CheckIsValidIndex(index))) {
           shadow_frame.SetVRegReference(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
@@ -957,8 +957,8 @@
         uint8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         BooleanArray* array = a->AsBooleanArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -976,8 +976,8 @@
         int8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         ByteArray* array = a->AsByteArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -995,8 +995,8 @@
         uint16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         CharArray* array = a->AsCharArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -1014,8 +1014,8 @@
         int16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         ShortArray* array = a->AsShortArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -1033,8 +1033,8 @@
         int32_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         IntArray* array = a->AsIntArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -1052,8 +1052,8 @@
         int64_t val = shadow_frame.GetVRegLong(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         LongArray* array = a->AsLongArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -1071,7 +1071,7 @@
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         Object* val = shadow_frame.GetVRegReference(inst->VRegA_23x(inst_data));
         ObjectArray<Object>* array = a->AsObjectArray<Object>();
-        if (LIKELY(array->IsValidIndex(index) && array->CheckAssignable(val))) {
+        if (LIKELY(array->CheckIsValidIndex(index) && array->CheckAssignable(val))) {
           array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 4aa7f13..e372c26 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -638,13 +638,14 @@
    * go to sleep indefinitely.
    */
   while (event_thread_id_ != 0) {
-    VLOG(jdwp) << StringPrintf("event in progress (%#llx), %#llx sleeping", event_thread_id_, threadId);
+    VLOG(jdwp) << StringPrintf("event in progress (%#" PRIx64 "), %#" PRIx64 " sleeping",
+                               event_thread_id_, threadId);
     waited = true;
     event_thread_cond_.Wait(self);
   }
 
   if (waited || threadId != 0) {
-    VLOG(jdwp) << StringPrintf("event token grabbed (%#llx)", threadId);
+    VLOG(jdwp) << StringPrintf("event token grabbed (%#" PRIx64 ")", threadId);
   }
   if (threadId != 0) {
     event_thread_id_ = threadId;
@@ -664,7 +665,7 @@
   MutexLock mu(self, event_thread_lock_);
 
   CHECK_NE(event_thread_id_, 0U);
-  VLOG(jdwp) << StringPrintf("cleared event token (%#llx)", event_thread_id_);
+  VLOG(jdwp) << StringPrintf("cleared event token (%#" PRIx64 ")", event_thread_id_);
 
   event_thread_id_ = 0;
 
@@ -820,7 +821,8 @@
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
                  << basket.className << "." << Dbg::GetMethodName(pLoc->method_id)
-                 << StringPrintf(" thread=%#llx dex_pc=%#llx)", basket.threadId, pLoc->dex_pc);
+                 << StringPrintf(" thread=%#" PRIx64 "  dex_pc=%#" PRIx64 ")",
+                                 basket.threadId, pLoc->dex_pc);
 
       suspend_policy = scanSuspendPolicy(match_list, match_count);
       VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
@@ -885,7 +887,7 @@
 
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
-                 << StringPrintf("thread=%#llx", basket.threadId) << ")";
+                 << StringPrintf("thread=%#" PRIx64, basket.threadId) << ")";
 
       suspend_policy = scanSuspendPolicy(match_list, match_count);
       VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
@@ -968,8 +970,8 @@
     FindMatchingEvents(EK_EXCEPTION, &basket, match_list, &match_count);
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total)"
-                 << StringPrintf(" thread=%#llx", basket.threadId)
-                 << StringPrintf(" exceptId=%#llx", exceptionId)
+                 << StringPrintf(" thread=%#" PRIx64, basket.threadId)
+                 << StringPrintf(" exceptId=%#" PRIx64, exceptionId)
                  << " caught=" << basket.caught << ")"
                  << "  throw: " << *pThrowLoc;
       if (pCatchLoc->class_id == 0) {
@@ -1036,7 +1038,7 @@
     FindMatchingEvents(EK_CLASS_PREPARE, &basket, match_list, &match_count);
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
-                 << StringPrintf("thread=%#llx", basket.threadId) << ") " << signature;
+                 << StringPrintf("thread=%#" PRIx64, basket.threadId) << ") " << signature;
 
       suspend_policy = scanSuspendPolicy(match_list, match_count);
       VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 523d892..a514e69 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -21,6 +21,7 @@
 #include <string>
 
 #include "atomic.h"
+#include "base/hex_dump.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/stringprintf.h"
@@ -48,7 +49,7 @@
 std::string DescribeRefTypeId(const RefTypeId& ref_type_id) {
   std::string signature("unknown");
   Dbg::GetSignature(ref_type_id, &signature);
-  return StringPrintf("%#llx (%s)", ref_type_id, signature.c_str());
+  return StringPrintf("%#" PRIx64 " (%s)", ref_type_id, signature.c_str());
 }
 
 // Helper function: write a variable-width value into the output input buffer.
@@ -99,8 +100,9 @@
 
   int32_t arg_count = request.ReadSigned32("argument count");
 
-  VLOG(jdwp) << StringPrintf("    --> thread_id=%#llx object_id=%#llx", thread_id, object_id);
-  VLOG(jdwp) << StringPrintf("        class_id=%#llx method_id=%x %s.%s", class_id,
+  VLOG(jdwp) << StringPrintf("    --> thread_id=%#" PRIx64 " object_id=%#" PRIx64,
+                             thread_id, object_id);
+  VLOG(jdwp) << StringPrintf("        class_id=%#" PRIx64 " method_id=%x %s.%s", class_id,
                              method_id, Dbg::GetClassName(class_id).c_str(),
                              Dbg::GetMethodName(method_id).c_str());
   VLOG(jdwp) << StringPrintf("        %d args:", arg_count);
@@ -111,7 +113,8 @@
     argTypes[i] = request.ReadTag();
     size_t width = Dbg::GetTagWidth(argTypes[i]);
     argValues[i] = request.ReadValue(width);
-    VLOG(jdwp) << "          " << argTypes[i] << StringPrintf("(%zd): %#llx", width, argValues[i]);
+    VLOG(jdwp) << "          " << argTypes[i] << StringPrintf("(%zd): %#" PRIx64, width,
+                                                              argValues[i]);
   }
 
   uint32_t options = request.ReadUnsigned32("InvokeOptions bit flags");
@@ -143,7 +146,8 @@
     expandBufAdd1(pReply, JT_OBJECT);
     expandBufAddObjectId(pReply, exceptObjId);
 
-    VLOG(jdwp) << "  --> returned " << resultTag << StringPrintf(" %#llx (except=%#llx)", resultValue, exceptObjId);
+    VLOG(jdwp) << "  --> returned " << resultTag
+        << StringPrintf(" %#" PRIx64 " (except=%#" PRIx64 ")", resultValue, exceptObjId);
 
     /* show detailed debug output */
     if (resultTag == JT_STRING && exceptObjId == 0) {
@@ -526,7 +530,7 @@
   if (status != ERR_NONE) {
     return status;
   }
-  VLOG(jdwp) << StringPrintf("    --> ObjectId %#llx", class_object_id);
+  VLOG(jdwp) << StringPrintf("    --> ObjectId %#" PRIx64, class_object_id);
   expandBufAddObjectId(pReply, class_object_id);
   return ERR_NONE;
 }
@@ -936,7 +940,7 @@
   if (error != ERR_NONE) {
     return error;
   }
-  VLOG(jdwp) << StringPrintf("  Name of thread %#llx is \"%s\"", thread_id, name.c_str());
+  VLOG(jdwp) << StringPrintf("  Name of thread %#" PRIx64 " is \"%s\"", thread_id, name.c_str());
   expandBufAddUtf8String(pReply, name);
 
   return ERR_NONE;
@@ -1335,7 +1339,7 @@
         ObjectId thread_id = request.ReadThreadId();
         uint32_t size = request.ReadUnsigned32("step size");
         uint32_t depth = request.ReadUnsigned32("step depth");
-        VLOG(jdwp) << StringPrintf("    Step: thread=%#llx", thread_id)
+        VLOG(jdwp) << StringPrintf("    Step: thread=%#" PRIx64, thread_id)
                      << " size=" << JdwpStepSize(size) << " depth=" << JdwpStepDepth(depth);
 
         mod.step.threadId = thread_id;
@@ -1640,7 +1644,7 @@
   std::string result;
   result += "REQUEST: ";
   result += GetCommandName(request);
-  result += StringPrintf(" (length=%d id=0x%06x)", request.GetLength(), request.GetId());
+  result += StringPrintf(" (length=%zu id=0x%06x)", request.GetLength(), request.GetId());
   return result;
 }
 
@@ -1702,7 +1706,7 @@
   }
   if (i == arraysize(gHandlers)) {
     LOG(ERROR) << "Command not implemented: " << DescribeCommand(request);
-    LOG(ERROR) << HexDump(request.data(), request.size());
+    LOG(ERROR) << HexDump(request.data(), request.size(), false, "");
     result = ERR_NOT_IMPLEMENTED;
   }
 
@@ -1726,7 +1730,7 @@
   size_t respLen = expandBufGetLength(pReply) - kJDWPHeaderLen;
   VLOG(jdwp) << "REPLY: " << GetCommandName(request) << " " << result << " (length=" << respLen << ")";
   if (false) {
-    VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen);
+    VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen, false, "");
   }
 
   VLOG(jdwp) << "----------";
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 127ebfa..928f53d 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -156,11 +156,11 @@
   errno = 0;
   ssize_t actual = netState->WriteBufferedPacket(iov);
   if (static_cast<size_t>(actual) != expected) {
-    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet %c%c%c%c to debugger (%d of %d)",
-                                static_cast<uint8_t>(type >> 24),
-                                static_cast<uint8_t>(type >> 16),
-                                static_cast<uint8_t>(type >> 8),
-                                static_cast<uint8_t>(type),
+    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet %c%c%c%c to debugger (%zd of %zu)",
+                                static_cast<char>(type >> 24),
+                                static_cast<char>(type >> 16),
+                                static_cast<char>(type >> 8),
+                                static_cast<char>(type),
                                 actual, expected);
   }
 }
@@ -175,7 +175,7 @@
   errno = 0;
   ssize_t actual = netState->WritePacket(pReq);
   if (static_cast<size_t>(actual) != expandBufGetLength(pReq)) {
-    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet to debugger (%d of %d)",
+    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet to debugger (%zd of %zu)",
                                 actual, expandBufGetLength(pReq));
   }
 }
@@ -607,7 +607,7 @@
 std::ostream& operator<<(std::ostream& os, const JdwpLocation& rhs) {
   os << "JdwpLocation["
      << Dbg::GetClassName(rhs.class_id) << "." << Dbg::GetMethodName(rhs.method_id)
-     << "@" << StringPrintf("%#llx", rhs.dex_pc) << " " << rhs.type_tag << "]";
+     << "@" << StringPrintf("%#" PRIx64, rhs.dex_pc) << " " << rhs.type_tag << "]";
   return os;
 }
 
diff --git a/runtime/jdwp/jdwp_request.cc b/runtime/jdwp/jdwp_request.cc
index a9dd1e1..7b15d6d 100644
--- a/runtime/jdwp/jdwp_request.cc
+++ b/runtime/jdwp/jdwp_request.cc
@@ -16,6 +16,8 @@
 
 #include "jdwp/jdwp.h"
 
+#include <inttypes.h>
+
 #include "base/stringprintf.h"
 #include "jdwp/jdwp_priv.h"
 
@@ -98,7 +100,7 @@
 
 ObjectId Request::ReadObjectId(const char* specific_kind) {
   ObjectId id = Read8BE();
-  VLOG(jdwp) << StringPrintf("    %s id %#llx", specific_kind, id);
+  VLOG(jdwp) << StringPrintf("    %s id %#" PRIx64, specific_kind, id);
   return id;
 }
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 81cc94b..30b4ee8 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -523,6 +523,12 @@
     return dlsym(handle_, symbol_name.c_str());
   }
 
+  void VisitRoots(RootVisitor* visitor, void* arg) {
+    if (class_loader_ != nullptr) {
+      class_loader_ = visitor(class_loader_, arg);
+    }
+  }
+
  private:
   enum JNI_OnLoadState {
     kPending,
@@ -613,6 +619,12 @@
     return NULL;
   }
 
+  void VisitRoots(RootVisitor* visitor, void* arg) {
+    for (auto& lib_pair : libraries_) {
+      lib_pair.second->VisitRoots(visitor, arg);
+    }
+  }
+
  private:
   SafeMap<std::string, SharedLibrary*> libraries_;
 };
@@ -2195,7 +2207,7 @@
     Array* array = soa.Decode<Array*>(java_array);
     gc::Heap* heap = Runtime::Current()->GetHeap();
     if (heap->IsMovableObject(array)) {
-      heap->IncrementDisableGC(soa.Self());
+      heap->IncrementDisableMovingGC(soa.Self());
       // Re-decode in case the object moved since IncrementDisableGC waits for GC to complete.
       array = soa.Decode<Array*>(java_array);
     }
@@ -2203,8 +2215,7 @@
     if (is_copy != nullptr) {
       *is_copy = JNI_FALSE;
     }
-    void* address = array->GetRawData(array->GetClass()->GetComponentSize());;
-    return address;
+    return array->GetRawData(array->GetClass()->GetComponentSize());
   }
 
   static void ReleasePrimitiveArrayCritical(JNIEnv* env, jarray array, void* elements, jint mode) {
@@ -2639,9 +2650,6 @@
     size_t bytes = array->GetLength() * component_size;
     VLOG(heap) << "Release primitive array " << env << " array_data " << array_data
                << " elements " << reinterpret_cast<void*>(elements);
-    if (!is_copy && heap->IsMovableObject(array)) {
-      heap->DecrementDisableGC(soa.Self());
-    }
     // Don't need to copy if we had a direct pointer.
     if (mode != JNI_ABORT && is_copy) {
       memcpy(array_data, elements, bytes);
@@ -2649,10 +2657,12 @@
     if (mode != JNI_COMMIT) {
       if (is_copy) {
         delete[] reinterpret_cast<uint64_t*>(elements);
+      } else if (heap->IsMovableObject(array)) {
+        // Non copy to a movable object must means that we had disabled the moving GC.
+        heap->DecrementDisableMovingGC(soa.Self());
       }
+      UnpinPrimitiveArray(soa, array);
     }
-    // TODO: Do we always unpin primitive array?
-    UnpinPrimitiveArray(soa, array);
   }
 
   template <typename JavaArrayT, typename JavaT, typename ArrayT>
@@ -3387,6 +3397,11 @@
     MutexLock mu(self, pins_lock);
     pin_table.VisitRoots(visitor, arg);
   }
+  {
+    MutexLock mu(self, libraries_lock);
+    // Libraries contains shared libraries which hold a pointer to a class loader.
+    libraries->VisitRoots(visitor, arg);
+  }
   // The weak_globals table is visited by the GC itself (because it mutates the table).
 }
 
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index aea10c2..8b9a3cd 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -46,7 +46,8 @@
 }
 
 inline LockWord::LockWord(Monitor* mon)
-    : value_((reinterpret_cast<uint32_t>(mon) >> kStateSize) | (kStateFat << kStateShift)) {
+    : value_(((reinterpret_cast<uintptr_t>(mon) >> kStateSize) | (kStateFat << kStateShift)) &
+             0xFFFFFFFFU) {
   DCHECK_EQ(FatLockMonitor(), mon);
 }
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 97b34ef..0a3e1a1 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -16,8 +16,10 @@
 
 #include "mem_map.h"
 
+#include <inttypes.h>
 #include <backtrace/BacktraceMap.h>
 
+#include "UniquePtr.h"
 #include "base/stringprintf.h"
 #include "ScopedFd.h"
 #include "utils.h"
@@ -54,20 +56,21 @@
   uintptr_t base = reinterpret_cast<uintptr_t>(addr);
   uintptr_t limit = base + byte_count;
 
-  BacktraceMap map(getpid());
-  if (!map.Build()) {
+  UniquePtr<BacktraceMap> map(BacktraceMap::Create(getpid()));
+  if (!map->Build()) {
     PLOG(WARNING) << "Failed to build process map";
     return;
   }
-  for (BacktraceMap::const_iterator it = map.begin(); it != map.end(); ++it) {
+  for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
     CHECK(!(base >= it->start && base < it->end)     // start of new within old
         && !(limit > it->start && limit < it->end)   // end of new within old
         && !(base <= it->start && limit > it->end))  // start/end of new includes all of old
-        << StringPrintf("Requested region 0x%08x-0x%08x overlaps with existing map 0x%08x-0x%08x (%s)\n",
+        << StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " overlaps with "
+                        "existing map 0x%08" PRIxPTR "-0x%08" PRIxPTR " (%s)\n",
                         base, limit,
-                        static_cast<uint32_t>(it->start), static_cast<uint32_t>(it->end),
+                        static_cast<uintptr_t>(it->start), static_cast<uintptr_t>(it->end),
                         it->name.c_str())
-        << std::make_pair(it, map.end());
+        << std::make_pair(it, map->end());
   }
 }
 
@@ -144,7 +147,7 @@
     std::string strerr(strerror(errno));
     std::string maps;
     ReadFileToString("/proc/self/maps", &maps);
-    *error_msg = StringPrintf("mmap(%p, %zd, %x, %x, %d, %lld) of file '%s' failed: %s\n%s",
+    *error_msg = StringPrintf("mmap(%p, %zd, %x, %x, %d, %" PRId64 ") of file '%s' failed: %s\n%s",
                               page_aligned_addr, page_aligned_byte_count, prot, flags, fd,
                               static_cast<int64_t>(page_aligned_offset), filename, strerr.c_str(),
                               maps.c_str());
diff --git a/runtime/method_reference.h b/runtime/method_reference.h
index 1ff4ea0..8e46d7e 100644
--- a/runtime/method_reference.h
+++ b/runtime/method_reference.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_METHOD_REFERENCE_H_
 #define ART_RUNTIME_METHOD_REFERENCE_H_
 
+#include <stdint.h>
+
 namespace art {
 
 class DexFile;
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 5265946..207573f 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -19,6 +19,7 @@
 
 #include "object.h"
 #include "gc/heap.h"
+#include "thread.h"
 
 namespace art {
 namespace mirror {
@@ -83,7 +84,9 @@
     return reinterpret_cast<const void*>(data);
   }
 
-  bool IsValidIndex(int32_t index) const
+  // Returns true if the index is valid. If not, throws an ArrayIndexOutOfBoundsException and
+  // returns false.
+  bool CheckIsValidIndex(int32_t index) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(static_cast<uint32_t>(index) >= static_cast<uint32_t>(GetLength()))) {
       ThrowArrayIndexOutOfBoundsException(index);
@@ -93,12 +96,13 @@
   }
 
  protected:
-  void ThrowArrayIndexOutOfBoundsException(int32_t index) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ThrowArrayStoreException(Object* object) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
+  void ThrowArrayIndexOutOfBoundsException(int32_t index) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // The number of array elements.
   int32_t length_;
   // Marker for the data (used by generated code)
@@ -126,18 +130,31 @@
   }
 
   T Get(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (!IsValidIndex(i)) {
+    if (UNLIKELY(!CheckIsValidIndex(i))) {
+      DCHECK(Thread::Current()->IsExceptionPending());
       return T(0);
     }
+    return GetWithoutChecks(i);
+  }
+
+  T GetWithoutChecks(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(CheckIsValidIndex(i));
     return GetData()[i];
   }
 
   void Set(int32_t i, T value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (IsValidIndex(i)) {
-      GetData()[i] = value;
+    if (LIKELY(CheckIsValidIndex(i))) {
+      SetWithoutChecks(i, value);
+    } else {
+      DCHECK(Thread::Current()->IsExceptionPending());
     }
   }
 
+  void SetWithoutChecks(int32_t i, T value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(CheckIsValidIndex(i));
+    GetData()[i] = value;
+  }
+
   static void SetArrayClass(Class* array_class) {
     CHECK(array_class_ == NULL);
     CHECK(array_class != NULL);
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index be49b42..6a50dfe 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -50,11 +50,11 @@
 
 template<class T>
 inline T* ObjectArray<T>::Get(int32_t i) const {
-  if (UNLIKELY(!IsValidIndex(i))) {
+  if (UNLIKELY(!CheckIsValidIndex(i))) {
+    DCHECK(Thread::Current()->IsExceptionPending());
     return NULL;
   }
-  MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
-  return GetFieldObject<T*>(data_offset, false);
+  return GetWithoutChecks(i);
 }
 
 template<class T>
@@ -71,9 +71,8 @@
 
 template<class T>
 inline void ObjectArray<T>::Set(int32_t i, T* object) {
-  if (LIKELY(IsValidIndex(i) && CheckAssignable(object))) {
-    MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
-    SetFieldObject(data_offset, object, false);
+  if (LIKELY(CheckIsValidIndex(i) && CheckAssignable(object))) {
+    SetWithoutChecks(i, object);
   } else {
     DCHECK(Thread::Current()->IsExceptionPending());
   }
@@ -81,21 +80,24 @@
 
 template<class T>
 inline void ObjectArray<T>::SetWithoutChecks(int32_t i, T* object) {
-  DCHECK(IsValidIndex(i));
+  DCHECK(CheckIsValidIndex(i));
+  DCHECK(CheckAssignable(object));
   MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
   SetFieldObject(data_offset, object, false);
 }
 
 template<class T>
 inline void ObjectArray<T>::SetPtrWithoutChecks(int32_t i, T* object) {
-  DCHECK(IsValidIndex(i));
+  DCHECK(CheckIsValidIndex(i));
+  // TODO enable this check. It fails when writing the image in ImageWriter::FixupObjectArray.
+  // DCHECK(CheckAssignable(object));
   MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
   SetFieldPtr(data_offset, object, false);
 }
 
 template<class T>
 inline T* ObjectArray<T>::GetWithoutChecks(int32_t i) const {
-  DCHECK(IsValidIndex(i));
+  DCHECK(CheckIsValidIndex(i));
   MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
   return GetFieldObject<T*>(data_offset, false);
 }
@@ -104,10 +106,10 @@
 inline void ObjectArray<T>::Copy(const ObjectArray<T>* src, int src_pos,
                                  ObjectArray<T>* dst, int dst_pos,
                                  size_t length) {
-  if (src->IsValidIndex(src_pos) &&
-      src->IsValidIndex(src_pos+length-1) &&
-      dst->IsValidIndex(dst_pos) &&
-      dst->IsValidIndex(dst_pos+length-1)) {
+  if (src->CheckIsValidIndex(src_pos) &&
+      src->CheckIsValidIndex(src_pos + length - 1) &&
+      dst->CheckIsValidIndex(dst_pos) &&
+      dst->CheckIsValidIndex(dst_pos + length - 1)) {
     MemberOffset src_offset(DataOffset(sizeof(Object*)).Int32Value() + src_pos * sizeof(Object*));
     MemberOffset dst_offset(DataOffset(sizeof(Object*)).Int32Value() + dst_pos * sizeof(Object*));
     Class* array_class = dst->GetClass();
@@ -139,6 +141,8 @@
       }
     }
     heap->WriteBarrierArray(dst, dst_pos, length);
+  } else {
+    DCHECK(Thread::Current()->IsExceptionPending());
   }
 }
 
diff --git a/runtime/oat.cc b/runtime/oat.cc
index caf18f1..81d4540 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '1', '3', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '1', '4', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 0e73812..365c9c3 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -36,6 +36,11 @@
 #include "ScopedLocalRef.h"
 #include "thread.h"
 #include "thread_list.h"
+
+#ifdef HAVE_ANDROID_OS
+#include "cutils/properties.h"
+#endif
+
 #if !defined(ART_USE_PORTABLE_COMPILER)
 #include "entrypoints/quick/quick_entrypoints.h"
 #endif
@@ -259,6 +264,17 @@
     }
   }
 
+  // Only on target...
+#ifdef HAVE_ANDROID_OS
+  // Switch off profiler if the dalvik.vm.profiler property has value 0.
+  char buf[PROP_VALUE_MAX];
+  property_get("dalvik.vm.profiler", buf, "0");
+  if (strcmp(buf, "0") == 0) {
+    LOG(INFO) << "Profiler disabled.  To enable setprop dalvik.vm.profiler 1";
+    return;
+  }
+#endif
+
   LOG(INFO) << "Starting profile with period " << period << "s, duration " << duration <<
       "s, interval " << interval_us << "us.  Profile file " << profile_file_name;
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 90bfc9b..d070207 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -318,10 +318,19 @@
   return 0;
 }
 
-size_t ParseIntegerOrDie(const std::string& s) {
-  std::string::size_type colon = s.find(':');
+static const std::string StringAfterChar(const std::string& s, char c) {
+  std::string::size_type colon = s.find(c);
   if (colon == std::string::npos) {
-    LOG(FATAL) << "Missing integer: " << s;
+    LOG(FATAL) << "Missing char " << c << " in string " << s;
+  }
+  // Add one to remove the char we were trimming until.
+  return s.substr(colon + 1);
+}
+
+static size_t ParseIntegerOrDie(const std::string& s, char after_char) {
+  std::string::size_type colon = s.find(after_char);
+  if (colon == std::string::npos) {
+    LOG(FATAL) << "Missing char " << after_char << " in string " << s;
   }
   const char* begin = &s[colon + 1];
   char* end;
@@ -332,10 +341,10 @@
   return result;
 }
 
-double ParseDoubleOrDie(const std::string& option, const char* prefix,
-                        double min, double max, bool ignore_unrecognized,
-                        double defval) {
-  std::istringstream iss(option.substr(strlen(prefix)));
+
+static double ParseDoubleOrDie(const std::string& option, char after_char, double min, double max,
+                               bool ignore_unrecognized, double defval) {
+  std::istringstream iss(StringAfterChar(option, after_char));
   double value;
   iss >> value;
   // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
@@ -473,7 +482,7 @@
       parsed->boot_class_path_
           = reinterpret_cast<const std::vector<const DexFile*>*>(options[i].second);
     } else if (StartsWith(option, "-Ximage:")) {
-      parsed->image_ = option.substr(strlen("-Ximage:")).data();
+      parsed->image_ = StringAfterChar(option, ':');
     } else if (StartsWith(option, "-Xcheck:jni")) {
       parsed->check_jni_ = true;
     } else if (StartsWith(option, "-Xrunjdwp:") || StartsWith(option, "-agentlib:jdwp=")) {
@@ -539,15 +548,12 @@
       }
       parsed->heap_max_free_ = size;
     } else if (StartsWith(option, "-XX:HeapTargetUtilization=")) {
-      parsed->heap_target_utilization_ = ParseDoubleOrDie(option, "-XX:HeapTargetUtilization=",
-          0.1, 0.9, ignore_unrecognized,
-          parsed->heap_target_utilization_);
+      parsed->heap_target_utilization_ = ParseDoubleOrDie(
+          option, '=', 0.1, 0.9, ignore_unrecognized, parsed->heap_target_utilization_);
     } else if (StartsWith(option, "-XX:ParallelGCThreads=")) {
-      parsed->parallel_gc_threads_ =
-          ParseMemoryOption(option.substr(strlen("-XX:ParallelGCThreads=")).c_str(), 1024);
+      parsed->parallel_gc_threads_ = ParseIntegerOrDie(option, '=');
     } else if (StartsWith(option, "-XX:ConcGCThreads=")) {
-      parsed->conc_gc_threads_ =
-          ParseMemoryOption(option.substr(strlen("-XX:ConcGCThreads=")).c_str(), 1024);
+      parsed->conc_gc_threads_ = ParseIntegerOrDie(option, '=');
     } else if (StartsWith(option, "-Xss")) {
       size_t size = ParseMemoryOption(option.substr(strlen("-Xss")).c_str(), 1);
       if (size == 0) {
@@ -560,15 +566,11 @@
       }
       parsed->stack_size_ = size;
     } else if (StartsWith(option, "-XX:MaxSpinsBeforeThinLockInflation=")) {
-      parsed->max_spins_before_thin_lock_inflation_ =
-          strtoul(option.substr(strlen("-XX:MaxSpinsBeforeThinLockInflation=")).c_str(),
-                  nullptr, 10);
-    } else if (option == "-XX:LongPauseLogThreshold") {
-      parsed->long_pause_log_threshold_ =
-          ParseMemoryOption(option.substr(strlen("-XX:LongPauseLogThreshold=")).c_str(), 1024);
-    } else if (option == "-XX:LongGCLogThreshold") {
-          parsed->long_gc_log_threshold_ =
-              ParseMemoryOption(option.substr(strlen("-XX:LongGCLogThreshold")).c_str(), 1024);
+      parsed->max_spins_before_thin_lock_inflation_ = ParseIntegerOrDie(option, '=');
+    } else if (StartsWith(option, "-XX:LongPauseLogThreshold=")) {
+      parsed->long_pause_log_threshold_ = MsToNs(ParseIntegerOrDie(option, '='));
+    } else if (StartsWith(option, "-XX:LongGCLogThreshold=")) {
+      parsed->long_gc_log_threshold_ = MsToNs(ParseIntegerOrDie(option, '='));
     } else if (option == "-XX:DumpGCPerformanceOnShutdown") {
       parsed->dump_gc_performance_on_shutdown_ = true;
     } else if (option == "-XX:IgnoreMaxFootprint") {
@@ -594,7 +596,7 @@
       for (const std::string& gc_option : gc_options) {
         gc::CollectorType collector_type = ParseCollectorType(gc_option);
         if (collector_type != gc::kCollectorTypeNone) {
-          parsed->collector_type_ = gc::kCollectorTypeGSS;
+          parsed->collector_type_ = collector_type;
         } else if (gc_option == "preverify") {
           parsed->verify_pre_gc_heap_ = true;
         }  else if (gc_option == "nopreverify") {
@@ -608,7 +610,7 @@
         }
       }
     } else if (StartsWith(option, "-XX:BackgroundGC=")) {
-      const std::string substring = option.substr(strlen("-XX:BackgroundGC="));
+      const std::string substring = StringAfterChar(option, '=');
       gc::CollectorType collector_type = ParseCollectorType(substring);
       if (collector_type != gc::kCollectorTypeNone) {
         parsed->background_collector_type_ = collector_type;
@@ -650,9 +652,9 @@
     } else if (StartsWith(option, "-Xjnigreflimit:")) {
       // Silently ignored for backwards compatibility.
     } else if (StartsWith(option, "-Xlockprofthreshold:")) {
-      parsed->lock_profiling_threshold_ = ParseIntegerOrDie(option);
+      parsed->lock_profiling_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xstacktracefile:")) {
-      parsed->stack_trace_file_ = option.substr(strlen("-Xstacktracefile:"));
+      parsed->stack_trace_file_ = StringAfterChar(option, ':');
     } else if (option == "sensitiveThread") {
       parsed->hook_is_sensitive_thread_ = reinterpret_cast<bool (*)()>(const_cast<void*>(options[i].second));
     } else if (option == "vfprintf") {
@@ -671,7 +673,7 @@
     } else if (StartsWith(option, "-Xmethod-trace-file:")) {
       parsed->method_trace_file_ = option.substr(strlen("-Xmethod-trace-file:"));
     } else if (StartsWith(option, "-Xmethod-trace-file-size:")) {
-      parsed->method_trace_file_size_ = ParseIntegerOrDie(option);
+      parsed->method_trace_file_size_ = ParseIntegerOrDie(option, ':');
     } else if (option == "-Xprofile:threadcpuclock") {
       Trace::SetDefaultClockSource(kProfilerClockSourceThreadCpu);
     } else if (option == "-Xprofile:wallclock") {
@@ -679,18 +681,17 @@
     } else if (option == "-Xprofile:dualclock") {
       Trace::SetDefaultClockSource(kProfilerClockSourceDual);
     } else if (StartsWith(option, "-Xprofile:")) {
-      parsed->profile_output_filename_ = option.substr(strlen("-Xprofile:"));
+      parsed->profile_output_filename_ = StringAfterChar(option, ';');
       parsed->profile_ = true;
     } else if (StartsWith(option, "-Xprofile-period:")) {
-      parsed->profile_period_s_ = ParseIntegerOrDie(option);
+      parsed->profile_period_s_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xprofile-duration:")) {
-      parsed->profile_duration_s_ = ParseIntegerOrDie(option);
+      parsed->profile_duration_s_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xprofile-interval:")) {
-      parsed->profile_interval_us_ = ParseIntegerOrDie(option);
+      parsed->profile_interval_us_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xprofile-backoff:")) {
-      parsed->profile_backoff_coefficient_ = ParseDoubleOrDie(option, "-Xprofile-backoff:",
-          1.0, 10.0, ignore_unrecognized,
-          parsed->profile_backoff_coefficient_);
+      parsed->profile_backoff_coefficient_ = ParseDoubleOrDie(
+          option, ':', 1.0, 10.0, ignore_unrecognized, parsed->profile_backoff_coefficient_);
     } else if (option == "-compiler-filter:interpret-only") {
       parsed->compiler_filter_ = kInterpretOnly;
     } else if (option == "-compiler-filter:space") {
@@ -704,15 +705,15 @@
     } else if (option == "-sea_ir") {
       parsed->sea_ir_mode_ = true;
     } else if (StartsWith(option, "-huge-method-max:")) {
-      parsed->huge_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->huge_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-large-method-max:")) {
-      parsed->large_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->large_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-small-method-max:")) {
-      parsed->small_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->small_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-tiny-method-max:")) {
-      parsed->tiny_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->tiny_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-num-dex-methods-max:")) {
-      parsed->num_dex_methods_threshold_ = ParseIntegerOrDie(option);
+      parsed->num_dex_methods_threshold_ = ParseIntegerOrDie(option, ':');
     } else {
       if (!ignore_unrecognized) {
         // TODO: print usage via vfprintf
diff --git a/runtime/stack.h b/runtime/stack.h
index 590f406..0692390 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -52,6 +52,50 @@
   kUndefined,
 };
 
+/**
+ * @brief Represents the virtual register numbers that denote special meaning.
+ * @details This is used to make some virtual register numbers to have specific
+ * semantic meaning. This is done so that the compiler can treat all virtual
+ * registers the same way and only special case when needed. For example,
+ * calculating SSA does not care whether a virtual register is a normal one or
+ * a compiler temporary, so it can deal with them in a consistent manner. But,
+ * for example if backend cares about temporaries because it has custom spill
+ * location, then it can special case them only then.
+ */
+enum VRegBaseRegNum : int {
+  /**
+   * @brief Virtual registers originating from dex have number >= 0.
+   */
+  kVRegBaseReg = 0,
+
+  /**
+   * @brief Invalid virtual register number.
+   */
+  kVRegInvalid = -1,
+
+  /**
+   * @brief Used to denote the base register for compiler temporaries.
+   * @details Compiler temporaries are virtual registers not originating
+   * from dex but that are created by compiler.  All virtual register numbers
+   * that are <= kVRegTempBaseReg are categorized as compiler temporaries.
+   */
+  kVRegTempBaseReg = -2,
+
+  /**
+   * @brief Base register of temporary that holds the method pointer.
+   * @details This is a special compiler temporary because it has a specific
+   * location on stack.
+   */
+  kVRegMethodPtrBaseReg = kVRegTempBaseReg,
+
+  /**
+   * @brief Base register of non-special compiler temporary.
+   * @details A non-special compiler temporary is one whose spill location
+   * is flexible.
+   */
+  kVRegNonSpecialTempBaseReg = -3,
+};
+
 // ShadowFrame has 3 possible layouts:
 //  - portable - a unified array of VRegs and references. Precise references need GC maps.
 //  - interpreter - separate VRegs and reference arrays. References are in the reference array.
@@ -524,8 +568,15 @@
   /*
    * Return sp-relative offset for a Dalvik virtual register, compiler
    * spill or Method* in bytes using Method*.
-   * Note that (reg >= 0) refers to a Dalvik register, (reg == -2)
-   * denotes Method* and (reg <= -3) denotes a compiler temp.
+   * Note that (reg >= 0) refers to a Dalvik register, (reg == -1)
+   * denotes an invalid Dalvik register, (reg == -2) denotes Method*
+   * and (reg <= -3) denotes a compiler temporary. A compiler temporary
+   * can be thought of as a virtual register that does not exist in the
+   * dex but holds intermediate values to help optimizations and code
+   * generation. A special compiler temporary is one whose location
+   * in frame is well known while non-special ones do not have a requirement
+   * on location in frame as long as code generator itself knows how
+   * to access them.
    *
    *     +------------------------+
    *     | IN[ins-1]              |  {Note: resides in caller's frame}
@@ -546,9 +597,9 @@
    *     | V[1]                   |  ... (reg == 1)
    *     | V[0]                   |  ... (reg == 0) <---- "locals_start"
    *     +------------------------+
-   *     | Compiler temps         |  ... (reg == -2)
-   *     |                        |  ... (reg == -3)
-   *     |                        |  ... (reg == -4)
+   *     | Compiler temp region   |  ... (reg <= -3)
+   *     |                        |
+   *     |                        |
    *     +------------------------+
    *     | stack alignment padding|  {0 to (kStackAlignWords-1) of padding}
    *     +------------------------+
@@ -556,23 +607,35 @@
    *     | OUT[outs-2]            |
    *     |       .                |
    *     | OUT[0]                 |
-   *     | curMethod*             |  ... (reg == -1) <<== sp, 16-byte aligned
+   *     | curMethod*             |  ... (reg == -2) <<== sp, 16-byte aligned
    *     +========================+
    */
   static int GetVRegOffset(const DexFile::CodeItem* code_item,
                            uint32_t core_spills, uint32_t fp_spills,
                            size_t frame_size, int reg) {
     DCHECK_EQ(frame_size & (kStackAlignment - 1), 0U);
+    DCHECK_NE(reg, static_cast<int>(kVRegInvalid));
+
     int num_spills = __builtin_popcount(core_spills) + __builtin_popcount(fp_spills) + 1;  // Filler.
     int num_ins = code_item->ins_size_;
     int num_regs = code_item->registers_size_ - num_ins;
     int locals_start = frame_size - ((num_spills + num_regs) * sizeof(uint32_t));
-    if (reg == -2) {
-      return 0;  // Method*
-    } else if (reg <= -3) {
-      return locals_start - ((reg + 1) * sizeof(uint32_t));  // Compiler temp.
-    } else if (reg < num_regs) {
-      return locals_start + (reg * sizeof(uint32_t));        // Dalvik local reg.
+    if (reg == static_cast<int>(kVRegMethodPtrBaseReg)) {
+      // The current method pointer corresponds to special location on stack.
+      return 0;
+    } else if (reg <= static_cast<int>(kVRegNonSpecialTempBaseReg)) {
+      /*
+       * Special temporaries may have custom locations and the logic above deals with that.
+       * However, non-special temporaries are placed relative to the locals. Since the
+       * virtual register numbers for temporaries "grow" in negative direction, reg number
+       * will always be <= to the temp base reg. Thus, the logic ensures that the first
+       * temp is at offset -4 bytes from locals, the second is at -8 bytes from locals,
+       * and so on.
+       */
+      int relative_offset = (reg + std::abs(static_cast<int>(kVRegNonSpecialTempBaseReg)) - 1) * sizeof(uint32_t);
+      return locals_start + relative_offset;
+    }  else if (reg < num_regs) {
+      return locals_start + (reg * sizeof(uint32_t));
     } else {
       return frame_size + ((reg - num_regs) * sizeof(uint32_t)) + sizeof(uint32_t);  // Dalvik in.
     }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 621e350..68f3c04 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1410,8 +1410,8 @@
     mirror::ArtMethod* method = down_cast<mirror::ArtMethod*>(method_trace->Get(i));
     MethodHelper mh(method);
     int32_t line_number;
-    SirtRef<mirror::String> class_name_object(soa.Self(), NULL);
-    SirtRef<mirror::String> source_name_object(soa.Self(), NULL);
+    SirtRef<mirror::String> class_name_object(soa.Self(), nullptr);
+    SirtRef<mirror::String> source_name_object(soa.Self(), nullptr);
     if (method->IsProxyMethod()) {
       line_number = -1;
       class_name_object.reset(method->GetDeclaringClass()->GetName());
@@ -1423,16 +1423,18 @@
       // Allocate element, potentially triggering GC
       // TODO: reuse class_name_object via Class::name_?
       const char* descriptor = mh.GetDeclaringClassDescriptor();
-      CHECK(descriptor != NULL);
+      CHECK(descriptor != nullptr);
       std::string class_name(PrettyDescriptor(descriptor));
       class_name_object.reset(mirror::String::AllocFromModifiedUtf8(soa.Self(), class_name.c_str()));
-      if (class_name_object.get() == NULL) {
-        return NULL;
+      if (class_name_object.get() == nullptr) {
+        return nullptr;
       }
       const char* source_file = mh.GetDeclaringClassSourceFile();
-      source_name_object.reset(mirror::String::AllocFromModifiedUtf8(soa.Self(), source_file));
-      if (source_name_object.get() == NULL) {
-        return NULL;
+      if (source_file != nullptr) {
+        source_name_object.reset(mirror::String::AllocFromModifiedUtf8(soa.Self(), source_file));
+        if (source_name_object.get() == nullptr) {
+          return nullptr;
+        }
       }
     }
     const char* method_name = mh.GetName();
@@ -1624,8 +1626,11 @@
   PORTABLE_ENTRY_POINT_INFO(pPortableResolutionTrampoline),
   PORTABLE_ENTRY_POINT_INFO(pPortableToInterpreterBridge),
   QUICK_ENTRY_POINT_INFO(pAllocArray),
+  QUICK_ENTRY_POINT_INFO(pAllocArrayResolved),
   QUICK_ENTRY_POINT_INFO(pAllocArrayWithAccessCheck),
   QUICK_ENTRY_POINT_INFO(pAllocObject),
+  QUICK_ENTRY_POINT_INFO(pAllocObjectResolved),
+  QUICK_ENTRY_POINT_INFO(pAllocObjectInitialized),
   QUICK_ENTRY_POINT_INFO(pAllocObjectWithAccessCheck),
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArray),
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArrayWithAccessCheck),
@@ -1855,7 +1860,7 @@
         // Java method.
         // Portable path use DexGcMap and store in Method.native_gc_map_.
         const uint8_t* gc_map = m->GetNativeGcMap();
-        CHECK(gc_map != NULL) << PrettyMethod(m);
+        CHECK(gc_map != nullptr) << PrettyMethod(m);
         verifier::DexPcToReferenceMap dex_gc_map(gc_map);
         uint32_t dex_pc = GetDexPc();
         const uint8_t* reg_bitmap = dex_gc_map.FindBitMap(dex_pc);
diff --git a/test/103-string-append/run b/test/103-string-append/run
new file mode 100755
index 0000000..e27a622
--- /dev/null
+++ b/test/103-string-append/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2012 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# As this is a performance test we always use the non-debug build.
+exec ${RUN} "${@/#libartd.so/libart.so}"
diff --git a/test/Android.mk b/test/Android.mk
index 5187724..d716f9b 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -71,10 +71,11 @@
     LOCAL_NO_STANDARD_LIBRARIES := true
     LOCAL_MODULE_PATH := $(3)
     LOCAL_DEX_PREOPT_IMAGE := $(TARGET_CORE_IMG_OUT)
+    LOCAL_DEX_PREOPT := false
     LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
     LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
     include $(BUILD_JAVA_LIBRARY)
-    ART_TEST_TARGET_DEX_FILES += $(3)/$$(LOCAL_MODULE).jar
+    ART_TEST_TARGET_DEX_FILES += $$(LOCAL_INSTALLED_MODULE)
   endif
 
   ifeq ($(ART_BUILD_HOST),true)
@@ -84,10 +85,11 @@
     LOCAL_JAVA_LIBRARIES := $(HOST_CORE_JARS)
     LOCAL_NO_STANDARD_LIBRARIES := true
     LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_OUT)
+    LOCAL_DEX_PREOPT := false
     LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
     LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
     include $(BUILD_HOST_DALVIK_JAVA_LIBRARY)
-    ART_TEST_HOST_DEX_FILES += $$(LOCAL_MODULE_PATH)/$$(LOCAL_MODULE).jar
+    ART_TEST_HOST_DEX_FILES += $$(LOCAL_INSTALLED_MODULE)
   endif
 endef
 $(foreach dir,$(TEST_DEX_DIRECTORIES), $(eval $(call build-art-test-dex,art-test-dex,$(dir),$(ART_NATIVETEST_OUT))))