Merge "ART: Update for split native bridge initialization"
diff --git a/Android.mk b/Android.mk
index 3cd6b5f..f8d46a4 100644
--- a/Android.mk
+++ b/Android.mk
@@ -77,7 +77,8 @@
 
 .PHONY: clean-oat-target
 clean-oat-target:
-	adb remount
+	adb root
+	adb wait-for-device remount
 	adb shell rm -rf $(ART_TARGET_NATIVETEST_DIR)
 	adb shell rm -rf $(ART_TARGET_TEST_DIR)
 	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*/*
@@ -140,7 +141,8 @@
 # Sync test files to the target, depends upon all things that must be pushed to the target.
 .PHONY: test-art-target-sync
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
-	adb remount
+	adb root
+	adb wait-for-device remount
 	adb sync
 
 # Undefine variable now its served its purpose.
@@ -348,7 +350,8 @@
 
 .PHONY: oat-target-sync
 oat-target-sync: oat-target
-	adb remount
+	adb root
+	adb wait-for-device remount
 	adb sync
 
 ########################################################################
@@ -367,29 +370,29 @@
 
 .PHONY: use-art
 use-art:
-	adb root && sleep 3
-	adb shell stop
+	adb root
+	adb wait-for-device shell stop
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
 	adb shell start
 
 .PHONY: use-artd
 use-artd:
-	adb root && sleep 3
-	adb shell stop
+	adb root
+	adb wait-for-device shell stop
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libartd.so
 	adb shell start
 
 .PHONY: use-dalvik
 use-dalvik:
-	adb root && sleep 3
-	adb shell stop
+	adb root
+	adb wait-for-device shell stop
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libdvm.so
 	adb shell start
 
 .PHONY: use-art-full
 use-art-full:
-	adb root && sleep 3
-	adb shell stop
+	adb root
+	adb wait-for-device shell stop
 	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-filter ""
 	adb shell setprop dalvik.vm.image-dex2oat-filter ""
@@ -398,8 +401,8 @@
 
 .PHONY: use-artd-full
 use-artd-full:
-	adb root && sleep 3
-	adb shell stop
+	adb root
+	adb wait-for-device shell stop
 	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-filter ""
 	adb shell setprop dalvik.vm.image-dex2oat-filter ""
@@ -408,8 +411,8 @@
 
 .PHONY: use-art-smart
 use-art-smart:
-	adb root && sleep 3
-	adb shell stop
+	adb root
+	adb wait-for-device shell stop
 	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-filter ""
@@ -418,8 +421,8 @@
 
 .PHONY: use-art-interpret-only
 use-art-interpret-only:
-	adb root && sleep 3
-	adb shell stop
+	adb root
+	adb wait-for-device shell stop
 	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "interpret-only"
@@ -428,8 +431,8 @@
 
 .PHONY: use-artd-interpret-only
 use-artd-interpret-only:
-	adb root && sleep 3
-	adb shell stop
+	adb root
+	adb wait-for-device shell stop
 	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "interpret-only"
@@ -438,8 +441,8 @@
 
 .PHONY: use-art-verify-none
 use-art-verify-none:
-	adb root && sleep 3
-	adb shell stop
+	adb root
+	adb wait-for-device shell stop
 	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-filter "verify-none"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "verify-none"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 4ecda91..af43a3c 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -140,13 +140,18 @@
   compiler/jni/jni_compiler_test.cc \
   compiler/oat_test.cc \
   compiler/optimizing/codegen_test.cc \
+  compiler/optimizing/dead_code_elimination_test.cc \
+  compiler/optimizing/constant_propagation_test.cc \
   compiler/optimizing/dominator_test.cc \
   compiler/optimizing/find_loops_test.cc \
+  compiler/optimizing/graph_checker_test.cc \
   compiler/optimizing/graph_test.cc \
+  compiler/optimizing/gvn_test.cc \
   compiler/optimizing/linearize_test.cc \
   compiler/optimizing/liveness_test.cc \
   compiler/optimizing/live_interval_test.cc \
   compiler/optimizing/live_ranges_test.cc \
+  compiler/optimizing/nodes_test.cc \
   compiler/optimizing/parallel_move_test.cc \
   compiler/optimizing/pretty_printer_test.cc \
   compiler/optimizing/register_allocator_test.cc \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 6e48bdf..7ac1c6b 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -90,7 +90,11 @@
 	optimizing/code_generator_arm.cc \
 	optimizing/code_generator_x86.cc \
 	optimizing/code_generator_x86_64.cc \
+	optimizing/constant_propagation.cc \
+	optimizing/dead_code_elimination.cc \
+	optimizing/graph_checker.cc \
 	optimizing/graph_visualizer.cc \
+	optimizing/gvn.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
 	optimizing/optimizing_compiler.cc \
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index b267841..07f3033 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -82,11 +82,22 @@
                                      jobject class_loader, const DexFile& dex_file,
                                      void* llvm_compilation_unit) {
   VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "...";
-  if (code_item->insns_size_in_code_units_ >= 0x10000) {
-    LOG(INFO) << "Method size exceeds compiler limits: " << code_item->insns_size_in_code_units_
+  /*
+   * Skip compilation for pathologically large methods - either by instruction count or num vregs.
+   * Dalvik uses 16-bit uints for instruction and register counts.  We'll limit to a quarter
+   * of that, which also guarantees we cannot overflow our 16-bit internal SSA name space.
+   */
+  if (code_item->insns_size_in_code_units_ >= UINT16_MAX / 4) {
+    LOG(INFO) << "Method exceeds compiler instruction limit: "
+              << code_item->insns_size_in_code_units_
               << " in " << PrettyMethod(method_idx, dex_file);
     return NULL;
   }
+  if (code_item->registers_size_ >= UINT16_MAX / 4) {
+    LOG(INFO) << "Method exceeds compiler virtual register limit: "
+              << code_item->registers_size_ << " in " << PrettyMethod(method_idx, dex_file);
+    return NULL;
+  }
 
   if (!driver.GetCompilerOptions().IsCompilationEnabled()) {
     return nullptr;
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index b265ee7..9fa5fac 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -1094,6 +1094,7 @@
       default_cutoff = compiler_options.GetSmallMethodThreshold();
       break;
     case CompilerOptions::kSpeed:
+    case CompilerOptions::kTime:
       small_cutoff = compiler_options.GetHugeMethodThreshold();
       default_cutoff = compiler_options.GetHugeMethodThreshold();
       break;
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 62a8f26..ce56255 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -2420,9 +2420,9 @@
     case kMirOpDivZeroCheck:
       return Instruction::kContinue | Instruction::kThrow;
     case kMirOpCheck:
-      return 0;
+      return Instruction::kContinue | Instruction::kThrow;
     case kMirOpCheckPart2:
-      return 0;
+      return Instruction::kContinue;
     case kMirOpSelect:
       return Instruction::kContinue;
     case kMirOpConstVector:
@@ -2457,6 +2457,12 @@
       return Instruction::kContinue;
     case kMirOpReturnVectorRegisters:
       return Instruction::kContinue;
+    case kMirOpMemBarrier:
+      return Instruction::kContinue;
+    case kMirOpPackedArrayGet:
+      return Instruction::kContinue | Instruction::kThrow;
+    case kMirOpPackedArrayPut:
+      return Instruction::kContinue | Instruction::kThrow;
     default:
       LOG(WARNING) << "ExtendedFlagsOf: Unhandled case: " << static_cast<int> (opcode);
       return 0;
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index b9a17cc..0de2a44 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -1039,6 +1039,7 @@
   jmp_to_ret->target = return_point;
 
   AddIntrinsicSlowPath(info, launchpad_branch, return_point);
+  ClobberCallerSave();  // We must clobber everything because slow path will return here
 
   return true;
 }
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 1777e98..094db4c 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -918,6 +918,7 @@
   loop_finished->target = return_point;
 
   AddIntrinsicSlowPath(info, launchpad_branch, return_point);
+  ClobberCallerSave();  // We must clobber everything because slow path will return here
 
   return true;
 }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 6d8f288..d90bce1 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1224,6 +1224,7 @@
 
 void Mir2Lir::AddSlowPath(LIRSlowPath* slowpath) {
   slow_paths_.Insert(slowpath);
+  ResetDefTracking();
 }
 
 void Mir2Lir::LoadCodeAddress(const MethodReference& target_method, InvokeType type,
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 8ce696c..960f217 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1193,7 +1193,7 @@
 
   LIR* intrinsic_finish = NewLIR0(kPseudoTargetLabel);
   AddIntrinsicSlowPath(info, slow_path_branch, intrinsic_finish);
-
+  ClobberCallerSave();  // We must clobber everything because slow path will return here
   return true;
 }
 
@@ -1492,6 +1492,7 @@
     LIR* resume_tgt = NewLIR0(kPseudoTargetLabel);
     info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
     AddIntrinsicSlowPath(info, high_code_point_branch, resume_tgt);
+    ClobberCallerSave();  // We must clobber everything because slow path will return here
   } else {
     DCHECK_EQ(mir_graph_->ConstantValue(rl_char) & ~0xFFFF, 0);
     DCHECK(high_code_point_branch == nullptr);
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 2e4e292..22588f3 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -97,7 +97,7 @@
 }
 
 inline LIR* Mir2Lir::NewLIR2NoDest(int opcode, int src, int info) {
-  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_UNARY_OP))
+  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_BINARY_OP))
       << GetTargetInstName(opcode) << " " << opcode << " "
       << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
       << current_dalvik_offset_;
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 9935a22..a9a0252 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -192,7 +192,7 @@
   { kX86Movnti32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,   { 0x0F,          0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti32MR", "[!0r+!1d],!2r" },
   { kX86Movnti32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,  { 0x0F,          0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti32AR", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32TR", "fs:[!0d],!1r" },
-  { kX86Mov32RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RR", "!0r,!1r" },
+  { kX86Mov32RR, kRegReg,    IS_MOVE  | IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RR", "!0r,!1r" },
   { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RM", "!0r,[!1r+!2d]" },
   { kX86Mov32RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
   { kX86Mov32RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RT", "!0r,fs:[!1d]" },
@@ -201,15 +201,15 @@
   { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32TI", "fs:[!0d],!1d" },
 
-  { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { 0,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RM", "!0r,[!1r+!2d]" },
-  { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { 0,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Lea32RM, kRegMem,               IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RM", "!0r,[!1r+!2d]" },
+  { kX86Lea32RA, kRegArray,             IS_QUIN_OP | REG_DEF0_USE12,     { 0,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
 
   { kX86Mov64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64MR", "[!0r+!1d],!2r" },
   { kX86Mov64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86Movnti64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,   { 0x0F,              0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti64MR", "[!0r+!1d],!2r" },
   { kX86Movnti64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,  { 0x0F,              0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti64AR", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0, false }, "Mov64TR", "fs:[!0d],!1r" },
-  { kX86Mov64RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RR", "!0r,!1r" },
+  { kX86Mov64RR, kRegReg,    IS_MOVE  | IS_BINARY_OP   | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RR", "!0r,!1r" },
   { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RM", "!0r,[!1r+!2d]" },
   { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
   { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RT", "!0r,fs:[!1d]" },
@@ -219,8 +219,8 @@
   { kX86Mov64AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { REX_W,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64TI", "fs:[!0d],!1d" },
 
-  { kX86Lea64RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { REX_W,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RM", "!0r,[!1r+!2d]" },
-  { kX86Lea64RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { REX_W,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Lea64RM, kRegMem,               IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RM", "!0r,[!1r+!2d]" },
+  { kX86Lea64RA, kRegArray,             IS_QUIN_OP | REG_DEF0_USE12,     { REX_W,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
 
   { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, { 0,     0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc32RR", "!2c !0r,!1r" },
   { kX86Cmov64RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, { REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc64RR", "!2c !0r,!1r" },
@@ -444,14 +444,14 @@
   { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1, false }, "PslldRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1, false }, "PsllqRI", "!0r,!1d" },
 
-  { kX86Fild32M,  kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M",  "[!0r,!1d]" },
-  { kX86Fild64M,  kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M",  "[!0r,!1d]" },
-  { kX86Fld32M,   kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 0, 0, 0, false }, "Fld32M",   "[!0r,!1d]" },
-  { kX86Fld64M,   kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 0, 0, 0, false }, "Fld64M",   "[!0r,!1d]" },
-  { kX86Fstp32M,  kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 3, 0, 0, false }, "Fstps32M", "[!0r,!1d]" },
-  { kX86Fstp64M,  kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 3, 0, 0, false }, "Fstpd64M", "[!0r,!1d]" },
-  { kX86Fst32M,   kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 2, 0, 0, false }, "Fsts32M",  "[!0r,!1d]" },
-  { kX86Fst64M,   kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 2, 0, 0, false }, "Fstd64M",  "[!0r,!1d]" },
+  { kX86Fild32M,  kMem,     IS_LOAD    | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M",  "[!0r,!1d]" },
+  { kX86Fild64M,  kMem,     IS_LOAD    | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M",  "[!0r,!1d]" },
+  { kX86Fld32M,   kMem,     IS_LOAD    | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 0, 0, 0, false }, "Fld32M",   "[!0r,!1d]" },
+  { kX86Fld64M,   kMem,     IS_LOAD    | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 0, 0, 0, false }, "Fld64M",   "[!0r,!1d]" },
+  { kX86Fstp32M,  kMem,     IS_STORE   | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 3, 0, 0, false }, "Fstps32M", "[!0r,!1d]" },
+  { kX86Fstp64M,  kMem,     IS_STORE   | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 3, 0, 0, false }, "Fstpd64M", "[!0r,!1d]" },
+  { kX86Fst32M,   kMem,     IS_STORE   | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 2, 0, 0, false }, "Fsts32M",  "[!0r,!1d]" },
+  { kX86Fst64M,   kMem,     IS_STORE   | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 2, 0, 0, false }, "Fstd64M",  "[!0r,!1d]" },
   { kX86Fprem,    kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xD9, 0,    0xF8, 0,    0, 0, 0, 0, false }, "Fprem64",  "" },
   { kX86Fucompp,  kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xDA, 0,    0xE9, 0,    0, 0, 0, 0, false }, "Fucompp",  "" },
   { kX86Fstsw16R, kNullary, NO_OPERAND | REG_DEFA | USE_FP_STACK,               { 0x9B, 0xDF, 0xE0, 0,    0, 0, 0, 0, false }, "Fstsw16R", "ax" },
@@ -680,7 +680,8 @@
     }
     if (displacement != 0 || LowRegisterBits(raw_base) == rs_rBP.GetRegNum()) {
       // BP requires an explicit displacement, even when it's 0.
-      if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) {
+      if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA &&
+          entry->opcode != kX86Lea32RM && entry->opcode != kX86Lea64RM) {
         DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name;
       }
       size += IS_SIMM8(displacement) ? 1 : 4;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index dd4d661..6020e70 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -497,6 +497,8 @@
   void MaskVectorRegister(X86OpCode opcode, RegStorage rs_src1, uint32_t m1, uint32_t m2,
                           uint32_t m3, uint32_t m4);
   void AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir);
+  virtual void LoadVectorRegister(RegStorage rs_dest, RegStorage rs_src, OpSize opsize,
+                                  int op_mov);
 
   static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
 
@@ -914,7 +916,7 @@
    * @param bb Basic block containing instruction.
    * @param mir Instruction to analyze.
    */
-  void AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir);
+  virtual void AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir);
 
   /*
    * @brief Analyze one use of a double operand.
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index aadb41a..de11996 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -1124,37 +1124,51 @@
   LoadValueDirectFixed(rl_src, rs_rAX);
   LoadWordDisp(rs_rAX, mirror::Array::LengthOffset().Int32Value(), rs_rAX);
   LIR* src_bad_len  = nullptr;
+  LIR* src_bad_off = nullptr;
   LIR* srcPos_negative  = nullptr;
   if (!rl_srcPos.is_const) {
     LoadValueDirectFixed(rl_srcPos, tmp_reg);
     srcPos_negative  = OpCmpImmBranch(kCondLt, tmp_reg, 0, nullptr);
-    OpRegReg(kOpAdd, tmp_reg, rs_rDX);
-    src_bad_len  = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr);
+    // src_pos < src_len
+    src_bad_off = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr);
+    // src_len - src_pos < copy_len
+    OpRegRegReg(kOpSub, tmp_reg, rs_rAX, tmp_reg);
+    src_bad_len = OpCmpBranch(kCondLt, tmp_reg, rs_rDX, nullptr);
   } else {
     int32_t pos_val = mir_graph_->ConstantValue(rl_srcPos.orig_sreg);
     if (pos_val == 0) {
       src_bad_len  = OpCmpBranch(kCondLt, rs_rAX, rs_rDX, nullptr);
     } else {
-      OpRegRegImm(kOpAdd, tmp_reg,  rs_rDX, pos_val);
-      src_bad_len  = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr);
+      // src_pos < src_len
+      src_bad_off = OpCmpImmBranch(kCondLt, rs_rAX, pos_val, nullptr);
+      // src_len - src_pos < copy_len
+      OpRegRegImm(kOpSub, tmp_reg, rs_rAX, pos_val);
+      src_bad_len = OpCmpBranch(kCondLt, tmp_reg, rs_rDX, nullptr);
     }
   }
   LIR* dstPos_negative = nullptr;
   LIR* dst_bad_len = nullptr;
+  LIR* dst_bad_off = nullptr;
   LoadValueDirectFixed(rl_dst, rs_rAX);
   LoadWordDisp(rs_rAX, mirror::Array::LengthOffset().Int32Value(), rs_rAX);
   if (!rl_dstPos.is_const) {
     LoadValueDirectFixed(rl_dstPos, tmp_reg);
     dstPos_negative = OpCmpImmBranch(kCondLt, tmp_reg, 0, nullptr);
-    OpRegRegReg(kOpAdd, tmp_reg, tmp_reg, rs_rDX);
-    dst_bad_len = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr);
+    // dst_pos < dst_len
+    dst_bad_off = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr);
+    // dst_len - dst_pos < copy_len
+    OpRegRegReg(kOpSub, tmp_reg, rs_rAX, tmp_reg);
+    dst_bad_len = OpCmpBranch(kCondLt, tmp_reg, rs_rDX, nullptr);
   } else {
     int32_t pos_val = mir_graph_->ConstantValue(rl_dstPos.orig_sreg);
     if (pos_val == 0) {
       dst_bad_len = OpCmpBranch(kCondLt, rs_rAX, rs_rDX, nullptr);
     } else {
-      OpRegRegImm(kOpAdd, tmp_reg,  rs_rDX, pos_val);
-      dst_bad_len = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr);
+      // dst_pos < dst_len
+      dst_bad_off = OpCmpImmBranch(kCondLt, rs_rAX, pos_val, nullptr);
+      // dst_len - dst_pos < copy_len
+      OpRegRegImm(kOpSub, tmp_reg, rs_rAX, pos_val);
+      dst_bad_len = OpCmpBranch(kCondLt, tmp_reg, rs_rDX, nullptr);
     }
   }
   // Everything is checked now.
@@ -1198,14 +1212,19 @@
   src_null_branch->target = check_failed;
   if (srcPos_negative != nullptr)
     srcPos_negative ->target = check_failed;
+  if (src_bad_off != nullptr)
+    src_bad_off->target = check_failed;
   if (src_bad_len != nullptr)
     src_bad_len->target = check_failed;
   dst_null_branch->target = check_failed;
   if (dstPos_negative != nullptr)
     dstPos_negative->target = check_failed;
+  if (dst_bad_off != nullptr)
+    dst_bad_off->target = check_failed;
   if (dst_bad_len != nullptr)
     dst_bad_len->target = check_failed;
   AddIntrinsicSlowPath(info, launchpad_branch, return_point);
+  ClobberCallerSave();  // We must clobber everything because slow path will return here
   return true;
 }
 
@@ -1384,6 +1403,7 @@
   if (slowpath_branch != nullptr) {
     LIR *return_point = NewLIR0(kPseudoTargetLabel);
     AddIntrinsicSlowPath(info, slowpath_branch, return_point);
+    ClobberCallerSave();  // We must clobber everything because slow path will return here
   }
 
   StoreValue(rl_dest, rl_return);
@@ -2268,6 +2288,20 @@
   }
 }
 
+void X86Mir2Lir::LoadVectorRegister(RegStorage rs_dest, RegStorage rs_src,
+                                    OpSize opsize, int op_mov) {
+  if (!cu_->target64 && opsize == k64) {
+    // Logic assumes that longs are loaded in GP register pairs.
+    NewLIR2(kX86MovdxrRR, rs_dest.GetReg(), rs_src.GetLowReg());
+    RegStorage r_tmp = AllocTempDouble();
+    NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), rs_src.GetHighReg());
+    NewLIR2(kX86PunpckldqRR, rs_dest.GetReg(), r_tmp.GetReg());
+    FreeTemp(r_tmp);
+  } else {
+    NewLIR2(op_mov, rs_dest.GetReg(), rs_src.GetReg());
+  }
+}
+
 void X86Mir2Lir::GenSetVector(BasicBlock *bb, MIR *mir) {
   DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
   OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
@@ -2321,16 +2355,7 @@
   RegStorage reg_to_shuffle = rl_src.reg;
 
   // Load the value into the XMM register.
-  if (!cu_->target64 && opsize == k64) {
-    // Logic assumes that longs are loaded in GP register pairs.
-    NewLIR2(kX86MovdxrRR, rs_dest.GetReg(), reg_to_shuffle.GetLowReg());
-    RegStorage r_tmp = AllocTempDouble();
-    NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), reg_to_shuffle.GetHighReg());
-    NewLIR2(kX86PunpckldqRR, rs_dest.GetReg(), r_tmp.GetReg());
-    FreeTemp(r_tmp);
-  } else {
-    NewLIR2(op_mov, rs_dest.GetReg(), reg_to_shuffle.GetReg());
-  }
+  LoadVectorRegister(rs_dest, reg_to_shuffle, opsize, op_mov);
 
   if (opsize == kSignedByte || opsize == kUnsignedByte) {
     // In the byte case, first duplicate it to be a word
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d743f90..990c1c8 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -795,14 +795,11 @@
   if (IsImage()) {
     TimingLogger::ScopedTiming t("UpdateImageClasses", timings);
     // Update image_classes_ with classes for objects created by <clinit> methods.
-    Thread* self = Thread::Current();
-    const char* old_cause = self->StartAssertNoThreadSuspension("ImageWriter");
     gc::Heap* heap = Runtime::Current()->GetHeap();
     // TODO: Image spaces only?
     ScopedObjectAccess soa(Thread::Current());
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    WriterMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_);
     heap->VisitObjects(FindClinitImageClassesCallback, this);
-    self->EndAssertNoThreadSuspension(old_cause);
   }
 }
 
@@ -1872,7 +1869,8 @@
             // TODO we detach transaction from runtime to indicate we quit the transactional
             // mode which prevents the GC from visiting objects modified during the transaction.
             // Ensure GC is not run so don't access freed objects when aborting transaction.
-            const char* old_casue = soa.Self()->StartAssertNoThreadSuspension("Transaction end");
+
+            ScopedAssertNoThreadSuspension ants(soa.Self(), "Transaction end");
             runtime->ExitTransactionMode();
 
             if (!success) {
@@ -1885,7 +1883,6 @@
               transaction.Abort();
               CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
             }
-            soa.Self()->EndAssertNoThreadSuspension(old_casue);
           }
         }
         soa.Self()->AssertNoPendingException();
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 3c76098..e7bd357 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -260,7 +260,7 @@
       uint16_t* declaring_class_idx, uint16_t* declaring_method_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Get declaration location of a resolved field.
+  // Get the index in the vtable of the method.
   uint16_t GetResolvedMethodVTableIndex(
       mirror::ArtMethod* resolved_method, InvokeType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index c0f91d1..eb3de97 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -27,7 +27,8 @@
     kSpace,               // Maximize space savings.
     kBalanced,            // Try to get the best performance return on compilation investment.
     kSpeed,               // Maximize runtime performance.
-    kEverything           // Force compilation (Note: excludes compilaton of class initializers).
+    kEverything,          // Force compilation (Note: excludes compilation of class initializers).
+    kTime                 // Compile methods, but minimize compilation time.
   };
 
   // Guide heuristics to determine whether to compile method if profile data not available.
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 35320f5..e535b6d 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_ELF_BUILDER_H_
 #define ART_COMPILER_ELF_BUILDER_H_
 
+#include "base/stl_util.h"
 #include "buffered_output_stream.h"
 #include "elf_utils.h"
 #include "file_output_stream.h"
@@ -354,12 +355,124 @@
   ElfSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> strtab_;
 };
 
+template <typename Elf_Word>
+class ElfFilePiece {
+ public:
+  virtual ~ElfFilePiece() {}
+
+  virtual bool Write(File* elf_file) {
+    if (static_cast<off_t>(offset_) != lseek(elf_file->Fd(), offset_, SEEK_SET)) {
+      PLOG(ERROR) << "Failed to seek to " << GetDescription() << " offset " << offset_ << " for "
+          << elf_file->GetPath();
+      return false;
+    }
+
+    return DoActualWrite(elf_file);
+  }
+
+  static bool Compare(ElfFilePiece* a, ElfFilePiece* b) {
+    return a->offset_ < b->offset_;
+  }
+
+ protected:
+  explicit ElfFilePiece(Elf_Word offset) : offset_(offset) {}
+
+  virtual std::string GetDescription() = 0;
+  virtual bool DoActualWrite(File* elf_file) = 0;
+
+  Elf_Word offset_;
+};
+
+template <typename Elf_Word>
+class ElfFileMemoryPiece : public ElfFilePiece<Elf_Word> {
+ public:
+  ElfFileMemoryPiece(const std::string& name, Elf_Word offset, const void* data, Elf_Word size)
+      : ElfFilePiece<Elf_Word>(offset), dbg_name_(name), data_(data), size_(size) {}
+
+  bool DoActualWrite(File* elf_file) OVERRIDE {
+    DCHECK(data_ != nullptr || size_ == 0U) << dbg_name_ << " " << size_;
+
+    if (!elf_file->WriteFully(data_, size_)) {
+      PLOG(ERROR) << "Failed to write " << dbg_name_ << " for " << elf_file->GetPath();
+      return false;
+    }
+
+    return true;
+  }
+
+  std::string GetDescription() OVERRIDE {
+    return dbg_name_;
+  }
+
+ private:
+  const std::string& dbg_name_;
+  const void *data_;
+  Elf_Word size_;
+};
+
 class CodeOutput {
  public:
   virtual bool Write(OutputStream* out) = 0;
   virtual ~CodeOutput() {}
 };
 
+template <typename Elf_Word>
+class ElfFileRodataPiece : public ElfFilePiece<Elf_Word> {
+ public:
+  ElfFileRodataPiece(Elf_Word offset, CodeOutput* output) : ElfFilePiece<Elf_Word>(offset),
+      output_(output) {}
+
+  bool DoActualWrite(File* elf_file) OVERRIDE {
+    std::unique_ptr<BufferedOutputStream> output_stream(
+        new BufferedOutputStream(new FileOutputStream(elf_file)));
+    if (!output_->Write(output_stream.get())) {
+      PLOG(ERROR) << "Failed to write .rodata and .text for " << elf_file->GetPath();
+      return false;
+    }
+
+    return true;
+  }
+
+  std::string GetDescription() OVERRIDE {
+    return ".rodata";
+  }
+
+ private:
+  CodeOutput* output_;
+};
+
+template <typename Elf_Word>
+class ElfFileOatTextPiece : public ElfFilePiece<Elf_Word> {
+ public:
+  ElfFileOatTextPiece(Elf_Word offset, CodeOutput* output) : ElfFilePiece<Elf_Word>(offset),
+      output_(output) {}
+
+  bool DoActualWrite(File* elf_file) OVERRIDE {
+    // All data is written by the ElfFileRodataPiece right now, as the oat writer writes in one
+    // piece. This is for future flexibility.
+    UNUSED(output_);
+    return true;
+  }
+
+  std::string GetDescription() OVERRIDE {
+    return ".text";
+  }
+
+ private:
+  CodeOutput* output_;
+};
+
+template <typename Elf_Word>
+static bool WriteOutFile(const std::vector<ElfFilePiece<Elf_Word>*>& pieces, File* elf_file) {
+  // TODO It would be nice if this checked for overlap.
+  for (auto it = pieces.begin(); it != pieces.end(); ++it) {
+    if (!(*it)->Write(elf_file)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 template <typename Elf_Word, typename Elf_Shdr>
 static inline constexpr Elf_Word NextOffset(const Elf_Shdr& cur, const Elf_Shdr& prev) {
   return RoundUp(prev.sh_size + prev.sh_offset, cur.sh_addralign);
@@ -667,7 +780,7 @@
   }
 
   bool Write() {
-    std::vector<ElfFilePiece> pieces;
+    std::vector<ElfFilePiece<Elf_Word>*> pieces;
     Elf_Shdr prev = dynamic_builder_.section_;
     std::string strtab;
 
@@ -746,8 +859,9 @@
       it->section_.sh_addr = 0;
       it->section_.sh_size = it->GetBuffer()->size();
       it->section_.sh_link = it->GetLink();
-      pieces.push_back(ElfFilePiece(it->name_, it->section_.sh_offset,
-                                    it->GetBuffer()->data(), it->GetBuffer()->size()));
+
+      // We postpone adding an ElfFilePiece to keep the order in "pieces."
+
       prev = it->section_;
       if (debug_logging_) {
         LOG(INFO) << it->name_ << " off=" << it->section_.sh_offset
@@ -824,55 +938,62 @@
     elf_header_.e_shstrndx = shstrtab_builder_.section_index_;
 
     // Add the rest of the pieces to the list.
-    pieces.push_back(ElfFilePiece("Elf Header", 0, &elf_header_, sizeof(elf_header_)));
-    pieces.push_back(ElfFilePiece("Program headers", PHDR_OFFSET,
-                                  &program_headers_, sizeof(program_headers_)));
-    pieces.push_back(ElfFilePiece(".dynamic", dynamic_builder_.section_.sh_offset,
-                                  dynamic.data(), dynamic_builder_.section_.sh_size));
-    pieces.push_back(ElfFilePiece(".dynsym", dynsym_builder_.section_.sh_offset,
-                                  dynsym.data(), dynsym.size() * sizeof(Elf_Sym)));
-    pieces.push_back(ElfFilePiece(".dynstr", dynsym_builder_.GetStrTab()->section_.sh_offset,
-                                  dynstr_.c_str(), dynstr_.size()));
-    pieces.push_back(ElfFilePiece(".hash", hash_builder_.section_.sh_offset,
-                                  hash_.data(), hash_.size() * sizeof(Elf_Word)));
-    pieces.push_back(ElfFilePiece(".rodata", rodata_builder_.section_.sh_offset,
-                                  nullptr, rodata_builder_.section_.sh_size));
-    pieces.push_back(ElfFilePiece(".text", text_builder_.section_.sh_offset,
-                                  nullptr, text_builder_.section_.sh_size));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Elf Header", 0, &elf_header_,
+                                                      sizeof(elf_header_)));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET,
+                                                      &program_headers_, sizeof(program_headers_)));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynamic",
+                                                      dynamic_builder_.section_.sh_offset,
+                                                      dynamic.data(),
+                                                      dynamic_builder_.section_.sh_size));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynsym", dynsym_builder_.section_.sh_offset,
+                                                      dynsym.data(),
+                                                      dynsym.size() * sizeof(Elf_Sym)));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynstr",
+                                                    dynsym_builder_.GetStrTab()->section_.sh_offset,
+                                                    dynstr_.c_str(), dynstr_.size()));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".hash", hash_builder_.section_.sh_offset,
+                                                      hash_.data(),
+                                                      hash_.size() * sizeof(Elf_Word)));
+    pieces.push_back(new ElfFileRodataPiece<Elf_Word>(rodata_builder_.section_.sh_offset,
+                                                      oat_writer_));
+    pieces.push_back(new ElfFileOatTextPiece<Elf_Word>(text_builder_.section_.sh_offset,
+                                                       oat_writer_));
     if (IncludingDebugSymbols()) {
-      pieces.push_back(ElfFilePiece(".symtab", symtab_builder_.section_.sh_offset,
-                                    symtab.data(), symtab.size() * sizeof(Elf_Sym)));
-      pieces.push_back(ElfFilePiece(".strtab", symtab_builder_.GetStrTab()->section_.sh_offset,
-                                    strtab.c_str(), strtab.size()));
+      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".symtab",
+                                                        symtab_builder_.section_.sh_offset,
+                                                        symtab.data(),
+                                                        symtab.size() * sizeof(Elf_Sym)));
+      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".strtab",
+                                                    symtab_builder_.GetStrTab()->section_.sh_offset,
+                                                    strtab.c_str(), strtab.size()));
     }
-    pieces.push_back(ElfFilePiece(".shstrtab", shstrtab_builder_.section_.sh_offset,
-                                  &shstrtab_[0], shstrtab_.size()));
+    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".shstrtab",
+                                                      shstrtab_builder_.section_.sh_offset,
+                                                      &shstrtab_[0], shstrtab_.size()));
     for (uint32_t i = 0; i < section_ptrs_.size(); ++i) {
       // Just add all the sections in induvidually since they are all over the
       // place on the heap/stack.
       Elf_Word cur_off = sections_offset + i * sizeof(Elf_Shdr);
-      pieces.push_back(ElfFilePiece("section table piece", cur_off,
-                                    section_ptrs_[i], sizeof(Elf_Shdr)));
+      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("section table piece", cur_off,
+                                                        section_ptrs_[i], sizeof(Elf_Shdr)));
+    }
+
+    // Postponed debug info.
+    for (auto it = other_builders_.begin(); it != other_builders_.end(); ++it) {
+      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(it->name_, it->section_.sh_offset,
+                                                        it->GetBuffer()->data(),
+                                                        it->GetBuffer()->size()));
     }
 
     if (!WriteOutFile(pieces)) {
       LOG(ERROR) << "Unable to write to file " << elf_file_->GetPath();
-      return false;
-    }
-    // write out the actual oat file data.
-    Elf_Word oat_data_offset = rodata_builder_.section_.sh_offset;
-    if (static_cast<off_t>(oat_data_offset) != lseek(elf_file_->Fd(), oat_data_offset, SEEK_SET)) {
-      PLOG(ERROR) << "Failed to seek to .rodata offset " << oat_data_offset
-                  << " for " << elf_file_->GetPath();
-      return false;
-    }
-    std::unique_ptr<BufferedOutputStream> output_stream(
-        new BufferedOutputStream(new FileOutputStream(elf_file_)));
-    if (!oat_writer_->Write(output_stream.get())) {
-      PLOG(ERROR) << "Failed to write .rodata and .text for " << elf_file_->GetPath();
+
+      STLDeleteElements(&pieces);  // Have to manually clean pieces.
       return false;
     }
 
+    STLDeleteElements(&pieces);  // Have to manually clean pieces.
     return true;
   }
 
@@ -1028,34 +1149,12 @@
     }
   }
 
-  struct ElfFilePiece {
-    ElfFilePiece(const std::string& name, Elf_Word offset, const void* data, Elf_Word size)
-    : dbg_name_(name), offset_(offset), data_(data), size_(size) {}
-    ~ElfFilePiece() {}
-
-    const std::string& dbg_name_;
-    Elf_Word offset_;
-    const void *data_;
-    Elf_Word size_;
-    static bool Compare(ElfFilePiece a, ElfFilePiece b) {
-      return a.offset_ < b.offset_;
-    }
-  };
 
   // Write each of the pieces out to the file.
-  bool WriteOutFile(const std::vector<ElfFilePiece>& pieces) {
-    // TODO It would be nice if this checked for overlap.
+  bool WriteOutFile(const std::vector<ElfFilePiece<Elf_Word>*>& pieces) {
     for (auto it = pieces.begin(); it != pieces.end(); ++it) {
-      if (it->data_) {
-        if (static_cast<off_t>(it->offset_) != lseek(elf_file_->Fd(), it->offset_, SEEK_SET)) {
-          PLOG(ERROR) << "Failed to seek to " << it->dbg_name_ << " offset location "
-                      << it->offset_ << " for " << elf_file_->GetPath();
-          return false;
-        }
-        if (!elf_file_->WriteFully(it->data_, it->size_)) {
-          PLOG(ERROR) << "Failed to write " << it->dbg_name_ << " for " << elf_file_->GetPath();
-          return false;
-        }
+      if (!(*it)->Write(elf_file_)) {
+        return false;
       }
     }
     return true;
diff --git a/compiler/elf_patcher.cc b/compiler/elf_patcher.cc
index 92eb4d8..0646b75 100644
--- a/compiler/elf_patcher.cc
+++ b/compiler/elf_patcher.cc
@@ -188,9 +188,8 @@
                      compiler_driver_->GetMethodsToPatch().size() +
                      compiler_driver_->GetClassesToPatch().size());
   }
-  Thread* self = Thread::Current();
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const char* old_cause = self->StartAssertNoThreadSuspension("ElfPatcher");
+  ScopedAssertNoThreadSuspension ants(Thread::Current(), "ElfPatcher");
 
   typedef std::vector<const CompilerDriver::CallPatchInformation*> CallPatches;
   const CallPatches& code_to_patch = compiler_driver_->GetCodeToPatch();
@@ -259,8 +258,6 @@
     SetPatchLocation(patch, PointerToLowMemUInt32(get_image_address_(cb_data_, target)));
   }
 
-  self->EndAssertNoThreadSuspension(old_cause);
-
   if (write_patches_) {
     return WriteOutPatchData();
   }
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 9c9cdf2..c08d3bd 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -543,11 +543,9 @@
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     // TODO: Image spaces only?
-    const char* old = self->StartAssertNoThreadSuspension("ImageWriter");
     DCHECK_LT(image_end_, image_->Size());
     // Clear any pre-existing monitors which may have been in the monitor words.
     heap->VisitObjects(WalkFieldsCallback, this);
-    self->EndAssertNoThreadSuspension(old);
   }
 
   const byte* oat_file_begin = image_begin_ + RoundUp(image_end_, kPageSize);
@@ -577,20 +575,18 @@
 
 void ImageWriter::CopyAndFixupObjects()
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Thread* self = Thread::Current();
-  const char* old_cause = self->StartAssertNoThreadSuspension("ImageWriter");
+  ScopedAssertNoThreadSuspension ants(Thread::Current(), "ImageWriter");
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // TODO: heap validation can't handle this fix up pass
   heap->DisableObjectValidation();
   // TODO: Image spaces only?
-  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  WriterMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   heap->VisitObjects(CopyAndFixupObjectsCallback, this);
   // Fix up the object previously had hash codes.
   for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) {
     hash_pair.first->SetLockWord(LockWord::FromHashCode(hash_pair.second), false);
   }
   saved_hashes_.clear();
-  self->EndAssertNoThreadSuspension(old_cause);
 }
 
 void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) {
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index a03588f..33b00d2 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -331,18 +331,61 @@
                                 bool is_range,
                                 uint32_t* args,
                                 uint32_t register_index) {
+  Instruction::Code opcode = instruction.Opcode();
+  InvokeType invoke_type;
+  switch (opcode) {
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_STATIC_RANGE:
+      invoke_type = kStatic;
+      break;
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_DIRECT_RANGE:
+      invoke_type = kDirect;
+      break;
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+      invoke_type = kVirtual;
+      break;
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+      invoke_type = kInterface;
+      break;
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_SUPER:
+      invoke_type = kSuper;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected invoke op: " << opcode;
+      return false;
+  }
+
   const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
   const DexFile::ProtoId& proto_id = dex_file_->GetProtoId(method_id.proto_idx_);
   const char* descriptor = dex_file_->StringDataByIdx(proto_id.shorty_idx_);
   Primitive::Type return_type = Primitive::GetType(descriptor[0]);
-  bool is_instance_call =
-      instruction.Opcode() != Instruction::INVOKE_STATIC
-      && instruction.Opcode() != Instruction::INVOKE_STATIC_RANGE;
+  bool is_instance_call = invoke_type != kStatic;
   const size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1);
 
-  // Treat invoke-direct like static calls for now.
-  HInvoke* invoke = new (arena_) HInvokeStatic(
-      arena_, number_of_arguments, return_type, dex_offset, method_idx);
+  HInvoke* invoke = nullptr;
+  if (invoke_type == kVirtual) {
+    MethodReference target_method(dex_file_, method_idx);
+    uintptr_t direct_code;
+    uintptr_t direct_method;
+    int vtable_index;
+    // TODO: Add devirtualization support.
+    compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_offset, true, true,
+                                        &invoke_type, &target_method, &vtable_index,
+                                        &direct_code, &direct_method);
+    if (vtable_index == -1) {
+      return false;
+    }
+    invoke = new (arena_) HInvokeVirtual(
+        arena_, number_of_arguments, return_type, dex_offset, vtable_index);
+  } else {
+    // Treat invoke-direct like static calls for now.
+    invoke = new (arena_) HInvokeStatic(
+        arena_, number_of_arguments, return_type, dex_offset, method_idx);
+  }
 
   size_t start_index = 0;
   Temporaries temps(graph_, is_instance_call ? 1 : 0);
@@ -620,7 +663,8 @@
     }
 
     case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_DIRECT: {
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_VIRTUAL: {
       uint32_t method_idx = instruction.VRegB_35c();
       uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
       uint32_t args[5];
@@ -632,7 +676,8 @@
     }
 
     case Instruction::INVOKE_STATIC_RANGE:
-    case Instruction::INVOKE_DIRECT_RANGE: {
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE: {
       uint32_t method_idx = instruction.VRegB_3rc();
       uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
       uint32_t register_index = instruction.VRegC();
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2547a29..3231c99 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -44,6 +44,7 @@
   ComputeFrameSize(GetGraph()->GetNumberOfLocalVRegs()
                      + GetGraph()->GetNumberOfTemporaries()
                      + 1 /* filler */,
+                   0, /* the baseline compiler does not have live registers at slow path */
                    GetGraph()->GetMaximumNumberOfOutVRegs()
                      + 1 /* current method */);
   GenerateFrameEntry();
@@ -111,10 +112,15 @@
   return -1;
 }
 
-void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, size_t number_of_out_slots) {
+void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots,
+                                     size_t maximum_number_of_live_registers,
+                                     size_t number_of_out_slots) {
+  first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
+
   SetFrameSize(RoundUp(
       number_of_spill_slots * kVRegSize
       + number_of_out_slots * kVRegSize
+      + maximum_number_of_live_registers * GetWordSize()
       + FrameEntrySpillSize(),
       kStackAlignment));
 }
@@ -468,4 +474,48 @@
   }
 }
 
+size_t CodeGenerator::GetStackOffsetOfSavedRegister(size_t index) {
+  return first_register_slot_in_slow_path_ + index * GetWordSize();
+}
+
+void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) {
+  RegisterSet* register_set = locations->GetLiveRegisters();
+  uint32_t count = 0;
+  for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
+    if (register_set->ContainsCoreRegister(i)) {
+      size_t stack_offset = GetStackOffsetOfSavedRegister(count);
+      ++count;
+      SaveCoreRegister(Location::StackSlot(stack_offset), i);
+      // If the register holds an object, update the stack mask.
+      if (locations->RegisterContainsObject(i)) {
+        locations->SetStackBit(stack_offset / kVRegSize);
+      }
+    }
+  }
+
+  for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
+    if (register_set->ContainsFloatingPointRegister(i)) {
+      LOG(FATAL) << "Unimplemented";
+    }
+  }
+}
+
+void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) {
+  RegisterSet* register_set = locations->GetLiveRegisters();
+  uint32_t count = 0;
+  for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
+    if (register_set->ContainsCoreRegister(i)) {
+      size_t stack_offset = GetStackOffsetOfSavedRegister(count);
+      ++count;
+      RestoreCoreRegister(Location::StackSlot(stack_offset), i);
+    }
+  }
+
+  for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
+    if (register_set->ContainsFloatingPointRegister(i)) {
+      LOG(FATAL) << "Unimplemented";
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index a83d703..55f5d8d 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -98,7 +98,9 @@
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
   virtual Assembler* GetAssembler() = 0;
   virtual size_t GetWordSize() const = 0;
-  void ComputeFrameSize(size_t number_of_spill_slots, size_t number_of_out_slots);
+  void ComputeFrameSize(size_t number_of_spill_slots,
+                        size_t maximum_number_of_live_registers,
+                        size_t number_of_out_slots);
   virtual size_t FrameEntrySpillSize() const = 0;
   int32_t GetStackSlot(HLocal* local) const;
   Location GetTemporaryLocation(HTemporary* temp) const;
@@ -114,6 +116,8 @@
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
   virtual InstructionSet GetInstructionSet() const = 0;
+  virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) = 0;
+  virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) = 0;
 
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc);
 
@@ -128,6 +132,8 @@
   void BuildNativeGCMap(
       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
   void BuildStackMaps(std::vector<uint8_t>* vector);
+  void SaveLiveRegisters(LocationSummary* locations);
+  void RestoreLiveRegisters(LocationSummary* locations);
 
   bool IsLeafMethod() const {
     return is_leaf_;
@@ -141,6 +147,7 @@
   CodeGenerator(HGraph* graph, size_t number_of_registers)
       : frame_size_(kUninitializedFrameSize),
         core_spill_mask_(0),
+        first_register_slot_in_slow_path_(0),
         graph_(graph),
         block_labels_(graph->GetArena(), 0),
         pc_infos_(graph->GetArena(), 32),
@@ -166,9 +173,11 @@
   // Frame size required for this method.
   uint32_t frame_size_;
   uint32_t core_spill_mask_;
+  uint32_t first_register_slot_in_slow_path_;
 
  private:
   void InitLocations(HInstruction* instruction);
+  size_t GetStackOffsetOfSavedRegister(size_t index);
 
   HGraph* const graph_;
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 9903092..206ed13 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -20,6 +20,7 @@
 #include "gc/accounting/card_table.h"
 #include "mirror/array.h"
 #include "mirror/art_method.h"
+#include "mirror/class.h"
 #include "thread.h"
 #include "utils/assembler.h"
 #include "utils/arm/assembler_arm.h"
@@ -97,10 +98,12 @@
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
+    codegen->SaveLiveRegisters(instruction_->GetLocations());
     int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pTestSuspend).Int32Value();
     __ ldr(LR, Address(TR, offset));
     __ blx(LR);
     codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+    codegen->RestoreLiveRegisters(instruction_->GetLocations());
     __ b(GetReturnLabel());
   }
 
@@ -181,6 +184,14 @@
   stream << ArmManagedRegister::FromDRegister(DRegister(reg));
 }
 
+void CodeGeneratorARM::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
+  __ str(static_cast<Register>(reg_id), Address(SP, stack_location.GetStackIndex()));
+}
+
+void CodeGeneratorARM::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
+  __ ldr(static_cast<Register>(reg_id), Address(SP, stack_location.GetStackIndex()));
+}
+
 CodeGeneratorARM::CodeGeneratorARM(HGraph* graph)
     : CodeGenerator(graph, kNumberOfRegIds),
       location_builder_(graph, this),
@@ -576,7 +587,7 @@
   DCHECK(cond->IsCondition());
   HCondition* condition = cond->AsCondition();
   if (condition->NeedsMaterialization()) {
-    locations->SetInAt(0, Location::Any());
+    locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
@@ -589,7 +600,7 @@
     DCHECK(if_instr->GetLocations()->InAt(0).IsRegister());
     __ cmp(if_instr->GetLocations()->InAt(0).AsArm().AsCoreRegister(),
            ShifterOperand(0));
-    __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), EQ);
+    __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), NE);
   } else {
     // Condition has not been materialized, use its inputs as the comparison and its
     // condition as the branch condition.
@@ -818,6 +829,47 @@
 }
 
 void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) {
+  HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorARM::LoadCurrentMethod(Register reg) {
+  __ ldr(reg, Address(SP, kCurrentMethodStackOffset));
+}
+
+void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) {
+  Register temp = invoke->GetLocations()->GetTemp(0).AsArm().AsCoreRegister();
+  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
+  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).Int32Value() +
+      invoke->GetIndexInDexCache() * kArmWordSize;
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  // temp = method;
+  LoadCurrentMethod(temp);
+  // temp = temp->dex_cache_resolved_methods_;
+  __ ldr(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+  // temp = temp[index_in_cache]
+  __ ldr(temp, Address(temp, index_in_cache));
+  // LR = temp[offset_of_quick_compiled_code]
+  __ ldr(LR, Address(temp,
+                     mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
+  // LR()
+  __ blx(LR);
+
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
+}
+
+void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  HandleInvoke(invoke);
+}
+
+void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(ArmCoreLocation(R0));
@@ -852,37 +904,30 @@
   }
 }
 
-void InstructionCodeGeneratorARM::LoadCurrentMethod(Register reg) {
-  __ ldr(reg, Address(SP, kCurrentMethodStackOffset));
-}
 
-void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) {
+void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   Register temp = invoke->GetLocations()->GetTemp(0).AsArm().AsCoreRegister();
-  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
-  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).Int32Value() +
-      invoke->GetIndexInDexCache() * kArmWordSize;
-
-  // TODO: Implement all kinds of calls:
-  // 1) boot -> boot
-  // 2) app -> boot
-  // 3) app -> app
-  //
-  // Currently we implement the app -> app logic, which looks up in the resolve cache.
-
-  // temp = method;
-  LoadCurrentMethod(temp);
-  // temp = temp->dex_cache_resolved_methods_;
-  __ ldr(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
-  // temp = temp[index_in_cache]
-  __ ldr(temp, Address(temp, index_in_cache));
-  // LR = temp[offset_of_quick_compiled_code]
-  __ ldr(LR, Address(temp,
-                     mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
-  // LR()
+  uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() +
+          invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
+  LocationSummary* locations = invoke->GetLocations();
+  Location receiver = locations->InAt(0);
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  // temp = object->GetClass();
+  if (receiver.IsStackSlot()) {
+    __ ldr(temp, Address(SP, receiver.GetStackIndex()));
+    __ ldr(temp, Address(temp, class_offset));
+  } else {
+    __ ldr(temp, Address(receiver.AsArm().AsCoreRegister(), class_offset));
+  }
+  // temp = temp->GetMethodAt(method_offset);
+  uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value();
+  __ ldr(temp, Address(temp, method_offset));
+  // LR = temp->GetEntryPoint();
+  __ ldr(LR, Address(temp, entry_point));
+  // LR();
   __ blx(LR);
-
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderARM::VisitAdd(HAdd* add) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 660294b..0902fb8 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -93,6 +93,8 @@
 
 #undef DECLARE_VISIT_INSTRUCTION
 
+  void HandleInvoke(HInvoke* invoke);
+
  private:
   CodeGeneratorARM* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -130,6 +132,8 @@
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(Label* label) OVERRIDE;
   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+  virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+  virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
 
   virtual size_t GetWordSize() const OVERRIDE {
     return kArmWordSize;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3dd9b37..0db4311 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -20,6 +20,7 @@
 #include "gc/accounting/card_table.h"
 #include "mirror/array.h"
 #include "mirror/art_method.h"
+#include "mirror/class.h"
 #include "thread.h"
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
@@ -121,8 +122,10 @@
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
+    codegen->SaveLiveRegisters(instruction_->GetLocations());
     __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend)));
     codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+    codegen->RestoreLiveRegisters(instruction_->GetLocations());
     __ jmp(GetReturnLabel());
   }
 
@@ -160,6 +163,14 @@
   stream << X86ManagedRegister::FromXmmRegister(XmmRegister(reg));
 }
 
+void CodeGeneratorX86::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
+  __ movl(Address(ESP, stack_location.GetStackIndex()), static_cast<Register>(reg_id));
+}
+
+void CodeGeneratorX86::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
+  __ movl(static_cast<Register>(reg_id), Address(ESP, stack_location.GetStackIndex()));
+}
+
 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph)
     : CodeGenerator(graph, kNumberOfRegIds),
       location_builder_(graph, this),
@@ -540,14 +551,18 @@
   DCHECK(cond->IsCondition());
   HCondition* condition = cond->AsCondition();
   if (condition->NeedsMaterialization()) {
-    // Materialized condition, compare against 0
-    Location lhs = if_instr->GetLocations()->InAt(0);
-    if (lhs.IsRegister()) {
-      __ cmpl(lhs.AsX86().AsCpuRegister(), Immediate(0));
-    } else {
-      __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
+    // Moves do not affect the eflags register, so if the condition is evaluated
+    // just before the if, we don't need to evaluate it again.
+    if (!condition->IsBeforeWhenDisregardMoves(if_instr)) {
+      // Materialized condition, compare against 0
+      Location lhs = if_instr->GetLocations()->InAt(0);
+      if (lhs.IsRegister()) {
+        __ cmpl(lhs.AsX86().AsCpuRegister(), Immediate(0));
+      } else {
+        __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
+      }
     }
-    __ j(kEqual,  codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+    __ j(kNotEqual,  codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
   } else {
     Location lhs = condition->GetLocations()->InAt(0);
     Location rhs = condition->GetLocations()->InAt(1);
@@ -624,6 +639,9 @@
 void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) {
   if (comp->NeedsMaterialization()) {
     LocationSummary* locations = comp->GetLocations();
+    Register reg = locations->Out().AsX86().AsCpuRegister();
+    // Clear register: setcc only sets the low byte.
+    __ xorl(reg, reg);
     if (locations->InAt(1).IsRegister()) {
       __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(),
               locations->InAt(1).AsX86().AsCpuRegister());
@@ -635,7 +653,7 @@
       __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(),
               Address(ESP, locations->InAt(1).GetStackIndex()));
     }
-    __ setb(X86Condition(comp->GetCondition()), locations->Out().AsX86().AsCpuRegister());
+    __ setb(X86Condition(comp->GetCondition()), reg);
   }
 }
 
@@ -763,6 +781,40 @@
 }
 
 void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) {
+  HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) {
+  Register temp = invoke->GetLocations()->GetTemp(0).AsX86().AsCpuRegister();
+  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
+  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).Int32Value() +
+      invoke->GetIndexInDexCache() * kX86WordSize;
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  // temp = method;
+  LoadCurrentMethod(temp);
+  // temp = temp->dex_cache_resolved_methods_;
+  __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+  // temp = temp[index_in_cache]
+  __ movl(temp, Address(temp, index_in_cache));
+  // (temp + offset_of_quick_compiled_code)()
+  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
+
+  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  HandleInvoke(invoke);
+}
+
+void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(X86CpuLocation(EAX));
@@ -799,26 +851,23 @@
   invoke->SetLocations(locations);
 }
 
-void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) {
+void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   Register temp = invoke->GetLocations()->GetTemp(0).AsX86().AsCpuRegister();
-  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
-  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).Int32Value() +
-      invoke->GetIndexInDexCache() * kX86WordSize;
-
-  // TODO: Implement all kinds of calls:
-  // 1) boot -> boot
-  // 2) app -> boot
-  // 3) app -> app
-  //
-  // Currently we implement the app -> app logic, which looks up in the resolve cache.
-
-  // temp = method;
-  LoadCurrentMethod(temp);
-  // temp = temp->dex_cache_resolved_methods_;
-  __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
-  // temp = temp[index_in_cache]
-  __ movl(temp, Address(temp, index_in_cache));
-  // (temp + offset_of_quick_compiled_code)()
+  uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() +
+          invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
+  LocationSummary* locations = invoke->GetLocations();
+  Location receiver = locations->InAt(0);
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  // temp = object->GetClass();
+  if (receiver.IsStackSlot()) {
+    __ movl(temp, Address(ESP, receiver.GetStackIndex()));
+    __ movl(temp, Address(temp, class_offset));
+  } else {
+    __ movl(temp, Address(receiver.AsX86().AsCpuRegister(), class_offset));
+  }
+  // temp = temp->GetMethodAt(method_offset);
+  __ movl(temp, Address(temp, method_offset));
+  // call temp->GetEntryPoint();
   __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
 
   DCHECK(!codegen_->IsLeafMethod());
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 7c50204..ffcaf60 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -94,6 +94,8 @@
 
 #undef DECLARE_VISIT_INSTRUCTION
 
+  void HandleInvoke(HInvoke* invoke);
+
  private:
   CodeGeneratorX86* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -132,6 +134,8 @@
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(Label* label) OVERRIDE;
   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+  virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+  virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
 
   virtual size_t GetWordSize() const OVERRIDE {
     return kX86WordSize;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2f352e0..56198af 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -20,6 +20,7 @@
 #include "gc/accounting/card_table.h"
 #include "mirror/array.h"
 #include "mirror/art_method.h"
+#include "mirror/class.h"
 #include "mirror/object_reference.h"
 #include "thread.h"
 #include "utils/assembler.h"
@@ -102,8 +103,10 @@
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
+    codegen->SaveLiveRegisters(instruction_->GetLocations());
     __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pTestSuspend), true));
     codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+    codegen->RestoreLiveRegisters(instruction_->GetLocations());
     __ jmp(GetReturnLabel());
   }
 
@@ -169,6 +172,14 @@
   stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg));
 }
 
+void CodeGeneratorX86_64::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
+  __ movq(Address(CpuRegister(RSP), stack_location.GetStackIndex()), CpuRegister(reg_id));
+}
+
+void CodeGeneratorX86_64::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
+  __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_location.GetStackIndex()));
+}
+
 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph)
       : CodeGenerator(graph, kNumberOfRegIds),
         location_builder_(graph, this),
@@ -423,14 +434,18 @@
   DCHECK(cond->IsCondition());
   HCondition* condition = cond->AsCondition();
   if (condition->NeedsMaterialization()) {
-    // Materialized condition, compare against 0.
-    Location lhs = if_instr->GetLocations()->InAt(0);
-    if (lhs.IsRegister()) {
-      __ cmpl(lhs.AsX86_64().AsCpuRegister(), Immediate(0));
-    } else {
-      __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
+    // Moves do not affect the eflags register, so if the condition is evaluated
+    // just before the if, we don't need to evaluate it again.
+    if (!condition->IsBeforeWhenDisregardMoves(if_instr)) {
+      // Materialized condition, compare against 0.
+      Location lhs = if_instr->GetLocations()->InAt(0);
+      if (lhs.IsRegister()) {
+        __ cmpl(lhs.AsX86_64().AsCpuRegister(), Immediate(0));
+      } else {
+        __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
+      }
     }
-    __ j(kEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+    __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
   } else {
     Location lhs = condition->GetLocations()->InAt(0);
     Location rhs = condition->GetLocations()->InAt(1);
@@ -504,6 +519,9 @@
 void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) {
   if (comp->NeedsMaterialization()) {
     LocationSummary* locations = comp->GetLocations();
+    CpuRegister reg = locations->Out().AsX86_64().AsCpuRegister();
+    // Clear register: setcc only sets the low byte.
+    __ xorq(reg, reg);
     if (locations->InAt(1).IsRegister()) {
       __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(),
               locations->InAt(1).AsX86_64().AsCpuRegister());
@@ -514,8 +532,7 @@
       __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(),
               Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex()));
     }
-    __ setcc(X86_64Condition(comp->GetCondition()),
-             comp->GetLocations()->Out().AsX86_64().AsCpuRegister());
+    __ setcc(X86_64Condition(comp->GetCondition()), reg);
   }
 }
 
@@ -709,12 +726,46 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorX86_64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsX86_64().AsCpuRegister();
+  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
+  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).SizeValue() +
+      invoke->GetIndexInDexCache() * heap_reference_size;
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  // temp = method;
+  LoadCurrentMethod(temp);
+  // temp = temp->dex_cache_resolved_methods_;
+  __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+  // temp = temp[index_in_cache]
+  __ movl(temp, Address(temp, index_in_cache));
+  // (temp + offset_of_quick_compiled_code)()
+  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
+
+  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  HandleInvoke(invoke);
+}
+
+void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(X86_64CpuLocation(RDI));
 
   InvokeDexCallingConventionVisitor calling_convention_visitor;
-  for (size_t i = 0; i < invoke->InputCount(); ++i) {
+  for (size_t i = 0; i < invoke->InputCount(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
   }
@@ -740,26 +791,23 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::VisitInvokeStatic(HInvokeStatic* invoke) {
+void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsX86_64().AsCpuRegister();
-  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
-  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).SizeValue() +
-      invoke->GetIndexInDexCache() * heap_reference_size;
-
-  // TODO: Implement all kinds of calls:
-  // 1) boot -> boot
-  // 2) app -> boot
-  // 3) app -> app
-  //
-  // Currently we implement the app -> app logic, which looks up in the resolve cache.
-
-  // temp = method;
-  LoadCurrentMethod(temp);
-  // temp = temp->dex_cache_resolved_methods_;
-  __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
-  // temp = temp[index_in_cache]
-  __ movl(temp, Address(temp, index_in_cache));
-  // (temp + offset_of_quick_compiled_code)()
+  size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
+          invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
+  LocationSummary* locations = invoke->GetLocations();
+  Location receiver = locations->InAt(0);
+  size_t class_offset = mirror::Object::ClassOffset().SizeValue();
+  // temp = object->GetClass();
+  if (receiver.IsStackSlot()) {
+    __ movq(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
+    __ movq(temp, Address(temp, class_offset));
+  } else {
+    __ movq(temp, Address(receiver.AsX86_64().AsCpuRegister(), class_offset));
+  }
+  // temp = temp->GetMethodAt(method_offset);
+  __ movl(temp, Address(temp, method_offset));
+  // call temp->GetEntryPoint();
   __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
 
   DCHECK(!codegen_->IsLeafMethod());
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 44552ea..ea21872 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -91,6 +91,8 @@
 
 #undef DECLARE_VISIT_INSTRUCTION
 
+  void HandleInvoke(HInvoke* invoke);
+
  private:
   CodeGeneratorX86_64* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -129,6 +131,8 @@
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(Label* label) OVERRIDE;
   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+  virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+  virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
 
   virtual size_t GetWordSize() const OVERRIDE {
     return kX86_64WordSize;
diff --git a/compiler/optimizing/constant_propagation.cc b/compiler/optimizing/constant_propagation.cc
new file mode 100644
index 0000000..d675164
--- /dev/null
+++ b/compiler/optimizing/constant_propagation.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "constant_propagation.h"
+
+namespace art {
+
+void ConstantPropagation::Run() {
+  // Process basic blocks in reverse post-order in the dominator tree,
+  // so that an instruction turned into a constant, used as input of
+  // another instruction, may possibly be used to turn that second
+  // instruction into a constant as well.
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    // Traverse this block's instructions in (forward) order and
+    // replace the ones that can be statically evaluated by a
+    // compile-time counterpart.
+    for (HInstructionIterator it(block->GetInstructions());
+         !it.Done(); it.Advance()) {
+      HInstruction* inst = it.Current();
+      // Constant folding: replace `c <- a op b' with a compile-time
+      // evaluation of `a op b' if `a' and `b' are constant.
+      if (inst->IsBinaryOperation()) {
+        HConstant* constant =
+          inst->AsBinaryOperation()->TryStaticEvaluation(graph_->GetArena());
+        if (constant != nullptr) {
+          inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant);
+        }
+      }
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/constant_propagation.h b/compiler/optimizing/constant_propagation.h
new file mode 100644
index 0000000..0729881
--- /dev/null
+++ b/compiler/optimizing/constant_propagation.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CONSTANT_PROPAGATION_H_
+#define ART_COMPILER_OPTIMIZING_CONSTANT_PROPAGATION_H_
+
+#include "nodes.h"
+
+namespace art {
+
+/**
+ * Optimization pass performing a simple constant propagation on the
+ * SSA form.
+ */
+class ConstantPropagation : public ValueObject {
+ public:
+  explicit ConstantPropagation(HGraph* graph)
+    : graph_(graph) {}
+
+  void Run();
+
+ private:
+  HGraph* const graph_;
+
+  DISALLOW_COPY_AND_ASSIGN(ConstantPropagation);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CONSTANT_PROPAGATION_H_
diff --git a/compiler/optimizing/constant_propagation_test.cc b/compiler/optimizing/constant_propagation_test.cc
new file mode 100644
index 0000000..5c8c709
--- /dev/null
+++ b/compiler/optimizing/constant_propagation_test.cc
@@ -0,0 +1,487 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "constant_propagation.h"
+#include "dead_code_elimination.h"
+#include "pretty_printer.h"
+#include "graph_checker.h"
+#include "optimizing_unit_test.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+static void TestCode(const uint16_t* data,
+                     const std::string& expected_before,
+                     const std::string& expected_after_cp,
+                     const std::string& expected_after_dce) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = CreateCFG(&allocator, data);
+  ASSERT_NE(graph, nullptr);
+
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+
+  StringPrettyPrinter printer_before(graph);
+  printer_before.VisitInsertionOrder();
+  std::string actual_before = printer_before.str();
+  ASSERT_EQ(expected_before, actual_before);
+
+  ConstantPropagation(graph).Run();
+
+  StringPrettyPrinter printer_after_cp(graph);
+  printer_after_cp.VisitInsertionOrder();
+  std::string actual_after_cp = printer_after_cp.str();
+  ASSERT_EQ(expected_after_cp, actual_after_cp);
+
+  DeadCodeElimination(graph).Run();
+
+  StringPrettyPrinter printer_after_dce(graph);
+  printer_after_dce.VisitInsertionOrder();
+  std::string actual_after_dce = printer_after_dce.str();
+  ASSERT_EQ(expected_after_dce, actual_after_dce);
+
+  SSAChecker ssa_checker(&allocator, graph);
+  ssa_checker.VisitInsertionOrder();
+  ASSERT_TRUE(ssa_checker.IsValid());
+}
+
+
+/**
+ * Tiny three-register program exercising int constant folding on addition.
+ *
+ *                              16-bit
+ *                              offset
+ *                              ------
+ *     v0 <- 1                  0.      const/4 v0, #+1
+ *     v1 <- 2                  1.      const/4 v1, #+2
+ *     v2 <- v0 + v1            2.      add-int v2, v0, v1
+ *     return v2                4.      return v2
+ */
+TEST(ConstantPropagation, IntConstantFoldingOnAddition1) {
+  const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 << 8 | 1 << 12,
+    Instruction::CONST_4 | 1 << 8 | 2 << 12,
+    Instruction::ADD_INT | 2 << 8, 0 | 1 << 8,
+    Instruction::RETURN | 2 << 8);
+
+  std::string expected_before =
+    "BasicBlock 0, succ: 1\n"
+    "  3: IntConstant [9]\n"
+    "  5: IntConstant [9]\n"
+    "  14: SuspendCheck\n"
+    "  15: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 2\n"
+    "  9: Add(3, 5) [12]\n"
+    "  12: Return(9)\n"
+    "BasicBlock 2, pred: 1\n"
+    "  13: Exit\n";
+
+  // Expected difference after constant propagation.
+  diff_t expected_cp_diff = {
+    { "  3: IntConstant [9]\n", "  3: IntConstant\n" },
+    { "  5: IntConstant [9]\n", "  5: IntConstant\n" },
+    { "  9: Add(3, 5) [12]\n",  "  16: IntConstant [12]\n" },
+    { "  12: Return(9)\n",      "  12: Return(16)\n" }
+  };
+  std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
+
+  // Expected difference after dead code elimination.
+  diff_t expected_dce_diff = {
+    { "  3: IntConstant\n", removed },
+    { "  5: IntConstant\n", removed }
+  };
+  std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
+
+  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+}
+
+/**
+ * Small three-register program exercising int constant folding on addition.
+ *
+ *                              16-bit
+ *                              offset
+ *                              ------
+ *     v0 <- 1                  0.      const/4 v0, #+1
+ *     v1 <- 2                  1.      const/4 v1, #+2
+ *     v0 <- v0 + v1            2.      add-int/2addr v0, v1
+ *     v1 <- 3                  3.      const/4 v1, #+3
+ *     v2 <- 4                  4.      const/4 v2, #+4
+ *     v1 <- v1 + v2            5.      add-int/2addr v1, v2
+ *     v2 <- v0 + v1            6.      add-int v2, v0, v1
+ *     return v2                8.      return v2
+ */
+TEST(ConstantPropagation, IntConstantFoldingOnAddition2) {
+  const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 << 8 | 1 << 12,
+    Instruction::CONST_4 | 1 << 8 | 2 << 12,
+    Instruction::ADD_INT_2ADDR | 0 << 8 | 1 << 12,
+    Instruction::CONST_4 | 1 << 8 | 3 << 12,
+    Instruction::CONST_4 | 2 << 8 | 4 << 12,
+    Instruction::ADD_INT_2ADDR | 1 << 8 | 2 << 12,
+    Instruction::ADD_INT | 2 << 8, 0 | 1 << 8,
+    Instruction::RETURN | 2 << 8);
+
+  std::string expected_before =
+    "BasicBlock 0, succ: 1\n"
+    "  3: IntConstant [9]\n"
+    "  5: IntConstant [9]\n"
+    "  11: IntConstant [17]\n"
+    "  13: IntConstant [17]\n"
+    "  26: SuspendCheck\n"
+    "  27: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 2\n"
+    "  9: Add(3, 5) [21]\n"
+    "  17: Add(11, 13) [21]\n"
+    "  21: Add(9, 17) [24]\n"
+    "  24: Return(21)\n"
+    "BasicBlock 2, pred: 1\n"
+    "  25: Exit\n";
+
+  // Expected difference after constant propagation.
+  diff_t expected_cp_diff = {
+    { "  3: IntConstant [9]\n",   "  3: IntConstant\n" },
+    { "  5: IntConstant [9]\n",   "  5: IntConstant\n" },
+    { "  11: IntConstant [17]\n", "  11: IntConstant\n" },
+    { "  13: IntConstant [17]\n", "  13: IntConstant\n" },
+    { "  9: Add(3, 5) [21]\n",    "  28: IntConstant\n" },
+    { "  17: Add(11, 13) [21]\n", "  29: IntConstant\n" },
+    { "  21: Add(9, 17) [24]\n",  "  30: IntConstant [24]\n" },
+    { "  24: Return(21)\n",       "  24: Return(30)\n" }
+  };
+  std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
+
+  // Expected difference after dead code elimination.
+  diff_t expected_dce_diff = {
+    { "  3: IntConstant\n",  removed },
+    { "  5: IntConstant\n",  removed },
+    { "  11: IntConstant\n", removed },
+    { "  13: IntConstant\n", removed },
+    { "  28: IntConstant\n", removed },
+    { "  29: IntConstant\n", removed }
+  };
+  std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
+
+  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+}
+
+/**
+ * Tiny three-register program exercising int constant folding on subtraction.
+ *
+ *                              16-bit
+ *                              offset
+ *                              ------
+ *     v0 <- 3                  0.      const/4 v0, #+3
+ *     v1 <- 2                  1.      const/4 v1, #+2
+ *     v2 <- v0 - v1            2.      sub-int v2, v0, v1
+ *     return v2                4.      return v2
+ */
+TEST(ConstantPropagation, IntConstantFoldingOnSubtraction) {
+  const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 << 8 | 3 << 12,
+    Instruction::CONST_4 | 1 << 8 | 2 << 12,
+    Instruction::SUB_INT | 2 << 8, 0 | 1 << 8,
+    Instruction::RETURN | 2 << 8);
+
+  std::string expected_before =
+    "BasicBlock 0, succ: 1\n"
+    "  3: IntConstant [9]\n"
+    "  5: IntConstant [9]\n"
+    "  14: SuspendCheck\n"
+    "  15: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 2\n"
+    "  9: Sub(3, 5) [12]\n"
+    "  12: Return(9)\n"
+    "BasicBlock 2, pred: 1\n"
+    "  13: Exit\n";
+
+  // Expected difference after constant propagation.
+  diff_t expected_cp_diff = {
+    { "  3: IntConstant [9]\n", "  3: IntConstant\n" },
+    { "  5: IntConstant [9]\n", "  5: IntConstant\n" },
+    { "  9: Sub(3, 5) [12]\n",  "  16: IntConstant [12]\n" },
+    { "  12: Return(9)\n",      "  12: Return(16)\n" }
+  };
+  std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
+
+  // Expected difference after dead code elimination.
+  diff_t expected_dce_diff = {
+    { "  3: IntConstant\n", removed },
+    { "  5: IntConstant\n", removed }
+  };
+  std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
+
+  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+}
+
+#define SIX_REGISTERS_CODE_ITEM(...)                                     \
+    { 6, 0, 0, 0, 0, 0, NUM_INSTRUCTIONS(__VA_ARGS__), 0, __VA_ARGS__ }
+
+/**
+ * Tiny three-register-pair program exercising long constant folding
+ * on addition.
+ *
+ *                              16-bit
+ *                              offset
+ *                              ------
+ *     (v0, v1) <- 1            0.      const-wide/16 v0, #+1
+ *     (v2, v3) <- 2            2.      const-wide/16 v2, #+2
+ *     (v4, v5) <-
+ *       (v0, v1) + (v1, v2)    4.      add-long v4, v0, v2
+ *     return (v4, v5)          6.      return-wide v4
+ */
+TEST(ConstantPropagation, LongConstantFoldingOnAddition) {
+  const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
+    Instruction::CONST_WIDE_16 | 0 << 8, 1,
+    Instruction::CONST_WIDE_16 | 2 << 8, 2,
+    Instruction::ADD_LONG | 4 << 8, 0 | 2 << 8,
+    Instruction::RETURN_WIDE | 4 << 8);
+
+  std::string expected_before =
+    "BasicBlock 0, succ: 1\n"
+    "  6: LongConstant [12]\n"
+    "  8: LongConstant [12]\n"
+    "  17: SuspendCheck\n"
+    "  18: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 2\n"
+    "  12: Add(6, 8) [15]\n"
+    "  15: Return(12)\n"
+    "BasicBlock 2, pred: 1\n"
+    "  16: Exit\n";
+
+  // Expected difference after constant propagation.
+  diff_t expected_cp_diff = {
+    { "  6: LongConstant [12]\n", "  6: LongConstant\n" },
+    { "  8: LongConstant [12]\n", "  8: LongConstant\n" },
+    { "  12: Add(6, 8) [15]\n",   "  19: LongConstant [15]\n" },
+    { "  15: Return(12)\n",       "  15: Return(19)\n" }
+  };
+  std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
+
+  // Expected difference after dead code elimination.
+  diff_t expected_dce_diff = {
+    { "  6: LongConstant\n", removed },
+    { "  8: LongConstant\n", removed }
+  };
+  std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
+
+  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+}
+
+/**
+ * Tiny three-register-pair program exercising long constant folding
+ * on subtraction.
+ *
+ *                              16-bit
+ *                              offset
+ *                              ------
+ *     (v0, v1) <- 3            0.      const-wide/16 v0, #+3
+ *     (v2, v3) <- 2            2.      const-wide/16 v2, #+2
+ *     (v4, v5) <-
+ *       (v0, v1) - (v1, v2)    4.      sub-long v4, v0, v2
+ *     return (v4, v5)          6.      return-wide v4
+ */
+TEST(ConstantPropagation, LongConstantFoldingOnSubtraction) {
+  const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
+    Instruction::CONST_WIDE_16 | 0 << 8, 3,
+    Instruction::CONST_WIDE_16 | 2 << 8, 2,
+    Instruction::SUB_LONG | 4 << 8, 0 | 2 << 8,
+    Instruction::RETURN_WIDE | 4 << 8);
+
+  std::string expected_before =
+    "BasicBlock 0, succ: 1\n"
+    "  6: LongConstant [12]\n"
+    "  8: LongConstant [12]\n"
+    "  17: SuspendCheck\n"
+    "  18: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 2\n"
+    "  12: Sub(6, 8) [15]\n"
+    "  15: Return(12)\n"
+    "BasicBlock 2, pred: 1\n"
+    "  16: Exit\n";
+
+  // Expected difference after constant propagation.
+  diff_t expected_cp_diff = {
+    { "  6: LongConstant [12]\n", "  6: LongConstant\n" },
+    { "  8: LongConstant [12]\n", "  8: LongConstant\n" },
+    { "  12: Sub(6, 8) [15]\n",   "  19: LongConstant [15]\n" },
+    { "  15: Return(12)\n",       "  15: Return(19)\n" }
+  };
+  std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
+
+  // Expected difference after dead code elimination.
+  diff_t expected_dce_diff = {
+    { "  6: LongConstant\n", removed },
+    { "  8: LongConstant\n", removed }
+  };
+  std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
+
+  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+}
+
+/**
+ * Three-register program with jumps leading to the creation of many
+ * blocks.
+ *
+ * The intent of this test is to ensure that all constant expressions
+ * are actually evaluated at compile-time, thanks to the reverse
+ * (forward) post-order traversal of the the dominator tree.
+ *
+ *                              16-bit
+ *                              offset
+ *                              ------
+ *     v0 <- 0                   0.     const/4 v0, #+0
+ *     v1 <- 1                   1.     const/4 v1, #+1
+ *     v2 <- v0 + v1             2.     add-int v2, v0, v1
+ *     goto L2                   4.     goto +4
+ * L1: v1 <- v0 + 3              5.     add-int/lit16 v1, v0, #+3
+ *     goto L3                   7.     goto +4
+ * L2: v0 <- v2 + 2              8.     add-int/lit16 v0, v2, #+2
+ *     goto L1                  10.     goto +(-5)
+ * L3: v2 <- v1 + 4             11.     add-int/lit16 v2, v1, #+4
+ *     return v2                13.     return v2
+ */
+TEST(ConstantPropagation, IntConstantFoldingAndJumps) {
+  const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 << 8 | 0 << 12,
+    Instruction::CONST_4 | 1 << 8 | 1 << 12,
+    Instruction::ADD_INT | 2 << 8, 0 | 1 << 8,
+    Instruction::GOTO | 4 << 8,
+    Instruction::ADD_INT_LIT16 | 1 << 8 | 0 << 12, 3,
+    Instruction::GOTO | 4 << 8,
+    Instruction::ADD_INT_LIT16 | 0 << 8 | 2 << 12, 2,
+    static_cast<uint16_t>(Instruction::GOTO | -5 << 8),
+    Instruction::ADD_INT_LIT16 | 2 << 8 | 1 << 12, 4,
+    Instruction::RETURN | 2 << 8);
+
+  std::string expected_before =
+    "BasicBlock 0, succ: 1\n"
+    "  3: IntConstant [9]\n"
+    "  5: IntConstant [9]\n"
+    "  13: IntConstant [14]\n"
+    "  18: IntConstant [19]\n"
+    "  24: IntConstant [25]\n"
+    "  30: SuspendCheck\n"
+    "  31: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 3\n"
+    "  9: Add(3, 5) [19]\n"
+    "  11: Goto 3\n"
+    "BasicBlock 2, pred: 3, succ: 4\n"
+    "  14: Add(19, 13) [25]\n"
+    "  16: Goto 4\n"
+    "BasicBlock 3, pred: 1, succ: 2\n"
+    "  19: Add(9, 18) [14]\n"
+    "  21: SuspendCheck\n"
+    "  22: Goto 2\n"
+    "BasicBlock 4, pred: 2, succ: 5\n"
+    "  25: Add(14, 24) [28]\n"
+    "  28: Return(25)\n"
+    "BasicBlock 5, pred: 4\n"
+    "  29: Exit\n";
+
+  // Expected difference after constant propagation.
+  diff_t expected_cp_diff = {
+    { "  3: IntConstant [9]\n",   "  3: IntConstant\n" },
+    { "  5: IntConstant [9]\n",   "  5: IntConstant []\n" },
+    { "  13: IntConstant [14]\n", "  13: IntConstant\n" },
+    { "  18: IntConstant [19]\n", "  18: IntConstant\n" },
+    { "  24: IntConstant [25]\n", "  24: IntConstant\n" },
+    { "  9: Add(3, 5) [19]\n",    "  32: IntConstant []\n" },
+    { "  14: Add(19, 13) [25]\n", "  34: IntConstant\n" },
+    { "  19: Add(9, 18) [14]\n",  "  33: IntConstant []\n" },
+    { "  25: Add(14, 24) [28]\n", "  35: IntConstant [28]\n" },
+    { "  28: Return(25)\n",       "  28: Return(35)\n"}
+  };
+  std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
+
+  // Expected difference after dead code elimination.
+  diff_t expected_dce_diff = {
+    { "  3: IntConstant\n",     removed },
+    { "  13: IntConstant\n",    removed },
+    { "  18: IntConstant\n",    removed },
+    { "  24: IntConstant\n",    removed },
+    { "  34: IntConstant\n",    removed },
+  };
+  std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
+
+  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+}
+
+
+/**
+ * Three-register program with a constant (static) condition.
+ *
+ *                              16-bit
+ *                              offset
+ *                              ------
+ *     v1 <- 1                  0.      const/4 v1, #+1
+ *     v0 <- 0                  1.      const/4 v0, #+0
+ *     if v1 >= 0 goto L1       2.      if-gez v1, +3
+ *     v0 <- v1                 4.      move v0, v1
+ * L1: v2 <- v0 + v1            5.      add-int v2, v0, v1
+ *     return-void              7.      return
+ */
+TEST(ConstantPropagation, ConstantCondition) {
+  const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 1 << 8 | 1 << 12,
+    Instruction::CONST_4 | 0 << 8 | 0 << 12,
+    Instruction::IF_GEZ | 1 << 8, 3,
+    Instruction::MOVE | 0 << 8 | 1 << 12,
+    Instruction::ADD_INT | 2 << 8, 0 | 1 << 8,
+    Instruction::RETURN_VOID);
+
+  std::string expected_before =
+    "BasicBlock 0, succ: 1\n"
+    "  3: IntConstant [15, 22, 8]\n"
+    "  5: IntConstant [22, 8]\n"
+    "  19: SuspendCheck\n"
+    "  20: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 5, 2\n"
+    "  8: GreaterThanOrEqual(3, 5) [9]\n"
+    "  9: If(8)\n"
+    "BasicBlock 2, pred: 1, succ: 3\n"
+    "  12: Goto 3\n"
+    "BasicBlock 3, pred: 2, 5, succ: 4\n"
+    "  22: Phi(3, 5) [15]\n"
+    "  15: Add(22, 3)\n"
+    "  17: ReturnVoid\n"
+    "BasicBlock 4, pred: 3\n"
+    "  18: Exit\n"
+    "BasicBlock 5, pred: 1, succ: 3\n"
+    "  21: Goto 3\n";
+
+  // Expected difference after constant propagation.
+  diff_t expected_cp_diff = {
+    { "  3: IntConstant [15, 22, 8]\n",      "  3: IntConstant [15, 22]\n" },
+    { "  5: IntConstant [22, 8]\n",          "  5: IntConstant [22]\n" },
+    { "  8: GreaterThanOrEqual(3, 5) [9]\n", "  23: IntConstant [9]\n" },
+    { "  9: If(8)\n",                        "  9: If(23)\n" }
+  };
+  std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
+
+  // Expected difference after dead code elimination.
+  diff_t expected_dce_diff = {
+    { "  3: IntConstant [15, 22]\n", "  3: IntConstant [22]\n" },
+    { "  22: Phi(3, 5) [15]\n",      "  22: Phi(3, 5)\n" },
+    { "  15: Add(22, 3)\n",          removed }
+  };
+  std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
+
+  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
new file mode 100644
index 0000000..2f881d1
--- /dev/null
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dead_code_elimination.h"
+
+#include "base/bit_vector-inl.h"
+
+namespace art {
+
+void DeadCodeElimination::Run() {
+  // Process basic blocks in post-order in the dominator tree, so that
+  // a dead instruction depending on another dead instruction is
+  // removed.
+  for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) {
+    HBasicBlock* block = b.Current();
+    // Traverse this block's instructions in backward order and remove
+    // the unused ones.
+    HBackwardInstructionIterator i(block->GetInstructions());
+    // Skip the first iteration, as the last instruction of a block is
+    // a branching instruction.
+    DCHECK(i.Current()->IsControlFlow());
+    for (i.Advance(); !i.Done(); i.Advance()) {
+      HInstruction* inst = i.Current();
+      DCHECK(!inst->IsControlFlow());
+      if (!inst->HasSideEffects() && !inst->HasUses()) {
+        block->RemoveInstruction(inst);
+      }
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
new file mode 100644
index 0000000..48739be
--- /dev/null
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_
+#define ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_
+
+#include "nodes.h"
+
+namespace art {
+
+/**
+ * Optimization pass performing dead code elimination (removal of
+ * unused variables/instructions) on the SSA form.
+ */
+class DeadCodeElimination : public ValueObject {
+ public:
+  explicit DeadCodeElimination(HGraph* graph)
+      : graph_(graph) {}
+
+  void Run();
+
+ private:
+  HGraph* const graph_;
+
+  DISALLOW_COPY_AND_ASSIGN(DeadCodeElimination);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
new file mode 100644
index 0000000..245bcb2
--- /dev/null
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dead_code_elimination.h"
+#include "pretty_printer.h"
+#include "graph_checker.h"
+#include "optimizing_unit_test.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+static void TestCode(const uint16_t* data,
+                     const std::string& expected_before,
+                     const std::string& expected_after) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = CreateCFG(&allocator, data);
+  ASSERT_NE(graph, nullptr);
+
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+
+  StringPrettyPrinter printer_before(graph);
+  printer_before.VisitInsertionOrder();
+  std::string actual_before = printer_before.str();
+  ASSERT_EQ(actual_before, expected_before);
+
+  DeadCodeElimination(graph).Run();
+
+  StringPrettyPrinter printer_after(graph);
+  printer_after.VisitInsertionOrder();
+  std::string actual_after = printer_after.str();
+  ASSERT_EQ(actual_after, expected_after);
+
+  SSAChecker ssa_checker(&allocator, graph);
+  ssa_checker.VisitInsertionOrder();
+  ASSERT_TRUE(ssa_checker.IsValid());
+}
+
+
+/**
+ * Small three-register program.
+ *
+ *                              16-bit
+ *                              offset
+ *                              ------
+ *     v1 <- 1                  0.      const/4 v1, #+1
+ *     v0 <- 0                  1.      const/4 v0, #+0
+ *     if v1 >= 0 goto L1       2.      if-gez v1, +3
+ *     v0 <- v1                 4.      move v0, v1
+ * L1: v2 <- v0 + v1            5.      add-int v2, v0, v1
+ *     return-void              7.      return
+ */
+TEST(DeadCodeElimination, AdditionAndConditionalJump) {
+  const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 1 << 8 | 1 << 12,
+    Instruction::CONST_4 | 0 << 8 | 0 << 12,
+    Instruction::IF_GEZ | 1 << 8, 3,
+    Instruction::MOVE | 0 << 8 | 1 << 12,
+    Instruction::ADD_INT | 2 << 8, 0 | 1 << 8,
+    Instruction::RETURN_VOID);
+
+  std::string expected_before =
+    "BasicBlock 0, succ: 1\n"
+    "  3: IntConstant [15, 22, 8]\n"
+    "  5: IntConstant [22, 8]\n"
+    "  19: SuspendCheck\n"
+    "  20: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 5, 2\n"
+    "  8: GreaterThanOrEqual(3, 5) [9]\n"
+    "  9: If(8)\n"
+    "BasicBlock 2, pred: 1, succ: 3\n"
+    "  12: Goto 3\n"
+    "BasicBlock 3, pred: 2, 5, succ: 4\n"
+    "  22: Phi(3, 5) [15]\n"
+    "  15: Add(22, 3)\n"
+    "  17: ReturnVoid\n"
+    "BasicBlock 4, pred: 3\n"
+    "  18: Exit\n"
+    "BasicBlock 5, pred: 1, succ: 3\n"
+    "  21: Goto 3\n";
+
+  diff_t expected_diff = {
+    { "  3: IntConstant [15, 22, 8]\n", "  3: IntConstant [22, 8]\n" },
+    { "  22: Phi(3, 5) [15]\n",         "  22: Phi(3, 5)\n" },
+    { "  15: Add(22, 3)\n",             removed }
+  };
+  std::string expected_after = Patch(expected_before, expected_diff);
+
+  TestCode(data, expected_before, expected_after);
+}
+
+/**
+ * Three-register program with jumps leading to the creation of many
+ * blocks.
+ *
+ * The intent of this test is to ensure that all dead instructions are
+ * actually pruned at compile-time, thanks to the (backward)
+ * post-order traversal of the the dominator tree.
+ *
+ *                              16-bit
+ *                              offset
+ *                              ------
+ *     v0 <- 0                   0.     const/4 v0, #+0
+ *     v1 <- 1                   1.     const/4 v1, #+1
+ *     v2 <- v0 + v1             2.     add-int v2, v0, v1
+ *     goto L2                   4.     goto +4
+ * L1: v1 <- v0 + 3              5.     add-int/lit16 v1, v0, #+3
+ *     goto L3                   7.     goto +4
+ * L2: v0 <- v2 + 2              8.     add-int/lit16 v0, v2, #+2
+ *     goto L1                  10.     goto +(-5)
+ * L3: v2 <- v1 + 4             11.     add-int/lit16 v2, v1, #+4
+ *     return                   13.     return-void
+ */
+TEST(DeadCodeElimination, AdditionsAndInconditionalJumps) {
+  const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 << 8 | 0 << 12,
+    Instruction::CONST_4 | 1 << 8 | 1 << 12,
+    Instruction::ADD_INT | 2 << 8, 0 | 1 << 8,
+    Instruction::GOTO | 4 << 8,
+    Instruction::ADD_INT_LIT16 | 1 << 8 | 0 << 12, 3,
+    Instruction::GOTO | 4 << 8,
+    Instruction::ADD_INT_LIT16 | 0 << 8 | 2 << 12, 2,
+    static_cast<uint16_t>(Instruction::GOTO | -5 << 8),
+    Instruction::ADD_INT_LIT16 | 2 << 8 | 1 << 12, 4,
+    Instruction::RETURN_VOID);
+
+  std::string expected_before =
+    "BasicBlock 0, succ: 1\n"
+    "  3: IntConstant [9]\n"
+    "  5: IntConstant [9]\n"
+    "  13: IntConstant [14]\n"
+    "  18: IntConstant [19]\n"
+    "  24: IntConstant [25]\n"
+    "  29: SuspendCheck\n"
+    "  30: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 3\n"
+    "  9: Add(3, 5) [19]\n"
+    "  11: Goto 3\n"
+    "BasicBlock 2, pred: 3, succ: 4\n"
+    "  14: Add(19, 13) [25]\n"
+    "  16: Goto 4\n"
+    "BasicBlock 3, pred: 1, succ: 2\n"
+    "  19: Add(9, 18) [14]\n"
+    "  21: SuspendCheck\n"
+    "  22: Goto 2\n"
+    "BasicBlock 4, pred: 2, succ: 5\n"
+    "  25: Add(14, 24)\n"
+    "  27: ReturnVoid\n"
+    "BasicBlock 5, pred: 4\n"
+    "  28: Exit\n";
+
+  // Expected difference after constant propagation.
+  diff_t expected_diff = {
+    { "  13: IntConstant [14]\n", removed },
+    { "  24: IntConstant [25]\n", removed },
+    { "  14: Add(19, 13) [25]\n", removed },
+    // The SuspendCheck instruction following this Add instruction
+    // inserts the latter in an environment, thus making it "used" and
+    // therefore non removable.  It ensues that some other Add and
+    // IntConstant instructions cannot be removed, as they are direct
+    // or indirect inputs of the initial Add instruction.
+    { "  19: Add(9, 18) [14]\n",  "  19: Add(9, 18) []\n" },
+    { "  25: Add(14, 24)\n",      removed },
+  };
+  std::string expected_after = Patch(expected_before, expected_diff);
+
+  TestCode(data, expected_before, expected_after);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
new file mode 100644
index 0000000..e36b1cd
--- /dev/null
+++ b/compiler/optimizing/graph_checker.cc
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "graph_checker.h"
+
+#include <string>
+#include <map>
+#include <sstream>
+
+#include "base/bit_vector-inl.h"
+
+namespace art {
+
+void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
+  current_block_ = block;
+
+  // Check consistency with respect to predecessors of `block`.
+  const GrowableArray<HBasicBlock*>& predecessors = block->GetPredecessors();
+  std::map<HBasicBlock*, size_t> predecessors_count;
+  for (size_t i = 0, e = predecessors.Size(); i < e; ++i) {
+    HBasicBlock* p = predecessors.Get(i);
+    ++predecessors_count[p];
+  }
+  for (auto& pc : predecessors_count) {
+    HBasicBlock* p = pc.first;
+    size_t p_count_in_block_predecessors = pc.second;
+    const GrowableArray<HBasicBlock*>& p_successors = p->GetSuccessors();
+    size_t block_count_in_p_successors = 0;
+    for (size_t j = 0, f = p_successors.Size(); j < f; ++j) {
+      if (p_successors.Get(j) == block) {
+        ++block_count_in_p_successors;
+      }
+    }
+    if (p_count_in_block_predecessors != block_count_in_p_successors) {
+      std::stringstream error;
+      error << "Block " << block->GetBlockId()
+            << " lists " << p_count_in_block_predecessors
+            << " occurrences of block " << p->GetBlockId()
+            << " in its predecessors, whereas block " << p->GetBlockId()
+            << " lists " << block_count_in_p_successors
+            << " occurrences of block " << block->GetBlockId()
+            << " in its successors.";
+      errors_.Insert(error.str());
+    }
+  }
+
+  // Check consistency with respect to successors of `block`.
+  const GrowableArray<HBasicBlock*>& successors = block->GetSuccessors();
+  std::map<HBasicBlock*, size_t> successors_count;
+  for (size_t i = 0, e = successors.Size(); i < e; ++i) {
+    HBasicBlock* s = successors.Get(i);
+    ++successors_count[s];
+  }
+  for (auto& sc : successors_count) {
+    HBasicBlock* s = sc.first;
+    size_t s_count_in_block_successors = sc.second;
+    const GrowableArray<HBasicBlock*>& s_predecessors = s->GetPredecessors();
+    size_t block_count_in_s_predecessors = 0;
+    for (size_t j = 0, f = s_predecessors.Size(); j < f; ++j) {
+      if (s_predecessors.Get(j) == block) {
+        ++block_count_in_s_predecessors;
+      }
+    }
+    if (s_count_in_block_successors != block_count_in_s_predecessors) {
+      std::stringstream error;
+      error << "Block " << block->GetBlockId()
+            << " lists " << s_count_in_block_successors
+            << " occurrences of block " << s->GetBlockId()
+            << " in its successors, whereas block " << s->GetBlockId()
+            << " lists " << block_count_in_s_predecessors
+            << " occurrences of block " << block->GetBlockId()
+            << " in its predecessors.";
+      errors_.Insert(error.str());
+    }
+  }
+
+  // Ensure `block` ends with a branch instruction.
+  HInstruction* last_inst = block->GetLastInstruction();
+  if (last_inst == nullptr || !last_inst->IsControlFlow()) {
+    std::stringstream error;
+    error  << "Block " << block->GetBlockId()
+           << " does not end with a branch instruction.";
+    errors_.Insert(error.str());
+  }
+
+  // Visit this block's list of phis.
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    // Ensure this block's list of phis contains only phis.
+    if (!it.Current()->IsPhi()) {
+      std::stringstream error;
+      error << "Block " << current_block_->GetBlockId()
+            << " has a non-phi in its phi list.";
+      errors_.Insert(error.str());
+    }
+    it.Current()->Accept(this);
+  }
+
+  // Visit this block's list of instructions.
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done();
+       it.Advance()) {
+    // Ensure this block's list of instructions does not contains phis.
+    if (it.Current()->IsPhi()) {
+      std::stringstream error;
+      error << "Block " << current_block_->GetBlockId()
+            << " has a phi in its non-phi list.";
+      errors_.Insert(error.str());
+    }
+    it.Current()->Accept(this);
+  }
+}
+
+void GraphChecker::VisitInstruction(HInstruction* instruction) {
+  // Ensure `instruction` is associated with `current_block_`.
+  if (instruction->GetBlock() != current_block_) {
+    std::stringstream error;
+    if (instruction->IsPhi()) {
+      error << "Phi ";
+    } else {
+      error << "Instruction ";
+    }
+    error << instruction->GetId() << " in block "
+          << current_block_->GetBlockId();
+    if (instruction->GetBlock() != nullptr) {
+      error << " associated with block "
+            << instruction->GetBlock()->GetBlockId() << ".";
+    } else {
+      error << " not associated with any block.";
+    }
+    errors_.Insert(error.str());
+  }
+}
+
+void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
+  super_type::VisitBasicBlock(block);
+
+  // Ensure there is no critical edge (i.e., an edge connecting a
+  // block with multiple successors to a block with multiple
+  // predecessors).
+  if (block->GetSuccessors().Size() > 1) {
+    for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
+      HBasicBlock* successor = block->GetSuccessors().Get(j);
+      if (successor->GetPredecessors().Size() > 1) {
+        std::stringstream error;
+        error << "Critical edge between blocks " << block->GetBlockId()
+              << " and "  << successor->GetBlockId() << ".";
+        errors_.Insert(error.str());
+      }
+    }
+  }
+
+  if (block->IsLoopHeader()) {
+    CheckLoop(block);
+  }
+}
+
+void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
+  int id = loop_header->GetBlockId();
+
+  // Ensure the pre-header block is first in the list of
+  // predecessors of a loop header.
+  if (!loop_header->IsLoopPreHeaderFirstPredecessor()) {
+    std::stringstream error;
+    error << "Loop pre-header is not the first predecessor of the loop header "
+          << id << ".";
+    errors_.Insert(error.str());
+  }
+
+  // Ensure the loop header has only two predecessors and that only the
+  // second one is a back edge.
+  if (loop_header->GetPredecessors().Size() < 2) {
+    std::stringstream error;
+    error << "Loop header " << id << " has less than two predecessors.";
+    errors_.Insert(error.str());
+  } else if (loop_header->GetPredecessors().Size() > 2) {
+    std::stringstream error;
+    error << "Loop header " << id << " has more than two predecessors.";
+    errors_.Insert(error.str());
+  } else {
+    HLoopInformation* loop_information = loop_header->GetLoopInformation();
+    HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0);
+    if (loop_information->IsBackEdge(first_predecessor)) {
+      std::stringstream error;
+      error << "First predecessor of loop header " << id << " is a back edge.";
+      errors_.Insert(error.str());
+    }
+    HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1);
+    if (!loop_information->IsBackEdge(second_predecessor)) {
+      std::stringstream error;
+      error << "Second predecessor of loop header " << id
+            << " is not a back edge.";
+      errors_.Insert(error.str());
+    }
+  }
+
+  // Ensure there is only one back edge per loop.
+  size_t num_back_edges =
+    loop_header->GetLoopInformation()->GetBackEdges().Size();
+  if (num_back_edges != 1) {
+      std::stringstream error;
+      error << "Loop defined by header " << id << " has "
+            << num_back_edges << " back edge(s).";
+      errors_.Insert(error.str());
+  }
+
+  // Ensure all blocks in the loop are dominated by the loop header.
+  const ArenaBitVector& loop_blocks =
+    loop_header->GetLoopInformation()->GetBlocks();
+  for (uint32_t i : loop_blocks.Indexes()) {
+    HBasicBlock* loop_block = GetGraph()->GetBlocks().Get(i);
+    if (!loop_header->Dominates(loop_block)) {
+      std::stringstream error;
+      error << "Loop block " << loop_block->GetBlockId()
+            << " not dominated by loop header " << id;
+      errors_.Insert(error.str());
+    }
+  }
+}
+
+void SSAChecker::VisitInstruction(HInstruction* instruction) {
+  super_type::VisitInstruction(instruction);
+
+  // Ensure an instruction dominates all its uses (or in the present
+  // case, that all uses of an instruction (used as input) are
+  // dominated by its definition).
+  for (HInputIterator input_it(instruction); !input_it.Done();
+       input_it.Advance()) {
+    HInstruction* input = input_it.Current();
+    if (!input->Dominates(instruction)) {
+      std::stringstream error;
+      error << "Instruction " << input->GetId()
+            << " in block " << input->GetBlock()->GetBlockId()
+            << " does not dominate use " << instruction->GetId()
+            << " in block " << current_block_->GetBlockId() << ".";
+      errors_.Insert(error.str());
+    }
+  }
+}
+
+void SSAChecker::VisitPhi(HPhi* phi) {
+  VisitInstruction(phi);
+
+  // Ensure the first input of a phi is not itself.
+  if (phi->InputAt(0) == phi) {
+      std::stringstream error;
+      error << "Loop phi " << phi->GetId()
+            << " in block " << phi->GetBlock()->GetBlockId()
+            << " is its own first input.";
+      errors_.Insert(error.str());
+  }
+
+  // Ensure the number of phi inputs is the same as the number of
+  // its predecessors.
+  const GrowableArray<HBasicBlock*>& predecessors =
+    phi->GetBlock()->GetPredecessors();
+  if (phi->InputCount() != predecessors.Size()) {
+    std::stringstream error;
+    error << "Phi " << phi->GetId()
+          << " in block " << phi->GetBlock()->GetBlockId()
+          << " has " << phi->InputCount() << " inputs, but block "
+          << phi->GetBlock()->GetBlockId() << " has "
+          << predecessors.Size() << " predecessors.";
+    errors_.Insert(error.str());
+  } else {
+    // Ensure phi input at index I either comes from the Ith
+    // predecessor or from a block that dominates this predecessor.
+    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+      HInstruction* input = phi->InputAt(i);
+      HBasicBlock* predecessor = predecessors.Get(i);
+      if (!(input->GetBlock() == predecessor
+            || input->GetBlock()->Dominates(predecessor))) {
+        std::stringstream error;
+        error << "Input " << input->GetId() << " at index " << i
+              << " of phi " << phi->GetId()
+              << " from block " << phi->GetBlock()->GetBlockId()
+              << " is not defined in predecessor number " << i
+              << " nor in a block dominating it.";
+        errors_.Insert(error.str());
+      }
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
new file mode 100644
index 0000000..34a770b
--- /dev/null
+++ b/compiler/optimizing/graph_checker.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_GRAPH_CHECKER_H_
+#define ART_COMPILER_OPTIMIZING_GRAPH_CHECKER_H_
+
+#include "nodes.h"
+
+namespace art {
+
+// A control-flow graph visitor performing various checks.
+class GraphChecker : public HGraphVisitor {
+ public:
+  GraphChecker(ArenaAllocator* allocator, HGraph* graph)
+    : HGraphVisitor(graph),
+      allocator_(allocator),
+      errors_(allocator, 0) {}
+
+  // Check `block`.
+  virtual void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+
+  // Check `instruction`.
+  virtual void VisitInstruction(HInstruction* instruction) OVERRIDE;
+
+  // Was the last visit of the graph valid?
+  bool IsValid() const {
+    return errors_.IsEmpty();
+  }
+
+  // Get the list of detected errors.
+  const GrowableArray<std::string>& GetErrors() const {
+    return errors_;
+  }
+
+ protected:
+  ArenaAllocator* const allocator_;
+  // The block currently visited.
+  HBasicBlock* current_block_ = nullptr;
+  // Errors encountered while checking the graph.
+  GrowableArray<std::string> errors_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(GraphChecker);
+};
+
+
+// An SSA graph visitor performing various checks.
+class SSAChecker : public GraphChecker {
+ public:
+  typedef GraphChecker super_type;
+
+  SSAChecker(ArenaAllocator* allocator, HGraph* graph)
+    : GraphChecker(allocator, graph) {}
+
+  // Perform SSA form checks on `block`.
+  virtual void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+  // Loop-related checks from block `loop_header`.
+  void CheckLoop(HBasicBlock* loop_header);
+
+  // Perform SSA form checks on instructions.
+  virtual void VisitInstruction(HInstruction* instruction) OVERRIDE;
+  virtual void VisitPhi(HPhi* phi) OVERRIDE;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SSAChecker);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_GRAPH_CHECKER_H_
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
new file mode 100644
index 0000000..ea06920
--- /dev/null
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "graph_checker.h"
+#include "optimizing_unit_test.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+/**
+ * Create a simple control-flow graph composed of two blocks:
+ *
+ *   BasicBlock 0, succ: 1
+ *     0: Goto 1
+ *   BasicBlock 1, pred: 0
+ *     1: Exit
+ */
+HGraph* CreateSimpleCFG(ArenaAllocator* allocator) {
+  HGraph* graph = new (allocator) HGraph(allocator);
+  HBasicBlock* entry_block = new (allocator) HBasicBlock(graph);
+  entry_block->AddInstruction(new (allocator) HGoto());
+  graph->AddBlock(entry_block);
+  graph->SetEntryBlock(entry_block);
+  HBasicBlock* exit_block = new (allocator) HBasicBlock(graph);
+  exit_block->AddInstruction(new (allocator) HExit());
+  graph->AddBlock(exit_block);
+  graph->SetExitBlock(exit_block);
+  entry_block->AddSuccessor(exit_block);
+  return graph;
+}
+
+
+static void TestCode(const uint16_t* data) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = CreateCFG(&allocator, data);
+  ASSERT_NE(graph, nullptr);
+
+  GraphChecker graph_checker(&allocator, graph);
+  graph_checker.VisitInsertionOrder();
+  ASSERT_TRUE(graph_checker.IsValid());
+}
+
+static void TestCodeSSA(const uint16_t* data) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = CreateCFG(&allocator, data);
+  ASSERT_NE(graph, nullptr);
+
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+
+  SSAChecker ssa_checker(&allocator, graph);
+  ssa_checker.VisitInsertionOrder();
+  ASSERT_TRUE(ssa_checker.IsValid());
+}
+
+
+TEST(GraphChecker, ReturnVoid) {
+  const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+      Instruction::RETURN_VOID);
+
+  TestCode(data);
+}
+
+TEST(GraphChecker, CFG1) {
+  const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+      Instruction::GOTO | 0x100,
+      Instruction::RETURN_VOID);
+
+  TestCode(data);
+}
+
+TEST(GraphChecker, CFG2) {
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0x100,
+    Instruction::RETURN_VOID);
+
+  TestCode(data);
+}
+
+TEST(GraphChecker, CFG3) {
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0x100,
+    Instruction::GOTO | 0xFF00);
+
+  TestCode(data);
+}
+
+// Test case with an invalid graph containing inconsistent
+// predecessor/successor arcs in CFG.
+TEST(GraphChecker, InconsistentPredecessorsAndSuccessors) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = CreateSimpleCFG(&allocator);
+  GraphChecker graph_checker(&allocator, graph);
+  graph_checker.VisitInsertionOrder();
+  ASSERT_TRUE(graph_checker.IsValid());
+
+  // Remove the entry block from the exit block's predecessors, to create an
+  // inconsistent successor/predecessor relation.
+  graph->GetExitBlock()->RemovePredecessor(graph->GetEntryBlock());
+  graph_checker.VisitInsertionOrder();
+  ASSERT_FALSE(graph_checker.IsValid());
+}
+
+// Test case with an invalid graph containing a non-branch last
+// instruction in a block.
+TEST(GraphChecker, BlockEndingWithNonBranchInstruction) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = CreateSimpleCFG(&allocator);
+  GraphChecker graph_checker(&allocator, graph);
+  graph_checker.VisitInsertionOrder();
+  ASSERT_TRUE(graph_checker.IsValid());
+
+  // Remove the sole instruction of the exit block (composed of a
+  // single Exit instruction) to make it invalid (i.e. not ending by a
+  // branch instruction).
+  HBasicBlock* exit_block = graph->GetExitBlock();
+  HInstruction* last_inst = exit_block->GetLastInstruction();
+  exit_block->RemoveInstruction(last_inst);
+
+  graph_checker.VisitInsertionOrder();
+  ASSERT_FALSE(graph_checker.IsValid());
+}
+
+TEST(SSAChecker, SSAPhi) {
+  // This code creates one Phi function during the conversion to SSA form.
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::RETURN | 0 << 8);
+
+  TestCodeSSA(data);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index f011e85..0fb4737 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -81,6 +81,25 @@
     }
   }
 
+  char GetTypeId(Primitive::Type type) {
+    // Note that Primitive::Descriptor would not work for us
+    // because it does not handle reference types (that is kPrimNot).
+    switch (type) {
+      case Primitive::kPrimBoolean: return 'z';
+      case Primitive::kPrimByte: return 'b';
+      case Primitive::kPrimChar: return 'c';
+      case Primitive::kPrimShort: return 's';
+      case Primitive::kPrimInt: return 'i';
+      case Primitive::kPrimLong: return 'j';
+      case Primitive::kPrimFloat: return 'f';
+      case Primitive::kPrimDouble: return 'd';
+      case Primitive::kPrimNot: return 'l';
+      case Primitive::kPrimVoid: return 'v';
+    }
+    LOG(FATAL) << "Unreachable";
+    return 'v';
+  }
+
   void PrintPredecessors(HBasicBlock* block) {
     AddIndent();
     output_ << "predecessors";
@@ -110,6 +129,12 @@
       }
     } else if (location.IsConstant()) {
       output_ << "constant";
+      HConstant* constant = location.GetConstant();
+      if (constant->IsIntConstant()) {
+        output_ << " " << constant->AsIntConstant()->GetValue();
+      } else if (constant->IsLongConstant()) {
+        output_ << " " << constant->AsLongConstant()->GetValue();
+      }
     } else if (location.IsInvalid()) {
       output_ << "invalid";
     } else if (location.IsStackSlot()) {
@@ -140,7 +165,7 @@
     if (instruction->InputCount() > 0) {
       output_ << " [ ";
       for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) {
-        output_ << "v" << inputs.Current()->GetId() << " ";
+        output_ << GetTypeId(inputs.Current()->GetType()) << inputs.Current()->GetId() << " ";
       }
       output_ << "]";
     }
@@ -175,7 +200,8 @@
       HInstruction* instruction = it.Current();
       AddIndent();
       int bci = 0;
-      output_ << bci << " " << instruction->NumberOfUses() << " v" << instruction->GetId() << " ";
+      output_ << bci << " " << instruction->NumberOfUses()
+              << " " << GetTypeId(instruction->GetType()) << instruction->GetId() << " ";
       instruction->Accept(this);
       output_ << kEndInstructionMarker << std::endl;
     }
@@ -214,7 +240,8 @@
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       AddIndent();
       HInstruction* instruction = it.Current();
-      output_ << instruction->GetId() << " v" << instruction->GetId() << "[ ";
+      output_ << instruction->GetId() << " " << GetTypeId(instruction->GetType())
+              << instruction->GetId() << "[ ";
       for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) {
         output_ << inputs.Current()->GetId() << " ";
       }
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index 7cd74e9..6e2c6fd 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -25,8 +25,10 @@
 class DexCompilationUnit;
 class HGraph;
 
+// TODO: Create an analysis/optimization abstraction.
 static const char* kLivenessPassName = "liveness";
 static const char* kRegisterAllocatorPassName = "register";
+static const char* kGVNPassName = "gvn";
 
 /**
  * If enabled, emits compilation information suitable for the c1visualizer tool
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
new file mode 100644
index 0000000..027b3d4
--- /dev/null
+++ b/compiler/optimizing/gvn.cc
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gvn.h"
+
+namespace art {
+
+void GlobalValueNumberer::Run() {
+  ComputeSideEffects();
+
+  sets_.Put(graph_->GetEntryBlock()->GetBlockId(), new (allocator_) ValueSet(allocator_));
+
+  // Do reverse post order to ensure the non back-edge predecessors of a block are
+  // visited before the block itself.
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    VisitBasicBlock(it.Current());
+  }
+}
+
+void GlobalValueNumberer::UpdateLoopEffects(HLoopInformation* info, SideEffects effects) {
+  int id = info->GetHeader()->GetBlockId();
+  loop_effects_.Put(id, loop_effects_.Get(id).Union(effects));
+}
+
+void GlobalValueNumberer::ComputeSideEffects() {
+  if (kIsDebugBuild) {
+    for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      SideEffects effects = GetBlockEffects(block);
+      DCHECK(!effects.HasSideEffects() && !effects.HasDependencies());
+      if (block->IsLoopHeader()) {
+        effects = GetLoopEffects(block);
+        DCHECK(!effects.HasSideEffects() && !effects.HasDependencies());
+      }
+    }
+  }
+
+  // Do a post order visit to ensure we visit a loop header after its loop body.
+  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+
+    SideEffects effects = SideEffects::None();
+    // Update `effects` with the side effects of all instructions in this block.
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      effects = effects.Union(instruction->GetSideEffects());
+      if (effects.HasAllSideEffects()) {
+        break;
+      }
+    }
+
+    block_effects_.Put(block->GetBlockId(), effects);
+
+    if (block->IsLoopHeader()) {
+      // The side effects of the loop header are part of the loop.
+      UpdateLoopEffects(block->GetLoopInformation(), effects);
+      HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+      if (pre_header->IsInLoop()) {
+        // Update the side effects of the outer loop with the side effects of the inner loop.
+        // Note that this works because we know all the blocks of the inner loop are visited
+        // before the loop header of the outer loop.
+        UpdateLoopEffects(pre_header->GetLoopInformation(), GetLoopEffects(block));
+      }
+    } else if (block->IsInLoop()) {
+      // Update the side effects of the loop with the side effects of this block.
+      UpdateLoopEffects(block->GetLoopInformation(), effects);
+    }
+  }
+}
+
+SideEffects GlobalValueNumberer::GetLoopEffects(HBasicBlock* block) const {
+  DCHECK(block->IsLoopHeader());
+  return loop_effects_.Get(block->GetBlockId());
+}
+
+SideEffects GlobalValueNumberer::GetBlockEffects(HBasicBlock* block) const {
+  return block_effects_.Get(block->GetBlockId());
+}
+
+static bool IsLoopExit(HBasicBlock* block, HBasicBlock* successor) {
+  HLoopInformation* block_info = block->GetLoopInformation();
+  HLoopInformation* other_info = successor->GetLoopInformation();
+  return block_info != other_info && (other_info == nullptr || block_info->IsIn(*other_info));
+}
+
+void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
+  if (kIsDebugBuild) {
+    // Check that all non back-edge processors have been visited.
+    for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
+      HBasicBlock* predecessor = block->GetPredecessors().Get(i);
+      DCHECK(visited_.Get(predecessor->GetBlockId())
+             || (block->GetLoopInformation() != nullptr
+                 && (block->GetLoopInformation()->GetBackEdges().Get(0) == predecessor)));
+    }
+    visited_.Put(block->GetBlockId(), true);
+  }
+
+  ValueSet* set = sets_.Get(block->GetBlockId());
+
+  if (block->IsLoopHeader()) {
+    set->Kill(GetLoopEffects(block));
+  }
+
+  HInstruction* current = block->GetFirstInstruction();
+  while (current != nullptr) {
+    set->Kill(current->GetSideEffects());
+    // Save the next instruction in case `current` is removed from the graph.
+    HInstruction* next = current->GetNext();
+    if (current->CanBeMoved()) {
+      HInstruction* existing = set->Lookup(current);
+      if (existing != nullptr) {
+        current->ReplaceWith(existing);
+        current->GetBlock()->RemoveInstruction(current);
+      } else {
+        set->Add(current);
+      }
+    }
+    current = next;
+  }
+
+  if (block == graph_->GetEntryBlock()) {
+    // The entry block should only accumulate constant instructions, and
+    // the builder puts constants only in the entry block.
+    // Therefore, there is no need to propagate the value set to the next block.
+    DCHECK_EQ(block->GetDominatedBlocks().Size(), 1u);
+    HBasicBlock* dominated = block->GetDominatedBlocks().Get(0);
+    sets_.Put(dominated->GetBlockId(), new (allocator_) ValueSet(allocator_));
+    return;
+  }
+
+  // Copy the value set to dominated blocks. We can re-use
+  // the current set for the last dominated block because we are done visiting
+  // this block.
+  for (size_t i = 0, e = block->GetDominatedBlocks().Size(); i < e; ++i) {
+    HBasicBlock* dominated = block->GetDominatedBlocks().Get(i);
+    sets_.Put(dominated->GetBlockId(), i == e - 1 ? set : set->Copy());
+  }
+
+  // Kill instructions in the value set of each successor. If the successor
+  // is a loop exit, then we use the side effects of the loop. If not, we use
+  // the side effects of this block.
+  for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
+    HBasicBlock* successor = block->GetSuccessors().Get(i);
+    if (successor->IsLoopHeader()
+        && successor->GetLoopInformation()->GetBackEdges().Get(0) == block) {
+      // In case of a back edge, we already have visited the loop header.
+      // We should not update its value set, because the last dominated block
+      // of the loop header uses the same value set.
+      DCHECK(visited_.Get(successor->GetBlockId()));
+      continue;
+    }
+    DCHECK(!visited_.Get(successor->GetBlockId()));
+    ValueSet* successor_set = sets_.Get(successor->GetBlockId());
+    // The dominator sets the set, and we are guaranteed to have visited it already.
+    DCHECK(successor_set != nullptr);
+
+    // If this block dominates this successor there is nothing to do.
+    // Also if the set is empty, there is nothing to kill.
+    if (successor->GetDominator() != block && !successor_set->IsEmpty()) {
+      if (block->IsInLoop() && IsLoopExit(block, successor)) {
+        // All instructions killed in the loop must be killed for a loop exit.
+        SideEffects effects = GetLoopEffects(block->GetLoopInformation()->GetHeader());
+        sets_.Get(successor->GetBlockId())->Kill(effects);
+      } else {
+        // Following block (that might be in the same loop).
+        // Just kill instructions based on this block's side effects.
+        sets_.Get(successor->GetBlockId())->Kill(GetBlockEffects(block));
+      }
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h
new file mode 100644
index 0000000..41b3ceb
--- /dev/null
+++ b/compiler/optimizing/gvn.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_GVN_H_
+#define ART_COMPILER_OPTIMIZING_GVN_H_
+
+#include <gtest/gtest.h>
+#include "nodes.h"
+
+namespace art {
+
+/**
+ * A node in the collision list of a ValueSet. Encodes the instruction,
+ * the hash code, and the next node in the collision list.
+ */
+class ValueSetNode : public ArenaObject {
+ public:
+  ValueSetNode(HInstruction* instruction, size_t hash_code, ValueSetNode* next)
+      : instruction_(instruction), hash_code_(hash_code), next_(next) {}
+
+  size_t GetHashCode() const { return hash_code_; }
+  HInstruction* GetInstruction() const { return instruction_; }
+  ValueSetNode* GetNext() const { return next_; }
+  void SetNext(ValueSetNode* node) { next_ = node; }
+
+ private:
+  HInstruction* const instruction_;
+  const size_t hash_code_;
+  ValueSetNode* next_;
+
+  DISALLOW_COPY_AND_ASSIGN(ValueSetNode);
+};
+
+/**
+ * A ValueSet holds instructions that can replace other instructions. It is updated
+ * through the `Add` method, and the `Kill` method. The `Kill` method removes
+ * instructions that are affected by the given side effect.
+ *
+ * The `Lookup` method returns an equivalent instruction to the given instruction
+ * if there is one in the set. In GVN, we would say those instructions have the
+ * same "number".
+ */
+class ValueSet : public ArenaObject {
+ public:
+  explicit ValueSet(ArenaAllocator* allocator)
+      : allocator_(allocator), number_of_entries_(0), collisions_(nullptr) {
+    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
+      table_[i] = nullptr;
+    }
+  }
+
+  // Adds an instruction in the set.
+  void Add(HInstruction* instruction) {
+    DCHECK(Lookup(instruction) == nullptr);
+    size_t hash_code = instruction->ComputeHashCode();
+    size_t index = hash_code % kDefaultNumberOfEntries;
+    if (table_[index] == nullptr) {
+      table_[index] = instruction;
+    } else {
+      collisions_ = new (allocator_) ValueSetNode(instruction, hash_code, collisions_);
+    }
+    ++number_of_entries_;
+  }
+
+  // If in the set, returns an equivalent instruction to the given instruction. Returns
+  // null otherwise.
+  HInstruction* Lookup(HInstruction* instruction) const {
+    size_t hash_code = instruction->ComputeHashCode();
+    size_t index = hash_code % kDefaultNumberOfEntries;
+    HInstruction* existing = table_[index];
+    if (existing != nullptr && existing->Equals(instruction)) {
+      return existing;
+    }
+
+    for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
+      if (node->GetHashCode() == hash_code) {
+        existing = node->GetInstruction();
+        if (existing->Equals(instruction)) {
+          return existing;
+        }
+      }
+    }
+    return nullptr;
+  }
+
+  // Removes all instructions in the set that are affected by the given side effects.
+  void Kill(SideEffects side_effects) {
+    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
+      HInstruction* instruction = table_[i];
+      if (instruction != nullptr && instruction->GetSideEffects().DependsOn(side_effects)) {
+        table_[i] = nullptr;
+        --number_of_entries_;
+      }
+    }
+
+    ValueSetNode* current = collisions_;
+    ValueSetNode* previous = nullptr;
+    while (current != nullptr) {
+      HInstruction* instruction = current->GetInstruction();
+      if (instruction->GetSideEffects().DependsOn(side_effects)) {
+        if (previous == nullptr) {
+          collisions_ = current->GetNext();
+        } else {
+          previous->SetNext(current->GetNext());
+        }
+        --number_of_entries_;
+      } else {
+        previous = current;
+      }
+      current = current->GetNext();
+    }
+  }
+
+  // Returns a copy of this set.
+  ValueSet* Copy() const {
+    ValueSet* copy = new (allocator_) ValueSet(allocator_);
+
+    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
+      copy->table_[i] = table_[i];
+    }
+
+    // Note that the order will be inverted in the copy. This is fine, as the order is not
+    // relevant for a ValueSet.
+    for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
+      copy->collisions_ = new (allocator_) ValueSetNode(
+          node->GetInstruction(), node->GetHashCode(), copy->collisions_);
+    }
+
+    copy->number_of_entries_ = number_of_entries_;
+    return copy;
+  }
+
+  bool IsEmpty() const { return number_of_entries_ == 0; }
+  size_t GetNumberOfEntries() const { return number_of_entries_; }
+
+ private:
+  static constexpr size_t kDefaultNumberOfEntries = 8;
+
+  ArenaAllocator* const allocator_;
+
+  // The number of entries in the set.
+  size_t number_of_entries_;
+
+  // The internal implementation of the set. It uses a combination of a hash code based
+  // fixed-size list, and a linked list to handle hash code collisions.
+  // TODO: Tune the fixed size list original size, and support growing it.
+  ValueSetNode* collisions_;
+  HInstruction* table_[kDefaultNumberOfEntries];
+
+  DISALLOW_COPY_AND_ASSIGN(ValueSet);
+};
+
+/**
+ * Optimization phase that removes redundant instruction.
+ */
+class GlobalValueNumberer : public ValueObject {
+ public:
+  GlobalValueNumberer(ArenaAllocator* allocator, HGraph* graph)
+      : allocator_(allocator),
+        graph_(graph),
+        block_effects_(allocator, graph->GetBlocks().Size()),
+        loop_effects_(allocator, graph->GetBlocks().Size()),
+        sets_(allocator, graph->GetBlocks().Size()),
+        visited_(allocator, graph->GetBlocks().Size()) {
+    size_t number_of_blocks = graph->GetBlocks().Size();
+    block_effects_.SetSize(number_of_blocks);
+    loop_effects_.SetSize(number_of_blocks);
+    sets_.SetSize(number_of_blocks);
+    visited_.SetSize(number_of_blocks);
+
+    for (size_t i = 0; i < number_of_blocks; ++i) {
+      block_effects_.Put(i, SideEffects::None());
+      loop_effects_.Put(i, SideEffects::None());
+    }
+  }
+
+  void Run();
+
+ private:
+  // Per-block GVN. Will also update the ValueSet of the dominated and
+  // successor blocks.
+  void VisitBasicBlock(HBasicBlock* block);
+
+  // Compute side effects of individual blocks and loops. The GVN algorithm
+  // will use these side effects to update the ValueSet of individual blocks.
+  void ComputeSideEffects();
+
+  void UpdateLoopEffects(HLoopInformation* info, SideEffects effects);
+  SideEffects GetLoopEffects(HBasicBlock* block) const;
+  SideEffects GetBlockEffects(HBasicBlock* block) const;
+
+  ArenaAllocator* const allocator_;
+  HGraph* const graph_;
+
+  // Side effects of individual blocks, that is the union of the side effects
+  // of the instructions in the block.
+  GrowableArray<SideEffects> block_effects_;
+
+  // Side effects of loops, that is the union of the side effects of the
+  // blocks contained in that loop.
+  GrowableArray<SideEffects> loop_effects_;
+
+  // ValueSet for blocks. Initially null, but for an individual block they
+  // are allocated and populated by the dominator, and updated by all blocks
+  // in the path from the dominator to the block.
+  GrowableArray<ValueSet*> sets_;
+
+  // Mark visisted blocks. Only used for debugging.
+  GrowableArray<bool> visited_;
+
+  FRIEND_TEST(GVNTest, LoopSideEffects);
+  DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_GVN_H_
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
new file mode 100644
index 0000000..ad6e338
--- /dev/null
+++ b/compiler/optimizing/gvn_test.cc
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "builder.h"
+#include "gvn.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+TEST(GVNTest, LocalFieldElimination) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  entry->AddInstruction(parameter);
+
+  HBasicBlock* block = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(block);
+  entry->AddSuccessor(block);
+
+  block->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42)));
+  block->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42)));
+  HInstruction* to_remove = block->GetLastInstruction();
+  block->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(43)));
+  HInstruction* different_offset = block->GetLastInstruction();
+  // Kill the value.
+  block->AddInstruction(new (&allocator) HInstanceFieldSet(
+      parameter, parameter, Primitive::kPrimNot, MemberOffset(42)));
+  block->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42)));
+  HInstruction* use_after_kill = block->GetLastInstruction();
+  block->AddInstruction(new (&allocator) HExit());
+
+  ASSERT_EQ(to_remove->GetBlock(), block);
+  ASSERT_EQ(different_offset->GetBlock(), block);
+  ASSERT_EQ(use_after_kill->GetBlock(), block);
+
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+  GlobalValueNumberer(&allocator, graph).Run();
+
+  ASSERT_TRUE(to_remove->GetBlock() == nullptr);
+  ASSERT_EQ(different_offset->GetBlock(), block);
+  ASSERT_EQ(use_after_kill->GetBlock(), block);
+}
+
+TEST(GVNTest, GlobalFieldElimination) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  entry->AddInstruction(parameter);
+
+  HBasicBlock* block = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(block);
+  entry->AddSuccessor(block);
+  block->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+
+  block->AddInstruction(new (&allocator) HIf(block->GetLastInstruction()));
+  HBasicBlock* then = new (&allocator) HBasicBlock(graph);
+  HBasicBlock* else_ = new (&allocator) HBasicBlock(graph);
+  HBasicBlock* join = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(then);
+  graph->AddBlock(else_);
+  graph->AddBlock(join);
+
+  block->AddSuccessor(then);
+  block->AddSuccessor(else_);
+  then->AddSuccessor(join);
+  else_->AddSuccessor(join);
+
+  then->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+  then->AddInstruction(new (&allocator) HGoto());
+  else_->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+  else_->AddInstruction(new (&allocator) HGoto());
+  join->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+  join->AddInstruction(new (&allocator) HExit());
+
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+  GlobalValueNumberer(&allocator, graph).Run();
+
+  // Check that all field get instructions have been GVN'ed.
+  ASSERT_TRUE(then->GetFirstInstruction()->IsGoto());
+  ASSERT_TRUE(else_->GetFirstInstruction()->IsGoto());
+  ASSERT_TRUE(join->GetFirstInstruction()->IsExit());
+}
+
+TEST(GVNTest, LoopFieldElimination) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+
+  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  entry->AddInstruction(parameter);
+
+  HBasicBlock* block = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(block);
+  entry->AddSuccessor(block);
+  block->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+  block->AddInstruction(new (&allocator) HGoto());
+
+  HBasicBlock* loop_header = new (&allocator) HBasicBlock(graph);
+  HBasicBlock* loop_body = new (&allocator) HBasicBlock(graph);
+  HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
+
+  graph->AddBlock(loop_header);
+  graph->AddBlock(loop_body);
+  graph->AddBlock(exit);
+  block->AddSuccessor(loop_header);
+  loop_header->AddSuccessor(loop_body);
+  loop_header->AddSuccessor(exit);
+  loop_body->AddSuccessor(loop_header);
+
+  loop_header->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+  HInstruction* field_get_in_loop_header = loop_header->GetLastInstruction();
+  loop_header->AddInstruction(new (&allocator) HIf(block->GetLastInstruction()));
+
+  // Kill inside the loop body to prevent field gets inside the loop header
+  // and the body to be GVN'ed.
+  loop_body->AddInstruction(new (&allocator) HInstanceFieldSet(
+      parameter, parameter, Primitive::kPrimNot, MemberOffset(42)));
+  HInstruction* field_set = loop_body->GetLastInstruction();
+  loop_body->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+  HInstruction* field_get_in_loop_body = loop_body->GetLastInstruction();
+  loop_body->AddInstruction(new (&allocator) HGoto());
+
+  exit->AddInstruction(
+      new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+  HInstruction* field_get_in_exit = exit->GetLastInstruction();
+  exit->AddInstruction(new (&allocator) HExit());
+
+  ASSERT_EQ(field_get_in_loop_header->GetBlock(), loop_header);
+  ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body);
+  ASSERT_EQ(field_get_in_exit->GetBlock(), exit);
+
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+  graph->FindNaturalLoops();
+  GlobalValueNumberer(&allocator, graph).Run();
+
+  // Check that all field get instructions are still there.
+  ASSERT_EQ(field_get_in_loop_header->GetBlock(), loop_header);
+  ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body);
+  // The exit block is dominated by the loop header, whose field get
+  // does not get killed by the loop flags.
+  ASSERT_TRUE(field_get_in_exit->GetBlock() == nullptr);
+
+  // Now remove the field set, and check that all field get instructions have been GVN'ed.
+  loop_body->RemoveInstruction(field_set);
+  GlobalValueNumberer(&allocator, graph).Run();
+
+  ASSERT_TRUE(field_get_in_loop_header->GetBlock() == nullptr);
+  ASSERT_TRUE(field_get_in_loop_body->GetBlock() == nullptr);
+  ASSERT_TRUE(field_get_in_exit->GetBlock() == nullptr);
+}
+
+// Test that inner loops affect the side effects of the outer loop.
+TEST(GVNTest, LoopSideEffects) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+
+  HBasicBlock* outer_loop_header = new (&allocator) HBasicBlock(graph);
+  HBasicBlock* outer_loop_body = new (&allocator) HBasicBlock(graph);
+  HBasicBlock* outer_loop_exit = new (&allocator) HBasicBlock(graph);
+  HBasicBlock* inner_loop_header = new (&allocator) HBasicBlock(graph);
+  HBasicBlock* inner_loop_body = new (&allocator) HBasicBlock(graph);
+  HBasicBlock* inner_loop_exit = new (&allocator) HBasicBlock(graph);
+
+  graph->AddBlock(outer_loop_header);
+  graph->AddBlock(outer_loop_body);
+  graph->AddBlock(outer_loop_exit);
+  graph->AddBlock(inner_loop_header);
+  graph->AddBlock(inner_loop_body);
+  graph->AddBlock(inner_loop_exit);
+
+  entry->AddSuccessor(outer_loop_header);
+  outer_loop_header->AddSuccessor(outer_loop_body);
+  outer_loop_header->AddSuccessor(outer_loop_exit);
+  outer_loop_body->AddSuccessor(inner_loop_header);
+  inner_loop_header->AddSuccessor(inner_loop_body);
+  inner_loop_header->AddSuccessor(inner_loop_exit);
+  inner_loop_body->AddSuccessor(inner_loop_header);
+  inner_loop_exit->AddSuccessor(outer_loop_header);
+
+  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimBoolean);
+  entry->AddInstruction(parameter);
+  entry->AddInstruction(new (&allocator) HGoto());
+  outer_loop_header->AddInstruction(new (&allocator) HIf(parameter));
+  outer_loop_body->AddInstruction(new (&allocator) HGoto());
+  inner_loop_header->AddInstruction(new (&allocator) HIf(parameter));
+  inner_loop_body->AddInstruction(new (&allocator) HGoto());
+  inner_loop_exit->AddInstruction(new (&allocator) HGoto());
+  outer_loop_exit->AddInstruction(new (&allocator) HExit());
+
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+  graph->FindNaturalLoops();
+
+  ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn(
+      *outer_loop_header->GetLoopInformation()));
+
+  // Check that the loops don't have side effects.
+  {
+    // Make one block with a side effect.
+    entry->AddInstruction(new (&allocator) HInstanceFieldSet(
+        parameter, parameter, Primitive::kPrimNot, MemberOffset(42)));
+
+    GlobalValueNumberer gvn(&allocator, graph);
+    gvn.Run();
+
+    ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects());
+    ASSERT_FALSE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects());
+    ASSERT_FALSE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects());
+  }
+
+  // Check that the side effects of the outer loop does not affect the inner loop.
+  {
+    outer_loop_body->InsertInstructionBefore(
+        new (&allocator) HInstanceFieldSet(
+            parameter, parameter, Primitive::kPrimNot, MemberOffset(42)),
+        outer_loop_body->GetLastInstruction());
+
+    GlobalValueNumberer gvn(&allocator, graph);
+    gvn.Run();
+
+    ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects());
+    ASSERT_TRUE(gvn.GetBlockEffects(outer_loop_body).HasSideEffects());
+    ASSERT_TRUE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects());
+    ASSERT_FALSE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects());
+  }
+
+  // Check that the side effects of the inner loop affects the outer loop.
+  {
+    outer_loop_body->RemoveInstruction(outer_loop_body->GetFirstInstruction());
+    inner_loop_body->InsertInstructionBefore(
+        new (&allocator) HInstanceFieldSet(
+            parameter, parameter, Primitive::kPrimNot, MemberOffset(42)),
+        inner_loop_body->GetLastInstruction());
+
+    GlobalValueNumberer gvn(&allocator, graph);
+    gvn.Run();
+
+    ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects());
+    ASSERT_FALSE(gvn.GetBlockEffects(outer_loop_body).HasSideEffects());
+    ASSERT_TRUE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects());
+    ASSERT_TRUE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects());
+  }
+}
+}  // namespace art
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index fce97bd..1c36cdf 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -28,7 +28,7 @@
       call_kind_(call_kind),
       stack_mask_(nullptr),
       register_mask_(0),
-      live_registers_(0) {
+      live_registers_() {
   inputs_.SetSize(instruction->InputCount());
   for (size_t i = 0; i < instruction->InputCount(); ++i) {
     inputs_.Put(i, Location());
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 041e85b..06623b6 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -266,6 +266,34 @@
   uword value_;
 };
 
+class RegisterSet : public ValueObject {
+ public:
+  RegisterSet() : core_registers_(0), floating_point_registers_(0) {}
+
+  void Add(Location loc) {
+    // TODO: floating point registers.
+    core_registers_ |= (1 << loc.reg().RegId());
+  }
+
+  bool ContainsCoreRegister(uint32_t id) {
+    return Contains(core_registers_, id);
+  }
+
+  bool ContainsFloatingPointRegister(uint32_t id) {
+    return Contains(floating_point_registers_, id);
+  }
+
+  static bool Contains(uint32_t register_set, uint32_t reg) {
+    return (register_set & (1 << reg)) != 0;
+  }
+
+ private:
+  uint32_t core_registers_;
+  uint32_t floating_point_registers_;
+
+  DISALLOW_COPY_AND_ASSIGN(RegisterSet);
+};
+
 /**
  * The code generator computes LocationSummary for each instruction so that
  * the instruction itself knows what code to generate: where to find the inputs
@@ -327,6 +355,8 @@
   Location Out() const { return output_; }
 
   bool CanCall() const { return call_kind_ != kNoCall; }
+  bool WillCall() const { return call_kind_ == kCall; }
+  bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; }
   bool NeedsSafepoint() const { return CanCall(); }
 
   void SetStackBit(uint32_t index) {
@@ -337,14 +367,22 @@
     register_mask_ |= (1 << reg_id);
   }
 
-  void SetLiveRegister(uint32_t reg_id) {
-    live_registers_ |= (1 << reg_id);
+  bool RegisterContainsObject(uint32_t reg_id) {
+    return RegisterSet::Contains(register_mask_, reg_id);
+  }
+
+  void AddLiveRegister(Location location) {
+    live_registers_.Add(location);
   }
 
   BitVector* GetStackMask() const {
     return stack_mask_;
   }
 
+  RegisterSet* GetLiveRegisters() {
+    return &live_registers_;
+  }
+
  private:
   GrowableArray<Location> inputs_;
   GrowableArray<Location> temps_;
@@ -359,7 +397,7 @@
   uint32_t register_mask_;
 
   // Registers that are in use at this position.
-  uint32_t live_registers_;
+  RegisterSet live_registers_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationSummary);
 };
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 207c605..09412a9 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -124,6 +124,7 @@
   // dominator of the block. We can then start visiting its successors.
   if (visits->Get(block->GetBlockId()) ==
       block->GetPredecessors().Size() - block->NumberOfBackEdges()) {
+    block->GetDominator()->AddDominatedBlock(block);
     reverse_post_order_.Add(block);
     for (size_t i = 0; i < block->GetSuccessors().Size(); i++) {
       VisitBlockForDominatorTree(block->GetSuccessors().Get(i), block, visits);
@@ -194,6 +195,11 @@
     }
     pre_header->AddSuccessor(header);
   }
+
+  // Make sure the second predecessor of a loop header is the back edge.
+  if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) {
+    header->SwapPredecessors();
+  }
 }
 
 void HGraph::SimplifyCFG() {
@@ -307,6 +313,14 @@
   instruction->SetId(GetGraph()->GetNextInstructionId());
 }
 
+void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial,
+                                                  HInstruction* replacement) {
+  DCHECK(initial->GetBlock() == this);
+  InsertInstructionBefore(replacement, initial);
+  initial->ReplaceWith(replacement);
+  RemoveInstruction(initial);
+}
+
 static void Add(HInstructionList* instruction_list,
                 HBasicBlock* block,
                 HInstruction* instruction) {
@@ -337,6 +351,16 @@
   for (size_t i = 0; i < instruction->InputCount(); i++) {
     instruction->InputAt(i)->RemoveUser(instruction, i);
   }
+
+  HEnvironment* environment = instruction->GetEnvironment();
+  if (environment != nullptr) {
+    for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+      HInstruction* vreg = environment->GetInstructionAt(i);
+      if (vreg != nullptr) {
+        vreg->RemoveEnvironmentUser(environment, i);
+      }
+    }
+  }
 }
 
 void HBasicBlock::RemoveInstruction(HInstruction* instruction) {
@@ -347,13 +371,16 @@
   Remove(&phis_, this, phi);
 }
 
-void HInstruction::RemoveUser(HInstruction* user, size_t input_index) {
-  HUseListNode<HInstruction>* previous = nullptr;
-  HUseListNode<HInstruction>* current = uses_;
+template <typename T>
+static void RemoveFromUseList(T* user,
+                              size_t input_index,
+                              HUseListNode<T>** list) {
+  HUseListNode<T>* previous = nullptr;
+  HUseListNode<T>* current = *list;
   while (current != nullptr) {
     if (current->GetUser() == user && current->GetIndex() == input_index) {
       if (previous == NULL) {
-        uses_ = current->GetTail();
+        *list = current->GetTail();
       } else {
         previous->SetTail(current->GetTail());
       }
@@ -363,6 +390,14 @@
   }
 }
 
+void HInstruction::RemoveUser(HInstruction* user, size_t input_index) {
+  RemoveFromUseList(user, input_index, &uses_);
+}
+
+void HInstruction::RemoveEnvironmentUser(HEnvironment* user, size_t input_index) {
+  RemoveFromUseList(user, input_index, &env_uses_);
+}
+
 void HInstructionList::AddInstruction(HInstruction* instruction) {
   if (first_instruction_ == nullptr) {
     DCHECK(last_instruction_ == nullptr);
@@ -392,6 +427,54 @@
   }
 }
 
+bool HInstructionList::FoundBefore(const HInstruction* instruction1,
+                                   const HInstruction* instruction2) const {
+  DCHECK_EQ(instruction1->GetBlock(), instruction2->GetBlock());
+  for (HInstructionIterator it(*this); !it.Done(); it.Advance()) {
+    if (it.Current() == instruction1) {
+      return true;
+    }
+    if (it.Current() == instruction2) {
+      return false;
+    }
+  }
+  LOG(FATAL) << "Did not find an order between two instructions of the same block.";
+  return true;
+}
+
+bool HInstruction::Dominates(HInstruction* other_instruction) const {
+  HBasicBlock* block = GetBlock();
+  HBasicBlock* other_block = other_instruction->GetBlock();
+  if (block != other_block) {
+    return GetBlock()->Dominates(other_instruction->GetBlock());
+  } else {
+    // If both instructions are in the same block, ensure this
+    // instruction comes before `other_instruction`.
+    if (IsPhi()) {
+      if (!other_instruction->IsPhi()) {
+        // Phis appear before non phi-instructions so this instruction
+        // dominates `other_instruction`.
+        return true;
+      } else {
+        // There is no order among phis.
+        LOG(FATAL) << "There is no dominance between phis of a same block.";
+        return false;
+      }
+    } else {
+      // `this` is not a phi.
+      if (other_instruction->IsPhi()) {
+        // Phis appear before non phi-instructions so this instruction
+        // does not dominate `other_instruction`.
+        return false;
+      } else {
+        // Check whether this instruction comes before
+        // `other_instruction` in the instruction list.
+        return block->GetInstructions().FoundBefore(this, other_instruction);
+      }
+    }
+  }
+}
+
 void HInstruction::ReplaceWith(HInstruction* other) {
   DCHECK(other != nullptr);
   for (HUseIterator<HInstruction> it(GetUses()); !it.Done(); it.Advance()) {
@@ -449,6 +532,18 @@
   }
 }
 
+HConstant* HBinaryOperation::TryStaticEvaluation(ArenaAllocator* allocator) const {
+  if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) {
+    int32_t value = Evaluate(GetLeft()->AsIntConstant()->GetValue(),
+                             GetRight()->AsIntConstant()->GetValue());
+    return new(allocator) HIntConstant(value);
+  } else if (GetLeft()->IsLongConstant() && GetRight()->IsLongConstant()) {
+    int64_t value = Evaluate(GetLeft()->AsLongConstant()->GetValue(),
+                             GetRight()->AsLongConstant()->GetValue());
+    return new(allocator) HLongConstant(value);
+  }
+  return nullptr;
+}
 
 bool HCondition::NeedsMaterialization() const {
   if (!HasOnlyOneUse()) {
@@ -460,16 +555,25 @@
     return true;
   }
 
-  // TODO: should we allow intervening instructions with no side-effect between this condition
-  // and the If instruction?
+  // TODO: if there is no intervening instructions with side-effect between this condition
+  // and the If instruction, we should move the condition just before the If.
   if (GetNext() != user) {
     return true;
   }
   return false;
 }
 
+bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const {
+  HInstruction* previous = if_->GetPrevious();
+  while (previous != nullptr && previous->IsParallelMove()) {
+    previous = previous->GetPrevious();
+  }
+  return previous == this;
+}
+
 bool HInstruction::Equals(HInstruction* other) const {
   if (!InstructionTypeEquals(other)) return false;
+  DCHECK_EQ(GetKind(), other->GetKind());
   if (!InstructionDataEquals(other)) return false;
   if (GetType() != other->GetType()) return false;
   if (InputCount() != other->InputCount()) return false;
@@ -477,6 +581,7 @@
   for (size_t i = 0, e = InputCount(); i < e; ++i) {
     if (InputAt(i) != other->InputAt(i)) return false;
   }
+  DCHECK_EQ(ComputeHashCode(), other->ComputeHashCode());
   return true;
 }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index ed6dd93..be6b355 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -38,6 +38,7 @@
 static const int kDefaultNumberOfBlocks = 8;
 static const int kDefaultNumberOfSuccessors = 2;
 static const int kDefaultNumberOfPredecessors = 2;
+static const int kDefaultNumberOfDominatedBlocks = 1;
 static const int kDefaultNumberOfBackEdges = 1;
 
 enum IfCondition {
@@ -56,6 +57,12 @@
   void AddInstruction(HInstruction* instruction);
   void RemoveInstruction(HInstruction* instruction);
 
+  // Return true if `instruction1` is found before `instruction2` in
+  // this instruction list and false otherwise.  Abort if none
+  // of these instructions is found.
+  bool FoundBefore(const HInstruction* instruction1,
+                   const HInstruction* instruction2) const;
+
  private:
   HInstruction* first_instruction_;
   HInstruction* last_instruction_;
@@ -192,7 +199,8 @@
   HLoopInformation(HBasicBlock* header, HGraph* graph)
       : header_(header),
         back_edges_(graph->GetArena(), kDefaultNumberOfBackEdges),
-        blocks_(graph->GetArena(), graph->GetBlocks().Size(), false) {}
+        // Make bit vector growable, as the number of blocks may change.
+        blocks_(graph->GetArena(), graph->GetBlocks().Size(), true) {}
 
   HBasicBlock* GetHeader() const {
     return header_;
@@ -265,6 +273,7 @@
         successors_(graph->GetArena(), kDefaultNumberOfSuccessors),
         loop_information_(nullptr),
         dominator_(nullptr),
+        dominated_blocks_(graph->GetArena(), kDefaultNumberOfDominatedBlocks),
         block_id_(-1),
         lifetime_start_(kNoLifetime),
         lifetime_end_(kNoLifetime) {}
@@ -277,6 +286,10 @@
     return successors_;
   }
 
+  const GrowableArray<HBasicBlock*>& GetDominatedBlocks() const {
+    return dominated_blocks_;
+  }
+
   void AddBackEdge(HBasicBlock* back_edge) {
     if (loop_information_ == nullptr) {
       loop_information_ = new (graph_->GetArena()) HLoopInformation(this, graph_);
@@ -292,6 +305,7 @@
 
   HBasicBlock* GetDominator() const { return dominator_; }
   void SetDominator(HBasicBlock* dominator) { dominator_ = dominator; }
+  void AddDominatedBlock(HBasicBlock* block) { dominated_blocks_.Add(block); }
 
   int NumberOfBackEdges() const {
     return loop_information_ == nullptr
@@ -331,6 +345,13 @@
     block->successors_.Add(this);
   }
 
+  void SwapPredecessors() {
+    DCHECK_EQ(predecessors_.Size(), 2u);
+    HBasicBlock* temp = predecessors_.Get(0);
+    predecessors_.Put(0, predecessors_.Get(1));
+    predecessors_.Put(1, temp);
+  }
+
   size_t GetPredecessorIndexOf(HBasicBlock* predecessor) {
     for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
       if (predecessors_.Get(i) == predecessor) {
@@ -352,6 +373,9 @@
   void AddInstruction(HInstruction* instruction);
   void RemoveInstruction(HInstruction* instruction);
   void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
+  // Replace instruction `initial` with `replacement` within this block.
+  void ReplaceAndRemoveInstructionWith(HInstruction* initial,
+                                       HInstruction* replacement);
   void AddPhi(HPhi* phi);
   void RemovePhi(HPhi* phi);
 
@@ -359,6 +383,12 @@
     return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this);
   }
 
+  bool IsLoopPreHeaderFirstPredecessor() const {
+    DCHECK(IsLoopHeader());
+    DCHECK(!GetPredecessors().IsEmpty());
+    return GetPredecessors().Get(0) == GetLoopInformation()->GetPreHeader();
+  }
+
   HLoopInformation* GetLoopInformation() const {
     return loop_information_;
   }
@@ -401,6 +431,7 @@
   HInstructionList phis_;
   HLoopInformation* loop_information_;
   HBasicBlock* dominator_;
+  GrowableArray<HBasicBlock*> dominated_blocks_;
   int block_id_;
   size_t lifetime_start_;
   size_t lifetime_end_;
@@ -422,6 +453,7 @@
   M(If)                                                    \
   M(IntConstant)                                           \
   M(InvokeStatic)                                          \
+  M(InvokeVirtual)                                         \
   M(LoadLocal)                                             \
   M(Local)                                                 \
   M(LongConstant)                                          \
@@ -447,19 +479,22 @@
 
 #define FOR_EACH_INSTRUCTION(M)                            \
   FOR_EACH_CONCRETE_INSTRUCTION(M)                         \
-  M(Constant)
+  M(Constant)                                              \
+  M(BinaryOperation)
 
 #define FORWARD_DECLARATION(type) class H##type;
 FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
 #undef FORWARD_DECLARATION
 
-#define DECLARE_INSTRUCTION(type)                          \
-  virtual const char* DebugName() const { return #type; }  \
-  virtual H##type* As##type() { return this; }             \
-  virtual bool InstructionTypeEquals(HInstruction* other) const {     \
-    return other->Is##type();                              \
-  }                                                        \
-  virtual void Accept(HGraphVisitor* visitor)              \
+#define DECLARE_INSTRUCTION(type)                                       \
+  virtual InstructionKind GetKind() const { return k##type; }           \
+  virtual const char* DebugName() const { return #type; }               \
+  virtual const H##type* As##type() const OVERRIDE { return this; }     \
+  virtual H##type* As##type() OVERRIDE { return this; }                 \
+  virtual bool InstructionTypeEquals(HInstruction* other) const {       \
+    return other->Is##type();                                           \
+  }                                                                     \
+  virtual void Accept(HGraphVisitor* visitor)
 
 template <typename T>
 class HUseListNode : public ArenaObject {
@@ -484,6 +519,8 @@
 // Represents the side effects an instruction may have.
 class SideEffects : public ValueObject {
  public:
+  SideEffects() : flags_(0) {}
+
   static SideEffects None() {
     return SideEffects(0);
   }
@@ -501,6 +538,31 @@
     return SideEffects(((1 << count) - 1) << kFlagChangesCount);
   }
 
+  SideEffects Union(SideEffects other) const {
+    return SideEffects(flags_ | other.flags_);
+  }
+
+  bool HasSideEffects() const {
+    size_t all_bits_set = (1 << kFlagChangesCount) - 1;
+    return (flags_ & all_bits_set) != 0;
+  }
+
+  bool HasAllSideEffects() const {
+    size_t all_bits_set = (1 << kFlagChangesCount) - 1;
+    return all_bits_set == (flags_ & all_bits_set);
+  }
+
+  bool DependsOn(SideEffects other) const {
+    size_t depends_flags = other.ComputeDependsFlags();
+    return (flags_ & depends_flags) != 0;
+  }
+
+  bool HasDependencies() const {
+    int count = kFlagDependsOnCount - kFlagChangesCount;
+    size_t all_bits_set = (1 << count) - 1;
+    return ((flags_ >> kFlagChangesCount) & all_bits_set) != 0;
+  }
+
  private:
   static constexpr int kFlagChangesSomething = 0;
   static constexpr int kFlagChangesCount = kFlagChangesSomething + 1;
@@ -508,10 +570,13 @@
   static constexpr int kFlagDependsOnSomething = kFlagChangesCount;
   static constexpr int kFlagDependsOnCount = kFlagDependsOnSomething + 1;
 
- private:
   explicit SideEffects(size_t flags) : flags_(flags) {}
 
-  const size_t flags_;
+  size_t ComputeDependsFlags() const {
+    return flags_ << kFlagChangesCount;
+  }
+
+  size_t flags_;
 };
 
 class HInstruction : public ArenaObject {
@@ -532,6 +597,12 @@
 
   virtual ~HInstruction() {}
 
+#define DECLARE_KIND(type) k##type,
+  enum InstructionKind {
+    FOR_EACH_INSTRUCTION(DECLARE_KIND)
+  };
+#undef DECLARE_KIND
+
   HInstruction* GetNext() const { return next_; }
   HInstruction* GetPrevious() const { return previous_; }
 
@@ -541,7 +612,7 @@
   bool IsInLoop() const { return block_->IsInLoop(); }
   bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); }
 
-  virtual size_t InputCount() const  = 0;
+  virtual size_t InputCount() const = 0;
   virtual HInstruction* InputAt(size_t i) const = 0;
 
   virtual void Accept(HGraphVisitor* visitor) = 0;
@@ -552,17 +623,20 @@
 
   virtual bool NeedsEnvironment() const { return false; }
   virtual bool IsControlFlow() const { return false; }
+  bool HasSideEffects() const { return side_effects_.HasSideEffects(); }
 
   void AddUseAt(HInstruction* user, size_t index) {
     uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HInstruction>(user, index, uses_);
   }
 
   void AddEnvUseAt(HEnvironment* user, size_t index) {
+    DCHECK(user != nullptr);
     env_uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HEnvironment>(
         user, index, env_uses_);
   }
 
   void RemoveUser(HInstruction* user, size_t index);
+  void RemoveEnvironmentUser(HEnvironment* user, size_t index);
 
   HUseListNode<HInstruction>* GetUses() const { return uses_; }
   HUseListNode<HEnvironment>* GetEnvUses() const { return env_uses_; }
@@ -581,6 +655,10 @@
     return result;
   }
 
+  // Does this instruction dominate `other_instruction`?  Aborts if
+  // this instruction and `other_instruction` are both phis.
+  bool Dominates(HInstruction* other_instruction) const;
+
   int GetId() const { return id_; }
   void SetId(int id) { id_ = id; }
 
@@ -606,7 +684,8 @@
   }
 
 #define INSTRUCTION_TYPE_CHECK(type)                                           \
-  bool Is##type() { return (As##type() != nullptr); }                          \
+  bool Is##type() const { return (As##type() != nullptr); }                    \
+  virtual const H##type* As##type() const { return nullptr; }                  \
   virtual H##type* As##type() { return nullptr; }
 
   FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
@@ -628,6 +707,18 @@
   // 2) Their inputs are identical.
   bool Equals(HInstruction* other) const;
 
+  virtual InstructionKind GetKind() const = 0;
+
+  virtual size_t ComputeHashCode() const {
+    size_t result = GetKind();
+    for (size_t i = 0, e = InputCount(); i < e; ++i) {
+      result = (result * 31) + InputAt(i)->GetId();
+    }
+    return result;
+  }
+
+  SideEffects GetSideEffects() const { return side_effects_; }
+
   size_t GetLifetimePosition() const { return lifetime_position_; }
   void SetLifetimePosition(size_t position) { lifetime_position_ = position; }
   LiveInterval* GetLiveInterval() const { return live_interval_; }
@@ -983,6 +1074,17 @@
   virtual bool CanBeMoved() const { return true; }
   virtual bool InstructionDataEquals(HInstruction* other) const { return true; }
 
+  // Try to statically evaluate `operation` and return an HConstant
+  // containing the result of this evaluation.  If `operation` cannot
+  // be evaluated as a constant, return nullptr.
+  HConstant* TryStaticEvaluation(ArenaAllocator* allocator) const;
+
+  // Apply this operation to `x` and `y`.
+  virtual int32_t Evaluate(int32_t x, int32_t y) const = 0;
+  virtual int64_t Evaluate(int64_t x, int64_t y) const = 0;
+
+  DECLARE_INSTRUCTION(BinaryOperation);
+
  private:
   DISALLOW_COPY_AND_ASSIGN(HBinaryOperation);
 };
@@ -993,8 +1095,15 @@
       : HBinaryOperation(Primitive::kPrimBoolean, first, second) {}
 
   virtual bool IsCommutative() { return true; }
+
+  // For register allocation purposes, returns whether this instruction needs to be
+  // materialized (that is, not just be in the processor flags).
   bool NeedsMaterialization() const;
 
+  // For code generation purposes, returns whether this instruction is just before
+  // `if_`, and disregard moves in between.
+  bool IsBeforeWhenDisregardMoves(HIf* if_) const;
+
   DECLARE_INSTRUCTION(Condition);
 
   virtual IfCondition GetCondition() const = 0;
@@ -1009,6 +1118,9 @@
   HEqual(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
+  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x == y; }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x == y; }
+
   DECLARE_INSTRUCTION(Equal);
 
   virtual IfCondition GetCondition() const {
@@ -1024,6 +1136,9 @@
   HNotEqual(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
+  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x != y; }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x != y; }
+
   DECLARE_INSTRUCTION(NotEqual);
 
   virtual IfCondition GetCondition() const {
@@ -1039,6 +1154,9 @@
   HLessThan(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
+  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x < y; }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x < y; }
+
   DECLARE_INSTRUCTION(LessThan);
 
   virtual IfCondition GetCondition() const {
@@ -1054,6 +1172,9 @@
   HLessThanOrEqual(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
+  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x <= y; }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x <= y; }
+
   DECLARE_INSTRUCTION(LessThanOrEqual);
 
   virtual IfCondition GetCondition() const {
@@ -1069,6 +1190,9 @@
   HGreaterThan(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
+  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x > y; }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x > y; }
+
   DECLARE_INSTRUCTION(GreaterThan);
 
   virtual IfCondition GetCondition() const {
@@ -1084,6 +1208,9 @@
   HGreaterThanOrEqual(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
+  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x >= y; }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x >= y; }
+
   DECLARE_INSTRUCTION(GreaterThanOrEqual);
 
   virtual IfCondition GetCondition() const {
@@ -1105,6 +1232,19 @@
     DCHECK_EQ(type, second->GetType());
   }
 
+  virtual int32_t Evaluate(int32_t x, int32_t y) const {
+    return
+      x == y ? 0 :
+      x > y ? 1 :
+      -1;
+  }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const {
+    return
+      x == y ? 0 :
+      x > y ? 1 :
+      -1;
+  }
+
   DECLARE_INSTRUCTION(Compare);
 
  private:
@@ -1185,6 +1325,8 @@
     return other->AsIntConstant()->value_ == value_;
   }
 
+  virtual size_t ComputeHashCode() const { return GetValue(); }
+
   DECLARE_INSTRUCTION(IntConstant);
 
  private:
@@ -1203,6 +1345,8 @@
     return other->AsLongConstant()->value_ == value_;
   }
 
+  virtual size_t ComputeHashCode() const { return static_cast<size_t>(GetValue()); }
+
   DECLARE_INSTRUCTION(LongConstant);
 
  private:
@@ -1272,6 +1416,26 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeStatic);
 };
 
+class HInvokeVirtual : public HInvoke {
+ public:
+  HInvokeVirtual(ArenaAllocator* arena,
+                 uint32_t number_of_arguments,
+                 Primitive::Type return_type,
+                 uint32_t dex_pc,
+                 uint32_t vtable_index)
+      : HInvoke(arena, number_of_arguments, return_type, dex_pc),
+        vtable_index_(vtable_index) {}
+
+  uint32_t GetVTableIndex() const { return vtable_index_; }
+
+  DECLARE_INSTRUCTION(InvokeVirtual);
+
+ private:
+  const uint32_t vtable_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HInvokeVirtual);
+};
+
 class HNewInstance : public HExpression<0> {
  public:
   HNewInstance(uint32_t dex_pc, uint16_t type_index)
@@ -1301,6 +1465,9 @@
 
   virtual bool IsCommutative() { return true; }
 
+  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x + y; }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x + y; }
+
   DECLARE_INSTRUCTION(Add);
 
  private:
@@ -1314,6 +1481,9 @@
 
   virtual bool IsCommutative() { return false; }
 
+  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x + y; }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x + y; }
+
   DECLARE_INSTRUCTION(Sub);
 
  private:
@@ -1446,6 +1616,10 @@
     return other_offset == GetFieldOffset().SizeValue();
   }
 
+  virtual size_t ComputeHashCode() const {
+    return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue();
+  }
+
   MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); }
   Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); }
 
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
new file mode 100644
index 0000000..b75bacb
--- /dev/null
+++ b/compiler/optimizing/nodes_test.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nodes.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+/**
+ * Test that removing instruction from the graph removes itself from user lists
+ * and environment lists.
+ */
+TEST(Node, RemoveInstruction) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  entry->AddInstruction(parameter);
+  entry->AddInstruction(new (&allocator) HGoto());
+
+  HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(first_block);
+  entry->AddSuccessor(first_block);
+  HInstruction* null_check = new (&allocator) HNullCheck(parameter, 0);
+  first_block->AddInstruction(null_check);
+  first_block->AddInstruction(new (&allocator) HReturnVoid());
+
+  HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(exit_block);
+  first_block->AddSuccessor(exit_block);
+  exit_block->AddInstruction(new (&allocator) HExit());
+
+  HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1);
+  null_check->SetEnvironment(environment);
+  environment->SetRawEnvAt(0, parameter);
+  parameter->AddEnvUseAt(null_check->GetEnvironment(), 0);
+
+  ASSERT_TRUE(parameter->HasEnvironmentUses());
+  ASSERT_TRUE(parameter->HasUses());
+
+  first_block->RemoveInstruction(null_check);
+
+  ASSERT_FALSE(parameter->HasEnvironmentUses());
+  ASSERT_FALSE(parameter->HasUses());
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 75f4155..702eba1 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -25,6 +25,7 @@
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "graph_visualizer.h"
+#include "gvn.h"
 #include "nodes.h"
 #include "register_allocator.h"
 #include "ssa_phi_elimination.h"
@@ -38,7 +39,7 @@
  */
 class CodeVectorAllocator FINAL : public CodeAllocator {
  public:
-  CodeVectorAllocator() { }
+  CodeVectorAllocator() {}
 
   virtual uint8_t* Allocate(size_t size) {
     size_ = size;
@@ -70,6 +71,7 @@
 class OptimizingCompiler FINAL : public Compiler {
  public:
   explicit OptimizingCompiler(CompilerDriver* driver);
+  ~OptimizingCompiler();
 
   bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, CompilationUnit* cu) const
       OVERRIDE;
@@ -113,6 +115,13 @@
   void UnInit() const OVERRIDE;
 
  private:
+  // Whether we should run any optimization or register allocation. If false, will
+  // just run the code generation after the graph was built.
+  const bool run_optimizations_;
+  mutable AtomicInteger total_compiled_methods_;
+  mutable AtomicInteger unoptimized_compiled_methods_;
+  mutable AtomicInteger optimized_compiled_methods_;
+
   std::unique_ptr<std::ostream> visualizer_output_;
 
   // Delegate to another compiler in case the optimizing compiler cannot compile a method.
@@ -122,8 +131,16 @@
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler);
 };
 
-OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) : Compiler(driver, 100),
-    delegate_(Create(driver, Compiler::Kind::kQuick)) {
+static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
+
+OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
+    : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
+      run_optimizations_(
+          driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime),
+      total_compiled_methods_(0),
+      unoptimized_compiled_methods_(0),
+      optimized_compiled_methods_(0),
+      delegate_(Create(driver, Compiler::Kind::kQuick)) {
   if (kIsVisualizerEnabled) {
     visualizer_output_.reset(new std::ofstream("art.cfg"));
   }
@@ -137,6 +154,18 @@
   delegate_->UnInit();
 }
 
+OptimizingCompiler::~OptimizingCompiler() {
+  if (total_compiled_methods_ == 0) {
+    LOG(INFO) << "Did not compile any method.";
+  } else {
+    size_t unoptimized_percent = (unoptimized_compiled_methods_ * 100 / total_compiled_methods_);
+    size_t optimized_percent = (optimized_compiled_methods_ * 100 / total_compiled_methods_);
+    LOG(INFO) << "Compiled " << total_compiled_methods_ << " methods: "
+              << unoptimized_percent << "% (" << unoptimized_compiled_methods_ << ") unoptimized, "
+              << optimized_percent << "% (" << optimized_compiled_methods_ << ") optimized.";
+  }
+}
+
 bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
                                           CompilationUnit* cu) const {
   return delegate_->CanCompileMethod(method_idx, dex_file, cu);
@@ -173,6 +202,7 @@
                                                uint32_t method_idx,
                                                jobject class_loader,
                                                const DexFile& dex_file) const {
+  total_compiled_methods_++;
   InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
   // Always use the thumb2 assembler: some runtime functionality (like implicit stack
   // overflow checks) assume thumb2.
@@ -222,7 +252,8 @@
 
   CodeVectorAllocator allocator;
 
-  if (RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set)) {
+  if (run_optimizations_ && RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set)) {
+    optimized_compiled_methods_++;
     graph->BuildDominatorTree();
     graph->TransformToSSA();
     visualizer.DumpGraph("ssa");
@@ -230,6 +261,8 @@
 
     SsaRedundantPhiElimination(graph).Run();
     SsaDeadPhiElimination(graph).Run();
+    GlobalValueNumberer(graph->GetArena(), graph).Run();
+    visualizer.DumpGraph(kGVNPassName);
 
     SsaLivenessAnalysis liveness(*graph, codegen);
     liveness.Analyze();
@@ -262,6 +295,7 @@
     LOG(FATAL) << "Could not allocate registers in optimizing compiler";
     return nullptr;
   } else {
+    unoptimized_compiled_methods_++;
     codegen->CompileBaseline(&allocator);
 
     // Run these phases to get some test coverage.
@@ -269,6 +303,9 @@
     graph->TransformToSSA();
     visualizer.DumpGraph("ssa");
     graph->FindNaturalLoops();
+    SsaRedundantPhiElimination(graph).Run();
+    SsaDeadPhiElimination(graph).Run();
+    GlobalValueNumberer(graph->GetArena(), graph).Run();
     SsaLivenessAnalysis liveness(*graph, codegen);
     liveness.Analyze();
     visualizer.DumpGraph(kLivenessPassName);
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index c409529..6dd53e5 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -17,8 +17,14 @@
 #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_
 #define ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_
 
+#include "nodes.h"
+#include "builder.h"
+#include "dex_file.h"
+#include "dex_instruction.h"
 #include "ssa_liveness_analysis.h"
 
+#include "gtest/gtest.h"
+
 namespace art {
 
 #define NUM_INSTRUCTIONS(...)  \
@@ -61,6 +67,33 @@
   }
 }
 
+// Create a control-flow graph from Dex instructions.
+inline HGraph* CreateCFG(ArenaAllocator* allocator, const uint16_t* data) {
+  HGraphBuilder builder(allocator);
+  const DexFile::CodeItem* item =
+    reinterpret_cast<const DexFile::CodeItem*>(data);
+  HGraph* graph = builder.BuildGraph(*item);
+  return graph;
+}
+
+// Naive string diff data type.
+typedef std::list<std::pair<std::string, std::string>> diff_t;
+
+// An alias for the empty string used to make it clear that a line is
+// removed in a diff.
+static const std::string removed = "";
+
+// Naive patch command: apply a diff to a string.
+inline std::string Patch(const std::string& original, const diff_t& diff) {
+  std::string result = original;
+  for (const auto& p : diff) {
+    std::string::size_type pos = result.find(p.first);
+    EXPECT_NE(pos, std::string::npos);
+    result.replace(pos, p.first.size(), p.second);
+  }
+  return result;
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 7862611..1ac9b78 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -45,7 +45,8 @@
         number_of_registers_(-1),
         registers_array_(nullptr),
         blocked_registers_(allocator->AllocArray<bool>(codegen->GetNumberOfRegisters())),
-        reserved_out_slots_(0) {
+        reserved_out_slots_(0),
+        maximum_number_of_live_registers_(0) {
   codegen->SetupBlockedRegisters(blocked_registers_);
   physical_register_intervals_.SetSize(codegen->GetNumberOfRegisters());
   // Always reserve for the current method and the graph's max out registers.
@@ -157,9 +158,34 @@
     }
   }
 
+  bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
+      && (instruction->GetType() != Primitive::kPrimFloat);
+
+  GrowableArray<LiveInterval*>& unhandled = core_register
+      ? unhandled_core_intervals_
+      : unhandled_fp_intervals_;
+
   if (locations->CanCall()) {
-    codegen_->MarkNotLeaf();
+    if (!instruction->IsSuspendCheck()) {
+      codegen_->MarkNotLeaf();
+    }
     safepoints_.Add(instruction);
+    if (locations->OnlyCallsOnSlowPath()) {
+      // We add a synthesized range at this position to record the live registers
+      // at this position. Ideally, we could just update the safepoints when locations
+      // are updated, but we currently need to know the full stack size before updating
+      // locations (because of parameters and the fact that we don't have a frame pointer).
+      // And knowing the full stack size requires to know the maximum number of live
+      // registers at calls in slow paths.
+      // By adding the following interval in the algorithm, we can compute this
+      // maximum before updating locations.
+      LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
+      interval->AddRange(position, position + 1);
+      unhandled.Add(interval);
+    }
+  }
+
+  if (locations->WillCall()) {
     // Block all registers.
     for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
       BlockRegister(Location::RegisterLocation(ManagedRegister(i)),
@@ -176,12 +202,6 @@
     }
   }
 
-  bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
-      && (instruction->GetType() != Primitive::kPrimFloat);
-  GrowableArray<LiveInterval*>& unhandled = core_register
-      ? unhandled_core_intervals_
-      : unhandled_fp_intervals_;
-
   LiveInterval* current = instruction->GetLiveInterval();
   if (current == nullptr) return;
 
@@ -405,6 +425,14 @@
       }
     }
 
+    if (current->IsSlowPathSafepoint()) {
+      // Synthesized interval to record the maximum number of live registers
+      // at safepoints. No need to allocate a register for it.
+      maximum_number_of_live_registers_ =
+          std::max(maximum_number_of_live_registers_, active_.Size());
+      continue;
+    }
+
     // (4) Try to find an available register.
     bool success = TryAllocateFreeReg(current);
 
@@ -796,6 +824,10 @@
       // This is a parallel move for connecting siblings in a same block. We need to
       // differentiate it with moves for connecting blocks, and input moves.
       if (previous->GetLifetimePosition() != position) {
+        // If the previous instruction of the previous instruction is not a parallel
+        // move, we have to insert the new parallel move before the input or connecting
+        // block moves.
+        at = previous;
         previous = previous->GetPrevious();
       }
     }
@@ -926,14 +958,13 @@
       LocationSummary* locations = safepoint->GetLocations();
       if (!current->Covers(position)) continue;
 
-      if (current->GetType() == Primitive::kPrimNot) {
-        DCHECK(current->GetParent()->HasSpillSlot());
+      if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
         locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
       }
 
       switch (source.GetKind()) {
         case Location::kRegister: {
-          locations->SetLiveRegister(source.reg().RegId());
+          locations->AddLiveRegister(source);
           if (current->GetType() == Primitive::kPrimNot) {
             locations->SetRegisterBit(source.reg().RegId());
           }
@@ -1016,7 +1047,8 @@
 }
 
 void RegisterAllocator::Resolve() {
-  codegen_->ComputeFrameSize(spill_slots_.Size(), reserved_out_slots_);
+  codegen_->ComputeFrameSize(
+      spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_);
 
   // Adjust the Out Location of instructions.
   // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 7d397e3..3c305c8 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -179,6 +179,9 @@
   // Slots reserved for out arguments.
   size_t reserved_out_slots_;
 
+  // The maximum live registers at safepoints.
+  size_t maximum_number_of_live_registers_;
+
   FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
 
   DISALLOW_COPY_AND_ASSIGN(RegisterAllocator);
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 33b1f1f..dea6181 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -138,7 +138,8 @@
                HInstruction* defined_by = nullptr,
                bool is_fixed = false,
                int reg = kNoRegister,
-               bool is_temp = false)
+               bool is_temp = false,
+               bool is_slow_path_safepoint = false)
       : allocator_(allocator),
         first_range_(nullptr),
         last_range_(nullptr),
@@ -150,8 +151,14 @@
         spill_slot_(kNoSpillSlot),
         is_fixed_(is_fixed),
         is_temp_(is_temp),
+        is_slow_path_safepoint_(is_slow_path_safepoint),
         defined_by_(defined_by) {}
 
+  static LiveInterval* MakeSlowPathInterval(ArenaAllocator* allocator, HInstruction* instruction) {
+    return new (allocator) LiveInterval(
+        allocator, Primitive::kPrimVoid, instruction, false, kNoRegister, false, true);
+  }
+
   static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) {
     return new (allocator) LiveInterval(allocator, type, nullptr, true, reg, false);
   }
@@ -163,6 +170,7 @@
   }
 
   bool IsFixed() const { return is_fixed_; }
+  bool IsSlowPathSafepoint() const { return is_slow_path_safepoint_; }
 
   void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) {
     // Set the use within the instruction.
@@ -480,6 +488,9 @@
   // Whether the interval is for a temporary.
   const bool is_temp_;
 
+  // Whether the interval is for a safepoint that calls on slow path.
+  const bool is_slow_path_safepoint_;
+
   // The instruction represented by this interval.
   HInstruction* const defined_by_;
 
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 65675dc..e02a182 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -102,7 +102,10 @@
 
     // Find if the inputs of the phi are the same instruction.
     HInstruction* candidate = phi->InputAt(0);
-    // A loop phi cannot have itself as the first phi.
+    // A loop phi cannot have itself as the first phi. Note that this
+    // check relies on our simplification pass ensuring the pre-header
+    // block is first in the list of predecessors of the loop header.
+    DCHECK(!phi->IsLoopHeaderPhi() || phi->GetBlock()->IsLoopPreHeaderFirstPredecessor());
     DCHECK_NE(phi, candidate);
 
     for (size_t i = 1; i < phi->InputCount(); ++i) {
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 99fd9eb..ad3b205 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -207,8 +207,8 @@
     "BasicBlock 2, pred: 3, 6, succ: 3\n"
     "  4: Phi(6, 0) [6]\n"
     "  5: Goto\n"
-    "BasicBlock 3, pred: 2, 5, succ: 2\n"
-    "  6: Phi(4, 0) [4]\n"
+    "BasicBlock 3, pred: 5, 2, succ: 2\n"
+    "  6: Phi(0, 4) [4]\n"
     "  7: Goto\n"
     "BasicBlock 4\n"
     // Synthesized blocks to avoid critical edge.
@@ -298,8 +298,8 @@
     "  2: Goto\n"
     "BasicBlock 1, pred: 0, succ: 4\n"
     "  3: Goto\n"
-    "BasicBlock 2, pred: 3, 4, succ: 5, 3\n"
-    "  4: Phi(1, 0) [9, 5, 5]\n"
+    "BasicBlock 2, pred: 4, 3, succ: 5, 3\n"
+    "  4: Phi(0, 1) [9, 5, 5]\n"
     "  5: Equal(4, 4) [6]\n"
     "  6: If(5)\n"
     "BasicBlock 3, pred: 2, succ: 2\n"
@@ -339,8 +339,8 @@
     "  6: Goto\n"
     "BasicBlock 3, pred: 1, succ: 8\n"
     "  7: Goto\n"
-    "BasicBlock 4, pred: 5, 8, succ: 6, 5\n"
-    "  8: Phi(8, 14) [8, 12, 9, 9]\n"
+    "BasicBlock 4, pred: 8, 5, succ: 6, 5\n"
+    "  8: Phi(14, 8) [8, 12, 9, 9]\n"
     "  9: Equal(8, 8) [10]\n"
     "  10: If(9)\n"
     "BasicBlock 5, pred: 4, succ: 4\n"
diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc
index 53fa74e..a860cb7 100644
--- a/compiler/optimizing/ssa_type_propagation.cc
+++ b/compiler/optimizing/ssa_type_propagation.cc
@@ -28,7 +28,11 @@
     case Primitive::kPrimNot:
       return existing;
     default:
-      return new_type;
+      // Phis are initialized with a void type, so if we are asked
+      // to merge with a void type, we should use the existing one.
+      return new_type == Primitive::kPrimVoid
+          ? existing
+          : new_type;
   }
 }
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 891a287..e3a9580 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -116,6 +116,19 @@
     std::string subdir = toolsdir + std::string("/") + std::string(entry->d_name);
     size_t eabi = subdir.find(TOOL_PREFIX);
     if (eabi != std::string::npos) {
+      // Check if "bin/{as,objcopy,objdump}" exist under this folder.
+      struct stat exec_st;
+      std::string exec_path;
+      exec_path = subdir + "/bin/" + TOOL_PREFIX + "as";
+      if (stat(exec_path.c_str(), &exec_st) != 0)
+        continue;
+      exec_path = subdir + "/bin/" + TOOL_PREFIX + "objcopy";
+      if (stat(exec_path.c_str(), &exec_st) != 0)
+        continue;
+      exec_path = subdir + "/bin/" + TOOL_PREFIX + "objdump";
+      if (stat(exec_path.c_str(), &exec_st) != 0)
+        continue;
+
       std::string suffix = subdir.substr(eabi + strlen(TOOL_PREFIX));
       double version = strtod(suffix.c_str(), nullptr);
       if (version > maxversion) {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index be6f097..9e6e958 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -160,7 +160,14 @@
   UsageError("      Example: --compiler-backend=Portable");
   UsageError("      Default: Quick");
   UsageError("");
-  UsageError("  --compiler-filter=(verify-none|interpret-only|space|balanced|speed|everything):");
+  UsageError("  --compiler-filter="
+                "(verify-none"
+                "|interpret-only"
+                "|space"
+                "|balanced"
+                "|speed"
+                "|everything"
+                "|time):");
   UsageError("      select compiler filter.");
   UsageError("      Example: --compiler-filter=everything");
 #if ART_SMALL_MODE
@@ -435,6 +442,11 @@
       return nullptr;
     }
 
+    // Flush result to disk. Patching code will re-open the file (mmap), so ensure that our view
+    // of the file already made it there and won't be re-ordered with writes from PatchOat or
+    // image patching.
+    oat_file->Flush();
+
     if (!driver->IsImage() && driver->GetCompilerOptions().GetIncludePatchInformation()) {
       t2.NewTiming("Patching ELF");
       std::string error_msg;
@@ -1181,6 +1193,8 @@
     compiler_filter = CompilerOptions::kSpeed;
   } else if (strcmp(compiler_filter_string, "everything") == 0) {
     compiler_filter = CompilerOptions::kEverything;
+  } else if (strcmp(compiler_filter_string, "time") == 0) {
+    compiler_filter = CompilerOptions::kTime;
   } else {
     Usage("Unknown --compiler-filter value %s", compiler_filter_string);
   }
@@ -1238,6 +1252,7 @@
   } else {
     oat_file.reset(new File(oat_fd, oat_location));
     oat_file->DisableAutoClose();
+    oat_file->SetLength(0);
   }
   if (oat_file.get() == nullptr) {
     PLOG(ERROR) << "Failed to create oat file: " << oat_location;
@@ -1376,7 +1391,7 @@
    * If we're not in interpret-only or verify-none mode, go ahead and compile small applications.
    * Don't bother to check if we're doing the image.
    */
-  if (!image && compiler_options->IsCompilationEnabled()) {
+  if (!image && compiler_options->IsCompilationEnabled() && compiler_kind == Compiler::kQuick) {
     size_t num_methods = 0;
     for (size_t i = 0; i != dex_files.size(); ++i) {
       const DexFile* dex_file = dex_files[i];
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index bac3c33..888f2d2 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -53,10 +53,12 @@
 #include "runtime.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
+#include "ScopedLocalRef.h"
 #include "thread_list.h"
 #include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier.h"
 #include "vmap_table.h"
+#include "well_known_classes.h"
 
 namespace art {
 
@@ -105,7 +107,6 @@
           "  --no-disassemble may be used to disable disassembly.\n"
           "      Example: --no-disassemble\n"
           "\n");
-  exit(EXIT_FAILURE);
 }
 
 const char* image_roots_descriptions_[] = {
@@ -343,18 +344,21 @@
                    bool dump_raw_gc_map,
                    bool dump_vmap,
                    bool disassemble_code,
-                   bool absolute_addresses)
+                   bool absolute_addresses,
+                   Handle<mirror::ClassLoader>* class_loader)
     : dump_raw_mapping_table_(dump_raw_mapping_table),
       dump_raw_gc_map_(dump_raw_gc_map),
       dump_vmap_(dump_vmap),
       disassemble_code_(disassemble_code),
-      absolute_addresses_(absolute_addresses) {}
+      absolute_addresses_(absolute_addresses),
+      class_loader_(class_loader) {}
 
   const bool dump_raw_mapping_table_;
   const bool dump_raw_gc_map_;
   const bool dump_vmap_;
   const bool disassemble_code_;
   const bool absolute_addresses_;
+  Handle<mirror::ClassLoader>* class_loader_;
 };
 
 class OatDumper {
@@ -366,6 +370,7 @@
       disassembler_(Disassembler::Create(oat_file_.GetOatHeader().GetInstructionSet(),
                                          new DisassemblerOptions(options_->absolute_addresses_,
                                                                  oat_file.Begin()))) {
+    CHECK(options_->class_loader_ != nullptr);
     AddAllOffsets();
   }
 
@@ -780,10 +785,7 @@
                                     oat_method.GetVmapTableOffsetOffset());
         success = false;
       } else if (options_->dump_vmap_) {
-        if (oat_method.GetNativeGcMap() != nullptr) {
-          // The native GC map is null for methods compiled with the optimizing compiler.
-          DumpVmap(*indent2_os, oat_method);
-        }
+        DumpVmap(*indent2_os, oat_method);
       }
     }
     {
@@ -894,6 +896,12 @@
   }
 
   void DumpVmap(std::ostream& os, const OatFile::OatMethod& oat_method) {
+    // If the native GC map is null, then this method has been compiled with the
+    // optimizing compiler. The optimizing compiler currently outputs its stack map
+    // in the vmap table, and the code below does not work with such a stack map.
+    if (oat_method.GetNativeGcMap() == nullptr) {
+      return;
+    }
     const uint8_t* raw_table = oat_method.GetVmapTable();
     if (raw_table != nullptr) {
       const VmapTable vmap_table(raw_table);
@@ -1170,9 +1178,10 @@
       StackHandleScope<1> hs(soa.Self());
       Handle<mirror::DexCache> dex_cache(
           hs.NewHandle(Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file)));
+      DCHECK(options_->class_loader_ != nullptr);
       return verifier::MethodVerifier::VerifyMethodAndDump(soa.Self(), os, dex_method_idx, dex_file,
                                                            dex_cache,
-                                                           NullHandle<mirror::ClassLoader>(),
+                                                           *options_->class_loader_,
                                                            &class_def, code_item,
                                                            NullHandle<mirror::ArtMethod>(),
                                                            method_access_flags);
@@ -1955,122 +1964,10 @@
   DISALLOW_COPY_AND_ASSIGN(ImageDumper);
 };
 
-static int oatdump(int argc, char** argv) {
-  InitLogging(argv);
+static NoopCompilerCallbacks callbacks;
 
-  // Skip over argv[0].
-  argv++;
-  argc--;
-
-  if (argc == 0) {
-    fprintf(stderr, "No arguments specified\n");
-    usage();
-  }
-
-  const char* oat_filename = nullptr;
-  const char* image_location = nullptr;
-  const char* boot_image_location = nullptr;
-  InstructionSet instruction_set = kRuntimeISA;
-  std::string elf_filename_prefix;
-  std::ostream* os = &std::cout;
-  std::unique_ptr<std::ofstream> out;
-  std::string output_name;
-  bool dump_raw_mapping_table = false;
-  bool dump_raw_gc_map = false;
-  bool dump_vmap = true;
-  bool disassemble_code = true;
-  bool symbolize = false;
-
-  for (int i = 0; i < argc; i++) {
-    const StringPiece option(argv[i]);
-    if (option.starts_with("--oat-file=")) {
-      oat_filename = option.substr(strlen("--oat-file=")).data();
-    } else if (option.starts_with("--image=")) {
-      image_location = option.substr(strlen("--image=")).data();
-    } else if (option.starts_with("--boot-image=")) {
-      boot_image_location = option.substr(strlen("--boot-image=")).data();
-    } else if (option.starts_with("--instruction-set=")) {
-      StringPiece instruction_set_str = option.substr(strlen("--instruction-set=")).data();
-      if (instruction_set_str == "arm") {
-        instruction_set = kThumb2;
-      } else if (instruction_set_str == "arm64") {
-        instruction_set = kArm64;
-      } else if (instruction_set_str == "mips") {
-        instruction_set = kMips;
-      } else if (instruction_set_str == "x86") {
-        instruction_set = kX86;
-      } else if (instruction_set_str == "x86_64") {
-        instruction_set = kX86_64;
-      }
-    } else if (option =="--dump:raw_mapping_table") {
-      dump_raw_mapping_table = true;
-    } else if (option == "--dump:raw_gc_map") {
-      dump_raw_gc_map = true;
-    } else if (option == "--no-dump:vmap") {
-      dump_vmap = false;
-    } else if (option == "--no-disassemble") {
-      disassemble_code = false;
-    } else if (option.starts_with("--output=")) {
-      output_name = option.substr(strlen("--output=")).ToString();
-      const char* filename = output_name.c_str();
-      out.reset(new std::ofstream(filename));
-      if (!out->good()) {
-        fprintf(stderr, "Failed to open output filename %s\n", filename);
-        usage();
-      }
-      os = out.get();
-    } else if (option.starts_with("--symbolize=")) {
-      oat_filename = option.substr(strlen("--symbolize=")).data();
-      symbolize = true;
-    } else {
-      fprintf(stderr, "Unknown argument %s\n", option.data());
-      usage();
-    }
-  }
-
-  if (image_location == nullptr && oat_filename == nullptr) {
-    fprintf(stderr, "Either --image or --oat must be specified\n");
-    return EXIT_FAILURE;
-  }
-
-  if (image_location != nullptr && oat_filename != nullptr) {
-    fprintf(stderr, "Either --image or --oat must be specified but not both\n");
-    return EXIT_FAILURE;
-  }
-
-  // If we are only doing the oat file, disable absolute_addresses. Keep them for image dumping.
-  bool absolute_addresses = (oat_filename == nullptr);
-  std::unique_ptr<OatDumperOptions> oat_dumper_options(new OatDumperOptions(dump_raw_mapping_table,
-                                                                            dump_raw_gc_map,
-                                                                            dump_vmap,
-                                                                            disassemble_code,
-                                                                            absolute_addresses));
-  if (oat_filename != nullptr) {
-    std::string error_msg;
-    OatFile* oat_file =
-        OatFile::Open(oat_filename, oat_filename, nullptr, false, &error_msg);
-    if (oat_file == nullptr) {
-      fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str());
-      return EXIT_FAILURE;
-    }
-    if (symbolize) {
-      OatSymbolizer oat_symbolizer(oat_file, output_name);
-      if (!oat_symbolizer.Init()) {
-        fprintf(stderr, "Failed to initialize symbolizer\n");
-        return EXIT_FAILURE;
-      }
-      if (!oat_symbolizer.Symbolize()) {
-        fprintf(stderr, "Failed to symbolize\n");
-        return EXIT_FAILURE;
-      }
-    } else {
-      OatDumper oat_dumper(*oat_file, oat_dumper_options.release());
-      bool success = oat_dumper.Dump(*os);
-      return (success) ? EXIT_SUCCESS : EXIT_FAILURE;
-    }
-    return EXIT_SUCCESS;
-  }
-
+static Runtime* StartRuntime(const char* boot_image_location, const char* image_location,
+                             InstructionSet instruction_set) {
   RuntimeOptions options;
   std::string image_option;
   std::string oat_option;
@@ -2078,7 +1975,6 @@
   std::string boot_oat_option;
 
   // We are more like a compiler than a run-time. We don't want to execute code.
-  NoopCompilerCallbacks callbacks;
   options.push_back(std::make_pair("compilercallbacks", &callbacks));
 
   if (boot_image_location != nullptr) {
@@ -2097,14 +1993,24 @@
 
   if (!Runtime::Create(options, false)) {
     fprintf(stderr, "Failed to create runtime\n");
-    return EXIT_FAILURE;
+    return nullptr;
   }
-  std::unique_ptr<Runtime> runtime(Runtime::Current());
+
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
   // give it away now and then switch to a more manageable ScopedObjectAccess.
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+
+  return Runtime::Current();
+}
+
+static int DumpImage(Runtime* runtime, const char* image_location, OatDumperOptions* options,
+                     std::ostream* os) {
+  // Dumping the image, no explicit class loader.
+  NullHandle<mirror::ClassLoader> null_class_loader;
+  options->class_loader_ = &null_class_loader;
+
   ScopedObjectAccess soa(Thread::Current());
-  gc::Heap* heap = Runtime::Current()->GetHeap();
+  gc::Heap* heap = runtime->GetHeap();
   gc::space::ImageSpace* image_space = heap->GetImageSpace();
   CHECK(image_space != nullptr);
   const ImageHeader& image_header = image_space->GetImageHeader();
@@ -2112,11 +2018,227 @@
     fprintf(stderr, "Invalid image header %s\n", image_location);
     return EXIT_FAILURE;
   }
-  ImageDumper image_dumper(os, *image_space, image_header, oat_dumper_options.release());
+  ImageDumper image_dumper(os, *image_space, image_header, options);
   bool success = image_dumper.Dump();
   return (success) ? EXIT_SUCCESS : EXIT_FAILURE;
 }
 
+static int DumpOatWithRuntime(Runtime* runtime, OatFile* oat_file, OatDumperOptions* options,
+                              std::ostream* os) {
+  CHECK(runtime != nullptr && oat_file != nullptr && options != nullptr);
+
+  Thread* self = Thread::Current();
+  CHECK(self != nullptr);
+  // Need well-known-classes.
+  WellKnownClasses::Init(self->GetJniEnv());
+
+  // Need to register dex files to get a working dex cache.
+  ScopedObjectAccess soa(self);
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  class_linker->RegisterOatFile(oat_file);
+  std::vector<const DexFile*> dex_files;
+  for (const OatFile::OatDexFile* odf : oat_file->GetOatDexFiles()) {
+    std::string error_msg;
+    const DexFile* dex_file = odf->OpenDexFile(&error_msg);
+    CHECK(dex_file != nullptr) << error_msg;
+    class_linker->RegisterDexFile(*dex_file);
+    dex_files.push_back(dex_file);
+  }
+
+  // Need a class loader.
+  soa.Env()->AllocObject(WellKnownClasses::dalvik_system_PathClassLoader);
+  ScopedLocalRef<jobject> class_loader_local(soa.Env(),
+      soa.Env()->AllocObject(WellKnownClasses::dalvik_system_PathClassLoader));
+  jobject class_loader = soa.Env()->NewGlobalRef(class_loader_local.get());
+  // Fake that we're a compiler.
+  runtime->SetCompileTimeClassPath(class_loader, dex_files);
+
+  // Use the class loader while dumping.
+  StackHandleScope<1> scope(self);
+  Handle<mirror::ClassLoader> loader_handle = scope.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(class_loader));
+  options->class_loader_ = &loader_handle;
+
+  OatDumper oat_dumper(*oat_file, options);
+  bool success = oat_dumper.Dump(*os);
+  return (success) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+static int DumpOatWithoutRuntime(OatFile* oat_file, OatDumperOptions* options, std::ostream* os) {
+  // No image = no class loader.
+  NullHandle<mirror::ClassLoader> null_class_loader;
+  options->class_loader_ = &null_class_loader;
+
+  OatDumper oat_dumper(*oat_file, options);
+  bool success = oat_dumper.Dump(*os);
+  return (success) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+static int DumpOat(Runtime* runtime, const char* oat_filename, OatDumperOptions* options,
+                   std::ostream* os) {
+  std::string error_msg;
+  OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, nullptr, false, &error_msg);
+  if (oat_file == nullptr) {
+    fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str());
+    return EXIT_FAILURE;
+  }
+
+  if (runtime != nullptr) {
+    return DumpOatWithRuntime(runtime, oat_file, options, os);
+  } else {
+    return DumpOatWithoutRuntime(oat_file, options, os);
+  }
+}
+
+static int SymbolizeOat(const char* oat_filename, std::string& output_name) {
+  std::string error_msg;
+  OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, nullptr, false, &error_msg);
+  if (oat_file == nullptr) {
+    fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str());
+    return EXIT_FAILURE;
+  }
+
+  OatSymbolizer oat_symbolizer(oat_file, output_name);
+  if (!oat_symbolizer.Init()) {
+    fprintf(stderr, "Failed to initialize symbolizer\n");
+    return EXIT_FAILURE;
+  }
+  if (!oat_symbolizer.Symbolize()) {
+    fprintf(stderr, "Failed to symbolize\n");
+    return EXIT_FAILURE;
+  }
+
+  return EXIT_SUCCESS;
+}
+
+struct OatdumpArgs {
+  bool Parse(int argc, char** argv) {
+    // Skip over argv[0].
+    argv++;
+    argc--;
+
+    if (argc == 0) {
+      fprintf(stderr, "No arguments specified\n");
+      usage();
+      return false;
+    }
+
+    for (int i = 0; i < argc; i++) {
+      const StringPiece option(argv[i]);
+      if (option.starts_with("--oat-file=")) {
+        oat_filename_ = option.substr(strlen("--oat-file=")).data();
+      } else if (option.starts_with("--image=")) {
+        image_location_ = option.substr(strlen("--image=")).data();
+      } else if (option.starts_with("--boot-image=")) {
+        boot_image_location_ = option.substr(strlen("--boot-image=")).data();
+      } else if (option.starts_with("--instruction-set=")) {
+        StringPiece instruction_set_str = option.substr(strlen("--instruction-set=")).data();
+        instruction_set_ = GetInstructionSetFromString(instruction_set_str.data());
+        if (instruction_set_ == kNone) {
+          fprintf(stderr, "Unsupported instruction set %s\n", instruction_set_str.data());
+          usage();
+          return false;
+        }
+      } else if (option =="--dump:raw_mapping_table") {
+        dump_raw_mapping_table_ = true;
+      } else if (option == "--dump:raw_gc_map") {
+        dump_raw_gc_map_ = true;
+      } else if (option == "--no-dump:vmap") {
+        dump_vmap_ = false;
+      } else if (option == "--no-disassemble") {
+        disassemble_code_ = false;
+      } else if (option.starts_with("--output=")) {
+        output_name_ = option.substr(strlen("--output=")).ToString();
+        const char* filename = output_name_.c_str();
+        out_.reset(new std::ofstream(filename));
+        if (!out_->good()) {
+          fprintf(stderr, "Failed to open output filename %s\n", filename);
+          usage();
+          return false;
+        }
+        os_ = out_.get();
+      } else if (option.starts_with("--symbolize=")) {
+        oat_filename_ = option.substr(strlen("--symbolize=")).data();
+        symbolize_ = true;
+      } else {
+        fprintf(stderr, "Unknown argument %s\n", option.data());
+        usage();
+        return false;
+      }
+    }
+
+    if (image_location_ == nullptr && oat_filename_ == nullptr) {
+      fprintf(stderr, "Either --image or --oat must be specified\n");
+      return false;
+    }
+
+    if (image_location_ != nullptr && oat_filename_ != nullptr) {
+      fprintf(stderr, "Either --image or --oat must be specified but not both\n");
+      return false;
+    }
+
+    return true;
+  }
+
+  const char* oat_filename_ = nullptr;
+  const char* image_location_ = nullptr;
+  const char* boot_image_location_ = nullptr;
+  InstructionSet instruction_set_ = kRuntimeISA;
+  std::string elf_filename_prefix_;
+  std::ostream* os_ = &std::cout;
+  std::unique_ptr<std::ofstream> out_;
+  std::string output_name_;
+  bool dump_raw_mapping_table_ = false;
+  bool dump_raw_gc_map_ = false;
+  bool dump_vmap_ = true;
+  bool disassemble_code_ = true;
+  bool symbolize_ = false;
+};
+
+static int oatdump(int argc, char** argv) {
+  InitLogging(argv);
+
+  OatdumpArgs args;
+  if (!args.Parse(argc, argv)) {
+    return EXIT_FAILURE;
+  }
+
+  // If we are only doing the oat file, disable absolute_addresses. Keep them for image dumping.
+  bool absolute_addresses = (args.oat_filename_ == nullptr);
+
+  std::unique_ptr<OatDumperOptions> oat_dumper_options(new OatDumperOptions(
+      args.dump_raw_mapping_table_,
+      args.dump_raw_gc_map_,
+      args.dump_vmap_,
+      args.disassemble_code_,
+      absolute_addresses,
+      nullptr));
+
+  std::unique_ptr<Runtime> runtime;
+  if ((args.boot_image_location_ != nullptr || args.image_location_ != nullptr) &&
+      !args.symbolize_) {
+    // If we have a boot image option, try to start the runtime; except when just symbolizing.
+    runtime.reset(StartRuntime(args.boot_image_location_,
+                               args.image_location_,
+                               args.instruction_set_));
+  }
+
+  if (args.oat_filename_ != nullptr) {
+    if (args.symbolize_) {
+      return SymbolizeOat(args.oat_filename_, args.output_name_);
+    } else {
+      return DumpOat(runtime.get(), args.oat_filename_, oat_dumper_options.release(), args.os_);
+    }
+  }
+
+  if (runtime.get() == nullptr) {
+    // We need the runtime when printing an image.
+    return EXIT_FAILURE;
+  }
+
+  return DumpImage(runtime.get(), args.image_location_, oat_dumper_options.release(), args.os_);
+}
+
 }  // namespace art
 
 int main(int argc, char** argv) {
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index f89a4f7..50b4ece 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -79,9 +79,10 @@
 
   bool have_android_data = false;
   bool dalvik_cache_exists = false;
+  bool is_global_cache = false;
   std::string dalvik_cache;
   GetDalvikCache(GetInstructionSetString(isa), false, &dalvik_cache,
-                 &have_android_data, &dalvik_cache_exists);
+                 &have_android_data, &dalvik_cache_exists, &is_global_cache);
 
   std::string cache_filename;
   if (have_android_data && dalvik_cache_exists) {
@@ -986,9 +987,11 @@
     std::string cache_filename;
     bool has_cache = false;
     bool has_android_data_unused = false;
+    bool is_global_cache = false;
     if (!gc::space::ImageSpace::FindImageFilename(patched_image_location.c_str(), isa,
                                                   &system_filename, &has_system, &cache_filename,
-                                                  &has_android_data_unused, &has_cache)) {
+                                                  &has_android_data_unused, &has_cache,
+                                                  &is_global_cache)) {
       Usage("Unable to determine image file for location %s", patched_image_location.c_str());
     }
     if (has_cache) {
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 61bc9ff..46b2e10 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -79,6 +79,7 @@
   intern_table.cc \
   interpreter/interpreter.cc \
   interpreter/interpreter_common.cc \
+  interpreter/interpreter_goto_table_impl.cc \
   interpreter/interpreter_switch_impl.cc \
   java_vm_ext.cc \
   jdwp/jdwp_event.cc \
@@ -200,10 +201,6 @@
   entrypoints/quick/quick_throw_entrypoints.cc \
   entrypoints/quick/quick_trampoline_entrypoints.cc
 
-# Source files that only compile with GCC.
-LIBART_GCC_ONLY_SRC_FILES := \
-  interpreter/interpreter_goto_table_impl.cc
-
 LIBART_TARGET_LDFLAGS :=
 LIBART_HOST_LDFLAGS :=
 
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index a7adb02..2c3e966 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -66,6 +66,7 @@
   kAllocatorTagCompileTimeClassPath,
   kAllocatorTagOatFile,
   kAllocatorTagDexFileVerifier,
+  kAllocatorTagRosAlloc,
   kAllocatorTagCount,  // Must always be last element.
 };
 std::ostream& operator<<(std::ostream& os, const AllocatorTag& tag);
@@ -149,6 +150,10 @@
     Key, T, Compare, TrackingAllocator<std::pair<Key, T>, kTag>> {
 };
 
+template<class Key, AllocatorTag kTag, class Compare = std::less<Key>>
+class AllocationTrackingSet : public std::set<Key, Compare, TrackingAllocator<Key, kTag>> {
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_ALLOCATOR_H_
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 455680b..2c95ede 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -884,6 +884,10 @@
     DCHECK(heap_bitmap_lock_ == nullptr);
     heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kTraceLock);
+    DCHECK(trace_lock_ == nullptr);
+    trace_lock_ = new Mutex("trace lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kRuntimeShutdownLock);
     DCHECK(runtime_shutdown_lock_ == nullptr);
     runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", current_lock_level);
@@ -892,10 +896,6 @@
     DCHECK(profiler_lock_ == nullptr);
     profiler_lock_ = new Mutex("profiler lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kTraceLock);
-    DCHECK(trace_lock_ == nullptr);
-    trace_lock_ = new Mutex("trace lock", current_lock_level);
-
     UPDATE_CURRENT_LOCK_LEVEL(kDeoptimizationLock);
     DCHECK(deoptimization_lock_ == nullptr);
     deoptimization_lock_ = new Mutex("Deoptimization lock", current_lock_level);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 20f58de..8d2cdce 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -92,12 +92,12 @@
   kBreakpointInvokeLock,
   kAllocTrackerLock,
   kDeoptimizationLock,
-  kTraceLock,
   kProfilerLock,
   kJdwpEventListLock,
   kJdwpAttachLock,
   kJdwpStartLock,
   kRuntimeShutdownLock,
+  kTraceLock,
   kHeapBitmapLock,
   kMutatorLock,
   kThreadListSuspendThreadLock,
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
new file mode 100644
index 0000000..1a78d72
--- /dev/null
+++ b/runtime/check_reference_map_visitor.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_CHECK_REFERENCE_MAP_VISITOR_H_
+#define ART_RUNTIME_CHECK_REFERENCE_MAP_VISITOR_H_
+
+#include "gc_map.h"
+#include "mirror/art_method-inl.h"
+#include "scoped_thread_state_change.h"
+#include "stack_map.h"
+
+namespace art {
+
+// Helper class for tests checking that the compiler keeps track of dex registers
+// holding references.
+class CheckReferenceMapVisitor : public StackVisitor {
+ public:
+  explicit CheckReferenceMapVisitor(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr) {}
+
+  bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* m = GetMethod();
+    if (m->IsCalleeSaveMethod() || m->IsNative()) {
+      CHECK_EQ(GetDexPc(), DexFile::kDexNoIndex);
+    }
+
+    if (!m || m->IsNative() || m->IsRuntimeMethod() || IsShadowFrame()) {
+      return true;
+    }
+
+    LOG(INFO) << "At " << PrettyMethod(m, false);
+
+    if (m->IsCalleeSaveMethod()) {
+      LOG(WARNING) << "no PC for " << PrettyMethod(m);
+      return true;
+    }
+
+    return false;
+  }
+
+  void CheckReferences(int* registers, int number_of_references, uint32_t native_pc_offset)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (GetMethod()->IsOptimized()) {
+      CheckOptimizedMethod(registers, number_of_references, native_pc_offset);
+    } else {
+      CheckQuickMethod(registers, number_of_references, native_pc_offset);
+    }
+  }
+
+ private:
+  void CheckOptimizedMethod(int* registers, int number_of_references, uint32_t native_pc_offset)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* m = GetMethod();
+    CodeInfo code_info = m->GetOptimizedCodeInfo();
+    StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
+    DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, m->GetCodeItem()->registers_size_);
+    MemoryRegion stack_mask = stack_map.GetStackMask();
+    uint32_t register_mask = stack_map.GetRegisterMask();
+    for (int i = 0; i < number_of_references; ++i) {
+      int reg = registers[i];
+      CHECK(reg < m->GetCodeItem()->registers_size_);
+      DexRegisterMap::LocationKind location = dex_register_map.GetLocationKind(reg);
+      switch (location) {
+        case DexRegisterMap::kNone:
+          // Not set, should not be a reference.
+          CHECK(false);
+          break;
+        case DexRegisterMap::kInStack:
+          CHECK(stack_mask.LoadBit(dex_register_map.GetValue(reg) >> 2));
+          break;
+        case DexRegisterMap::kInRegister:
+          CHECK_NE(register_mask & dex_register_map.GetValue(reg), 0u);
+          break;
+        case DexRegisterMap::kConstant:
+          CHECK_EQ(dex_register_map.GetValue(0), 0);
+          break;
+      }
+    }
+  }
+
+  void CheckQuickMethod(int* registers, int number_of_references, uint32_t native_pc_offset)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* m = GetMethod();
+    NativePcOffsetToReferenceMap map(m->GetNativeGcMap());
+    const uint8_t* ref_bitmap = map.FindBitMap(native_pc_offset);
+    CHECK(ref_bitmap);
+    for (int i = 0; i < number_of_references; ++i) {
+      int reg = registers[i];
+      CHECK(reg < m->GetCodeItem()->registers_size_);
+      CHECK((*((ref_bitmap) + reg / 8) >> (reg % 8) ) & 0x01)
+          << "Error: Reg @" << i << " is not in GC map";
+    }
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_CHECK_REFERENCE_MAP_VISITOR_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 1686c27..f927720 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1323,8 +1323,9 @@
   std::string dalvik_cache;
   bool have_android_data = false;
   bool have_dalvik_cache = false;
+  bool is_global_cache = false;
   GetDalvikCache(GetInstructionSetString(kRuntimeISA), false, &dalvik_cache,
-                 &have_android_data, &have_dalvik_cache);
+                 &have_android_data, &have_dalvik_cache, &is_global_cache);
   std::string cache_filename;
   if (have_dalvik_cache) {
     cache_filename = GetDalvikCacheFilenameOrDie(dex_location.c_str(), dalvik_cache.c_str());
@@ -2949,7 +2950,7 @@
   }
   DCHECK(dst->IsArtMethod()) << PrettyDescriptor(dst->GetClass());
 
-  const char* old_cause = self->StartAssertNoThreadSuspension("LoadMethod");
+  ScopedAssertNoThreadSuspension ants(self, "LoadMethod");
   dst->SetDexMethodIndex(dex_method_idx);
   dst->SetDeclaringClass(klass.Get());
   dst->SetCodeItemOffset(it.GetMethodCodeItemOffset());
@@ -2996,7 +2997,6 @@
   }
   dst->SetAccessFlags(access_flags);
 
-  self->EndAssertNoThreadSuspension(old_cause);
   return dst;
 }
 
@@ -3473,8 +3473,7 @@
   if (!dex_cache_image_class_lookup_required_) {
     return;  // All dex cache classes are already in the class table.
   }
-  const char* old_no_suspend_cause =
-      self->StartAssertNoThreadSuspension("Moving image classes to class table");
+  ScopedAssertNoThreadSuspension ants(self, "Moving image classes to class table");
   mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches();
   std::string temp;
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
@@ -3500,13 +3499,10 @@
     }
   }
   dex_cache_image_class_lookup_required_ = false;
-  self->EndAssertNoThreadSuspension(old_no_suspend_cause);
 }
 
 mirror::Class* ClassLinker::LookupClassFromImage(const char* descriptor) {
-  Thread* self = Thread::Current();
-  const char* old_no_suspend_cause =
-      self->StartAssertNoThreadSuspension("Image class lookup");
+  ScopedAssertNoThreadSuspension ants(Thread::Current(), "Image class lookup");
   mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches();
   for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
     mirror::DexCache* dex_cache = dex_caches->Get(i);
@@ -3520,13 +3516,11 @@
         uint16_t type_idx = dex_file->GetIndexForTypeId(*type_id);
         mirror::Class* klass = dex_cache->GetResolvedType(type_idx);
         if (klass != nullptr) {
-          self->EndAssertNoThreadSuspension(old_no_suspend_cause);
           return klass;
         }
       }
     }
   }
-  self->EndAssertNoThreadSuspension(old_no_suspend_cause);
   return nullptr;
 }
 
@@ -5076,7 +5070,7 @@
   // we want a relatively stable order so that adding new fields
   // minimizes disruption of C++ version such as Class and Method.
   std::deque<mirror::ArtField*> grouped_and_sorted_fields;
-  const char* old_no_suspend_cause  = self->StartAssertNoThreadSuspension(
+  const char* old_no_suspend_cause = self->StartAssertNoThreadSuspension(
       "Naked ArtField references in deque");
   for (size_t i = 0; i < num_fields; i++) {
     mirror::ArtField* f = fields->Get(i);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index cc1e0b9..a9c4b4a 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -311,7 +311,7 @@
 static Dbg::HpsgWhen gDdmNhsgWhen = Dbg::HPSG_WHEN_NEVER;
 static Dbg::HpsgWhat gDdmNhsgWhat;
 
-static ObjectRegistry* gRegistry = nullptr;
+ObjectRegistry* Dbg::gRegistry = nullptr;
 
 // Recent allocation tracking.
 AllocRecord* Dbg::recent_allocation_records_ = nullptr;  // TODO: CircularBuffer<AllocRecord>
@@ -401,7 +401,7 @@
 
 static mirror::Array* DecodeNonNullArray(JDWP::RefTypeId id, JDWP::JdwpError* error)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Object* o = gRegistry->Get<mirror::Object*>(id, error);
+  mirror::Object* o = Dbg::GetObjectRegistry()->Get<mirror::Object*>(id, error);
   if (o == nullptr) {
     *error = JDWP::ERR_INVALID_OBJECT;
     return nullptr;
@@ -416,7 +416,7 @@
 
 static mirror::Class* DecodeClass(JDWP::RefTypeId id, JDWP::JdwpError* error)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Object* o = gRegistry->Get<mirror::Object*>(id, error);
+  mirror::Object* o = Dbg::GetObjectRegistry()->Get<mirror::Object*>(id, error);
   if (o == nullptr) {
     *error = JDWP::ERR_INVALID_OBJECT;
     return nullptr;
@@ -434,7 +434,7 @@
     EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_)
     LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Object* thread_peer = gRegistry->Get<mirror::Object*>(thread_id, error);
+  mirror::Object* thread_peer = Dbg::GetObjectRegistry()->Get<mirror::Object*>(thread_id, error);
   if (thread_peer == nullptr) {
     // This isn't even an object.
     *error = JDWP::ERR_INVALID_OBJECT;
@@ -511,8 +511,7 @@
  *
  * Null objects are tagged JT_OBJECT.
  */
-static JDWP::JdwpTag TagFromObject(const ScopedObjectAccessUnchecked& soa, mirror::Object* o)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+JDWP::JdwpTag Dbg::TagFromObject(const ScopedObjectAccessUnchecked& soa, mirror::Object* o) {
   return (o == nullptr) ? JDWP::JT_OBJECT : TagFromClass(soa, o->GetClass());
 }
 
@@ -842,8 +841,15 @@
   if (!o->IsClass()) {
     return StringPrintf("non-class %p", o);  // This is only used for debugging output anyway.
   }
+  return GetClassName(o->AsClass());
+}
+
+std::string Dbg::GetClassName(mirror::Class* klass) {
+  if (klass == nullptr) {
+    return "NULL";
+  }
   std::string temp;
-  return DescriptorToName(o->AsClass()->GetDescriptor(&temp));
+  return DescriptorToName(klass->GetDescriptor(&temp));
 }
 
 JDWP::JdwpError Dbg::GetClassObject(JDWP::RefTypeId id, JDWP::ObjectId* class_object_id) {
@@ -1108,8 +1114,7 @@
   gRegistry->DisposeObject(object_id, reference_count);
 }
 
-static JDWP::JdwpTypeTag GetTypeTag(mirror::Class* klass)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+JDWP::JdwpTypeTag Dbg::GetTypeTag(mirror::Class* klass) {
   DCHECK(klass != nullptr);
   if (klass->IsArrayClass()) {
     return JDWP::TT_ARRAY;
@@ -1422,17 +1427,7 @@
   return JDWP::ERR_NONE;
 }
 
-bool Dbg::MatchType(JDWP::RefTypeId instance_class_id, JDWP::RefTypeId class_id) {
-  JDWP::JdwpError error;
-  mirror::Class* c1 = DecodeClass(instance_class_id, &error);
-  CHECK(c1 != nullptr);
-  mirror::Class* c2 = DecodeClass(class_id, &error);
-  CHECK(c2 != nullptr);
-  return c2->IsAssignableFrom(c1);
-}
-
-static JDWP::FieldId ToFieldId(const mirror::ArtField* f)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+JDWP::FieldId Dbg::ToFieldId(const mirror::ArtField* f) {
   CHECK(!kMovingFields);
   return static_cast<JDWP::FieldId>(reinterpret_cast<uintptr_t>(f));
 }
@@ -1455,10 +1450,52 @@
   return reinterpret_cast<mirror::ArtMethod*>(static_cast<uintptr_t>(mid));
 }
 
-static void SetLocation(JDWP::JdwpLocation* location, mirror::ArtMethod* m, uint32_t dex_pc)
+bool Dbg::MatchThread(JDWP::ObjectId expected_thread_id, Thread* event_thread) {
+  CHECK(event_thread != nullptr);
+  JDWP::JdwpError error;
+  mirror::Object* expected_thread_peer = gRegistry->Get<mirror::Object*>(expected_thread_id,
+                                                                         &error);
+  return expected_thread_peer == event_thread->GetPeer();
+}
+
+bool Dbg::MatchLocation(const JDWP::JdwpLocation& expected_location,
+                        const JDWP::EventLocation& event_location) {
+  if (expected_location.dex_pc != event_location.dex_pc) {
+    return false;
+  }
+  mirror::ArtMethod* m = FromMethodId(expected_location.method_id);
+  return m == event_location.method;
+}
+
+bool Dbg::MatchType(mirror::Class* event_class, JDWP::RefTypeId class_id) {
+  if (event_class == nullptr) {
+    return false;
+  }
+  JDWP::JdwpError error;
+  mirror::Class* expected_class = DecodeClass(class_id, &error);
+  CHECK(expected_class != nullptr);
+  return expected_class->IsAssignableFrom(event_class);
+}
+
+bool Dbg::MatchField(JDWP::RefTypeId expected_type_id, JDWP::FieldId expected_field_id,
+                     mirror::ArtField* event_field) {
+  mirror::ArtField* expected_field = FromFieldId(expected_field_id);
+  if (expected_field != event_field) {
+    return false;
+  }
+  return Dbg::MatchType(event_field->GetDeclaringClass(), expected_type_id);
+}
+
+bool Dbg::MatchInstance(JDWP::ObjectId expected_instance_id, mirror::Object* event_instance) {
+  JDWP::JdwpError error;
+  mirror::Object* modifier_instance = gRegistry->Get<mirror::Object*>(expected_instance_id, &error);
+  return modifier_instance == event_instance;
+}
+
+void Dbg::SetJdwpLocation(JDWP::JdwpLocation* location, mirror::ArtMethod* m, uint32_t dex_pc)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (m == nullptr) {
-    memset(&location, 0, sizeof(location));
+    memset(location, 0, sizeof(*location));
   } else {
     mirror::Class* c = m->GetDeclaringClass();
     location->type_tag = GetTypeTag(c);
@@ -1470,11 +1507,18 @@
 
 std::string Dbg::GetMethodName(JDWP::MethodId method_id) {
   mirror::ArtMethod* m = FromMethodId(method_id);
+  if (m == nullptr) {
+    return "NULL";
+  }
   return m->GetName();
 }
 
 std::string Dbg::GetFieldName(JDWP::FieldId field_id) {
-  return FromFieldId(field_id)->GetName();
+  mirror::ArtField* f = FromFieldId(field_id);
+  if (f == nullptr) {
+    return "NULL";
+  }
+  return f->GetName();
 }
 
 /*
@@ -1757,7 +1801,7 @@
     return error;
   }
 
-  mirror::Object* o = gRegistry->Get<mirror::Object*>(object_id, &error);
+  mirror::Object* o = Dbg::GetObjectRegistry()->Get<mirror::Object*>(object_id, &error);
   if ((!is_static && o == nullptr) || error != JDWP::ERR_NONE) {
     return JDWP::ERR_INVALID_OBJECT;
   }
@@ -1819,7 +1863,7 @@
                                          uint64_t value, int width, bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   JDWP::JdwpError error;
-  mirror::Object* o = gRegistry->Get<mirror::Object*>(object_id, &error);
+  mirror::Object* o = Dbg::GetObjectRegistry()->Get<mirror::Object*>(object_id, &error);
   if ((!is_static && o == nullptr) || error != JDWP::ERR_NONE) {
     return JDWP::ERR_INVALID_OBJECT;
   }
@@ -1853,7 +1897,7 @@
       f->Set32<false>(o, value);
     }
   } else {
-    mirror::Object* v = gRegistry->Get<mirror::Object*>(value, &error);
+    mirror::Object* v = Dbg::GetObjectRegistry()->Get<mirror::Object*>(value, &error);
     if (error != JDWP::ERR_NONE) {
       return JDWP::ERR_INVALID_OBJECT;
     }
@@ -1886,11 +1930,25 @@
   return SetFieldValueImpl(0, field_id, value, width, true);
 }
 
-std::string Dbg::StringToUtf8(JDWP::ObjectId string_id) {
+JDWP::JdwpError Dbg::StringToUtf8(JDWP::ObjectId string_id, std::string* str) {
   JDWP::JdwpError error;
-  mirror::String* s = gRegistry->Get<mirror::String*>(string_id, &error);
-  CHECK(s != nullptr) << error;
-  return s->ToModifiedUtf8();
+  mirror::Object* obj = gRegistry->Get<mirror::Object*>(string_id, &error);
+  if (error != JDWP::ERR_NONE) {
+    return error;
+  }
+  if (obj == nullptr) {
+    return JDWP::ERR_INVALID_OBJECT;
+  }
+  {
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    mirror::Class* java_lang_String = soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_String);
+    if (!java_lang_String->IsAssignableFrom(obj->GetClass())) {
+      // This isn't a string.
+      return JDWP::ERR_INVALID_STRING;
+    }
+  }
+  *str = obj->AsString()->ToModifiedUtf8();
+  return JDWP::ERR_NONE;
 }
 
 void Dbg::OutputJValue(JDWP::JdwpTag tag, const JValue* return_value, JDWP::ExpandBuf* pReply) {
@@ -1939,13 +1997,13 @@
 }
 
 JDWP::JdwpError Dbg::GetThreadGroup(JDWP::ObjectId thread_id, JDWP::ExpandBuf* pReply) {
-  ScopedObjectAccess soa(Thread::Current());
+  ScopedObjectAccessUnchecked soa(Thread::Current());
   JDWP::JdwpError error;
   mirror::Object* thread_object = gRegistry->Get<mirror::Object*>(thread_id, &error);
   if (error != JDWP::ERR_NONE) {
     return JDWP::ERR_INVALID_OBJECT;
   }
-  const char* old_cause = soa.Self()->StartAssertNoThreadSuspension("Debugger: GetThreadGroup");
+  ScopedAssertNoThreadSuspension ants(soa.Self(), "Debugger: GetThreadGroup");
   // Okay, so it's an object, but is it actually a thread?
   {
     MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
@@ -1966,38 +2024,124 @@
     JDWP::ObjectId thread_group_id = gRegistry->Add(group);
     expandBufAddObjectId(pReply, thread_group_id);
   }
-  soa.Self()->EndAssertNoThreadSuspension(old_cause);
   return error;
 }
 
-std::string Dbg::GetThreadGroupName(JDWP::ObjectId thread_group_id) {
-  ScopedObjectAccess soa(Thread::Current());
-  JDWP::JdwpError error;
-  mirror::Object* thread_group = gRegistry->Get<mirror::Object*>(thread_group_id, &error);
-  CHECK(thread_group != nullptr) << error;
-  const char* old_cause = soa.Self()->StartAssertNoThreadSuspension("Debugger: GetThreadGroupName");
+static mirror::Object* DecodeThreadGroup(ScopedObjectAccessUnchecked& soa,
+                                         JDWP::ObjectId thread_group_id, JDWP::JdwpError* error)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::Object* thread_group = Dbg::GetObjectRegistry()->Get<mirror::Object*>(thread_group_id,
+                                                                                error);
+  if (*error != JDWP::ERR_NONE) {
+    return nullptr;
+  }
+  if (thread_group == nullptr) {
+    *error = JDWP::ERR_INVALID_OBJECT;
+    return nullptr;
+  }
   mirror::Class* c = soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_ThreadGroup);
   CHECK(c != nullptr);
+  if (!c->IsAssignableFrom(thread_group->GetClass())) {
+    // This is not a java.lang.ThreadGroup.
+    *error = JDWP::ERR_INVALID_THREAD_GROUP;
+    return nullptr;
+  }
+  *error = JDWP::ERR_NONE;
+  return thread_group;
+}
+
+JDWP::JdwpError Dbg::GetThreadGroupName(JDWP::ObjectId thread_group_id, JDWP::ExpandBuf* pReply) {
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  JDWP::JdwpError error;
+  mirror::Object* thread_group = DecodeThreadGroup(soa, thread_group_id, &error);
+  if (error != JDWP::ERR_NONE) {
+    return error;
+  }
+  ScopedAssertNoThreadSuspension ants(soa.Self(), "Debugger: GetThreadGroupName");
+  mirror::Class* c = soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_ThreadGroup);
   mirror::ArtField* f = c->FindInstanceField("name", "Ljava/lang/String;");
   CHECK(f != nullptr);
   mirror::String* s = reinterpret_cast<mirror::String*>(f->GetObject(thread_group));
-  soa.Self()->EndAssertNoThreadSuspension(old_cause);
-  return s->ToModifiedUtf8();
+
+  std::string thread_group_name(s->ToModifiedUtf8());
+  expandBufAddUtf8String(pReply, thread_group_name);
+  return JDWP::ERR_NONE;
 }
 
-JDWP::ObjectId Dbg::GetThreadGroupParent(JDWP::ObjectId thread_group_id) {
+JDWP::JdwpError Dbg::GetThreadGroupParent(JDWP::ObjectId thread_group_id, JDWP::ExpandBuf* pReply) {
   ScopedObjectAccessUnchecked soa(Thread::Current());
   JDWP::JdwpError error;
-  mirror::Object* thread_group = gRegistry->Get<mirror::Object*>(thread_group_id, &error);
-  CHECK(thread_group != nullptr) << error;
-  const char* old_cause = soa.Self()->StartAssertNoThreadSuspension("Debugger: GetThreadGroupParent");
-  mirror::Class* c = soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_ThreadGroup);
-  CHECK(c != nullptr);
-  mirror::ArtField* f = c->FindInstanceField("parent", "Ljava/lang/ThreadGroup;");
-  CHECK(f != nullptr);
-  mirror::Object* parent = f->GetObject(thread_group);
-  soa.Self()->EndAssertNoThreadSuspension(old_cause);
-  return gRegistry->Add(parent);
+  mirror::Object* thread_group = DecodeThreadGroup(soa, thread_group_id, &error);
+  if (error != JDWP::ERR_NONE) {
+    return error;
+  }
+  mirror::Object* parent;
+  {
+    ScopedAssertNoThreadSuspension ants(soa.Self(), "Debugger: GetThreadGroupParent");
+    mirror::Class* c = soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_ThreadGroup);
+    CHECK(c != nullptr);
+    mirror::ArtField* f = c->FindInstanceField("parent", "Ljava/lang/ThreadGroup;");
+    CHECK(f != nullptr);
+    parent = f->GetObject(thread_group);
+  }
+  JDWP::ObjectId parent_group_id = gRegistry->Add(parent);
+  expandBufAddObjectId(pReply, parent_group_id);
+  return JDWP::ERR_NONE;
+}
+
+static void GetChildThreadGroups(ScopedObjectAccessUnchecked& soa, mirror::Object* thread_group,
+                                 std::vector<JDWP::ObjectId>* child_thread_group_ids)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  CHECK(thread_group != nullptr);
+
+  // Get the ArrayList<ThreadGroup> "groups" out of this thread group...
+  mirror::ArtField* groups_field = thread_group->GetClass()->FindInstanceField("groups", "Ljava/util/List;");
+  mirror::Object* groups_array_list = groups_field->GetObject(thread_group);
+
+  // Get the array and size out of the ArrayList<ThreadGroup>...
+  mirror::ArtField* array_field = groups_array_list->GetClass()->FindInstanceField("array", "[Ljava/lang/Object;");
+  mirror::ArtField* size_field = groups_array_list->GetClass()->FindInstanceField("size", "I");
+  mirror::ObjectArray<mirror::Object>* groups_array =
+      array_field->GetObject(groups_array_list)->AsObjectArray<mirror::Object>();
+  const int32_t size = size_field->GetInt(groups_array_list);
+
+  // Copy the first 'size' elements out of the array into the result.
+  ObjectRegistry* registry = Dbg::GetObjectRegistry();
+  for (int32_t i = 0; i < size; ++i) {
+    child_thread_group_ids->push_back(registry->Add(groups_array->Get(i)));
+  }
+}
+
+JDWP::JdwpError Dbg::GetThreadGroupChildren(JDWP::ObjectId thread_group_id,
+                                            JDWP::ExpandBuf* pReply) {
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  JDWP::JdwpError error;
+  mirror::Object* thread_group = DecodeThreadGroup(soa, thread_group_id, &error);
+  if (error != JDWP::ERR_NONE) {
+    return error;
+  }
+
+  // Add child threads.
+  {
+    std::vector<JDWP::ObjectId> child_thread_ids;
+    GetThreads(thread_group, &child_thread_ids);
+    expandBufAdd4BE(pReply, child_thread_ids.size());
+    for (JDWP::ObjectId child_thread_id : child_thread_ids) {
+      expandBufAddObjectId(pReply, child_thread_id);
+    }
+  }
+
+  // Add child thread groups.
+  {
+    std::vector<JDWP::ObjectId> child_thread_groups_ids;
+    GetChildThreadGroups(soa, thread_group, &child_thread_groups_ids);
+    expandBufAdd4BE(pReply, child_thread_groups_ids.size());
+    for (JDWP::ObjectId child_thread_group_id : child_thread_groups_ids) {
+      expandBufAddObjectId(pReply, child_thread_group_id);
+    }
+  }
+
+  return JDWP::ERR_NONE;
 }
 
 JDWP::ObjectId Dbg::GetSystemThreadGroupId() {
@@ -2007,13 +2151,6 @@
   return gRegistry->Add(group);
 }
 
-JDWP::ObjectId Dbg::GetMainThreadGroupId() {
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_mainThreadGroup);
-  mirror::Object* group = f->GetObject(f->GetDeclaringClass());
-  return gRegistry->Add(group);
-}
-
 JDWP::JdwpThreadStatus Dbg::ToJdwpThreadStatus(ThreadState state) {
   switch (state) {
     case kBlocked:
@@ -2111,11 +2248,8 @@
   return (group == desired_thread_group);
 }
 
-void Dbg::GetThreads(JDWP::ObjectId thread_group_id, std::vector<JDWP::ObjectId>* thread_ids) {
+void Dbg::GetThreads(mirror::Object* thread_group, std::vector<JDWP::ObjectId>* thread_ids) {
   ScopedObjectAccessUnchecked soa(Thread::Current());
-  JDWP::JdwpError error;
-  mirror::Object* thread_group = gRegistry->Get<mirror::Object*>(thread_group_id, &error);
-  CHECK_EQ(error, JDWP::ERR_NONE);
   std::list<Thread*> all_threads_list;
   {
     MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
@@ -2147,30 +2281,6 @@
   }
 }
 
-void Dbg::GetChildThreadGroups(JDWP::ObjectId thread_group_id,
-                               std::vector<JDWP::ObjectId>* child_thread_group_ids) {
-  ScopedObjectAccess soa(Thread::Current());
-  JDWP::JdwpError error;
-  mirror::Object* thread_group = gRegistry->Get<mirror::Object*>(thread_group_id, &error);
-  CHECK(thread_group != nullptr) << error;
-
-  // Get the ArrayList<ThreadGroup> "groups" out of this thread group...
-  mirror::ArtField* groups_field = thread_group->GetClass()->FindInstanceField("groups", "Ljava/util/List;");
-  mirror::Object* groups_array_list = groups_field->GetObject(thread_group);
-
-  // Get the array and size out of the ArrayList<ThreadGroup>...
-  mirror::ArtField* array_field = groups_array_list->GetClass()->FindInstanceField("array", "[Ljava/lang/Object;");
-  mirror::ArtField* size_field = groups_array_list->GetClass()->FindInstanceField("size", "I");
-  mirror::ObjectArray<mirror::Object>* groups_array =
-      array_field->GetObject(groups_array_list)->AsObjectArray<mirror::Object>();
-  const int32_t size = size_field->GetInt(groups_array_list);
-
-  // Copy the first 'size' elements out of the array into the result.
-  for (int32_t i = 0; i < size; ++i) {
-    child_thread_group_ids->push_back(gRegistry->Add(groups_array->Get(i)));
-  }
-}
-
 static int GetStackDepth(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   struct CountStackDepthVisitor : public StackVisitor {
     explicit CountStackDepthVisitor(Thread* thread)
@@ -2231,7 +2341,7 @@
       if (depth_ >= start_frame_) {
         JDWP::FrameId frame_id(GetFrameId());
         JDWP::JdwpLocation location;
-        SetLocation(&location, GetMethod(), GetDexPc());
+        SetJdwpLocation(&location, GetMethod(), GetDexPc());
         VLOG(jdwp) << StringPrintf("    Frame %3zd: id=%3" PRIu64 " ", depth_, frame_id) << location;
         expandBufAdd8BE(buf_, frame_id);
         expandBufAddLocation(buf_, location);
@@ -2263,8 +2373,12 @@
 }
 
 JDWP::ObjectId Dbg::GetThreadSelfId() {
+  return GetThreadId(Thread::Current());
+}
+
+JDWP::ObjectId Dbg::GetThreadId(Thread* thread) {
   ScopedObjectAccessUnchecked soa(Thread::Current());
-  return gRegistry->Add(soa.Self()->GetPeer());
+  return gRegistry->Add(thread->GetPeer());
 }
 
 void Dbg::SuspendVM() {
@@ -2372,312 +2486,340 @@
   return JDWP::ERR_NONE;
 }
 
-JDWP::JdwpError Dbg::GetLocalValue(JDWP::ObjectId thread_id, JDWP::FrameId frame_id, int slot,
-                                   JDWP::JdwpTag tag, uint8_t* buf, size_t width) {
-  struct GetLocalVisitor : public StackVisitor {
-    GetLocalVisitor(const ScopedObjectAccessUnchecked& soa, Thread* thread, Context* context,
-                    JDWP::FrameId frame_id, int slot, JDWP::JdwpTag tag, uint8_t* buf, size_t width)
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        : StackVisitor(thread, context), soa_(soa), frame_id_(frame_id), slot_(slot), tag_(tag),
-          buf_(buf), width_(width), error_(JDWP::ERR_NONE) {}
+// Walks the stack until we find the frame with the given FrameId.
+class FindFrameVisitor FINAL : public StackVisitor {
+ public:
+  FindFrameVisitor(Thread* thread, Context* context, JDWP::FrameId frame_id)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : StackVisitor(thread, context), frame_id_(frame_id), error_(JDWP::ERR_INVALID_FRAMEID) {}
 
-    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
-    // annotalysis.
-    bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
-      if (GetFrameId() != frame_id_) {
-        return true;  // Not our frame, carry on.
-      }
-      // TODO: check that the tag is compatible with the actual type of the slot!
-      // TODO: check slot is valid for this method or return INVALID_SLOT error.
-      mirror::ArtMethod* m = GetMethod();
-      if (m->IsNative()) {
-        // We can't read local value from native method.
-        error_ = JDWP::ERR_OPAQUE_FRAME;
-        return false;
-      }
-      uint16_t reg = DemangleSlot(slot_, m);
-      constexpr JDWP::JdwpError kFailureErrorCode = JDWP::ERR_ABSENT_INFORMATION;
-      switch (tag_) {
-        case JDWP::JT_BOOLEAN: {
-          CHECK_EQ(width_, 1U);
-          uint32_t intVal;
-          if (GetVReg(m, reg, kIntVReg, &intVal)) {
-            VLOG(jdwp) << "get boolean local " << reg << " = " << intVal;
-            JDWP::Set1(buf_+1, intVal != 0);
-          } else {
-            VLOG(jdwp) << "failed to get boolean local " << reg;
-            error_ = kFailureErrorCode;
-          }
-          break;
-        }
-        case JDWP::JT_BYTE: {
-          CHECK_EQ(width_, 1U);
-          uint32_t intVal;
-          if (GetVReg(m, reg, kIntVReg, &intVal)) {
-            VLOG(jdwp) << "get byte local " << reg << " = " << intVal;
-            JDWP::Set1(buf_+1, intVal);
-          } else {
-            VLOG(jdwp) << "failed to get byte local " << reg;
-            error_ = kFailureErrorCode;
-          }
-          break;
-        }
-        case JDWP::JT_SHORT:
-        case JDWP::JT_CHAR: {
-          CHECK_EQ(width_, 2U);
-          uint32_t intVal;
-          if (GetVReg(m, reg, kIntVReg, &intVal)) {
-            VLOG(jdwp) << "get short/char local " << reg << " = " << intVal;
-            JDWP::Set2BE(buf_+1, intVal);
-          } else {
-            VLOG(jdwp) << "failed to get short/char local " << reg;
-            error_ = kFailureErrorCode;
-          }
-          break;
-        }
-        case JDWP::JT_INT: {
-          CHECK_EQ(width_, 4U);
-          uint32_t intVal;
-          if (GetVReg(m, reg, kIntVReg, &intVal)) {
-            VLOG(jdwp) << "get int local " << reg << " = " << intVal;
-            JDWP::Set4BE(buf_+1, intVal);
-          } else {
-            VLOG(jdwp) << "failed to get int local " << reg;
-            error_ = kFailureErrorCode;
-          }
-          break;
-        }
-        case JDWP::JT_FLOAT: {
-          CHECK_EQ(width_, 4U);
-          uint32_t intVal;
-          if (GetVReg(m, reg, kFloatVReg, &intVal)) {
-            VLOG(jdwp) << "get float local " << reg << " = " << intVal;
-            JDWP::Set4BE(buf_+1, intVal);
-          } else {
-            VLOG(jdwp) << "failed to get float local " << reg;
-            error_ = kFailureErrorCode;
-          }
-          break;
-        }
-        case JDWP::JT_ARRAY:
-        case JDWP::JT_CLASS_LOADER:
-        case JDWP::JT_CLASS_OBJECT:
-        case JDWP::JT_OBJECT:
-        case JDWP::JT_STRING:
-        case JDWP::JT_THREAD:
-        case JDWP::JT_THREAD_GROUP: {
-          CHECK_EQ(width_, sizeof(JDWP::ObjectId));
-          uint32_t intVal;
-          if (GetVReg(m, reg, kReferenceVReg, &intVal)) {
-            mirror::Object* o = reinterpret_cast<mirror::Object*>(intVal);
-            VLOG(jdwp) << "get " << tag_ << " object local " << reg << " = " << o;
-            if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
-              LOG(FATAL) << "Register " << reg << " expected to hold " << tag_ << " object: " << o;
-            }
-            tag_ = TagFromObject(soa_, o);
-            JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
-          } else {
-            VLOG(jdwp) << "failed to get " << tag_ << " object local " << reg;
-            error_ = kFailureErrorCode;
-          }
-          break;
-        }
-        case JDWP::JT_DOUBLE: {
-          CHECK_EQ(width_, 8U);
-          uint64_t longVal;
-          if (GetVRegPair(m, reg, kDoubleLoVReg, kDoubleHiVReg, &longVal)) {
-            VLOG(jdwp) << "get double local " << reg << " = " << longVal;
-            JDWP::Set8BE(buf_+1, longVal);
-          } else {
-            VLOG(jdwp) << "failed to get double local " << reg;
-            error_ = kFailureErrorCode;
-          }
-          break;
-        }
-        case JDWP::JT_LONG: {
-          CHECK_EQ(width_, 8U);
-          uint64_t longVal;
-          if (GetVRegPair(m, reg, kLongLoVReg, kLongHiVReg, &longVal)) {
-            VLOG(jdwp) << "get long local " << reg << " = " << longVal;
-            JDWP::Set8BE(buf_+1, longVal);
-          } else {
-            VLOG(jdwp) << "failed to get long local " << reg;
-            error_ = kFailureErrorCode;
-          }
-          break;
-        }
-        default:
-          LOG(FATAL) << "Unknown tag " << tag_;
-          break;
-      }
-
-      // Prepend tag, which may have been updated.
-      JDWP::Set1(buf_, tag_);
-      return false;
+  // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
+  // annotalysis.
+  bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
+    if (GetFrameId() != frame_id_) {
+      return true;  // Not our frame, carry on.
     }
-    const ScopedObjectAccessUnchecked& soa_;
-    const JDWP::FrameId frame_id_;
-    const int slot_;
-    JDWP::JdwpTag tag_;
-    uint8_t* const buf_;
-    const size_t width_;
-    JDWP::JdwpError error_;
-  };
+    mirror::ArtMethod* m = GetMethod();
+    if (m->IsNative()) {
+      // We can't read/write local value from/into native method.
+      error_ = JDWP::ERR_OPAQUE_FRAME;
+    } else {
+      // We found our frame.
+      error_ = JDWP::ERR_NONE;
+    }
+    return false;
+  }
+
+  JDWP::JdwpError GetError() const {
+    return error_;
+  }
+
+ private:
+  const JDWP::FrameId frame_id_;
+  JDWP::JdwpError error_;
+};
+
+JDWP::JdwpError Dbg::GetLocalValues(JDWP::Request* request, JDWP::ExpandBuf* pReply) {
+  JDWP::ObjectId thread_id = request->ReadThreadId();
+  JDWP::FrameId frame_id = request->ReadFrameId();
 
   ScopedObjectAccessUnchecked soa(Thread::Current());
-  MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
-  JDWP::JdwpError error;
-  Thread* thread = DecodeThread(soa, thread_id, &error);
-  if (error != JDWP::ERR_NONE) {
-    return error;
+  Thread* thread;
+  {
+    MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
+    JDWP::JdwpError error;
+    thread = DecodeThread(soa, thread_id, &error);
+    if (error != JDWP::ERR_NONE) {
+      return error;
+    }
   }
-  // TODO check thread is suspended by the debugger ?
+  // Find the frame with the given frame_id.
   std::unique_ptr<Context> context(Context::Create());
-  GetLocalVisitor visitor(soa, thread, context.get(), frame_id, slot, tag, buf, width);
+  FindFrameVisitor visitor(thread, context.get(), frame_id);
   visitor.WalkStack();
-  return visitor.error_;
+  if (visitor.GetError() != JDWP::ERR_NONE) {
+    return visitor.GetError();
+  }
+
+  // Read the values from visitor's context.
+  int32_t slot_count = request->ReadSigned32("slot count");
+  expandBufAdd4BE(pReply, slot_count);     /* "int values" */
+  for (int32_t i = 0; i < slot_count; ++i) {
+    uint32_t slot = request->ReadUnsigned32("slot");
+    JDWP::JdwpTag reqSigByte = request->ReadTag();
+
+    VLOG(jdwp) << "    --> slot " << slot << " " << reqSigByte;
+
+    size_t width = Dbg::GetTagWidth(reqSigByte);
+    uint8_t* ptr = expandBufAddSpace(pReply, width+1);
+    JDWP::JdwpError error = Dbg::GetLocalValue(visitor, soa, slot, reqSigByte, ptr, width);
+    if (error != JDWP::ERR_NONE) {
+      return error;
+    }
+  }
+  return JDWP::ERR_NONE;
 }
 
-JDWP::JdwpError Dbg::SetLocalValue(JDWP::ObjectId thread_id, JDWP::FrameId frame_id, int slot,
-                                   JDWP::JdwpTag tag, uint64_t value, size_t width) {
-  struct SetLocalVisitor : public StackVisitor {
-    SetLocalVisitor(Thread* thread, Context* context,
-                    JDWP::FrameId frame_id, int slot, JDWP::JdwpTag tag, uint64_t value,
-                    size_t width)
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        : StackVisitor(thread, context),
-          frame_id_(frame_id), slot_(slot), tag_(tag), value_(value), width_(width),
-          error_(JDWP::ERR_NONE) {}
-
-    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
-    // annotalysis.
-    bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
-      if (GetFrameId() != frame_id_) {
-        return true;  // Not our frame, carry on.
+JDWP::JdwpError Dbg::GetLocalValue(const StackVisitor& visitor, ScopedObjectAccessUnchecked& soa,
+                                   int slot, JDWP::JdwpTag tag, uint8_t* buf, size_t width) {
+  mirror::ArtMethod* m = visitor.GetMethod();
+  uint16_t reg = DemangleSlot(slot, m);
+  // TODO: check that the tag is compatible with the actual type of the slot!
+  // TODO: check slot is valid for this method or return INVALID_SLOT error.
+  constexpr JDWP::JdwpError kFailureErrorCode = JDWP::ERR_ABSENT_INFORMATION;
+  switch (tag) {
+    case JDWP::JT_BOOLEAN: {
+      CHECK_EQ(width, 1U);
+      uint32_t intVal;
+      if (visitor.GetVReg(m, reg, kIntVReg, &intVal)) {
+        VLOG(jdwp) << "get boolean local " << reg << " = " << intVal;
+        JDWP::Set1(buf + 1, intVal != 0);
+      } else {
+        VLOG(jdwp) << "failed to get boolean local " << reg;
+        return kFailureErrorCode;
       }
-      // TODO: check that the tag is compatible with the actual type of the slot!
-      // TODO: check slot is valid for this method or return INVALID_SLOT error.
-      mirror::ArtMethod* m = GetMethod();
-      if (m->IsNative()) {
-        // We can't read local value from native method.
-        error_ = JDWP::ERR_OPAQUE_FRAME;
-        return false;
-      }
-      uint16_t reg = DemangleSlot(slot_, m);
-      constexpr JDWP::JdwpError kFailureErrorCode = JDWP::ERR_ABSENT_INFORMATION;
-      switch (tag_) {
-        case JDWP::JT_BOOLEAN:
-        case JDWP::JT_BYTE:
-          CHECK_EQ(width_, 1U);
-          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
-            VLOG(jdwp) << "failed to set boolean/byte local " << reg << " = "
-                       << static_cast<uint32_t>(value_);
-            error_ = kFailureErrorCode;
-          }
-          break;
-        case JDWP::JT_SHORT:
-        case JDWP::JT_CHAR:
-          CHECK_EQ(width_, 2U);
-          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
-            VLOG(jdwp) << "failed to set short/char local " << reg << " = "
-                       << static_cast<uint32_t>(value_);
-            error_ = kFailureErrorCode;
-          }
-          break;
-        case JDWP::JT_INT:
-          CHECK_EQ(width_, 4U);
-          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
-            VLOG(jdwp) << "failed to set int local " << reg << " = "
-                       << static_cast<uint32_t>(value_);
-            error_ = kFailureErrorCode;
-          }
-          break;
-        case JDWP::JT_FLOAT:
-          CHECK_EQ(width_, 4U);
-          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kFloatVReg)) {
-            VLOG(jdwp) << "failed to set float local " << reg << " = "
-                       << static_cast<uint32_t>(value_);
-            error_ = kFailureErrorCode;
-          }
-          break;
-        case JDWP::JT_ARRAY:
-        case JDWP::JT_CLASS_LOADER:
-        case JDWP::JT_CLASS_OBJECT:
-        case JDWP::JT_OBJECT:
-        case JDWP::JT_STRING:
-        case JDWP::JT_THREAD:
-        case JDWP::JT_THREAD_GROUP: {
-          CHECK_EQ(width_, sizeof(JDWP::ObjectId));
-          JDWP::JdwpError error;
-          mirror::Object* o =
-              gRegistry->Get<mirror::Object*>(static_cast<JDWP::ObjectId>(value_), &error);
-          if (error != JDWP::ERR_NONE) {
-            VLOG(jdwp) << tag_ << " object " << value_ << " is an invalid object";
-            error_ = JDWP::ERR_INVALID_OBJECT;
-          } else if (!SetVReg(m, reg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)),
-                              kReferenceVReg)) {
-            VLOG(jdwp) << "failed to set " << tag_ << " object local " << reg << " = " << o;
-            error_ = kFailureErrorCode;
-          }
-          break;
-        }
-        case JDWP::JT_DOUBLE: {
-          CHECK_EQ(width_, 8U);
-          bool success = SetVRegPair(m, reg, value_, kDoubleLoVReg, kDoubleHiVReg);
-          if (!success) {
-            VLOG(jdwp) << "failed to set double local " << reg << " = " << value_;
-            error_ = kFailureErrorCode;
-          }
-          break;
-        }
-        case JDWP::JT_LONG: {
-          CHECK_EQ(width_, 8U);
-          bool success = SetVRegPair(m, reg, value_, kLongLoVReg, kLongHiVReg);
-          if (!success) {
-            VLOG(jdwp) << "failed to set double local " << reg << " = " << value_;
-            error_ = kFailureErrorCode;
-          }
-          break;
-        }
-        default:
-          LOG(FATAL) << "Unknown tag " << tag_;
-          break;
-      }
-      return false;
+      break;
     }
+    case JDWP::JT_BYTE: {
+      CHECK_EQ(width, 1U);
+      uint32_t intVal;
+      if (visitor.GetVReg(m, reg, kIntVReg, &intVal)) {
+        VLOG(jdwp) << "get byte local " << reg << " = " << intVal;
+        JDWP::Set1(buf + 1, intVal);
+      } else {
+        VLOG(jdwp) << "failed to get byte local " << reg;
+        return kFailureErrorCode;
+      }
+      break;
+    }
+    case JDWP::JT_SHORT:
+    case JDWP::JT_CHAR: {
+      CHECK_EQ(width, 2U);
+      uint32_t intVal;
+      if (visitor.GetVReg(m, reg, kIntVReg, &intVal)) {
+        VLOG(jdwp) << "get short/char local " << reg << " = " << intVal;
+        JDWP::Set2BE(buf + 1, intVal);
+      } else {
+        VLOG(jdwp) << "failed to get short/char local " << reg;
+        return kFailureErrorCode;
+      }
+      break;
+    }
+    case JDWP::JT_INT: {
+      CHECK_EQ(width, 4U);
+      uint32_t intVal;
+      if (visitor.GetVReg(m, reg, kIntVReg, &intVal)) {
+        VLOG(jdwp) << "get int local " << reg << " = " << intVal;
+        JDWP::Set4BE(buf + 1, intVal);
+      } else {
+        VLOG(jdwp) << "failed to get int local " << reg;
+        return kFailureErrorCode;
+      }
+      break;
+    }
+    case JDWP::JT_FLOAT: {
+      CHECK_EQ(width, 4U);
+      uint32_t intVal;
+      if (visitor.GetVReg(m, reg, kFloatVReg, &intVal)) {
+        VLOG(jdwp) << "get float local " << reg << " = " << intVal;
+        JDWP::Set4BE(buf + 1, intVal);
+      } else {
+        VLOG(jdwp) << "failed to get float local " << reg;
+        return kFailureErrorCode;
+      }
+      break;
+    }
+    case JDWP::JT_ARRAY:
+    case JDWP::JT_CLASS_LOADER:
+    case JDWP::JT_CLASS_OBJECT:
+    case JDWP::JT_OBJECT:
+    case JDWP::JT_STRING:
+    case JDWP::JT_THREAD:
+    case JDWP::JT_THREAD_GROUP: {
+      CHECK_EQ(width, sizeof(JDWP::ObjectId));
+      uint32_t intVal;
+      if (visitor.GetVReg(m, reg, kReferenceVReg, &intVal)) {
+        mirror::Object* o = reinterpret_cast<mirror::Object*>(intVal);
+        VLOG(jdwp) << "get " << tag << " object local " << reg << " = " << o;
+        if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
+          LOG(FATAL) << "Register " << reg << " expected to hold " << tag << " object: " << o;
+        }
+        tag = TagFromObject(soa, o);
+        JDWP::SetObjectId(buf + 1, gRegistry->Add(o));
+      } else {
+        VLOG(jdwp) << "failed to get " << tag << " object local " << reg;
+        return kFailureErrorCode;
+      }
+      break;
+    }
+    case JDWP::JT_DOUBLE: {
+      CHECK_EQ(width, 8U);
+      uint64_t longVal;
+      if (visitor.GetVRegPair(m, reg, kDoubleLoVReg, kDoubleHiVReg, &longVal)) {
+        VLOG(jdwp) << "get double local " << reg << " = " << longVal;
+        JDWP::Set8BE(buf + 1, longVal);
+      } else {
+        VLOG(jdwp) << "failed to get double local " << reg;
+        return kFailureErrorCode;
+      }
+      break;
+    }
+    case JDWP::JT_LONG: {
+      CHECK_EQ(width, 8U);
+      uint64_t longVal;
+      if (visitor.GetVRegPair(m, reg, kLongLoVReg, kLongHiVReg, &longVal)) {
+        VLOG(jdwp) << "get long local " << reg << " = " << longVal;
+        JDWP::Set8BE(buf + 1, longVal);
+      } else {
+        VLOG(jdwp) << "failed to get long local " << reg;
+        return kFailureErrorCode;
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unknown tag " << tag;
+      break;
+  }
 
-    const JDWP::FrameId frame_id_;
-    const int slot_;
-    const JDWP::JdwpTag tag_;
-    const uint64_t value_;
-    const size_t width_;
-    JDWP::JdwpError error_;
-  };
+  // Prepend tag, which may have been updated.
+  JDWP::Set1(buf, tag);
+  return JDWP::ERR_NONE;
+}
+
+JDWP::JdwpError Dbg::SetLocalValues(JDWP::Request* request) {
+  JDWP::ObjectId thread_id = request->ReadThreadId();
+  JDWP::FrameId frame_id = request->ReadFrameId();
 
   ScopedObjectAccessUnchecked soa(Thread::Current());
-  MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
-  JDWP::JdwpError error;
-  Thread* thread = DecodeThread(soa, thread_id, &error);
-  if (error != JDWP::ERR_NONE) {
-    return error;
+  Thread* thread;
+  {
+    MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
+    JDWP::JdwpError error;
+    thread = DecodeThread(soa, thread_id, &error);
+    if (error != JDWP::ERR_NONE) {
+      return error;
+    }
   }
-  // TODO check thread is suspended by the debugger ?
+  // Find the frame with the given frame_id.
   std::unique_ptr<Context> context(Context::Create());
-  SetLocalVisitor visitor(thread, context.get(), frame_id, slot, tag, value, width);
+  FindFrameVisitor visitor(thread, context.get(), frame_id);
   visitor.WalkStack();
-  return visitor.error_;
+  if (visitor.GetError() != JDWP::ERR_NONE) {
+    return visitor.GetError();
+  }
+
+  // Writes the values into visitor's context.
+  int32_t slot_count = request->ReadSigned32("slot count");
+  for (int32_t i = 0; i < slot_count; ++i) {
+    uint32_t slot = request->ReadUnsigned32("slot");
+    JDWP::JdwpTag sigByte = request->ReadTag();
+    size_t width = Dbg::GetTagWidth(sigByte);
+    uint64_t value = request->ReadValue(width);
+
+    VLOG(jdwp) << "    --> slot " << slot << " " << sigByte << " " << value;
+    JDWP::JdwpError error = Dbg::SetLocalValue(visitor, slot, sigByte, value, width);
+    if (error != JDWP::ERR_NONE) {
+      return error;
+    }
+  }
+  return JDWP::ERR_NONE;
 }
 
-JDWP::ObjectId Dbg::GetThisObjectIdForEvent(mirror::Object* this_object) {
-  // If 'this_object' isn't already in the registry, we know that we're not looking for it, so
-  // there's no point adding it to the registry and burning through ids.
-  // When registering an event request with an instance filter, we've been given an existing object
-  // id so it must already be present in the registry when the event fires.
-  JDWP::ObjectId this_id = 0;
-  if (this_object != nullptr && gRegistry->Contains(this_object)) {
-    this_id = gRegistry->Add(this_object);
+JDWP::JdwpError Dbg::SetLocalValue(StackVisitor& visitor, int slot, JDWP::JdwpTag tag,
+                                   uint64_t value, size_t width) {
+  mirror::ArtMethod* m = visitor.GetMethod();
+  uint16_t reg = DemangleSlot(slot, m);
+  // TODO: check that the tag is compatible with the actual type of the slot!
+  // TODO: check slot is valid for this method or return INVALID_SLOT error.
+  constexpr JDWP::JdwpError kFailureErrorCode = JDWP::ERR_ABSENT_INFORMATION;
+  switch (tag) {
+    case JDWP::JT_BOOLEAN:
+    case JDWP::JT_BYTE:
+      CHECK_EQ(width, 1U);
+      if (!visitor.SetVReg(m, reg, static_cast<uint32_t>(value), kIntVReg)) {
+        VLOG(jdwp) << "failed to set boolean/byte local " << reg << " = "
+                   << static_cast<uint32_t>(value);
+        return kFailureErrorCode;
+      }
+      break;
+    case JDWP::JT_SHORT:
+    case JDWP::JT_CHAR:
+      CHECK_EQ(width, 2U);
+      if (!visitor.SetVReg(m, reg, static_cast<uint32_t>(value), kIntVReg)) {
+        VLOG(jdwp) << "failed to set short/char local " << reg << " = "
+                   << static_cast<uint32_t>(value);
+        return kFailureErrorCode;
+      }
+      break;
+    case JDWP::JT_INT:
+      CHECK_EQ(width, 4U);
+      if (!visitor.SetVReg(m, reg, static_cast<uint32_t>(value), kIntVReg)) {
+        VLOG(jdwp) << "failed to set int local " << reg << " = "
+                   << static_cast<uint32_t>(value);
+        return kFailureErrorCode;
+      }
+      break;
+    case JDWP::JT_FLOAT:
+      CHECK_EQ(width, 4U);
+      if (!visitor.SetVReg(m, reg, static_cast<uint32_t>(value), kFloatVReg)) {
+        VLOG(jdwp) << "failed to set float local " << reg << " = "
+                   << static_cast<uint32_t>(value);
+        return kFailureErrorCode;
+      }
+      break;
+    case JDWP::JT_ARRAY:
+    case JDWP::JT_CLASS_LOADER:
+    case JDWP::JT_CLASS_OBJECT:
+    case JDWP::JT_OBJECT:
+    case JDWP::JT_STRING:
+    case JDWP::JT_THREAD:
+    case JDWP::JT_THREAD_GROUP: {
+      CHECK_EQ(width, sizeof(JDWP::ObjectId));
+      JDWP::JdwpError error;
+      mirror::Object* o = gRegistry->Get<mirror::Object*>(static_cast<JDWP::ObjectId>(value),
+                                                          &error);
+      if (error != JDWP::ERR_NONE) {
+        VLOG(jdwp) << tag << " object " << o << " is an invalid object";
+        return JDWP::ERR_INVALID_OBJECT;
+      } else if (!visitor.SetVReg(m, reg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)),
+                          kReferenceVReg)) {
+        VLOG(jdwp) << "failed to set " << tag << " object local " << reg << " = " << o;
+        return kFailureErrorCode;
+      }
+      break;
+    }
+    case JDWP::JT_DOUBLE: {
+      CHECK_EQ(width, 8U);
+      if (!visitor.SetVRegPair(m, reg, value, kDoubleLoVReg, kDoubleHiVReg)) {
+        VLOG(jdwp) << "failed to set double local " << reg << " = " << value;
+        return kFailureErrorCode;
+      }
+      break;
+    }
+    case JDWP::JT_LONG: {
+      CHECK_EQ(width, 8U);
+      if (!visitor.SetVRegPair(m, reg, value, kLongLoVReg, kLongHiVReg)) {
+        VLOG(jdwp) << "failed to set double local " << reg << " = " << value;
+        return kFailureErrorCode;
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unknown tag " << tag;
+      break;
   }
-  return this_id;
+  return JDWP::ERR_NONE;
+}
+
+static void SetEventLocation(JDWP::EventLocation* location, mirror::ArtMethod* m, uint32_t dex_pc)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  DCHECK(location != nullptr);
+  if (m == nullptr) {
+    memset(location, 0, sizeof(*location));
+  } else {
+    location->method = m;
+    location->dex_pc = (m->IsNative() || m->IsProxyMethod()) ? static_cast<uint32_t>(-1) : dex_pc;
+  }
 }
 
 void Dbg::PostLocationEvent(mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
@@ -2687,12 +2829,10 @@
   }
   DCHECK(m != nullptr);
   DCHECK_EQ(m->IsStatic(), this_object == nullptr);
-  JDWP::JdwpLocation location;
-  SetLocation(&location, m, dex_pc);
+  JDWP::EventLocation location;
+  SetEventLocation(&location, m, dex_pc);
 
-  // We need 'this' for InstanceOnly filters only.
-  JDWP::ObjectId this_id = GetThisObjectIdForEvent(this_object);
-  gJdwpState->PostLocationEvent(&location, this_id, event_flags, return_value);
+  gJdwpState->PostLocationEvent(&location, this_object, event_flags, return_value);
 }
 
 void Dbg::PostFieldAccessEvent(mirror::ArtMethod* m, int dex_pc,
@@ -2702,14 +2842,10 @@
   }
   DCHECK(m != nullptr);
   DCHECK(f != nullptr);
-  JDWP::JdwpLocation location;
-  SetLocation(&location, m, dex_pc);
+  JDWP::EventLocation location;
+  SetEventLocation(&location, m, dex_pc);
 
-  JDWP::RefTypeId type_id = gRegistry->AddRefType(f->GetDeclaringClass());
-  JDWP::FieldId field_id = ToFieldId(f);
-  JDWP::ObjectId this_id = gRegistry->Add(this_object);
-
-  gJdwpState->PostFieldEvent(&location, type_id, field_id, this_id, nullptr, false);
+  gJdwpState->PostFieldEvent(&location, f, this_object, nullptr, false);
 }
 
 void Dbg::PostFieldModificationEvent(mirror::ArtMethod* m, int dex_pc,
@@ -2721,14 +2857,10 @@
   DCHECK(m != nullptr);
   DCHECK(f != nullptr);
   DCHECK(field_value != nullptr);
-  JDWP::JdwpLocation location;
-  SetLocation(&location, m, dex_pc);
+  JDWP::EventLocation location;
+  SetEventLocation(&location, m, dex_pc);
 
-  JDWP::RefTypeId type_id = gRegistry->AddRefType(f->GetDeclaringClass());
-  JDWP::FieldId field_id = ToFieldId(f);
-  JDWP::ObjectId this_id = gRegistry->Add(this_object);
-
-  gJdwpState->PostFieldEvent(&location, type_id, field_id, this_id, field_value, true);
+  gJdwpState->PostFieldEvent(&location, f, this_object, field_value, true);
 }
 
 void Dbg::PostException(const ThrowLocation& throw_location,
@@ -2737,33 +2869,20 @@
   if (!IsDebuggerActive()) {
     return;
   }
+  JDWP::EventLocation exception_throw_location;
+  SetEventLocation(&exception_throw_location, throw_location.GetMethod(), throw_location.GetDexPc());
+  JDWP::EventLocation exception_catch_location;
+  SetEventLocation(&exception_catch_location, catch_method, catch_dex_pc);
 
-  JDWP::JdwpLocation jdwp_throw_location;
-  SetLocation(&jdwp_throw_location, throw_location.GetMethod(), throw_location.GetDexPc());
-  JDWP::JdwpLocation catch_location;
-  SetLocation(&catch_location, catch_method, catch_dex_pc);
-
-  // We need 'this' for InstanceOnly filters only.
-  JDWP::ObjectId this_id = GetThisObjectIdForEvent(throw_location.GetThis());
-  JDWP::ObjectId exception_id = gRegistry->Add(exception_object);
-  JDWP::RefTypeId exception_class_id = gRegistry->AddRefType(exception_object->GetClass());
-
-  gJdwpState->PostException(&jdwp_throw_location, exception_id, exception_class_id, &catch_location,
-                            this_id);
+  gJdwpState->PostException(&exception_throw_location, exception_object, &exception_catch_location,
+                            throw_location.GetThis());
 }
 
 void Dbg::PostClassPrepare(mirror::Class* c) {
   if (!IsDebuggerActive()) {
     return;
   }
-
-  // OLD-TODO - we currently always send both "verified" and "prepared" since
-  // debuggers seem to like that.  There might be some advantage to honesty,
-  // since the class may not yet be verified.
-  int state = JDWP::CS_VERIFIED | JDWP::CS_PREPARED;
-  JDWP::JdwpTypeTag tag = GetTypeTag(c);
-  std::string temp;
-  gJdwpState->PostClassPrepare(tag, gRegistry->Add(c), c->GetDescriptor(&temp), state);
+  gJdwpState->PostClassPrepare(c);
 }
 
 void Dbg::UpdateDebugger(Thread* thread, mirror::Object* this_object,
@@ -3046,6 +3165,8 @@
     // should never be null. We could just check we never encounter this case.
     return false;
   }
+  // Note: method verifier may cause thread suspension.
+  self->AssertThreadSuspensionIsAllowable();
   StackHandleScope<3> hs(self);
   mirror::Class* declaring_class = m->GetDeclaringClass();
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
@@ -3071,20 +3192,18 @@
 // Sanity checks all existing breakpoints on the same method.
 static void SanityCheckExistingBreakpoints(mirror::ArtMethod* m, bool need_full_deoptimization)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::breakpoint_lock_) {
-  if (kIsDebugBuild) {
-    for (const Breakpoint& breakpoint : gBreakpoints) {
-      CHECK_EQ(need_full_deoptimization, breakpoint.NeedFullDeoptimization());
-    }
-    if (need_full_deoptimization) {
-      // We should have deoptimized everything but not "selectively" deoptimized this method.
-      CHECK(Runtime::Current()->GetInstrumentation()->AreAllMethodsDeoptimized());
-      CHECK(!Runtime::Current()->GetInstrumentation()->IsDeoptimized(m));
-    } else {
-      // We should have "selectively" deoptimized this method.
-      // Note: while we have not deoptimized everything for this method, we may have done it for
-      // another event.
-      CHECK(Runtime::Current()->GetInstrumentation()->IsDeoptimized(m));
-    }
+  for (const Breakpoint& breakpoint : gBreakpoints) {
+    CHECK_EQ(need_full_deoptimization, breakpoint.NeedFullDeoptimization());
+  }
+  if (need_full_deoptimization) {
+    // We should have deoptimized everything but not "selectively" deoptimized this method.
+    CHECK(Runtime::Current()->GetInstrumentation()->AreAllMethodsDeoptimized());
+    CHECK(!Runtime::Current()->GetInstrumentation()->IsDeoptimized(m));
+  } else {
+    // We should have "selectively" deoptimized this method.
+    // Note: while we have not deoptimized everything for this method, we may have done it for
+    // another event.
+    CHECK(Runtime::Current()->GetInstrumentation()->IsDeoptimized(m));
   }
 }
 
@@ -3095,12 +3214,17 @@
   mirror::ArtMethod* m = FromMethodId(location->method_id);
   DCHECK(m != nullptr) << "No method for method id " << location->method_id;
 
-  WriterMutexLock mu(self, *Locks::breakpoint_lock_);
-  const Breakpoint* const existing_breakpoint = FindFirstBreakpointForMethod(m);
+  const Breakpoint* existing_breakpoint;
+  {
+    ReaderMutexLock mu(self, *Locks::breakpoint_lock_);
+    existing_breakpoint = FindFirstBreakpointForMethod(m);
+  }
   bool need_full_deoptimization;
   if (existing_breakpoint == nullptr) {
     // There is no breakpoint on this method yet: we need to deoptimize. If this method may be
     // inlined, we deoptimize everything; otherwise we deoptimize only this method.
+    // Note: IsMethodPossiblyInlined goes into the method verifier and may cause thread suspension.
+    // Therefore we must not hold any lock when we call it.
     need_full_deoptimization = IsMethodPossiblyInlined(self, m);
     if (need_full_deoptimization) {
       req->SetKind(DeoptimizationRequest::kFullDeoptimization);
@@ -3115,12 +3239,18 @@
     req->SetMethod(nullptr);
 
     need_full_deoptimization = existing_breakpoint->NeedFullDeoptimization();
-    SanityCheckExistingBreakpoints(m, need_full_deoptimization);
+    if (kIsDebugBuild) {
+      ReaderMutexLock mu(self, *Locks::breakpoint_lock_);
+      SanityCheckExistingBreakpoints(m, need_full_deoptimization);
+    }
   }
 
-  gBreakpoints.push_back(Breakpoint(m, location->dex_pc, need_full_deoptimization));
-  VLOG(jdwp) << "Set breakpoint #" << (gBreakpoints.size() - 1) << ": "
-             << gBreakpoints[gBreakpoints.size() - 1];
+  {
+    WriterMutexLock mu(self, *Locks::breakpoint_lock_);
+    gBreakpoints.push_back(Breakpoint(m, location->dex_pc, need_full_deoptimization));
+    VLOG(jdwp) << "Set breakpoint #" << (gBreakpoints.size() - 1) << ": "
+               << gBreakpoints[gBreakpoints.size() - 1];
+  }
 }
 
 // Uninstalls a breakpoint at the specified location. Also indicates through the deoptimization
@@ -3155,7 +3285,9 @@
     // There is at least one breakpoint for this method: we don't need to undeoptimize.
     req->SetKind(DeoptimizationRequest::kNothing);
     req->SetMethod(nullptr);
-    SanityCheckExistingBreakpoints(m, need_full_deoptimization);
+    if (kIsDebugBuild) {
+      SanityCheckExistingBreakpoints(m, need_full_deoptimization);
+    }
   }
 }
 
@@ -3180,7 +3312,7 @@
         self_suspend_ = true;
       } else {
         soa.Self()->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension);
-        jobject thread_peer = gRegistry->GetJObject(thread_id);
+        jobject thread_peer = Dbg::GetObjectRegistry()->GetJObject(thread_id);
         bool timed_out;
         Thread* suspended_thread;
         {
@@ -3843,9 +3975,7 @@
 
 void Dbg::PostThreadStartOrStop(Thread* t, uint32_t type) {
   if (IsDebuggerActive()) {
-    ScopedObjectAccessUnchecked soa(Thread::Current());
-    JDWP::ObjectId id = gRegistry->Add(t->GetPeer());
-    gJdwpState->PostThreadChange(id, type == CHUNK_TYPE("THCR"));
+    gJdwpState->PostThreadChange(t, type == CHUNK_TYPE("THCR"));
   }
   Dbg::DdmSendThreadNotification(t, type);
 }
@@ -4323,7 +4453,7 @@
       recent_allocation_records_ = new AllocRecord[alloc_record_max_];
       CHECK(recent_allocation_records_ != nullptr);
     }
-    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
+    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints(false);
   } else {
     {
       ScopedObjectAccess soa(self);  // For type_cache_.Clear();
@@ -4339,7 +4469,7 @@
       type_cache_.Clear();
     }
     // If an allocation comes in before we uninstrument, we will safely drop it on the floor.
-    Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints();
+    Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints(false);
   }
 }
 
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 219210e..cb7adae 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -43,6 +43,9 @@
 class Throwable;
 }  // namespace mirror
 class AllocRecord;
+class ObjectRegistry;
+class ScopedObjectAccessUnchecked;
+class StackVisitor;
 class Thread;
 class ThrowLocation;
 
@@ -250,6 +253,8 @@
    */
   static std::string GetClassName(JDWP::RefTypeId id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static std::string GetClassName(mirror::Class* klass)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError GetClassObject(JDWP::RefTypeId id, JDWP::ObjectId* class_object_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError GetSuperclass(JDWP::RefTypeId id, JDWP::RefTypeId* superclass_id)
@@ -294,7 +299,24 @@
                                            JDWP::ObjectId* new_array)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static bool MatchType(JDWP::RefTypeId instance_class_id, JDWP::RefTypeId class_id)
+  //
+  // Event filtering.
+  //
+  static bool MatchThread(JDWP::ObjectId expected_thread_id, Thread* event_thread)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool MatchLocation(const JDWP::JdwpLocation& expected_location,
+                            const JDWP::EventLocation& event_location)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool MatchType(mirror::Class* event_class, JDWP::RefTypeId class_id)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool MatchField(JDWP::RefTypeId expected_type_id, JDWP::FieldId expected_field_id,
+                         mirror::ArtField* event_field)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool MatchInstance(JDWP::ObjectId expected_instance_id, mirror::Object* event_instance)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   //
@@ -381,7 +403,7 @@
   static JDWP::JdwpError SetStaticFieldValue(JDWP::FieldId field_id, uint64_t value, int width)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static std::string StringToUtf8(JDWP::ObjectId string_id)
+  static JDWP::JdwpError StringToUtf8(JDWP::ObjectId string_id, std::string* str)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void OutputJValue(JDWP::JdwpTag tag, const JValue* return_value, JDWP::ExpandBuf* pReply)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -393,13 +415,19 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_);
   static JDWP::JdwpError GetThreadGroup(JDWP::ObjectId thread_id, JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_);
-  static std::string GetThreadGroupName(JDWP::ObjectId thread_group_id);
-  static JDWP::ObjectId GetThreadGroupParent(JDWP::ObjectId thread_group_id)
+  static JDWP::JdwpError GetThreadGroupName(JDWP::ObjectId thread_group_id,
+                                            JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static JDWP::JdwpError GetThreadGroupParent(JDWP::ObjectId thread_group_id,
+                                              JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static JDWP::JdwpError GetThreadGroupChildren(JDWP::ObjectId thread_group_id,
+                                                JDWP::ExpandBuf* pReply)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::ObjectId GetSystemThreadGroupId()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static JDWP::ObjectId GetMainThreadGroupId();
 
   static JDWP::JdwpThreadStatus ToJdwpThreadStatus(ThreadState state);
   static JDWP::JdwpError GetThreadStatus(JDWP::ObjectId thread_id,
@@ -414,11 +442,9 @@
 
   // Fills 'thread_ids' with the threads in the given thread group. If thread_group_id == 0,
   // returns all threads.
-  static void GetThreads(JDWP::ObjectId thread_group_id, std::vector<JDWP::ObjectId>* thread_ids)
+  static void GetThreads(mirror::Object* thread_group, std::vector<JDWP::ObjectId>* thread_ids)
       LOCKS_EXCLUDED(Locks::thread_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void GetChildThreadGroups(JDWP::ObjectId thread_group_id,
-                                   std::vector<JDWP::ObjectId>* child_thread_group_ids);
 
   static JDWP::JdwpError GetThreadFrameCount(JDWP::ObjectId thread_id, size_t* result)
       LOCKS_EXCLUDED(Locks::thread_list_lock_);
@@ -427,8 +453,9 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static JDWP::ObjectId GetThreadSelfId()
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static JDWP::ObjectId GetThreadSelfId() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static JDWP::ObjectId GetThreadId(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static void SuspendVM()
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
@@ -449,12 +476,10 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static JDWP::JdwpError GetLocalValue(JDWP::ObjectId thread_id, JDWP::FrameId frame_id, int slot,
-                                       JDWP::JdwpTag tag, uint8_t* buf, size_t expectedLen)
+  static JDWP::JdwpError GetLocalValues(JDWP::Request* request, JDWP::ExpandBuf* pReply)
       LOCKS_EXCLUDED(Locks::thread_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static JDWP::JdwpError SetLocalValue(JDWP::ObjectId thread_id, JDWP::FrameId frame_id, int slot,
-                                       JDWP::JdwpTag tag, uint64_t value, size_t width)
+  static JDWP::JdwpError SetLocalValues(JDWP::Request* request)
       LOCKS_EXCLUDED(Locks::thread_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -598,7 +623,33 @@
   static void DdmSendHeapSegments(bool native)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static ObjectRegistry* GetObjectRegistry() {
+    return gRegistry;
+  }
+
+  static JDWP::JdwpTag TagFromObject(const ScopedObjectAccessUnchecked& soa, mirror::Object* o)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static JDWP::JdwpTypeTag GetTypeTag(mirror::Class* klass)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static JDWP::FieldId ToFieldId(const mirror::ArtField* f)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static void SetJdwpLocation(JDWP::JdwpLocation* location, mirror::ArtMethod* m, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  private:
+  static JDWP::JdwpError GetLocalValue(const StackVisitor& visitor,
+                                       ScopedObjectAccessUnchecked& soa, int slot,
+                                       JDWP::JdwpTag tag, uint8_t* buf, size_t width)
+      LOCKS_EXCLUDED(Locks::thread_list_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static JDWP::JdwpError SetLocalValue(StackVisitor& visitor, int slot, JDWP::JdwpTag tag,
+                                       uint64_t value, size_t width)
+      LOCKS_EXCLUDED(Locks::thread_list_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static void DdmBroadcast(bool connect) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostThreadStartOrStop(Thread*, uint32_t)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -608,9 +659,6 @@
                                 const JValue* return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static JDWP::ObjectId GetThisObjectIdForEvent(mirror::Object* this_object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   static void ProcessDeoptimizationRequest(const DeoptimizationRequest& request)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -623,6 +671,8 @@
   static size_t alloc_record_head_ GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_count_ GUARDED_BY(Locks::alloc_tracker_lock_);
 
+  static ObjectRegistry* gRegistry;
+
   // Deoptimization requests to be processed each time the event list is updated. This is used when
   // registering and unregistering events so we do not deoptimize while holding the event list
   // lock.
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index 6a9976a..05214a4 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -122,7 +122,7 @@
   V(0x65, SGET_CHAR, "sget-char", k21c, true, kFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
   V(0x66, SGET_SHORT, "sget-short", k21c, true, kFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
   V(0x67, SPUT, "sput", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x68, SPUT_WIDE, "sput-wide", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x68, SPUT_WIDE, "sput-wide", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
   V(0x69, SPUT_OBJECT, "sput-object", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
   V(0x6A, SPUT_BOOLEAN, "sput-boolean", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
   V(0x6B, SPUT_BYTE, "sput-byte", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index ad22a2e..a7e5e74 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -569,7 +569,7 @@
 
 RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
   // Get the lowest address non-full run from the binary tree.
-  std::set<Run*>* const bt = &non_full_runs_[idx];
+  auto* const bt = &non_full_runs_[idx];
   if (!bt->empty()) {
     // If there's one, use it as the current run.
     auto it = bt->begin();
@@ -767,7 +767,7 @@
   }
   // Free the slot in the run.
   run->FreeSlot(ptr);
-  std::set<Run*>* non_full_runs = &non_full_runs_[idx];
+  auto* non_full_runs = &non_full_runs_[idx];
   if (run->IsAllFree()) {
     // It has just become completely free. Free the pages of this run.
     std::set<Run*>::iterator pos = non_full_runs->find(run);
@@ -793,9 +793,8 @@
     // already in the non-full run set (i.e., it was full) insert it
     // into the non-full run set.
     if (run != current_runs_[idx]) {
-      std::unordered_set<Run*, hash_run, eq_run>* full_runs =
-          kIsDebugBuild ? &full_runs_[idx] : NULL;
-      std::set<Run*>::iterator pos = non_full_runs->find(run);
+      auto* full_runs = kIsDebugBuild ? &full_runs_[idx] : NULL;
+      auto pos = non_full_runs->find(run);
       if (pos == non_full_runs->end()) {
         DCHECK(run_was_full);
         DCHECK(full_runs->find(run) != full_runs->end());
@@ -1266,9 +1265,8 @@
       }
       // Check if the run should be moved to non_full_runs_ or
       // free_page_runs_.
-      std::set<Run*>* non_full_runs = &non_full_runs_[idx];
-      std::unordered_set<Run*, hash_run, eq_run>* full_runs =
-          kIsDebugBuild ? &full_runs_[idx] : NULL;
+      auto* non_full_runs = &non_full_runs_[idx];
+      auto* full_runs = kIsDebugBuild ? &full_runs_[idx] : NULL;
       if (run->IsAllFree()) {
         // It has just become completely free. Free the pages of the
         // run.
@@ -2056,7 +2054,7 @@
     // in a run set.
     if (!is_current_run) {
       MutexLock mu(self, rosalloc->lock_);
-      std::set<Run*>& non_full_runs = rosalloc->non_full_runs_[idx];
+      auto& non_full_runs = rosalloc->non_full_runs_[idx];
       // If it's all free, it must be a free page run rather than a run.
       CHECK(!IsAllFree()) << "A free run must be in a free page run set " << Dump();
       if (!IsFull()) {
@@ -2066,7 +2064,7 @@
       } else {
         // If it's full, it must in the full run set (debug build only.)
         if (kIsDebugBuild) {
-          std::unordered_set<Run*, hash_run, eq_run>& full_runs = rosalloc->full_runs_[idx];
+          auto& full_runs = rosalloc->full_runs_[idx];
           CHECK(full_runs.find(this) != full_runs.end())
               << " A full run isn't in the full run set " << Dump();
         }
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index b2a5a3c..2fbd97a 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -26,6 +26,7 @@
 #include <unordered_set>
 #include <vector>
 
+#include "base/allocator.h"
 #include "base/mutex.h"
 #include "base/logging.h"
 #include "globals.h"
@@ -53,7 +54,7 @@
       size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
       size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
       DCHECK_GE(byte_size, static_cast<size_t>(0));
-      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      DCHECK_ALIGNED(byte_size, kPageSize);
       return byte_size;
     }
     void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
@@ -403,6 +404,7 @@
 
   // We use thread-local runs for the size Brackets whose indexes
   // are less than this index. We use shared (current) runs for the rest.
+
   static const size_t kNumThreadLocalSizeBrackets = 11;
 
  private:
@@ -423,12 +425,13 @@
 
   // The run sets that hold the runs whose slots are not all
   // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
-  std::set<Run*> non_full_runs_[kNumOfSizeBrackets];
+  AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets];
   // The run sets that hold the runs whose slots are all full. This is
   // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
-  std::unordered_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
+  std::unordered_set<Run*, hash_run, eq_run, TrackingAllocator<Run*, kAllocatorTagRosAlloc>>
+      full_runs_[kNumOfSizeBrackets];
   // The set of free pages.
-  std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_);
+  AllocationTrackingSet<FreePageRun*, kAllocatorTagRosAlloc> free_page_runs_ GUARDED_BY(lock_);
   // The dedicated full run, it is always full and shared by all threads when revoking happens.
   // This is an optimization since enables us to avoid a null check for revoked runs.
   static Run* dedicated_full_run_;
@@ -460,7 +463,8 @@
   // The table that indicates the size of free page runs. These sizes
   // are stored here to avoid storing in the free page header and
   // release backing pages.
-  std::vector<size_t> free_page_run_size_map_ GUARDED_BY(lock_);
+  std::vector<size_t, TrackingAllocator<size_t, kAllocatorTagRosAlloc>> free_page_run_size_map_
+      GUARDED_BY(lock_);
   // The global lock. Used to guard the page map, the free page set,
   // and the footprint.
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 3e3b964..864bb72 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -426,7 +426,7 @@
     }
   }
   if (running_on_valgrind_) {
-    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
+    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints(false);
   }
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() exiting";
@@ -685,9 +685,8 @@
 }
 
 void Heap::VisitObjects(ObjectCallback callback, void* arg) {
-  Thread* self = Thread::Current();
   // GCs can move objects, so don't allow this.
-  const char* old_cause = self->StartAssertNoThreadSuspension("Visiting objects");
+  ScopedAssertNoThreadSuspension ants(Thread::Current(), "Visiting objects");
   if (bump_pointer_space_ != nullptr) {
     // Visit objects in bump pointer space.
     bump_pointer_space_->Walk(callback, arg);
@@ -704,7 +703,6 @@
     }
   }
   GetLiveBitmap()->Walk(callback, arg);
-  self->EndAssertNoThreadSuspension(old_cause);
 }
 
 void Heap::MarkAllocStackAsLive(accounting::ObjectStack* stack) {
@@ -1429,12 +1427,10 @@
 void Heap::CountInstances(const std::vector<mirror::Class*>& classes, bool use_is_assignable_from,
                           uint64_t* counts) {
   // Can't do any GC in this function since this may move classes.
-  Thread* self = Thread::Current();
-  auto* old_cause = self->StartAssertNoThreadSuspension("CountInstances");
+  ScopedAssertNoThreadSuspension ants(Thread::Current(), "CountInstances");
   InstanceCounter counter(classes, use_is_assignable_from, counts);
-  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  ReaderMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   VisitObjects(InstanceCounter::Callback, &counter);
-  self->EndAssertNoThreadSuspension(old_cause);
 }
 
 class InstanceCollector {
@@ -1447,8 +1443,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     DCHECK(arg != nullptr);
     InstanceCollector* instance_collector = reinterpret_cast<InstanceCollector*>(arg);
-    mirror::Class* instance_class = obj->GetClass();
-    if (instance_class == instance_collector->class_) {
+    if (obj->GetClass() == instance_collector->class_) {
       if (instance_collector->max_count_ == 0 ||
           instance_collector->instances_.size() < instance_collector->max_count_) {
         instance_collector->instances_.push_back(obj);
@@ -1457,8 +1452,8 @@
   }
 
  private:
-  mirror::Class* class_;
-  uint32_t max_count_;
+  const mirror::Class* const class_;
+  const uint32_t max_count_;
   std::vector<mirror::Object*>& instances_;
   DISALLOW_COPY_AND_ASSIGN(InstanceCollector);
 };
@@ -1466,12 +1461,10 @@
 void Heap::GetInstances(mirror::Class* c, int32_t max_count,
                         std::vector<mirror::Object*>& instances) {
   // Can't do any GC in this function since this may move classes.
-  Thread* self = Thread::Current();
-  auto* old_cause = self->StartAssertNoThreadSuspension("GetInstances");
+  ScopedAssertNoThreadSuspension ants(Thread::Current(), "GetInstances");
   InstanceCollector collector(c, max_count, instances);
-  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  ReaderMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   VisitObjects(&InstanceCollector::Callback, &collector);
-  self->EndAssertNoThreadSuspension(old_cause);
 }
 
 class ReferringObjectsFinder {
@@ -1504,8 +1497,8 @@
   }
 
  private:
-  mirror::Object* object_;
-  uint32_t max_count_;
+  const mirror::Object* const object_;
+  const uint32_t max_count_;
   std::vector<mirror::Object*>& referring_objects_;
   DISALLOW_COPY_AND_ASSIGN(ReferringObjectsFinder);
 };
@@ -1513,12 +1506,10 @@
 void Heap::GetReferringObjects(mirror::Object* o, int32_t max_count,
                                std::vector<mirror::Object*>& referring_objects) {
   // Can't do any GC in this function since this may move the object o.
-  Thread* self = Thread::Current();
-  auto* old_cause = self->StartAssertNoThreadSuspension("GetReferringObjects");
+  ScopedAssertNoThreadSuspension ants(Thread::Current(), "GetReferringObjects");
   ReferringObjectsFinder finder(o, max_count, referring_objects);
-  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  ReaderMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   VisitObjects(&ReferringObjectsFinder::Callback, &finder);
-  self->EndAssertNoThreadSuspension(old_cause);
 }
 
 void Heap::CollectGarbage(bool clear_soft_references) {
@@ -2104,6 +2095,7 @@
   // Back to back GCs can cause 0 ms of wait time in between GC invocations.
   if (LIKELY(ms_delta != 0)) {
     allocation_rate_ = ((gc_start_size - last_gc_size_) * 1000) / ms_delta;
+    ATRACE_INT("Allocation rate KB/s", allocation_rate_ / KB);
     VLOG(heap) << "Allocation rate: " << PrettySize(allocation_rate_) << "/s";
   }
 
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 75de623..bfaa2bb 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -132,7 +132,6 @@
     if (concurrent) {
       StartPreservingReferences(self);
     }
-
     soft_reference_queue_.ForwardSoftReferences(&PreserveSoftReferenceCallback,
                                                 &process_references_args_);
     process_mark_stack_callback(arg);
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 41c34c9..353d00c 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -125,7 +125,7 @@
   }
   // We should clean up so we are more likely to have room for the image.
   if (Runtime::Current()->IsZygote()) {
-    LOG(INFO) << "Pruning dalvik-cache since we are relocating an image and will need to recompile";
+    LOG(INFO) << "Pruning dalvik-cache since we are generating an image and will need to recompile";
     PruneDexCache(image_isa);
   }
 
@@ -177,7 +177,8 @@
                                    bool* has_system,
                                    std::string* cache_filename,
                                    bool* dalvik_cache_exists,
-                                   bool* has_cache) {
+                                   bool* has_cache,
+                                   bool* is_global_cache) {
   *has_system = false;
   *has_cache = false;
   // image_location = /system/framework/boot.art
@@ -192,7 +193,7 @@
   *dalvik_cache_exists = false;
   std::string dalvik_cache;
   GetDalvikCache(GetInstructionSetString(image_isa), true, &dalvik_cache,
-                 &have_android_data, dalvik_cache_exists);
+                 &have_android_data, dalvik_cache_exists, is_global_cache);
 
   if (have_android_data && *dalvik_cache_exists) {
     // Always set output location even if it does not exist,
@@ -285,8 +286,9 @@
   std::string cache_filename;
   bool has_cache = false;
   bool dalvik_cache_exists = false;
+  bool is_global_cache = false;
   if (FindImageFilename(image_location, image_isa, &system_filename, &has_system,
-                        &cache_filename, &dalvik_cache_exists, &has_cache)) {
+                        &cache_filename, &dalvik_cache_exists, &has_cache, &is_global_cache)) {
     if (Runtime::Current()->ShouldRelocate()) {
       if (has_system && has_cache) {
         std::unique_ptr<ImageHeader> sys_hdr(new ImageHeader);
@@ -344,6 +346,21 @@
       && hdr_a.GetOatChecksum() == hdr_b.GetOatChecksum();
 }
 
+static bool ImageCreationAllowed(bool is_global_cache, std::string* error_msg) {
+  // Anyone can write into a "local" cache.
+  if (!is_global_cache) {
+    return true;
+  }
+
+  // Only the zygote is allowed to create the global boot image.
+  if (Runtime::Current()->IsZygote()) {
+    return true;
+  }
+
+  *error_msg = "Only the zygote can create the global boot image.";
+  return false;
+}
+
 ImageSpace* ImageSpace::Create(const char* image_location,
                                const InstructionSet image_isa,
                                std::string* error_msg) {
@@ -352,9 +369,10 @@
   std::string cache_filename;
   bool has_cache = false;
   bool dalvik_cache_exists = false;
+  bool is_global_cache = true;
   const bool found_image = FindImageFilename(image_location, image_isa, &system_filename,
                                              &has_system, &cache_filename, &dalvik_cache_exists,
-                                             &has_cache);
+                                             &has_cache, &is_global_cache);
 
   ImageSpace* space;
   bool relocate = Runtime::Current()->ShouldRelocate();
@@ -377,18 +395,27 @@
           relocated_version_used = true;
         } else {
           // We cannot have a relocated version, Relocate the system one and use it.
-          if (can_compile && RelocateImage(image_location, cache_filename.c_str(), image_isa,
-                                           error_msg)) {
+
+          std::string reason;
+          bool success;
+
+          // Check whether we are allowed to relocate.
+          if (!can_compile) {
+            reason = "Image dex2oat disabled by -Xnoimage-dex2oat.";
+            success = false;
+          } else if (!ImageCreationAllowed(is_global_cache, &reason)) {
+            // Whether we can write to the cache.
+            success = false;
+          } else {
+            // Try to relocate.
+            success = RelocateImage(image_location, cache_filename.c_str(), image_isa, &reason);
+          }
+
+          if (success) {
             relocated_version_used = true;
             image_filename = &cache_filename;
           } else {
-            std::string reason;
-            if (can_compile) {
-              reason = StringPrintf(": %s", error_msg->c_str());
-            } else {
-              reason = " because image dex2oat is disabled.";
-            }
-            *error_msg = StringPrintf("Unable to relocate image '%s' from '%s' to '%s'%s",
+            *error_msg = StringPrintf("Unable to relocate image '%s' from '%s' to '%s': %s",
                                       image_location, system_filename.c_str(),
                                       cache_filename.c_str(), reason.c_str());
             return nullptr;
@@ -460,6 +487,8 @@
   } else if (!dalvik_cache_exists) {
     *error_msg = StringPrintf("No place to put generated image.");
     return nullptr;
+  } else if (!ImageCreationAllowed(is_global_cache, error_msg)) {
+    return nullptr;
   } else if (!GenerateImage(cache_filename, image_isa, error_msg)) {
     *error_msg = StringPrintf("Failed to generate image '%s': %s",
                               cache_filename.c_str(), error_msg->c_str());
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 28ebca6..2586ece 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -110,7 +110,8 @@
                                 bool* has_system,
                                 std::string* data_location,
                                 bool* dalvik_cache_exists,
-                                bool* has_data);
+                                bool* has_data,
+                                bool *is_global_cache);
 
  private:
   // Tries to initialize an ImageSpace from the given image path,
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 0f45b9e..a2e88a6 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -597,10 +597,13 @@
   thread->ResetQuickAllocEntryPointsForThread();
 }
 
-void Instrumentation::SetEntrypointsInstrumented(bool instrumented) {
+void Instrumentation::SetEntrypointsInstrumented(bool instrumented, bool suspended) {
   Runtime* runtime = Runtime::Current();
   ThreadList* tl = runtime->GetThreadList();
-  if (runtime->IsStarted()) {
+  if (suspended) {
+    Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+  }
+  if (runtime->IsStarted() && !suspended) {
     tl->SuspendAll();
   }
   {
@@ -608,30 +611,30 @@
     SetQuickAllocEntryPointsInstrumented(instrumented);
     ResetQuickAllocEntryPoints();
   }
-  if (runtime->IsStarted()) {
+  if (runtime->IsStarted() && !suspended) {
     tl->ResumeAll();
   }
 }
 
-void Instrumentation::InstrumentQuickAllocEntryPoints() {
+void Instrumentation::InstrumentQuickAllocEntryPoints(bool suspended) {
   // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
   //       should be guarded by a lock.
   DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.LoadSequentiallyConsistent(), 0);
   const bool enable_instrumentation =
       quick_alloc_entry_points_instrumentation_counter_.FetchAndAddSequentiallyConsistent(1) == 0;
   if (enable_instrumentation) {
-    SetEntrypointsInstrumented(true);
+    SetEntrypointsInstrumented(true, suspended);
   }
 }
 
-void Instrumentation::UninstrumentQuickAllocEntryPoints() {
+void Instrumentation::UninstrumentQuickAllocEntryPoints(bool suspended) {
   // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
   //       should be guarded by a lock.
   DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.LoadSequentiallyConsistent(), 0);
   const bool disable_instrumentation =
       quick_alloc_entry_points_instrumentation_counter_.FetchAndSubSequentiallyConsistent(1) == 1;
   if (disable_instrumentation) {
-    SetEntrypointsInstrumented(false);
+    SetEntrypointsInstrumented(false, suspended);
   }
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index d05cee5..3c1c756 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -182,10 +182,10 @@
     return interpreter_handler_table_;
   }
 
-  void InstrumentQuickAllocEntryPoints() LOCKS_EXCLUDED(Locks::thread_list_lock_,
-                                                        Locks::runtime_shutdown_lock_);
-  void UninstrumentQuickAllocEntryPoints() LOCKS_EXCLUDED(Locks::thread_list_lock_,
-                                                          Locks::runtime_shutdown_lock_);
+  void InstrumentQuickAllocEntryPoints(bool suspended)
+      LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::runtime_shutdown_lock_);
+  void UninstrumentQuickAllocEntryPoints(bool suspended)
+      LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::runtime_shutdown_lock_);
   void ResetQuickAllocEntryPoints() EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_);
 
   // Update the code of a method respecting any installed stubs.
@@ -350,7 +350,7 @@
 
   // No thread safety analysis to get around SetQuickAllocEntryPointsInstrumented requiring
   // exclusive access to mutator lock which you can't get if the runtime isn't started.
-  void SetEntrypointsInstrumented(bool instrumented) NO_THREAD_SAFETY_ANALYSIS;
+  void SetEntrypointsInstrumented(bool instrumented, bool suspended) NO_THREAD_SAFETY_ANALYSIS;
 
   void MethodEnterEventImpl(Thread* thread, mirror::Object* this_object,
                             mirror::ArtMethod* method, uint32_t dex_pc) const
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 7e685e8..cd8cce0 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -316,35 +316,7 @@
   kComputedGotoImplKind   // Computed-goto-based interpreter implementation.
 };
 
-#if !defined(__clang__)
 static constexpr InterpreterImplKind kInterpreterImplKind = kComputedGotoImplKind;
-#else
-// Clang 3.4 fails to build the goto interpreter implementation.
-static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImpl;
-template<bool do_access_check, bool transaction_active>
-JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
-                       ShadowFrame& shadow_frame, JValue result_register) {
-  LOG(FATAL) << "UNREACHABLE";
-  exit(0);
-}
-// Explicit definitions of ExecuteGotoImpl.
-template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<true, false>(Thread* self, MethodHelper& mh,
-                                    const DexFile::CodeItem* code_item,
-                                    ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<false, false>(Thread* self, MethodHelper& mh,
-                                     const DexFile::CodeItem* code_item,
-                                     ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<true, true>(Thread* self, MethodHelper& mh,
-                                    const DexFile::CodeItem* code_item,
-                                    ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<false, true>(Thread* self, MethodHelper& mh,
-                                     const DexFile::CodeItem* code_item,
-                                     ShadowFrame& shadow_frame, JValue result_register);
-#endif
 
 static JValue Execute(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
                       ShadowFrame& shadow_frame, JValue result_register)
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index b5b6298..0c9451c 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -36,8 +36,13 @@
 class Thread;
 
 namespace mirror {
+  class ArtField;
   class ArtMethod;
+  class Class;
+  class Object;
+  class Throwable;
 }  // namespace mirror
+class Thread;
 
 namespace JDWP {
 
@@ -65,6 +70,11 @@
 static inline void expandBufAddRefTypeId(ExpandBuf* pReply, RefTypeId id) { expandBufAdd8BE(pReply, id); }
 static inline void expandBufAddFrameId(ExpandBuf* pReply, FrameId id) { expandBufAdd8BE(pReply, id); }
 
+struct EventLocation {
+  mirror::ArtMethod* method;
+  uint32_t dex_pc;
+};
+
 /*
  * Holds a JDWP "location".
  */
@@ -178,7 +188,7 @@
    * The VM has finished initializing.  Only called when the debugger is
    * connected at the time initialization completes.
    */
-  bool PostVMStart() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool PostVMStart() LOCKS_EXCLUDED(event_list_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
    * A location of interest has been reached.  This is used for breakpoints,
@@ -192,8 +202,9 @@
    *
    * "returnValue" is non-null for MethodExit events only.
    */
-  bool PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags,
+  bool PostLocationEvent(const EventLocation* pLoc, mirror::Object* thisPtr, int eventFlags,
                          const JValue* returnValue)
+     LOCKS_EXCLUDED(event_list_lock_)
      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
@@ -203,8 +214,9 @@
    * "fieldValue" is non-null for field modification events only.
    * "is_modification" is true for field modification, false for field access.
    */
-  bool PostFieldEvent(const JdwpLocation* pLoc, RefTypeId typeId, FieldId fieldId,
-                      ObjectId thisPtr, const JValue* fieldValue, bool is_modification)
+  bool PostFieldEvent(const EventLocation* pLoc, mirror::ArtField* field, mirror::Object* thisPtr,
+                      const JValue* fieldValue, bool is_modification)
+      LOCKS_EXCLUDED(event_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
@@ -212,21 +224,23 @@
    *
    * Pass in a zeroed-out "*pCatchLoc" if the exception wasn't caught.
    */
-  bool PostException(const JdwpLocation* pThrowLoc, ObjectId excepId, RefTypeId excepClassId,
-                     const JdwpLocation* pCatchLoc, ObjectId thisPtr)
+  bool PostException(const EventLocation* pThrowLoc, mirror::Throwable* exception_object,
+                     const EventLocation* pCatchLoc, mirror::Object* thisPtr)
+      LOCKS_EXCLUDED(event_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
    * A thread has started or stopped.
    */
-  bool PostThreadChange(ObjectId threadId, bool start)
+  bool PostThreadChange(Thread* thread, bool start)
+      LOCKS_EXCLUDED(event_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
    * Class has been prepared.
    */
-  bool PostClassPrepare(JdwpTypeTag tag, RefTypeId refTypeId, const std::string& signature,
-                        int status)
+  bool PostClassPrepare(mirror::Class* klass)
+      LOCKS_EXCLUDED(event_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index fc39cc4..7c8c63c 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -27,6 +27,9 @@
 #include "jdwp/jdwp_constants.h"
 #include "jdwp/jdwp_expand_buf.h"
 #include "jdwp/jdwp_priv.h"
+#include "jdwp/object_registry.h"
+#include "mirror/art_field-inl.h"
+#include "scoped_thread_state_change.h"
 #include "thread-inl.h"
 
 /*
@@ -107,18 +110,17 @@
  * The rest will be zeroed.
  */
 struct ModBasket {
-  ModBasket() : pLoc(NULL), threadId(0), classId(0), excepClassId(0),
-                caught(false), fieldTypeID(0), fieldId(0), thisPtr(0) { }
+  ModBasket() : pLoc(nullptr), thread(nullptr), locationClass(nullptr), exceptionClass(nullptr),
+                caught(false), field(nullptr), thisPtr(nullptr) { }
 
-  const JdwpLocation* pLoc;           /* LocationOnly */
-  std::string         className;      /* ClassMatch/ClassExclude */
-  ObjectId            threadId;       /* ThreadOnly */
-  RefTypeId           classId;        /* ClassOnly */
-  RefTypeId           excepClassId;   /* ExceptionOnly */
-  bool                caught;         /* ExceptionOnly */
-  RefTypeId           fieldTypeID;    /* FieldOnly */
-  FieldId             fieldId;        /* FieldOnly */
-  ObjectId            thisPtr;        /* InstanceOnly */
+  const EventLocation*  pLoc;             /* LocationOnly */
+  std::string           className;        /* ClassMatch/ClassExclude */
+  Thread*               thread;           /* ThreadOnly */
+  mirror::Class*        locationClass;    /* ClassOnly */
+  mirror::Class*        exceptionClass;   /* ExceptionOnly */
+  bool                  caught;           /* ExceptionOnly */
+  mirror::ArtField*     field;            /* FieldOnly */
+  mirror::Object*       thisPtr;          /* InstanceOnly */
   /* nothing for StepOnly -- handled differently */
 };
 
@@ -295,9 +297,6 @@
 /*
  * Remove the event with the given ID from the list.
  *
- * Failure to find the event isn't really an error, but it is a little
- * weird.  (It looks like Eclipse will try to be extra careful and will
- * explicitly remove one-off single-step events.)
  */
 void JdwpState::UnregisterEventById(uint32_t requestId) {
   bool found = false;
@@ -317,7 +316,11 @@
   if (found) {
     Dbg::ManageDeoptimization();
   } else {
-    LOG(WARNING) << StringPrintf("Odd: no match when removing event reqId=0x%04x", requestId);
+    // Failure to find the event isn't really an error. For instance, it looks like Eclipse will
+    // try to be extra careful and will explicitly remove one-off single-step events (using a
+    // 'count' event modifier of 1). So the event may have already been removed as part of the
+    // event notification (see JdwpState::CleanupMatchList).
+    VLOG(jdwp) << StringPrintf("No match when removing event reqId=0x%04x", requestId);
   }
 }
 
@@ -463,12 +466,12 @@
       CHECK(false);  // should not be getting these
       break;
     case MK_THREAD_ONLY:
-      if (pMod->threadOnly.threadId != basket.threadId) {
+      if (!Dbg::MatchThread(pMod->threadOnly.threadId, basket.thread)) {
         return false;
       }
       break;
     case MK_CLASS_ONLY:
-      if (!Dbg::MatchType(basket.classId, pMod->classOnly.refTypeId)) {
+      if (!Dbg::MatchType(basket.locationClass, pMod->classOnly.refTypeId)) {
         return false;
       }
       break;
@@ -483,33 +486,32 @@
       }
       break;
     case MK_LOCATION_ONLY:
-      if (pMod->locationOnly.loc != *basket.pLoc) {
+      if (!Dbg::MatchLocation(pMod->locationOnly.loc, *basket.pLoc)) {
         return false;
       }
       break;
     case MK_EXCEPTION_ONLY:
-      if (pMod->exceptionOnly.refTypeId != 0 && !Dbg::MatchType(basket.excepClassId, pMod->exceptionOnly.refTypeId)) {
+      if (pMod->exceptionOnly.refTypeId != 0 &&
+          !Dbg::MatchType(basket.exceptionClass, pMod->exceptionOnly.refTypeId)) {
         return false;
       }
-      if ((basket.caught && !pMod->exceptionOnly.caught) || (!basket.caught && !pMod->exceptionOnly.uncaught)) {
+      if ((basket.caught && !pMod->exceptionOnly.caught) ||
+          (!basket.caught && !pMod->exceptionOnly.uncaught)) {
         return false;
       }
       break;
     case MK_FIELD_ONLY:
-      if (pMod->fieldOnly.fieldId != basket.fieldId) {
-        return false;
-      }
-      if (!Dbg::MatchType(basket.fieldTypeID, pMod->fieldOnly.refTypeId)) {
+      if (!Dbg::MatchField(pMod->fieldOnly.refTypeId, pMod->fieldOnly.fieldId, basket.field)) {
         return false;
       }
       break;
     case MK_STEP:
-      if (pMod->step.threadId != basket.threadId) {
+      if (!Dbg::MatchThread(pMod->step.threadId, basket.thread)) {
         return false;
       }
       break;
     case MK_INSTANCE_ONLY:
-      if (pMod->instanceOnly.objectId != basket.thisPtr) {
+      if (!Dbg::MatchInstance(pMod->instanceOnly.objectId, basket.thisPtr)) {
         return false;
       }
       break;
@@ -773,7 +775,7 @@
 }
 
 static void LogMatchingEventsAndThread(JdwpEvent** match_list, size_t match_count,
-                                       const ModBasket& basket)
+                                       ObjectId thread_id)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   for (size_t i = 0; i < match_count; ++i) {
     JdwpEvent* pEvent = match_list[i];
@@ -781,11 +783,19 @@
                << StringPrintf(" (requestId=%#" PRIx32 ")", pEvent->requestId);
   }
   std::string thread_name;
-  JdwpError error = Dbg::GetThreadName(basket.threadId, &thread_name);
+  JdwpError error = Dbg::GetThreadName(thread_id, &thread_name);
   if (error != JDWP::ERR_NONE) {
     thread_name = "<unknown>";
   }
-  VLOG(jdwp) << StringPrintf("  thread=%#" PRIx64, basket.threadId) << " " << thread_name;
+  VLOG(jdwp) << StringPrintf("  thread=%#" PRIx64, thread_id) << " " << thread_name;
+}
+
+static void SetJdwpLocationFromEventLocation(const JDWP::EventLocation* event_location,
+                                             JDWP::JdwpLocation* jdwp_location)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  DCHECK(event_location != nullptr);
+  DCHECK(jdwp_location != nullptr);
+  Dbg::SetJdwpLocation(jdwp_location, event_location->method, event_location->dex_pc);
 }
 
 /*
@@ -809,14 +819,18 @@
  *  - Single-step to a line with a breakpoint.  Should get a single
  *    event message with both events in it.
  */
-bool JdwpState::PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags,
-                                  const JValue* returnValue) {
+bool JdwpState::PostLocationEvent(const EventLocation* pLoc, mirror::Object* thisPtr,
+                                  int eventFlags, const JValue* returnValue) {
+  DCHECK(pLoc != nullptr);
+  DCHECK(pLoc->method != nullptr);
+  DCHECK_EQ(pLoc->method->IsStatic(), thisPtr == nullptr);
+
   ModBasket basket;
   basket.pLoc = pLoc;
-  basket.classId = pLoc->class_id;
+  basket.locationClass = pLoc->method->GetDeclaringClass();
   basket.thisPtr = thisPtr;
-  basket.threadId = Dbg::GetThreadSelfId();
-  basket.className = Dbg::GetClassName(pLoc->class_id);
+  basket.thread = Thread::Current();
+  basket.className = Dbg::GetClassName(basket.locationClass);
 
   /*
    * On rare occasions we may need to execute interpreted code in the VM
@@ -824,7 +838,7 @@
    * while doing so.  (I don't think we currently do this at all, so
    * this is mostly paranoia.)
    */
-  if (basket.threadId == debug_thread_id_) {
+  if (basket.thread == GetDebugThread()) {
     VLOG(jdwp) << "Ignoring location event in JDWP thread";
     return false;
   }
@@ -846,29 +860,36 @@
   size_t match_count = 0;
   ExpandBuf* pReq = NULL;
   JdwpSuspendPolicy suspend_policy = SP_NONE;
+  JdwpEvent** match_list = nullptr;
+  ObjectId thread_id = 0;
   {
-    MutexLock mu(Thread::Current(), event_list_lock_);
-    JdwpEvent** match_list = AllocMatchList(event_list_size_);
-    if ((eventFlags & Dbg::kBreakpoint) != 0) {
-      FindMatchingEvents(EK_BREAKPOINT, basket, match_list, &match_count);
-    }
-    if ((eventFlags & Dbg::kSingleStep) != 0) {
-      FindMatchingEvents(EK_SINGLE_STEP, basket, match_list, &match_count);
-    }
-    if ((eventFlags & Dbg::kMethodEntry) != 0) {
-      FindMatchingEvents(EK_METHOD_ENTRY, basket, match_list, &match_count);
-    }
-    if ((eventFlags & Dbg::kMethodExit) != 0) {
-      FindMatchingEvents(EK_METHOD_EXIT, basket, match_list, &match_count);
-      FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, basket, match_list, &match_count);
+    {
+      MutexLock mu(Thread::Current(), event_list_lock_);
+      match_list = AllocMatchList(event_list_size_);
+      if ((eventFlags & Dbg::kBreakpoint) != 0) {
+        FindMatchingEvents(EK_BREAKPOINT, basket, match_list, &match_count);
+      }
+      if ((eventFlags & Dbg::kSingleStep) != 0) {
+        FindMatchingEvents(EK_SINGLE_STEP, basket, match_list, &match_count);
+      }
+      if ((eventFlags & Dbg::kMethodEntry) != 0) {
+        FindMatchingEvents(EK_METHOD_ENTRY, basket, match_list, &match_count);
+      }
+      if ((eventFlags & Dbg::kMethodExit) != 0) {
+        FindMatchingEvents(EK_METHOD_EXIT, basket, match_list, &match_count);
+        FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, basket, match_list, &match_count);
+      }
     }
     if (match_count != 0) {
       suspend_policy = scanSuspendPolicy(match_list, match_count);
 
+      thread_id = Dbg::GetThreadId(basket.thread);
+      JDWP::JdwpLocation jdwp_location;
+      SetJdwpLocationFromEventLocation(pLoc, &jdwp_location);
+
       if (VLOG_IS_ON(jdwp)) {
-        LogMatchingEventsAndThread(match_list, match_count, basket);
-        VLOG(jdwp) << "  location=" << *pLoc;
-        VLOG(jdwp) << StringPrintf("  this=%#" PRIx64, basket.thisPtr);
+        LogMatchingEventsAndThread(match_list, match_count, thread_id);
+        VLOG(jdwp) << "  location=" << jdwp_location;
         VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
       }
 
@@ -879,79 +900,81 @@
       for (size_t i = 0; i < match_count; i++) {
         expandBufAdd1(pReq, match_list[i]->eventKind);
         expandBufAdd4BE(pReq, match_list[i]->requestId);
-        expandBufAdd8BE(pReq, basket.threadId);
-        expandBufAddLocation(pReq, *pLoc);
+        expandBufAdd8BE(pReq, thread_id);
+        expandBufAddLocation(pReq, jdwp_location);
         if (match_list[i]->eventKind == EK_METHOD_EXIT_WITH_RETURN_VALUE) {
-          Dbg::OutputMethodReturnValue(pLoc->method_id, returnValue, pReq);
+          Dbg::OutputMethodReturnValue(jdwp_location.method_id, returnValue, pReq);
         }
       }
     }
 
-    CleanupMatchList(match_list, match_count);
+    {
+      MutexLock mu(Thread::Current(), event_list_lock_);
+      CleanupMatchList(match_list, match_count);
+    }
   }
 
   Dbg::ManageDeoptimization();
 
-  SendRequestAndPossiblySuspend(pReq, suspend_policy, basket.threadId);
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, thread_id);
   return match_count != 0;
 }
 
-bool JdwpState::PostFieldEvent(const JdwpLocation* pLoc, RefTypeId typeId, FieldId fieldId,
-                               ObjectId thisPtr, const JValue* fieldValue, bool is_modification) {
+bool JdwpState::PostFieldEvent(const EventLocation* pLoc, mirror::ArtField* field,
+                               mirror::Object* this_object, const JValue* fieldValue,
+                               bool is_modification) {
+  DCHECK(pLoc != nullptr);
+  DCHECK(field != nullptr);
+  DCHECK_EQ(fieldValue != nullptr, is_modification);
+  DCHECK_EQ(field->IsStatic(), this_object == nullptr);
+
   ModBasket basket;
   basket.pLoc = pLoc;
-  basket.classId = pLoc->class_id;
-  basket.thisPtr = thisPtr;
-  basket.threadId = Dbg::GetThreadSelfId();
-  basket.className = Dbg::GetClassName(pLoc->class_id);
-  basket.fieldTypeID = typeId;
-  basket.fieldId = fieldId;
-
-  DCHECK_EQ(fieldValue != nullptr, is_modification);
+  basket.locationClass = pLoc->method->GetDeclaringClass();
+  basket.thisPtr = this_object;
+  basket.thread = Thread::Current();
+  basket.className = Dbg::GetClassName(basket.locationClass);
+  basket.field = field;
 
   if (InvokeInProgress()) {
     VLOG(jdwp) << "Not posting field event during invoke";
     return false;
   }
 
-  // Get field's reference type tag.
-  JDWP::JdwpTypeTag type_tag;
-  uint32_t class_status;  // unused here.
-  JdwpError error = Dbg::GetClassInfo(typeId, &type_tag, &class_status, NULL);
-  if (error != ERR_NONE) {
-    return false;
-  }
-
-  // Get instance type tag.
-  uint8_t tag;
-  error = Dbg::GetObjectTag(thisPtr, &tag);
-  if (error != ERR_NONE) {
-    return false;
-  }
-
   size_t match_count = 0;
   ExpandBuf* pReq = NULL;
   JdwpSuspendPolicy suspend_policy = SP_NONE;
+  JdwpEvent** match_list = nullptr;
+  ObjectId thread_id = 0;
   {
-    MutexLock mu(Thread::Current(), event_list_lock_);
-    JdwpEvent** match_list = AllocMatchList(event_list_size_);
-
-    if (is_modification) {
-      FindMatchingEvents(EK_FIELD_MODIFICATION, basket, match_list, &match_count);
-    } else {
-      FindMatchingEvents(EK_FIELD_ACCESS, basket, match_list, &match_count);
+    {
+      MutexLock mu(Thread::Current(), event_list_lock_);
+      match_list = AllocMatchList(event_list_size_);
+      if (is_modification) {
+        FindMatchingEvents(EK_FIELD_MODIFICATION, basket, match_list, &match_count);
+      } else {
+        FindMatchingEvents(EK_FIELD_ACCESS, basket, match_list, &match_count);
+      }
     }
     if (match_count != 0) {
       suspend_policy = scanSuspendPolicy(match_list, match_count);
 
+      thread_id = Dbg::GetThreadId(basket.thread);
+      ObjectRegistry* registry = Dbg::GetObjectRegistry();
+      ObjectId instance_id = registry->Add(basket.thisPtr);
+      RefTypeId field_type_id = registry->AddRefType(field->GetDeclaringClass());
+      FieldId field_id = Dbg::ToFieldId(field);
+      JDWP::JdwpLocation jdwp_location;
+      SetJdwpLocationFromEventLocation(pLoc, &jdwp_location);
+
       if (VLOG_IS_ON(jdwp)) {
-        LogMatchingEventsAndThread(match_list, match_count, basket);
-        VLOG(jdwp) << "  location=" << *pLoc;
-        VLOG(jdwp) << StringPrintf("  this=%#" PRIx64, basket.thisPtr);
-        VLOG(jdwp) << StringPrintf("  type=%#" PRIx64, basket.fieldTypeID) << " "
-                   << Dbg::GetClassName(basket.fieldTypeID);
-        VLOG(jdwp) << StringPrintf("  field=%#" PRIx32, basket.fieldId) << " "
-                   << Dbg::GetFieldName(basket.fieldId);
+        LogMatchingEventsAndThread(match_list, match_count, thread_id);
+        VLOG(jdwp) << "  location=" << jdwp_location;
+        VLOG(jdwp) << StringPrintf("  this=%#" PRIx64, instance_id);
+        VLOG(jdwp) << StringPrintf("  type=%#" PRIx64, field_type_id) << " "
+                   << Dbg::GetClassName(field_id);
+        VLOG(jdwp) << StringPrintf("  field=%#" PRIx32, field_id) << " "
+                   << Dbg::GetFieldName(field_id);
         VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
       }
 
@@ -959,28 +982,41 @@
       expandBufAdd1(pReq, suspend_policy);
       expandBufAdd4BE(pReq, match_count);
 
+      // Get field's reference type tag.
+      JDWP::JdwpTypeTag type_tag = Dbg::GetTypeTag(field->GetDeclaringClass());
+
+      // Get instance type tag.
+      uint8_t tag;
+      {
+        ScopedObjectAccessUnchecked soa(Thread::Current());
+        tag = Dbg::TagFromObject(soa, basket.thisPtr);
+      }
+
       for (size_t i = 0; i < match_count; i++) {
         expandBufAdd1(pReq, match_list[i]->eventKind);
         expandBufAdd4BE(pReq, match_list[i]->requestId);
-        expandBufAdd8BE(pReq, basket.threadId);
-        expandBufAddLocation(pReq, *pLoc);
+        expandBufAdd8BE(pReq, thread_id);
+        expandBufAddLocation(pReq, jdwp_location);
         expandBufAdd1(pReq, type_tag);
-        expandBufAddRefTypeId(pReq, typeId);
-        expandBufAddFieldId(pReq, fieldId);
+        expandBufAddRefTypeId(pReq, field_type_id);
+        expandBufAddFieldId(pReq, field_id);
         expandBufAdd1(pReq, tag);
-        expandBufAddObjectId(pReq, thisPtr);
+        expandBufAddObjectId(pReq, instance_id);
         if (is_modification) {
-          Dbg::OutputFieldValue(fieldId, fieldValue, pReq);
+          Dbg::OutputFieldValue(field_id, fieldValue, pReq);
         }
       }
     }
 
-    CleanupMatchList(match_list, match_count);
+    {
+      MutexLock mu(Thread::Current(), event_list_lock_);
+      CleanupMatchList(match_list, match_count);
+    }
   }
 
   Dbg::ManageDeoptimization();
 
-  SendRequestAndPossiblySuspend(pReq, suspend_policy, basket.threadId);
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, thread_id);
   return match_count != 0;
 }
 
@@ -990,8 +1026,8 @@
  * Valid mods:
  *  Count, ThreadOnly
  */
-bool JdwpState::PostThreadChange(ObjectId threadId, bool start) {
-  CHECK_EQ(threadId, Dbg::GetThreadSelfId());
+bool JdwpState::PostThreadChange(Thread* thread, bool start) {
+  CHECK_EQ(thread, Thread::Current());
 
   /*
    * I don't think this can happen.
@@ -1002,27 +1038,32 @@
   }
 
   ModBasket basket;
-  basket.threadId = threadId;
+  basket.thread = thread;
 
   ExpandBuf* pReq = NULL;
   JdwpSuspendPolicy suspend_policy = SP_NONE;
+  JdwpEvent** match_list = nullptr;
   size_t match_count = 0;
+  ObjectId thread_id = 0;
   {
-    // Don't allow the list to be updated while we scan it.
-    MutexLock mu(Thread::Current(), event_list_lock_);
-    JdwpEvent** match_list = AllocMatchList(event_list_size_);
-
-    if (start) {
-      FindMatchingEvents(EK_THREAD_START, basket, match_list, &match_count);
-    } else {
-      FindMatchingEvents(EK_THREAD_DEATH, basket, match_list, &match_count);
+    {
+      // Don't allow the list to be updated while we scan it.
+      MutexLock mu(Thread::Current(), event_list_lock_);
+      match_list = AllocMatchList(event_list_size_);
+      if (start) {
+        FindMatchingEvents(EK_THREAD_START, basket, match_list, &match_count);
+      } else {
+        FindMatchingEvents(EK_THREAD_DEATH, basket, match_list, &match_count);
+      }
     }
 
     if (match_count != 0) {
       suspend_policy = scanSuspendPolicy(match_list, match_count);
 
+      thread_id = Dbg::GetThreadId(basket.thread);
+
       if (VLOG_IS_ON(jdwp)) {
-        LogMatchingEventsAndThread(match_list, match_count, basket);
+        LogMatchingEventsAndThread(match_list, match_count, thread_id);
         VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
       }
 
@@ -1033,16 +1074,19 @@
       for (size_t i = 0; i < match_count; i++) {
         expandBufAdd1(pReq, match_list[i]->eventKind);
         expandBufAdd4BE(pReq, match_list[i]->requestId);
-        expandBufAdd8BE(pReq, basket.threadId);
+        expandBufAdd8BE(pReq, thread_id);
       }
     }
 
-    CleanupMatchList(match_list, match_count);
+    {
+      MutexLock mu(Thread::Current(), event_list_lock_);
+      CleanupMatchList(match_list, match_count);
+    }
   }
 
   Dbg::ManageDeoptimization();
 
-  SendRequestAndPossiblySuspend(pReq, suspend_policy, basket.threadId);
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, thread_id);
 
   return match_count != 0;
 }
@@ -1076,17 +1120,28 @@
  * because there's a pretty good chance that we're not going to send it
  * up the debugger.
  */
-bool JdwpState::PostException(const JdwpLocation* pThrowLoc,
-                              ObjectId exceptionId, RefTypeId exceptionClassId,
-                              const JdwpLocation* pCatchLoc, ObjectId thisPtr) {
-  ModBasket basket;
+bool JdwpState::PostException(const EventLocation* pThrowLoc, mirror::Throwable* exception_object,
+                              const EventLocation* pCatchLoc, mirror::Object* thisPtr) {
+  DCHECK(exception_object != nullptr);
+  DCHECK(pThrowLoc != nullptr);
+  DCHECK(pCatchLoc != nullptr);
+  if (pThrowLoc->method != nullptr) {
+    DCHECK_EQ(pThrowLoc->method->IsStatic(), thisPtr == nullptr);
+  } else {
+    VLOG(jdwp) << "Unexpected: exception event with empty throw location";
+  }
 
+  ModBasket basket;
   basket.pLoc = pThrowLoc;
-  basket.classId = pThrowLoc->class_id;
-  basket.threadId = Dbg::GetThreadSelfId();
-  basket.className = Dbg::GetClassName(basket.classId);
-  basket.excepClassId = exceptionClassId;
-  basket.caught = (pCatchLoc->class_id != 0);
+  if (pThrowLoc->method != nullptr) {
+    basket.locationClass = pThrowLoc->method->GetDeclaringClass();
+  } else {
+    basket.locationClass = nullptr;
+  }
+  basket.thread = Thread::Current();
+  basket.className = Dbg::GetClassName(basket.locationClass);
+  basket.exceptionClass = exception_object->GetClass();
+  basket.caught = (pCatchLoc->method != 0);
   basket.thisPtr = thisPtr;
 
   /* don't try to post an exception caused by the debugger */
@@ -1098,24 +1153,37 @@
   size_t match_count = 0;
   ExpandBuf* pReq = NULL;
   JdwpSuspendPolicy suspend_policy = SP_NONE;
+  JdwpEvent** match_list = nullptr;
+  ObjectId thread_id = 0;
   {
-    MutexLock mu(Thread::Current(), event_list_lock_);
-    JdwpEvent** match_list = AllocMatchList(event_list_size_);
-    FindMatchingEvents(EK_EXCEPTION, basket, match_list, &match_count);
+    {
+      MutexLock mu(Thread::Current(), event_list_lock_);
+      match_list = AllocMatchList(event_list_size_);
+      FindMatchingEvents(EK_EXCEPTION, basket, match_list, &match_count);
+    }
     if (match_count != 0) {
       suspend_policy = scanSuspendPolicy(match_list, match_count);
 
+      thread_id = Dbg::GetThreadId(basket.thread);
+      ObjectRegistry* registry = Dbg::GetObjectRegistry();
+      ObjectId exceptionId = registry->Add(exception_object);
+      JDWP::JdwpLocation jdwp_throw_location;
+      JDWP::JdwpLocation jdwp_catch_location;
+      SetJdwpLocationFromEventLocation(pThrowLoc, &jdwp_throw_location);
+      SetJdwpLocationFromEventLocation(pCatchLoc, &jdwp_catch_location);
+
       if (VLOG_IS_ON(jdwp)) {
-        LogMatchingEventsAndThread(match_list, match_count, basket);
-        VLOG(jdwp) << "  throwLocation=" << *pThrowLoc;
-        if (pCatchLoc->class_id == 0) {
+        std::string exceptionClassName(PrettyDescriptor(exception_object->GetClass()));
+
+        LogMatchingEventsAndThread(match_list, match_count, thread_id);
+        VLOG(jdwp) << "  throwLocation=" << jdwp_throw_location;
+        if (jdwp_catch_location.class_id == 0) {
           VLOG(jdwp) << "  catchLocation=uncaught";
         } else {
-          VLOG(jdwp) << "  catchLocation=" << *pCatchLoc;
+          VLOG(jdwp) << "  catchLocation=" << jdwp_catch_location;
         }
-        VLOG(jdwp) << StringPrintf("  this=%#" PRIx64, basket.thisPtr);
-        VLOG(jdwp) << StringPrintf("  exceptionClass=%#" PRIx64, basket.excepClassId) << " "
-                   << Dbg::GetClassName(basket.excepClassId);
+        VLOG(jdwp) << StringPrintf("  exception=%#" PRIx64, exceptionId) << " "
+                   << exceptionClassName;
         VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
       }
 
@@ -1126,21 +1194,23 @@
       for (size_t i = 0; i < match_count; i++) {
         expandBufAdd1(pReq, match_list[i]->eventKind);
         expandBufAdd4BE(pReq, match_list[i]->requestId);
-        expandBufAdd8BE(pReq, basket.threadId);
-
-        expandBufAddLocation(pReq, *pThrowLoc);
+        expandBufAdd8BE(pReq, thread_id);
+        expandBufAddLocation(pReq, jdwp_throw_location);
         expandBufAdd1(pReq, JT_OBJECT);
         expandBufAdd8BE(pReq, exceptionId);
-        expandBufAddLocation(pReq, *pCatchLoc);
+        expandBufAddLocation(pReq, jdwp_catch_location);
       }
     }
 
-    CleanupMatchList(match_list, match_count);
+    {
+      MutexLock mu(Thread::Current(), event_list_lock_);
+      CleanupMatchList(match_list, match_count);
+    }
   }
 
   Dbg::ManageDeoptimization();
 
-  SendRequestAndPossiblySuspend(pReq, suspend_policy, basket.threadId);
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, thread_id);
 
   return match_count != 0;
 }
@@ -1151,13 +1221,13 @@
  * Valid mods:
  *  Count, ThreadOnly, ClassOnly, ClassMatch, ClassExclude
  */
-bool JdwpState::PostClassPrepare(JdwpTypeTag tag, RefTypeId refTypeId, const std::string& signature,
-                                 int status) {
-  ModBasket basket;
+bool JdwpState::PostClassPrepare(mirror::Class* klass) {
+  DCHECK(klass != nullptr);
 
-  basket.classId = refTypeId;
-  basket.threadId = Dbg::GetThreadSelfId();
-  basket.className = Dbg::GetClassName(basket.classId);
+  ModBasket basket;
+  basket.locationClass = klass;
+  basket.thread = Thread::Current();
+  basket.className = Dbg::GetClassName(basket.locationClass);
 
   /* suppress class prep caused by debugger */
   if (InvokeInProgress()) {
@@ -1167,28 +1237,44 @@
 
   ExpandBuf* pReq = NULL;
   JdwpSuspendPolicy suspend_policy = SP_NONE;
+  JdwpEvent** match_list = nullptr;
   size_t match_count = 0;
+  ObjectId thread_id = 0;
   {
-    MutexLock mu(Thread::Current(), event_list_lock_);
-    JdwpEvent** match_list = AllocMatchList(event_list_size_);
-    FindMatchingEvents(EK_CLASS_PREPARE, basket, match_list, &match_count);
+    {
+      MutexLock mu(Thread::Current(), event_list_lock_);
+      match_list = AllocMatchList(event_list_size_);
+      FindMatchingEvents(EK_CLASS_PREPARE, basket, match_list, &match_count);
+    }
     if (match_count != 0) {
       suspend_policy = scanSuspendPolicy(match_list, match_count);
 
+      thread_id = Dbg::GetThreadId(basket.thread);
+      ObjectRegistry* registry = Dbg::GetObjectRegistry();
+      RefTypeId class_id = registry->AddRefType(basket.locationClass);
+
+      // OLD-TODO - we currently always send both "verified" and "prepared" since
+      // debuggers seem to like that.  There might be some advantage to honesty,
+      // since the class may not yet be verified.
+      int status = JDWP::CS_VERIFIED | JDWP::CS_PREPARED;
+      JDWP::JdwpTypeTag tag = Dbg::GetTypeTag(basket.locationClass);
+      std::string temp;
+      std::string signature(basket.locationClass->GetDescriptor(&temp));
+
       if (VLOG_IS_ON(jdwp)) {
-        LogMatchingEventsAndThread(match_list, match_count, basket);
-        VLOG(jdwp) << StringPrintf("  type=%#" PRIx64, basket.classId)<< " " << signature;
+        LogMatchingEventsAndThread(match_list, match_count, thread_id);
+        VLOG(jdwp) << StringPrintf("  type=%#" PRIx64, class_id) << " " << signature;
         VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
       }
 
-      if (basket.threadId == debug_thread_id_) {
+      if (thread_id == debug_thread_id_) {
         /*
          * JDWP says that, for a class prep in the debugger thread, we
-         * should set threadId to null and if any threads were supposed
+         * should set thread to null and if any threads were supposed
          * to be suspended then we suspend all other threads.
          */
         VLOG(jdwp) << "  NOTE: class prepare in debugger thread!";
-        basket.threadId = 0;
+        thread_id = 0;
         if (suspend_policy == SP_EVENT_THREAD) {
           suspend_policy = SP_ALL;
         }
@@ -1201,20 +1287,23 @@
       for (size_t i = 0; i < match_count; i++) {
         expandBufAdd1(pReq, match_list[i]->eventKind);
         expandBufAdd4BE(pReq, match_list[i]->requestId);
-        expandBufAdd8BE(pReq, basket.threadId);
-
+        expandBufAdd8BE(pReq, thread_id);
         expandBufAdd1(pReq, tag);
-        expandBufAdd8BE(pReq, refTypeId);
+        expandBufAdd8BE(pReq, class_id);
         expandBufAddUtf8String(pReq, signature);
         expandBufAdd4BE(pReq, status);
       }
     }
-    CleanupMatchList(match_list, match_count);
+
+    {
+      MutexLock mu(Thread::Current(), event_list_lock_);
+      CleanupMatchList(match_list, match_count);
+    }
   }
 
   Dbg::ManageDeoptimization();
 
-  SendRequestAndPossiblySuspend(pReq, suspend_policy, basket.threadId);
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, thread_id);
 
   return match_count != 0;
 }
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 35095f9..16a774f 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -151,7 +151,12 @@
     /* show detailed debug output */
     if (resultTag == JT_STRING && exceptObjId == 0) {
       if (resultValue != 0) {
-        VLOG(jdwp) << "      string '" << Dbg::StringToUtf8(resultValue) << "'";
+        if (VLOG_IS_ON(jdwp)) {
+          std::string result_string;
+          JDWP::JdwpError error = Dbg::StringToUtf8(resultValue, &result_string);
+          CHECK_EQ(error, JDWP::ERR_NONE);
+          VLOG(jdwp) << "      string '" << result_string << "'";
+        }
       } else {
         VLOG(jdwp) << "      string (null)";
       }
@@ -220,7 +225,7 @@
 static JdwpError VM_AllThreads(JdwpState*, Request*, ExpandBuf* pReply)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   std::vector<ObjectId> thread_ids;
-  Dbg::GetThreads(0, &thread_ids);
+  Dbg::GetThreads(nullptr /* all thread groups */, &thread_ids);
 
   expandBufAdd4BE(pReply, thread_ids.size());
   for (uint32_t i = 0; i < thread_ids.size(); ++i) {
@@ -919,7 +924,11 @@
 static JdwpError SR_Value(JdwpState*, Request* request, ExpandBuf* pReply)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectId stringObject = request->ReadObjectId();
-  std::string str(Dbg::StringToUtf8(stringObject));
+  std::string str;
+  JDWP::JdwpError error = Dbg::StringToUtf8(stringObject, &str);
+  if (error != JDWP::ERR_NONE) {
+    return error;
+  }
 
   VLOG(jdwp) << StringPrintf("    --> %s", PrintableString(str.c_str()).c_str());
 
@@ -1141,10 +1150,7 @@
 static JdwpError TGR_Name(JdwpState*, Request* request, ExpandBuf* pReply)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectId thread_group_id = request->ReadThreadGroupId();
-
-  expandBufAddUtf8String(pReply, Dbg::GetThreadGroupName(thread_group_id));
-
-  return ERR_NONE;
+  return Dbg::GetThreadGroupName(thread_group_id, pReply);
 }
 
 /*
@@ -1154,11 +1160,7 @@
 static JdwpError TGR_Parent(JdwpState*, Request* request, ExpandBuf* pReply)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectId thread_group_id = request->ReadThreadGroupId();
-
-  ObjectId parentGroup = Dbg::GetThreadGroupParent(thread_group_id);
-  expandBufAddObjectId(pReply, parentGroup);
-
-  return ERR_NONE;
+  return Dbg::GetThreadGroupParent(thread_group_id, pReply);
 }
 
 /*
@@ -1168,22 +1170,7 @@
 static JdwpError TGR_Children(JdwpState*, Request* request, ExpandBuf* pReply)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectId thread_group_id = request->ReadThreadGroupId();
-
-  std::vector<ObjectId> thread_ids;
-  Dbg::GetThreads(thread_group_id, &thread_ids);
-  expandBufAdd4BE(pReply, thread_ids.size());
-  for (uint32_t i = 0; i < thread_ids.size(); ++i) {
-    expandBufAddObjectId(pReply, thread_ids[i]);
-  }
-
-  std::vector<ObjectId> child_thread_groups_ids;
-  Dbg::GetChildThreadGroups(thread_group_id, &child_thread_groups_ids);
-  expandBufAdd4BE(pReply, child_thread_groups_ids.size());
-  for (uint32_t i = 0; i < child_thread_groups_ids.size(); ++i) {
-    expandBufAddObjectId(pReply, child_thread_groups_ids[i]);
-  }
-
-  return ERR_NONE;
+  return Dbg::GetThreadGroupChildren(thread_group_id, pReply);
 }
 
 /*
@@ -1398,26 +1385,7 @@
  */
 static JdwpError SF_GetValues(JdwpState*, Request* request, ExpandBuf* pReply)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectId thread_id = request->ReadThreadId();
-  FrameId frame_id = request->ReadFrameId();
-  int32_t slot_count = request->ReadSigned32("slot count");
-
-  expandBufAdd4BE(pReply, slot_count);     /* "int values" */
-  for (int32_t i = 0; i < slot_count; ++i) {
-    uint32_t slot = request->ReadUnsigned32("slot");
-    JDWP::JdwpTag reqSigByte = request->ReadTag();
-
-    VLOG(jdwp) << "    --> slot " << slot << " " << reqSigByte;
-
-    size_t width = Dbg::GetTagWidth(reqSigByte);
-    uint8_t* ptr = expandBufAddSpace(pReply, width+1);
-    JdwpError error = Dbg::GetLocalValue(thread_id, frame_id, slot, reqSigByte, ptr, width);
-    if (error != ERR_NONE) {
-      return error;
-    }
-  }
-
-  return ERR_NONE;
+  return Dbg::GetLocalValues(request, pReply);
 }
 
 /*
@@ -1425,24 +1393,7 @@
  */
 static JdwpError SF_SetValues(JdwpState*, Request* request, ExpandBuf*)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectId thread_id = request->ReadThreadId();
-  FrameId frame_id = request->ReadFrameId();
-  int32_t slot_count = request->ReadSigned32("slot count");
-
-  for (int32_t i = 0; i < slot_count; ++i) {
-    uint32_t slot = request->ReadUnsigned32("slot");
-    JDWP::JdwpTag sigByte = request->ReadTag();
-    size_t width = Dbg::GetTagWidth(sigByte);
-    uint64_t value = request->ReadValue(width);
-
-    VLOG(jdwp) << "    --> slot " << slot << " " << sigByte << " " << value;
-    JdwpError error = Dbg::SetLocalValue(thread_id, frame_id, slot, sigByte, value, width);
-    if (error != ERR_NONE) {
-      return error;
-    }
-  }
-
-  return ERR_NONE;
+  return Dbg::SetLocalValues(request);
 }
 
 static JdwpError SF_ThisObject(JdwpState*, Request* request, ExpandBuf* reply)
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index ae17070..8447616 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -285,14 +285,17 @@
 }
 
 inline StackMap ArtMethod::GetStackMap(uint32_t native_pc_offset) {
+  return GetOptimizedCodeInfo().GetStackMapForNativePcOffset(native_pc_offset);
+}
+
+inline CodeInfo ArtMethod::GetOptimizedCodeInfo() {
   DCHECK(IsOptimized());
   const void* code_pointer = GetQuickOatCodePointer();
   DCHECK(code_pointer != nullptr);
   uint32_t offset =
       reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
   const void* data = reinterpret_cast<const void*>(reinterpret_cast<const uint8_t*>(code_pointer) - offset);
-  CodeInfo code_info(data);
-  return code_info.GetStackMapForNativePcOffset(native_pc_offset);
+  return CodeInfo(data);
 }
 
 inline void ArtMethod::SetOatNativeGcMapOffset(uint32_t gc_map_offset) {
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index d37aa57..de6ec05 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -155,7 +155,9 @@
     // Temporary solution for detecting if a method has been optimized: the compiler
     // does not create a GC map. Instead, the vmap table contains the stack map
     // (as in stack_map.h).
-    return (GetEntryPointFromQuickCompiledCode() != nullptr) && (GetNativeGcMap() == nullptr);
+    return (GetEntryPointFromQuickCompiledCode() != nullptr)
+        && (GetQuickOatCodePointer() != nullptr)
+        && (GetNativeGcMap() == nullptr);
   }
 
   bool IsPortableCompiled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -349,6 +351,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   StackMap GetStackMap(uint32_t native_pc_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  CodeInfo GetOptimizedCodeInfo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const uint8_t* GetNativeGcMap() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetFieldPtr<uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, gc_map_));
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index ff9dc38..65a7919 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -292,7 +292,7 @@
   std::unique_ptr<const OatFile> oat_file(OatFile::Open(oat_filename, oat_filename, nullptr,
                                                         false, &error_msg));
   if (oat_file.get() == nullptr) {
-    if (kVerboseLogging) {
+    if (kReasonLogging) {
       LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << oat_filename
           << "' for file location '" << filename << "': " << error_msg;
     }
@@ -319,13 +319,13 @@
         return kUpToDate;
       } else if (should_relocate_if_possible &&
                   ClassLinker::VerifyOatImageChecksum(oat_file.get(), target_instruction_set)) {
-        if (kVerboseLogging) {
+        if (kReasonLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename
                     << " needs to be relocated for " << filename;
         }
         return kPatchoatNeeded;
       } else {
-        if (kVerboseLogging) {
+        if (kReasonLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename
                     << " is out of date for " << filename;
         }
@@ -343,13 +343,13 @@
       } else if (location_checksum == oat_dex_file->GetDexFileLocationChecksum()
                   && should_relocate_if_possible
                   && ClassLinker::VerifyOatImageChecksum(oat_file.get(), target_instruction_set)) {
-        if (kVerboseLogging) {
+        if (kReasonLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename
                     << " needs to be relocated for " << filename;
         }
         return kPatchoatNeeded;
       } else {
-        if (kVerboseLogging) {
+        if (kReasonLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename
                     << " is out of date for " << filename;
         }
@@ -357,7 +357,7 @@
       }
     }
   } else {
-    if (kVerboseLogging) {
+    if (kReasonLogging) {
       LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename
                 << " does not contain " << filename;
     }
@@ -367,9 +367,10 @@
 
 static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename,
     const char* pkgname, const char* instruction_set, const jboolean defer) {
-  // TODO disable this logging.
-  const bool kVerboseLogging = false;  // Spammy logging.
-  const bool kReasonLogging = true;  // Logging of reason for returning JNI_TRUE.
+  // Spammy logging for kUpToDate
+  const bool kVerboseLogging = false;
+  // Logging of reason for returning kDexoptNeeded or kPatchoatNeeded.
+  const bool kReasonLogging = true;
 
   if ((filename == nullptr) || !OS::FileExists(filename)) {
     LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist";
@@ -504,7 +505,9 @@
   std::string cache_dir;
   bool have_android_data = false;
   bool dalvik_cache_exists = false;
-  GetDalvikCache(instruction_set, false, &cache_dir, &have_android_data, &dalvik_cache_exists);
+  bool is_global_cache = false;
+  GetDalvikCache(instruction_set, false, &cache_dir, &have_android_data, &dalvik_cache_exists,
+                 &is_global_cache);
   std::string cache_filename;  // was cache_location
   bool have_cache_filename = false;
   if (dalvik_cache_exists) {
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index ceff206..d8a537f 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -60,11 +60,11 @@
 }
 
 static void VMDebug_startAllocCounting(JNIEnv*, jclass) {
-  Runtime::Current()->SetStatsEnabled(true);
+  Runtime::Current()->SetStatsEnabled(true, false);
 }
 
 static void VMDebug_stopAllocCounting(JNIEnv*, jclass) {
-  Runtime::Current()->SetStatsEnabled(false);
+  Runtime::Current()->SetStatsEnabled(false, false);
 }
 
 static jint VMDebug_getAllocCount(JNIEnv*, jclass, jint kind) {
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 6b4f764..2bd994d 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -80,7 +80,7 @@
     gc::kCollectorTypeCMS),
 #error "ART default GC type must be set"
 #endif
-    background_collector_type_(gc::kCollectorTypeHomogeneousSpaceCompact),
+    background_collector_type_(gc::kCollectorTypeNone),
                                                     // If background_collector_type_ is
                                                     // kCollectorTypeNone, it defaults to the
                                                     // collector_type_ after parsing options. If
@@ -696,6 +696,12 @@
       return false;
     }
   }
+  // If not set, background collector type defaults to homogeneous compaction
+  // if not low memory mode, semispace otherwise.
+  if (background_collector_type_ == gc::kCollectorTypeNone) {
+    background_collector_type_ = low_memory_mode_ ?
+        gc::kCollectorTypeSS : gc::kCollectorTypeHomogeneousSpaceCompact;
+  }
 
   // If a reference to the dalvik core.jar snuck in, replace it with
   // the art specific version. This can happen with on device
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 0edf116..3432aa8 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -72,6 +72,7 @@
 #include "reflection.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
+#include "sigchain.h"
 #include "signal_catcher.h"
 #include "signal_set.h"
 #include "handle_scope-inl.h"
@@ -567,13 +568,15 @@
   std::string cache_filename_unused;
   bool dalvik_cache_exists_unused;
   bool has_cache_unused;
+  bool is_global_cache_unused;
   bool found_image = gc::space::ImageSpace::FindImageFilename(image_location.c_str(),
                                                               kRuntimeISA,
                                                               &system_filename,
                                                               &has_system,
                                                               &cache_filename_unused,
                                                               &dalvik_cache_exists_unused,
-                                                              &has_cache_unused);
+                                                              &has_cache_unused,
+                                                              &is_global_cache_unused);
   *failures = 0;
   if (!found_image || !has_system) {
     return false;
@@ -740,6 +743,11 @@
       break;
   }
 
+  // Always initialize the signal chain so that any calls to sigaction get
+  // correctly routed to the next in the chain regardless of whether we
+  // have claimed the signal or not.
+  InitializeSignalChain();
+
   if (implicit_null_checks_ || implicit_so_checks_ || implicit_suspend_checks_) {
     fault_manager.Init();
 
@@ -999,14 +1007,14 @@
   }
 }
 
-void Runtime::SetStatsEnabled(bool new_state) {
+void Runtime::SetStatsEnabled(bool new_state, bool suspended) {
   if (new_state == true) {
     GetStats()->Clear(~0);
     // TODO: wouldn't it make more sense to clear _all_ threads' stats?
     Thread::Current()->GetStats()->Clear(~0);
-    GetInstrumentation()->InstrumentQuickAllocEntryPoints();
+    GetInstrumentation()->InstrumentQuickAllocEntryPoints(suspended);
   } else {
-    GetInstrumentation()->UninstrumentQuickAllocEntryPoints();
+    GetInstrumentation()->UninstrumentQuickAllocEntryPoints(suspended);
   }
   stats_enabled_ = new_state;
 }
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 9a8235d..84e40ad 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -389,7 +389,7 @@
 
   void ResetStats(int kinds);
 
-  void SetStatsEnabled(bool new_state);
+  void SetStatsEnabled(bool new_state, bool suspended);
 
   enum class NativeBridgeAction {  // private
     kUnload,
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c54bebe..ae89c90 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -931,6 +931,13 @@
     return false;
   }
 
+  // Threads with no managed stack frames should be shown.
+  const ManagedStack* managed_stack = thread->GetManagedStack();
+  if (managed_stack == NULL || (managed_stack->GetTopQuickFrame() == NULL &&
+      managed_stack->GetTopShadowFrame() == NULL)) {
+    return true;
+  }
+
   // In some other native method? That's interesting.
   // We don't just check kNative because native methods will be in state kSuspended if they're
   // calling back into the VM, or kBlocked if they're blocked on a monitor, or one of the
@@ -961,7 +968,7 @@
     // If we're currently in native code, dump that stack before dumping the managed stack.
     if (dump_for_abort || ShouldShowNativeStack(this)) {
       DumpKernelStack(os, GetTid(), "  kernel: ", false);
-      DumpNativeStack(os, GetTid(), "  native: ", GetCurrentMethod(nullptr));
+      DumpNativeStack(os, GetTid(), "  native: ", GetCurrentMethod(nullptr, !dump_for_abort));
     }
     DumpJavaStack(os);
   } else {
diff --git a/runtime/thread.h b/runtime/thread.h
index d96b50b..164eb86 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1180,6 +1180,23 @@
   DISALLOW_COPY_AND_ASSIGN(Thread);
 };
 
+class ScopedAssertNoThreadSuspension {
+ public:
+  ScopedAssertNoThreadSuspension(Thread* self, const char* cause)
+      : self_(self), old_cause_(self->StartAssertNoThreadSuspension(cause)) {
+  }
+  ~ScopedAssertNoThreadSuspension() {
+    self_->EndAssertNoThreadSuspension(old_cause_);
+  }
+  Thread* Self() {
+    return self_;
+  }
+
+ private:
+  Thread* const self_;
+  const char* old_cause_;
+};
+
 std::ostream& operator<<(std::ostream& os, const Thread& thread);
 std::ostream& operator<<(std::ostream& os, const ThreadState& state);
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 2dbfb3e..ec5b775 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -728,11 +728,14 @@
       Thread::resume_cond_->Wait(self);
       if (self->GetSuspendCount() != 0) {
         // The condition was signaled but we're still suspended. This
-        // can happen if the debugger lets go while a SIGQUIT thread
+        // can happen when we suspend then resume all threads to
+        // update instrumentation or compute monitor info. This can
+        // also happen if the debugger lets go while a SIGQUIT thread
         // dump event is pending (assuming SignalCatcher was resumed for
         // just long enough to try to grab the thread-suspend lock).
-        LOG(WARNING) << *self << " still suspended after undo "
-                   << "(suspend count=" << self->GetSuspendCount() << ")";
+        VLOG(jdwp) << *self << " still suspended after undo "
+                   << "(suspend count=" << self->GetSuspendCount() << ", "
+                   << "debug suspend count=" << self->GetDebugSuspendCount() << ")";
       }
     }
     CHECK_EQ(self->GetSuspendCount(), 0);
@@ -872,14 +875,21 @@
     // thread_suspend_count_lock_ so that the unregistering thread cannot be suspended.
     // Note: deliberately not using MutexLock that could hold a stale self pointer.
     Locks::thread_list_lock_->ExclusiveLock(self);
-    CHECK(Contains(self));
-    Locks::thread_suspend_count_lock_->ExclusiveLock(self);
-    bool removed = false;
-    if (!self->IsSuspended()) {
-      list_.remove(self);
-      removed = true;
+    bool removed = true;
+    if (!Contains(self)) {
+      std::ostringstream os;
+      DumpNativeStack(os, GetTid(), "  native: ", nullptr);
+      LOG(ERROR) << "Request to unregister unattached thread\n" << os.str();
+    } else {
+      Locks::thread_suspend_count_lock_->ExclusiveLock(self);
+      if (!self->IsSuspended()) {
+        list_.remove(self);
+      } else {
+        // We failed to remove the thread due to a suspend request, loop and try again.
+        removed = false;
+      }
+      Locks::thread_suspend_count_lock_->ExclusiveUnlock(self);
     }
-    Locks::thread_suspend_count_lock_->ExclusiveUnlock(self);
     Locks::thread_list_lock_->ExclusiveUnlock(self);
     if (removed) {
       delete self;
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 6dcc5fe..b32e042 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -373,11 +373,9 @@
 
       // Enable count of allocs if specified in the flags.
       if ((flags && kTraceCountAllocs) != 0) {
-        runtime->SetStatsEnabled(true);
+        runtime->SetStatsEnabled(true, true);
       }
 
-
-
       if (sampling_enabled) {
         CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, NULL, &RunSamplingThread,
                                             reinterpret_cast<void*>(interval_us)),
@@ -492,7 +490,7 @@
   size_t final_offset = cur_offset_.LoadRelaxed();
 
   if ((flags_ & kTraceCountAllocs) != 0) {
-    Runtime::Current()->SetStatsEnabled(false);
+    Runtime::Current()->SetStatsEnabled(false, true);
   }
 
   std::set<mirror::ArtMethod*> visited_methods;
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 6135e5d..9157f6c 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1232,13 +1232,14 @@
 }
 
 void GetDalvikCache(const char* subdir, const bool create_if_absent, std::string* dalvik_cache,
-                    bool* have_android_data, bool* dalvik_cache_exists) {
+                    bool* have_android_data, bool* dalvik_cache_exists, bool* is_global_cache) {
   CHECK(subdir != nullptr);
   std::string error_msg;
   const char* android_data = GetAndroidDataSafe(&error_msg);
   if (android_data == nullptr) {
     *have_android_data = false;
     *dalvik_cache_exists = false;
+    *is_global_cache = false;
     return;
   } else {
     *have_android_data = true;
@@ -1246,7 +1247,8 @@
   const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
   *dalvik_cache = dalvik_cache_root + subdir;
   *dalvik_cache_exists = OS::DirectoryExists(dalvik_cache->c_str());
-  if (create_if_absent && !*dalvik_cache_exists && strcmp(android_data, "/data") != 0) {
+  *is_global_cache = strcmp(android_data, "/data") == 0;
+  if (create_if_absent && !*dalvik_cache_exists && !*is_global_cache) {
     // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
     *dalvik_cache_exists = ((mkdir(dalvik_cache_root.c_str(), 0700) == 0 || errno == EEXIST) &&
                             (mkdir(dalvik_cache->c_str(), 0700) == 0 || errno == EEXIST));
diff --git a/runtime/utils.h b/runtime/utils.h
index 50462b1..9ec6db1 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -449,8 +449,9 @@
 // Return true if we found the dalvik cache and stored it in the dalvik_cache argument.
 // have_android_data will be set to true if we have an ANDROID_DATA that exists,
 // dalvik_cache_exists will be true if there is a dalvik-cache directory that is present.
+// The flag is_global_cache tells whether this cache is /data/dalvik-cache.
 void GetDalvikCache(const char* subdir, bool create_if_absent, std::string* dalvik_cache,
-                    bool* have_android_data, bool* dalvik_cache_exists);
+                    bool* have_android_data, bool* dalvik_cache_exists, bool* is_global_cache);
 
 // Returns the absolute dalvik-cache path for a DexFile or OatFile. The path returned will be
 // rooted at cache_location.
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 521a2dd..f28d488 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -311,9 +311,15 @@
   verifier->Verify();
   verifier->DumpFailures(os);
   os << verifier->info_messages_.str();
-  verifier->Dump(os);
-
-  return verifier;
+  // Only dump and return if no hard failures. Otherwise the verifier may be not fully initialized
+  // and querying any info is dangerous/can abort.
+  if (verifier->have_pending_hard_failure_) {
+    delete verifier;
+    return nullptr;
+  } else {
+    verifier->Dump(os);
+    return verifier;
+  }
 }
 
 MethodVerifier::MethodVerifier(Thread* self,
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index 29933cf..ff7b1f3 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -80,34 +80,34 @@
   const BooleanType& Boolean() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *BooleanType::GetInstance();
   }
-  const RegType& Byte() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const ByteType& Byte() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *ByteType::GetInstance();
   }
-  const RegType& Char() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const CharType& Char() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *CharType::GetInstance();
   }
-  const RegType& Short() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const ShortType& Short() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *ShortType::GetInstance();
   }
-  const RegType& Integer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const IntegerType& Integer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *IntegerType::GetInstance();
   }
-  const RegType& Float() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const FloatType& Float() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *FloatType::GetInstance();
   }
-  const RegType& LongLo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const LongLoType& LongLo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *LongLoType::GetInstance();
   }
-  const RegType& LongHi() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const LongHiType& LongHi() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *LongHiType::GetInstance();
   }
-  const RegType& DoubleLo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const DoubleLoType& DoubleLo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *DoubleLoType::GetInstance();
   }
-  const RegType& DoubleHi() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const DoubleHiType& DoubleHi() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *DoubleHiType::GetInstance();
   }
-  const RegType& Undefined() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const UndefinedType& Undefined() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *UndefinedType::GetInstance();
   }
   const ConflictType& Conflict() {
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index 7539990..74bfb7e 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -26,6 +26,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+#include "sigchain.h"
+
 #if defined(__APPLE__)
 #define _NSIG NSIG
 #define sighandler_t sig_t
@@ -81,6 +83,9 @@
 
 // User's signal handlers
 static SignalAction user_sigactions[_NSIG];
+static bool initialized;
+static void* linked_sigaction_sym;
+static void* linked_sigprocmask_sym;
 
 static void log(const char* format, ...) {
   char buf[256];
@@ -102,6 +107,7 @@
   }
 }
 
+
 // Claim a signal chain for a particular signal.
 void ClaimSignalChain(int signal, struct sigaction* oldaction) {
   CheckSignalValid(signal);
@@ -163,14 +169,17 @@
   // Will only get here if the signal chain has not been claimed.  We want
   // to pass the sigaction on to the kernel via the real sigaction in libc.
 
-  void* linked_sigaction_sym = dlsym(RTLD_NEXT, "sigaction");
   if (linked_sigaction_sym == nullptr) {
-    linked_sigaction_sym = dlsym(RTLD_DEFAULT, "sigaction");
-    if (linked_sigaction_sym == nullptr ||
-        linked_sigaction_sym == reinterpret_cast<void*>(sigaction)) {
-      log("Unable to find next sigaction in signal chain");
-      abort();
-    }
+    // Perform lazy initialization.
+    // This will only occur outside of a signal context since we have
+    // not been initialized and therefore cannot be within the ART
+    // runtime.
+    InitializeSignalChain();
+  }
+
+  if (linked_sigaction_sym == nullptr) {
+    log("Unable to find next sigaction in signal chain");
+    abort();
   }
 
   typedef int (*SigAction)(int, const struct sigaction*, struct sigaction*);
@@ -198,14 +207,14 @@
   // Will only get here if the signal chain has not been claimed.  We want
   // to pass the sigaction on to the kernel via the real sigaction in libc.
 
-  void* linked_sigaction_sym = dlsym(RTLD_NEXT, "sigaction");
   if (linked_sigaction_sym == nullptr) {
-    linked_sigaction_sym = dlsym(RTLD_DEFAULT, "sigaction");
-    if (linked_sigaction_sym == nullptr ||
-        linked_sigaction_sym == reinterpret_cast<void*>(sigaction)) {
-      log("Unable to find next sigaction in signal chain");
-      abort();
-    }
+    // Perform lazy initialization.
+    InitializeSignalChain();
+  }
+
+  if (linked_sigaction_sym == nullptr) {
+    log("Unable to find next sigaction in signal chain");
+    abort();
   }
 
   typedef int (*SigAction)(int, const struct sigaction*, struct sigaction*);
@@ -235,14 +244,14 @@
     new_set_ptr = &tmpset;
   }
 
-  void* linked_sigprocmask_sym = dlsym(RTLD_NEXT, "sigprocmask");
   if (linked_sigprocmask_sym == nullptr) {
-    linked_sigprocmask_sym = dlsym(RTLD_DEFAULT, "sigprocmask");
-    if (linked_sigprocmask_sym == nullptr ||
-        linked_sigprocmask_sym == reinterpret_cast<void*>(sigprocmask)) {
-      log("Unable to find next sigprocmask in signal chain");
-      abort();
-    }
+    // Perform lazy initialization.
+    InitializeSignalChain();
+  }
+
+  if (linked_sigprocmask_sym == nullptr) {
+    log("Unable to find next sigprocmask in signal chain");
+    abort();
   }
 
   typedef int (*SigProcMask)(int how, const sigset_t*, sigset_t*);
@@ -250,5 +259,36 @@
   return linked_sigprocmask(how, new_set_ptr, bionic_old_set);
 }
 }   // extern "C"
+
+void InitializeSignalChain() {
+  // Warning.
+  // Don't call this from within a signal context as it makes calls to
+  // dlsym.  Calling into the dynamic linker will result in locks being
+  // taken and if it so happens that a signal occurs while one of these
+  // locks is already taken, dlsym will block trying to reenter a
+  // mutex and we will never get out of it.
+  if (initialized) {
+    // Don't initialize twice.
+    return;
+  }
+  linked_sigaction_sym = dlsym(RTLD_NEXT, "sigaction");
+  if (linked_sigaction_sym == nullptr) {
+    linked_sigaction_sym = dlsym(RTLD_DEFAULT, "sigaction");
+    if (linked_sigaction_sym == nullptr ||
+      linked_sigaction_sym == reinterpret_cast<void*>(sigaction)) {
+        linked_sigaction_sym = nullptr;
+    }
+  }
+
+  linked_sigprocmask_sym = dlsym(RTLD_NEXT, "sigprocmask");
+  if (linked_sigprocmask_sym == nullptr) {
+    linked_sigprocmask_sym = dlsym(RTLD_DEFAULT, "sigprocmask");
+    if (linked_sigprocmask_sym == nullptr ||
+        linked_sigprocmask_sym == reinterpret_cast<void*>(sigprocmask)) {
+         linked_sigprocmask_sym = nullptr;
+    }
+  }
+  initialized = true;
+}
 }   // namespace art
 
diff --git a/sigchainlib/sigchain.h b/sigchainlib/sigchain.h
index a4ce81c..5bc4026 100644
--- a/sigchainlib/sigchain.h
+++ b/sigchainlib/sigchain.h
@@ -21,6 +21,8 @@
 
 namespace art {
 
+void InitializeSignalChain();
+
 void ClaimSignalChain(int signal, struct sigaction* oldaction);
 
 void UnclaimSignalChain(int signal);
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 7929554..e914bd9 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -14,138 +14,57 @@
  * limitations under the License.
  */
 
-#include <stdio.h>
-#include <memory>
-
-#include "class_linker.h"
-#include "dex_file-inl.h"
-#include "gc_map.h"
-#include "mirror/art_method-inl.h"
-#include "mirror/class-inl.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/object-inl.h"
-#include "scoped_thread_state_change.h"
-#include "thread.h"
+#include "check_reference_map_visitor.h"
 #include "jni.h"
-#include "verifier/method_verifier.h"
 
 namespace art {
 
-#define IS_IN_REF_BITMAP(ref_bitmap, reg) \
-    (((reg) < m->GetCodeItem()->registers_size_) && \
-     ((*((ref_bitmap) + (reg)/8) >> ((reg) % 8) ) & 0x01))
+#define CHECK_REGS_CONTAIN_REFS(native_pc_offset, ...) do { \
+  int t[] = {__VA_ARGS__}; \
+  int t_size = sizeof(t) / sizeof(*t); \
+  CheckReferences(t, t_size, m->NativePcOffset(m->ToNativePc(native_pc_offset))); \
+} while (false);
 
-#define CHECK_REGS_CONTAIN_REFS(...)     \
-  do {                                   \
-    int t[] = {__VA_ARGS__};             \
-    int t_size = sizeof(t) / sizeof(*t);      \
-    for (int i = 0; i < t_size; ++i)          \
-      CHECK(IS_IN_REF_BITMAP(ref_bitmap, t[i])) \
-          << "Error: Reg @ " << i << "-th argument is not in GC map"; \
-  } while (false)
-
-struct ReferenceMap2Visitor : public StackVisitor {
-  explicit ReferenceMap2Visitor(Thread* thread)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, NULL) {
-  }
+struct ReferenceMap2Visitor : public CheckReferenceMapVisitor {
+  explicit ReferenceMap2Visitor(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : CheckReferenceMapVisitor(thread) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (CheckReferenceMapVisitor::VisitFrame()) {
+      return true;
+    }
     mirror::ArtMethod* m = GetMethod();
-    if (!m || m->IsNative() || m->IsRuntimeMethod() || IsShadowFrame()) {
-      return true;
-    }
-    LOG(INFO) << "At " << PrettyMethod(m, false);
-
-    NativePcOffsetToReferenceMap map(m->GetNativeGcMap());
-
-    if (m->IsCalleeSaveMethod()) {
-      LOG(WARNING) << "no PC for " << PrettyMethod(m);
-      return true;
-    }
-
-    const uint8_t* ref_bitmap = NULL;
     std::string m_name(m->GetName());
 
     // Given the method name and the number of times the method has been called,
     // we know the Dex registers with live reference values. Assert that what we
     // find is what is expected.
     if (m_name.compare("f") == 0) {
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x03U)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8);  // v8: this
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x06U)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 1);  // v8: this, v1: x
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x08U)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 3, 1);  // v8: this, v3: y, v1: x
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x0cU)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 3, 1);  // v8: this, v3: y, v1: x
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x0eU)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 3, 1);  // v8: this, v3: y, v1: x
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x10U)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 3, 1);  // v8: this, v3: y, v1: x
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x13U)));
-      CHECK(ref_bitmap);
+      CHECK_REGS_CONTAIN_REFS(0x03U, 8);  // v8: this
+      CHECK_REGS_CONTAIN_REFS(0x06U, 8, 1);  // v8: this, v1: x
+      CHECK_REGS_CONTAIN_REFS(0x08U, 8, 3, 1);  // v8: this, v3: y, v1: x
+      CHECK_REGS_CONTAIN_REFS(0x0cU, 8, 3, 1);  // v8: this, v3: y, v1: x
+      CHECK_REGS_CONTAIN_REFS(0x0eU, 8, 3, 1);  // v8: this, v3: y, v1: x
+      CHECK_REGS_CONTAIN_REFS(0x10U, 8, 3, 1);  // v8: this, v3: y, v1: x
       // v2 is added because of the instruction at DexPC 0024. Object merges with 0 is Object. See:
       //   0024: move-object v3, v2
       //   0025: goto 0013
       // Detaled dex instructions for ReferenceMap.java are at the end of this function.
       // CHECK_REGS_CONTAIN_REFS(8, 3, 2, 1);  // v8: this, v3: y, v2: y, v1: x
       // We eliminate the non-live registers at a return, so only v3 is live:
-      CHECK_REGS_CONTAIN_REFS(3);  // v3: y
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x18U)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x1aU)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 5, 2, 1, 0);  // v8: this, v5: x[1], v2: y, v1: x, v0: ex
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x1dU)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 5, 2, 1, 0);  // v8: this, v5: x[1], v2: y, v1: x, v0: ex
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x1fU)));
-      CHECK(ref_bitmap);
+      CHECK_REGS_CONTAIN_REFS(0x13U);  // v3: y
+      CHECK_REGS_CONTAIN_REFS(0x18U, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
+      CHECK_REGS_CONTAIN_REFS(0x1aU, 8, 5, 2, 1, 0);  // v8: this, v5: x[1], v2: y, v1: x, v0: ex
+      CHECK_REGS_CONTAIN_REFS(0x1dU, 8, 5, 2, 1, 0);  // v8: this, v5: x[1], v2: y, v1: x, v0: ex
       // v5 is removed from the root set because there is a "merge" operation.
       // See 0015: if-nez v2, 001f.
-      CHECK_REGS_CONTAIN_REFS(8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x21U)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x27U)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x29U)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x2cU)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x2fU)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 4, 3, 2, 1);  // v8: this, v4: ex, v3: y, v2: y, v1: x
-
-      ref_bitmap = map.FindBitMap(m->NativePcOffset(m->ToNativePc(0x32U)));
-      CHECK(ref_bitmap);
-      CHECK_REGS_CONTAIN_REFS(8, 3, 2, 1, 0);  // v8: this, v3: y, v2: y, v1: x, v0: ex
+      CHECK_REGS_CONTAIN_REFS(0x1fU, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
+      CHECK_REGS_CONTAIN_REFS(0x21U, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
+      CHECK_REGS_CONTAIN_REFS(0x27U, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
+      CHECK_REGS_CONTAIN_REFS(0x29U, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
+      CHECK_REGS_CONTAIN_REFS(0x2cU, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
+      CHECK_REGS_CONTAIN_REFS(0x2fU, 8, 4, 3, 2, 1);  // v8: this, v4: ex, v3: y, v2: y, v1: x
+      CHECK_REGS_CONTAIN_REFS(0x32U, 8, 3, 2, 1, 0);  // v8: this, v3: y, v2: y, v1: x, v0: ex
     }
 
     return true;
diff --git a/test/004-StackWalk/stack_walk_jni.cc b/test/004-StackWalk/stack_walk_jni.cc
index 30a0d59..c40de7e 100644
--- a/test/004-StackWalk/stack_walk_jni.cc
+++ b/test/004-StackWalk/stack_walk_jni.cc
@@ -14,54 +14,29 @@
  * limitations under the License.
  */
 
-#include <stdio.h>
-#include <memory>
-
-#include "class_linker.h"
-#include "gc_map.h"
-#include "mirror/art_method-inl.h"
-#include "mirror/class-inl.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/object-inl.h"
+#include "check_reference_map_visitor.h"
 #include "jni.h"
-#include "scoped_thread_state_change.h"
 
 namespace art {
 
-#define REG(reg_bitmap, reg) \
-    (((reg) < m->GetCodeItem()->registers_size_) && \
-     ((*((reg_bitmap) + (reg)/8) >> ((reg) % 8) ) & 0x01))
-
-#define CHECK_REGS(...) if (!IsShadowFrame()) { \
-    int t[] = {__VA_ARGS__}; \
-    int t_size = sizeof(t) / sizeof(*t); \
-    for (int i = 0; i < t_size; ++i) \
-      CHECK(REG(reg_bitmap, t[i])) << "Error: Reg " << i << " is not in RegisterMap"; \
-  }
+#define CHECK_REGS(...) do { \
+  int t[] = {__VA_ARGS__}; \
+  int t_size = sizeof(t) / sizeof(*t); \
+  CheckReferences(t, t_size, GetNativePcOffset()); \
+} while (false);
 
 static int gJava_StackWalk_refmap_calls = 0;
 
-struct TestReferenceMapVisitor : public StackVisitor {
-  explicit TestReferenceMapVisitor(Thread* thread)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, NULL) {
-  }
+class TestReferenceMapVisitor : public CheckReferenceMapVisitor {
+ public:
+  explicit TestReferenceMapVisitor(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : CheckReferenceMapVisitor(thread) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ArtMethod* m = GetMethod();
-    CHECK(m != NULL);
-    LOG(INFO) << "At " << PrettyMethod(m, false);
-
-    if (m->IsCalleeSaveMethod() || m->IsNative()) {
-      LOG(WARNING) << "no PC for " << PrettyMethod(m);
-      CHECK_EQ(GetDexPc(), DexFile::kDexNoIndex);
+    if (CheckReferenceMapVisitor::VisitFrame()) {
       return true;
     }
-    const uint8_t* reg_bitmap = NULL;
-    if (!IsShadowFrame()) {
-      NativePcOffsetToReferenceMap map(m->GetNativeGcMap());
-      reg_bitmap = map.FindBitMap(GetNativePcOffset());
-    }
+    mirror::ArtMethod* m = GetMethod();
     StringPiece m_name(m->GetName());
 
     // Given the method name and the number of times the method has been called,
diff --git a/test/401-optimizing-compiler/src/Main.java b/test/401-optimizing-compiler/src/Main.java
index 2c6d1c2..07c407b 100644
--- a/test/401-optimizing-compiler/src/Main.java
+++ b/test/401-optimizing-compiler/src/Main.java
@@ -97,6 +97,11 @@
     if (exception == null) {
       throw new Error("Missing NullPointerException");
     }
+
+    result = $opt$InvokeVirtualMethod();
+    if (result != 42) {
+      throw new Error("Unexpected result: " + result);
+    }
   }
 
   public static void invokePrivate() {
@@ -205,5 +210,13 @@
     m.o = new Main();
   }
 
+  public static int $opt$InvokeVirtualMethod() {
+    return new Main().virtualMethod();
+  }
+
+  public int virtualMethod() {
+    return 42;
+  }
+
   Object o;
 }
diff --git a/test/408-move-bug/expected.txt b/test/408-move-bug/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/408-move-bug/expected.txt
diff --git a/test/408-move-bug/info.txt b/test/408-move-bug/info.txt
new file mode 100644
index 0000000..27a3dbc
--- /dev/null
+++ b/test/408-move-bug/info.txt
@@ -0,0 +1,2 @@
+Regression test for the register allocator in the optimizing
+compiler. Input moves where being overridden by sibling moves.
diff --git a/test/408-move-bug/src/Main.java b/test/408-move-bug/src/Main.java
new file mode 100644
index 0000000..420298b
--- /dev/null
+++ b/test/408-move-bug/src/Main.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    crash();
+    npe();
+  }
+
+  static void crash() {
+    boolean b = baz();
+    // Create many objects to starve registers.
+    Main foo1 = create();
+    Main foo2 = create();
+    Main foo3 = create();
+    Main foo4 = create();
+    foo1.otherField = null;
+    // On X86, we would force b to be in a byte register, which
+    // would generate moves. This code exposed a bug in the
+    // register allocator, where an input move was not just before
+    // the instruction itself, and its destination was overridden
+    // by another value.
+    foo1.field = b;
+    foo2.field = b;
+    foo3.field = b;
+    foo4.field = b;
+    foo1.lastField = b;
+  }
+
+  // Similar to `crash` but generated an NPE.
+  static void npe() {
+    boolean b = baz();
+    Main foo1 = create();
+    Main foo2 = create();
+    Main foo3 = create();
+    Main foo4 = create();
+    foo1.field = b;
+    foo2.field = b;
+    foo3.field = b;
+    foo4.field = b;
+    foo1.lastField = b;
+  }
+
+  static Main create() {
+    return new Main();
+  }
+
+  static boolean baz() {
+    return false;
+  }
+
+  boolean field;
+  Object otherField;
+  boolean lastField;
+}
diff --git a/test/409-materialized-condition/expected.txt b/test/409-materialized-condition/expected.txt
new file mode 100644
index 0000000..a0796cd
--- /dev/null
+++ b/test/409-materialized-condition/expected.txt
@@ -0,0 +1,5 @@
+foo1
+In do nothing.
+In if.
+foo2
+In do nothing.
diff --git a/test/409-materialized-condition/info.txt b/test/409-materialized-condition/info.txt
new file mode 100644
index 0000000..898560d
--- /dev/null
+++ b/test/409-materialized-condition/info.txt
@@ -0,0 +1 @@
+Test that materialized conditions are evaluated correctly.
diff --git a/test/409-materialized-condition/src/Main.java b/test/409-materialized-condition/src/Main.java
new file mode 100644
index 0000000..0c179a9
--- /dev/null
+++ b/test/409-materialized-condition/src/Main.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void doNothing(boolean b) {
+    System.out.println("In do nothing.");
+  }
+
+  public static void inIf() {
+    System.out.println("In if.");
+  }
+
+  public static int bar() {
+    return 42;
+  }
+
+  public static int foo1() {
+    int b = bar();
+    doNothing(b == 42);
+    // This second `b == 42` will be GVN'ed away.
+    if (b == 42) {
+      inIf();
+      return b;
+    }
+    return 0;
+  }
+
+  public static int foo2() {
+    int b = bar();
+    doNothing(b == 41);
+    // This second `b == 41` will be GVN'ed away.
+    if (b == 41) {
+      inIf();
+      return 0;
+    }
+    return b;
+  }
+
+  public static void main(String[] args) {
+    System.out.println("foo1");
+    int res = foo1();
+    if (res != 42) {
+      throw new Error("Unexpected return value for foo1: " + res + ", expected 42.");
+    }
+
+    System.out.println("foo2");
+    res = foo2();
+    if (res != 42) {
+      throw new Error("Unexpected return value for foo2: " + res + ", expected 42.");
+    }
+  }
+}