Merge "ART: Fixed subsequent CHECK-NOTs Checker bug"
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 769cd4c..a3323e1 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -16,6 +16,7 @@
 
 #include "base/logging.h"
 #include "calling_convention_arm.h"
+#include "handle_scope-inl.h"
 #include "utils/arm/managed_register_arm.h"
 
 namespace art {
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 29763a2..b9c8178 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -16,6 +16,7 @@
 
 #include "base/logging.h"
 #include "calling_convention_arm64.h"
+#include "handle_scope-inl.h"
 #include "utils/arm64/managed_register_arm64.h"
 
 namespace art {
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index f7a7be7..aefbf06 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -17,6 +17,7 @@
 #include "calling_convention_mips.h"
 
 #include "base/logging.h"
+#include "handle_scope-inl.h"
 #include "utils/mips/managed_register_mips.h"
 
 namespace art {
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 9bf7d0f..a5686e1 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -17,6 +17,7 @@
 #include "calling_convention_x86.h"
 
 #include "base/logging.h"
+#include "handle_scope-inl.h"
 #include "utils/x86/managed_register_x86.h"
 #include "utils.h"
 
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index a100552..bbdf1fe 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -17,6 +17,7 @@
 #include "calling_convention_x86_64.h"
 
 #include "base/logging.h"
+#include "handle_scope-inl.h"
 #include "utils/x86_64/managed_register_x86_64.h"
 #include "utils.h"
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 4d8154e..ada0fb7 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -620,6 +620,14 @@
         break;
       }
 
+      case Location::kFpuRegisterPair : {
+        stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.low());
+        stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.high());
+        ++i;
+        DCHECK_LT(i, environment_size);
+        break;
+      }
+
       default:
         LOG(FATAL) << "Unexpected kind " << location.GetKind();
     }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 4205ebe..9880239 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -142,6 +142,7 @@
     UNIMPLEMENTED(FATAL);
     UNREACHABLE();
   }
+  virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
 
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc);
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3b3fb64..d0a72bb 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -373,6 +373,16 @@
   return kArmWordSize;
 }
 
+size_t CodeGeneratorARM::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  __ StoreSToOffset(static_cast<SRegister>(reg_id), SP, stack_index);
+  return kArmWordSize;
+}
+
+size_t CodeGeneratorARM::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  __ LoadSFromOffset(static_cast<SRegister>(reg_id), SP, stack_index);
+  return kArmWordSize;
+}
+
 CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
                                    const ArmInstructionSetFeatures* isa_features)
     : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs),
@@ -802,7 +812,8 @@
         __ LoadImmediate(IP, value);
         __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
       }
-    } else if (const_to_move->IsLongConstant()) {
+    } else {
+      DCHECK(const_to_move->IsLongConstant()) << const_to_move;
       int64_t value = const_to_move->AsLongConstant()->GetValue();
       if (location.IsRegisterPair()) {
         __ LoadImmediate(location.AsRegisterPairLow<Register>(), Low32Bits(value));
@@ -2585,7 +2596,8 @@
                                                          Register out_hi) {
   if (offset != 0) {
     __ LoadImmediate(out_lo, offset);
-    __ add(addr, addr, ShifterOperand(out_lo));
+    __ add(IP, addr, ShifterOperand(out_lo));
+    addr = IP;
   }
   __ ldrexd(out_lo, out_hi, addr);
 }
@@ -2599,7 +2611,8 @@
   Label fail;
   if (offset != 0) {
     __ LoadImmediate(temp1, offset);
-    __ add(addr, addr, ShifterOperand(temp1));
+    __ add(IP, addr, ShifterOperand(temp1));
+    addr = IP;
   }
   __ Bind(&fail);
   // We need a load followed by store. (The address used in a STREX instruction must
@@ -2994,10 +3007,34 @@
       break;
     }
 
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
-      UNREACHABLE();
+    case Primitive::kPrimFloat: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      Location out = locations->Out();
+      DCHECK(out.IsFpuRegister());
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset);
+      } else {
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+        __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      Location out = locations->Out();
+      DCHECK(out.IsFpuRegisterPair());
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+      } else {
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
+        __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
+      }
+      break;
+    }
+
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << instruction->GetType();
       UNREACHABLE();
@@ -3114,12 +3151,36 @@
       break;
     }
 
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
-      UNREACHABLE();
+    case Primitive::kPrimFloat: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      Location value = locations->InAt(2);
+      DCHECK(value.IsFpuRegister());
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ StoreSToOffset(value.AsFpuRegister<SRegister>(), obj, offset);
+      } else {
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+        __ StoreSToOffset(value.AsFpuRegister<SRegister>(), IP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      Location value = locations->InAt(2);
+      DCHECK(value.IsFpuRegisterPair());
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+      } else {
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
+        __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
+      }
+      break;
+    }
+
     case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+      LOG(FATAL) << "Unreachable type " << value_type;
       UNREACHABLE();
   }
 }
@@ -3247,21 +3308,62 @@
     if (destination.IsRegister()) {
       __ LoadFromOffset(kLoadWord, destination.AsRegister<Register>(),
                         SP, source.GetStackIndex());
+    } else if (destination.IsFpuRegister()) {
+      __ LoadSFromOffset(destination.AsFpuRegister<SRegister>(), SP, source.GetStackIndex());
     } else {
       DCHECK(destination.IsStackSlot());
       __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
       __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
     }
-  } else {
-    DCHECK(source.IsConstant());
-    DCHECK(source.GetConstant()->IsIntConstant());
-    int32_t value = source.GetConstant()->AsIntConstant()->GetValue();
-    if (destination.IsRegister()) {
-      __ LoadImmediate(destination.AsRegister<Register>(), value);
+  } else if (source.IsFpuRegister()) {
+    if (destination.IsFpuRegister()) {
+      __ vmovs(destination.AsFpuRegister<SRegister>(), source.AsFpuRegister<SRegister>());
     } else {
       DCHECK(destination.IsStackSlot());
-      __ LoadImmediate(IP, value);
+      __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex());
+    }
+  } else if (source.IsFpuRegisterPair()) {
+    if (destination.IsFpuRegisterPair()) {
+      __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+               FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()));
+    } else {
+      DCHECK(destination.IsDoubleStackSlot()) << destination;
+      __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()),
+                        SP, destination.GetStackIndex());
+    }
+  } else if (source.IsDoubleStackSlot()) {
+    if (destination.IsFpuRegisterPair()) {
+      __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+                         SP, source.GetStackIndex());
+    } else {
+      DCHECK(destination.IsDoubleStackSlot()) << destination;
+      __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
       __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+      __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize));
+      __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+    }
+  } else {
+    DCHECK(source.IsConstant()) << source;
+    HInstruction* constant = source.GetConstant();
+    if (constant->IsIntConstant()) {
+      int32_t value = constant->AsIntConstant()->GetValue();
+      if (destination.IsRegister()) {
+        __ LoadImmediate(destination.AsRegister<Register>(), value);
+      } else {
+        DCHECK(destination.IsStackSlot());
+        __ LoadImmediate(IP, value);
+        __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+      }
+    } else {
+      DCHECK(constant->IsFloatConstant());
+      float value = constant->AsFloatConstant()->GetValue();
+      if (destination.IsFpuRegister()) {
+        __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), value);
+      } else {
+        DCHECK(destination.IsStackSlot());
+        __ LoadImmediate(IP, bit_cast<int32_t, float>(value));
+        __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+      }
     }
   }
 }
@@ -3300,6 +3402,20 @@
     Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
     Exchange(source.GetStackIndex(), destination.GetStackIndex());
+  } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
+    __ vmovrs(IP, source.AsFpuRegister<SRegister>());
+    __ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>());
+    __ vmovsr(destination.AsFpuRegister<SRegister>(), IP);
+  } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
+    SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>()
+                                           : destination.AsFpuRegister<SRegister>();
+    int mem = source.IsFpuRegister()
+        ? destination.GetStackIndex()
+        : source.GetStackIndex();
+
+    __ vmovrs(IP, reg);
+    __ LoadSFromOffset(reg, SP, mem);
+    __ StoreToOffset(kStoreWord, IP, SP, mem);
   } else {
     LOG(FATAL) << "Unimplemented";
   }
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 40f4edc..c1b4eda 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -168,6 +168,8 @@
   void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
 
   size_t GetWordSize() const OVERRIDE {
     return kArmWordSize;
@@ -237,6 +239,10 @@
     return isa_features_;
   }
 
+  bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
+    return type == Primitive::kPrimDouble || type == Primitive::kPrimLong;
+  }
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 19488a4..e4da07b 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -267,6 +267,10 @@
 
   ParallelMoveResolverARM64* GetMoveResolver() { return &move_resolver_; }
 
+  bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+    return false;
+  }
+
  private:
   // Labels for each block that will be compiled.
   vixl::Label* block_labels_;
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 636f884..acde122 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -222,6 +222,10 @@
     block_labels_.SetSize(GetGraph()->GetBlocks().Size());
   }
 
+  bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
+    return type == Primitive::kPrimLong;
+  }
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 0708864..87f6b0f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -218,6 +218,10 @@
     block_labels_.SetSize(GetGraph()->GetBlocks().Size());
   }
 
+  bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+    return false;
+  }
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 9ed1e45..9e0a5b8 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -30,10 +30,12 @@
   HGraphVisualizerPrinter(HGraph* graph,
                           std::ostream& output,
                           const char* pass_name,
+                          bool is_after_pass,
                           const CodeGenerator& codegen)
       : HGraphVisitor(graph),
         output_(output),
         pass_name_(pass_name),
+        is_after_pass_(is_after_pass),
         codegen_(codegen),
         indent_(0) {}
 
@@ -136,6 +138,10 @@
       output_ << "invalid";
     } else if (location.IsStackSlot()) {
       output_ << location.GetStackIndex() << "(sp)";
+    } else if (location.IsFpuRegisterPair()) {
+      codegen_.DumpFloatingPointRegister(output_, location.low());
+      output_ << " and ";
+      codegen_.DumpFloatingPointRegister(output_, location.high());
     } else {
       DCHECK(location.IsDoubleStackSlot());
       output_ << "2x" << location.GetStackIndex() << "(sp)";
@@ -157,19 +163,19 @@
     output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
   }
 
-  void VisitIntConstant(HIntConstant *instruction) OVERRIDE {
+  void VisitIntConstant(HIntConstant* instruction) OVERRIDE {
     output_ << " " << instruction->GetValue();
   }
 
-  void VisitLongConstant(HLongConstant *instruction) OVERRIDE {
+  void VisitLongConstant(HLongConstant* instruction) OVERRIDE {
     output_ << " " << instruction->GetValue();
   }
 
-  void VisitFloatConstant(HFloatConstant *instruction) OVERRIDE {
+  void VisitFloatConstant(HFloatConstant* instruction) OVERRIDE {
     output_ << " " << instruction->GetValue();
   }
 
-  void VisitDoubleConstant(HDoubleConstant *instruction) OVERRIDE {
+  void VisitDoubleConstant(HDoubleConstant* instruction) OVERRIDE {
     output_ << " " << instruction->GetValue();
   }
 
@@ -224,7 +230,8 @@
 
   void Run() {
     StartTag("cfg");
-    PrintProperty("name", pass_name_);
+    std::string pass_desc = std::string(pass_name_) + (is_after_pass_ ? " (after)" : " (before)");
+    PrintProperty("name", pass_desc.c_str());
     VisitInsertionOrder();
     EndTag("cfg");
   }
@@ -275,6 +282,7 @@
  private:
   std::ostream& output_;
   const char* pass_name_;
+  const bool is_after_pass_;
   const CodeGenerator& codegen_;
   size_t indent_;
 
@@ -295,7 +303,7 @@
   }
 
   is_enabled_ = true;
-  HGraphVisualizerPrinter printer(graph_, *output_, "", codegen_);
+  HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_);
   printer.StartTag("compilation");
   printer.PrintProperty("name", method_name);
   printer.PrintProperty("method", method_name);
@@ -305,8 +313,7 @@
 
 void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) const {
   if (is_enabled_) {
-    std::string pass_desc = std::string(pass_name) + (is_after_pass ? " (after)" : " (before)");
-    HGraphVisualizerPrinter printer(graph_, *output_, pass_desc.c_str(), codegen_);
+    HGraphVisualizerPrinter printer(graph_, *output_, pass_name, is_after_pass, codegen_);
     printer.Run();
   }
 }
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 1ff26d9..7df99d4 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -160,6 +160,16 @@
     return GetPayload();
   }
 
+  int low() const {
+    DCHECK(IsPair());
+    return GetPayload() >> 16;
+  }
+
+  int high() const {
+    DCHECK(IsPair());
+    return GetPayload() & 0xFFFF;
+  }
+
   template <typename T>
   T AsRegister() const {
     DCHECK(IsRegister());
@@ -175,25 +185,41 @@
   template <typename T>
   T AsRegisterPairLow() const {
     DCHECK(IsRegisterPair());
-    return static_cast<T>(GetPayload() >> 16);
+    return static_cast<T>(low());
   }
 
   template <typename T>
   T AsRegisterPairHigh() const {
     DCHECK(IsRegisterPair());
-    return static_cast<T>(GetPayload() & 0xFFFF);
+    return static_cast<T>(high());
   }
 
   template <typename T>
   T AsFpuRegisterPairLow() const {
     DCHECK(IsFpuRegisterPair());
-    return static_cast<T>(GetPayload() >> 16);
+    return static_cast<T>(low());
   }
 
   template <typename T>
   T AsFpuRegisterPairHigh() const {
     DCHECK(IsFpuRegisterPair());
-    return static_cast<T>(GetPayload() & 0xFFFF);
+    return static_cast<T>(high());
+  }
+
+  bool IsPair() const {
+    return IsRegisterPair() || IsFpuRegisterPair();
+  }
+
+  Location ToLow() const {
+    return IsRegisterPair()
+        ? Location::RegisterLocation(low())
+        : Location::FpuRegisterLocation(low());
+  }
+
+  Location ToHigh() const {
+    return IsRegisterPair()
+        ? Location::RegisterLocation(high())
+        : Location::FpuRegisterLocation(high());
   }
 
   static uintptr_t EncodeStackIndex(intptr_t stack_index) {
@@ -264,6 +290,18 @@
     return value_ == other.value_;
   }
 
+  // Returns whether this location contains `other`.
+  bool Contains(Location other) const {
+    if (Equals(other)) return true;
+    if (IsRegisterPair() && other.IsRegister()) {
+      return low() == other.reg() || high() == other.reg();
+    }
+    if (IsFpuRegisterPair() && other.IsFpuRegister()) {
+      return low() == other.reg() || high() == other.reg();
+    }
+    return false;
+  }
+
   const char* DebugString() const {
     switch (GetKind()) {
       case kInvalid: return "I";
@@ -525,7 +563,8 @@
         && (output_.GetPolicy() == Location::kSameAsFirstInput)) {
       return false;
     }
-    if (inputs_.Get(input_index).IsRegister() || inputs_.Get(input_index).IsFpuRegister()) {
+    Location input = inputs_.Get(input_index);
+    if (input.IsRegister() || input.IsFpuRegister() || input.IsPair()) {
       return false;
     }
     return true;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0fc1fd8..b98bc70 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2734,7 +2734,7 @@
 
   // True if this blocks a move from the given location.
   bool Blocks(Location loc) const {
-    return !IsEliminated() && source_.Equals(loc);
+    return !IsEliminated() && source_.Contains(loc);
   }
 
   // A move is redundant if it's been eliminated, if its source and
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index c1c805d..1efc52b 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -27,6 +27,12 @@
 static constexpr size_t kMaxLifetimePosition = -1;
 static constexpr size_t kDefaultNumberOfSpillSlots = 4;
 
+// For simplicity, we implement register pairs as (reg, reg + 1).
+// Note that this is a requirement for double registers on ARM, since we
+// allocate SRegister.
+static int GetHighForLowRegister(int reg) { return reg + 1; }
+static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
+
 RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
                                      CodeGenerator* codegen,
                                      const SsaLivenessAnalysis& liveness)
@@ -72,10 +78,16 @@
          !it.Done();
          it.Advance()) {
       HInstruction* current = it.Current();
-      if (current->GetType() == Primitive::kPrimLong ||
-          current->GetType() == Primitive::kPrimFloat ||
-          current->GetType() == Primitive::kPrimDouble) {
-        return false;
+      if (instruction_set == kX86) {
+        if (current->GetType() == Primitive::kPrimLong ||
+            current->GetType() == Primitive::kPrimFloat ||
+            current->GetType() == Primitive::kPrimDouble) {
+          return false;
+        }
+      } else if (instruction_set == kArm || instruction_set == kThumb2) {
+        if (current->GetType() == Primitive::kPrimLong) {
+          return false;
+        }
       }
     }
   }
@@ -130,7 +142,7 @@
       : physical_fp_register_intervals_.Get(reg);
   Primitive::Type type = location.IsRegister()
       ? Primitive::kPrimInt
-      : Primitive::kPrimDouble;
+      : Primitive::kPrimFloat;
   if (interval == nullptr) {
     interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
     if (location.IsRegister()) {
@@ -226,6 +238,12 @@
               LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
           temp_intervals_.Add(interval);
           interval->AddRange(position, position + 1);
+          if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+            interval->AddHighInterval(true);
+            LiveInterval* high = interval->GetHighInterval();
+            temp_intervals_.Add(high);
+            unhandled_fp_intervals_.Add(high);
+          }
           unhandled_fp_intervals_.Add(interval);
           break;
         }
@@ -279,6 +297,9 @@
     Location input = locations->InAt(i);
     if (input.IsRegister() || input.IsFpuRegister()) {
       BlockRegister(input, position, position + 1);
+    } else if (input.IsPair()) {
+      BlockRegister(input.ToLow(), position, position + 1);
+      BlockRegister(input.ToHigh(), position, position + 1);
     }
   }
 
@@ -291,6 +312,10 @@
 
   DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek()));
 
+  if (codegen_->NeedsTwoRegisters(current->GetType())) {
+    current->AddHighInterval();
+  }
+
   // Some instructions define their output in fixed register/stack slot. We need
   // to ensure we know these locations before doing register allocation. For a
   // given register, we create an interval that covers these locations. The register
@@ -304,14 +329,30 @@
     if (first.IsRegister() || first.IsFpuRegister()) {
       current->SetFrom(position + 1);
       current->SetRegister(first.reg());
+    } else if (first.IsPair()) {
+      current->SetFrom(position + 1);
+      current->SetRegister(first.low());
+      LiveInterval* high = current->GetHighInterval();
+      high->SetRegister(first.high());
+      high->SetFrom(position + 1);
     }
   } else if (output.IsRegister() || output.IsFpuRegister()) {
     // Shift the interval's start by one to account for the blocked register.
     current->SetFrom(position + 1);
     current->SetRegister(output.reg());
     BlockRegister(output, position, position + 1);
+  } else if (output.IsPair()) {
+    current->SetFrom(position + 1);
+    current->SetRegister(output.low());
+    LiveInterval* high = current->GetHighInterval();
+    high->SetRegister(output.high());
+    high->SetFrom(position + 1);
+    BlockRegister(output.ToLow(), position, position + 1);
+    BlockRegister(output.ToHigh(), position, position + 1);
   } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
     current->SetSpillSlot(output.GetStackIndex());
+  } else {
+    DCHECK(output.IsUnallocated() || output.IsConstant());
   }
 
   // If needed, add interval to the list of unhandled intervals.
@@ -516,6 +557,7 @@
     LiveInterval* current = unhandled_->Pop();
     DCHECK(!current->IsFixed() && !current->HasSpillSlot());
     DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart());
+    DCHECK(!current->IsLowInterval() || unhandled_->Peek()->IsHighInterval());
 
     size_t position = current->GetStart();
 
@@ -566,6 +608,13 @@
       continue;
     }
 
+    if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
+      DCHECK(!current->HasRegister());
+      // Allocating the low part was unsucessful. The splitted interval for the high part
+      // will be handled next (it is in the `unhandled_` list).
+      continue;
+    }
+
     // (4) Try to find an available register.
     bool success = TryAllocateFreeReg(current);
 
@@ -578,6 +627,9 @@
     //     intervals.
     if (success) {
       active_.Add(current);
+      if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
+        current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
+      }
     }
   }
 }
@@ -630,26 +682,31 @@
   if (current->HasRegister()) {
     // Some instructions have a fixed register output.
     reg = current->GetRegister();
-    DCHECK_NE(free_until[reg], 0u);
+    if (free_until[reg] == 0) {
+      DCHECK(current->IsHighInterval());
+      // AllocateBlockedReg will spill the holder of the register.
+      return false;
+    }
   } else {
+    DCHECK(!current->IsHighInterval());
     int hint = current->FindFirstRegisterHint(free_until);
     if (hint != kNoRegister) {
       DCHECK(!IsBlocked(hint));
       reg = hint;
+    } else if (current->IsLowInterval()) {
+      reg = FindAvailableRegisterPair(free_until);
     } else {
-      // Pick the register that is free the longest.
-      for (size_t i = 0; i < number_of_registers_; ++i) {
-        if (IsBlocked(i)) continue;
-        if (reg == -1 || free_until[i] > free_until[reg]) {
-          reg = i;
-          if (free_until[i] == kMaxLifetimePosition) break;
-        }
-      }
+      reg = FindAvailableRegister(free_until);
     }
   }
 
+  DCHECK_NE(reg, -1);
   // If we could not find a register, we need to spill.
-  if (reg == -1 || free_until[reg] == 0) {
+  if (free_until[reg] == 0) {
+    return false;
+  }
+
+  if (current->IsLowInterval() && free_until[GetHighForLowRegister(reg)] == 0) {
     return false;
   }
 
@@ -671,6 +728,40 @@
       : blocked_fp_registers_[reg];
 }
 
+int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use) const {
+  int reg = -1;
+  // Pick the register pair that is used the last.
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    if (IsBlocked(i)) continue;
+    if (!IsLowRegister(i)) continue;
+    int high_register = GetHighForLowRegister(i);
+    if (IsBlocked(high_register)) continue;
+    int existing_high_register = GetHighForLowRegister(reg);
+    if ((reg == -1) || (next_use[i] >= next_use[reg]
+                        && next_use[high_register] >= next_use[existing_high_register])) {
+      reg = i;
+      if (next_use[i] == kMaxLifetimePosition
+          && next_use[high_register] == kMaxLifetimePosition) {
+        break;
+      }
+    }
+  }
+  return reg;
+}
+
+int RegisterAllocator::FindAvailableRegister(size_t* next_use) const {
+  int reg = -1;
+  // Pick the register that is used the last.
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    if (IsBlocked(i)) continue;
+    if (reg == -1 || next_use[i] > next_use[reg]) {
+      reg = i;
+      if (next_use[i] == kMaxLifetimePosition) break;
+    }
+  }
+  return reg;
+}
+
 // Find the register that is used the last, and spill the interval
 // that holds it. If the first use of `current` is after that register
 // we spill `current` instead.
@@ -731,17 +822,20 @@
     }
   }
 
-  // Pick the register that is used the last.
   int reg = -1;
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    if (IsBlocked(i)) continue;
-    if (reg == -1 || next_use[i] > next_use[reg]) {
-      reg = i;
-      if (next_use[i] == kMaxLifetimePosition) break;
-    }
+  if (current->HasRegister()) {
+    DCHECK(current->IsHighInterval());
+    reg = current->GetRegister();
+  } else if (current->IsLowInterval()) {
+    reg = FindAvailableRegisterPair(next_use);
+  } else {
+    DCHECK(!current->IsHighInterval());
+    reg = FindAvailableRegister(next_use);
   }
 
-  if (first_register_use >= next_use[reg]) {
+  if ((first_register_use >= next_use[reg])
+      || (current->IsLowInterval() && first_register_use >= next_use[GetHighForLowRegister(reg)])) {
+    DCHECK(!current->IsHighInterval());
     // If the first use of that instruction is after the last use of the found
     // register, we split this interval just before its first register use.
     AllocateSpillSlotFor(current);
@@ -815,23 +909,49 @@
       break;
     }
   }
+
   array->InsertAt(insert_at, interval);
+  // Insert the high interval before the low, to ensure the low is processed before.
+  if (interval->HasHighInterval()) {
+    array->InsertAt(insert_at, interval->GetHighInterval());
+  } else if (interval->HasLowInterval()) {
+    array->InsertAt(insert_at + 1, interval->GetLowInterval());
+  }
 }
 
 LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
-  DCHECK(position >= interval->GetStart());
+  DCHECK_GE(position, interval->GetStart());
   DCHECK(!interval->IsDeadAt(position));
   if (position == interval->GetStart()) {
     // Spill slot will be allocated when handling `interval` again.
     interval->ClearRegister();
+    if (interval->HasHighInterval()) {
+      interval->GetHighInterval()->ClearRegister();
+    } else if (interval->HasLowInterval()) {
+      interval->GetLowInterval()->ClearRegister();
+    }
     return interval;
   } else {
     LiveInterval* new_interval = interval->SplitAt(position);
+    if (interval->HasHighInterval()) {
+      LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
+      new_interval->SetHighInterval(high);
+      high->SetLowInterval(new_interval);
+    } else if (interval->HasLowInterval()) {
+      LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
+      new_interval->SetLowInterval(low);
+      low->SetHighInterval(new_interval);
+    }
     return new_interval;
   }
 }
 
 void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
+  if (interval->IsHighInterval()) {
+    // The low interval will contain the spill slot.
+    return;
+  }
+
   LiveInterval* parent = interval->GetParent();
 
   // An instruction gets a spill slot for its entire lifetime. If the parent
@@ -898,6 +1018,7 @@
 static bool IsValidDestination(Location destination) {
   return destination.IsRegister()
       || destination.IsFpuRegister()
+      || destination.IsFpuRegisterPair()
       || destination.IsStackSlot()
       || destination.IsDoubleStackSlot();
 }
@@ -905,7 +1026,6 @@
 void RegisterAllocator::AddInputMoveFor(HInstruction* user,
                                         Location source,
                                         Location destination) const {
-  DCHECK(IsValidDestination(destination));
   if (source.Equals(destination)) return;
 
   DCHECK(!user->IsPhi());
@@ -1075,9 +1195,7 @@
   if (current->HasSpillSlot() && current->HasRegister()) {
     // We spill eagerly, so move must be at definition.
     InsertMoveAfter(interval->GetDefinedBy(),
-                    interval->IsFloatingPoint()
-                        ? Location::FpuRegisterLocation(interval->GetRegister())
-                        : Location::RegisterLocation(interval->GetRegister()),
+                    interval->ToLocation(),
                     interval->NeedsTwoSpillSlots()
                         ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
                         : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
@@ -1148,6 +1266,11 @@
           locations->AddLiveRegister(source);
           break;
         }
+        case Location::kFpuRegisterPair: {
+          locations->AddLiveRegister(source.ToLow());
+          locations->AddLiveRegister(source.ToHigh());
+          break;
+        }
         case Location::kStackSlot:  // Fall-through
         case Location::kDoubleStackSlot:  // Fall-through
         case Location::kConstant: {
@@ -1307,6 +1430,10 @@
   size_t temp_index = 0;
   for (size_t i = 0; i < temp_intervals_.Size(); ++i) {
     LiveInterval* temp = temp_intervals_.Get(i);
+    if (temp->IsHighInterval()) {
+      // High intervals can be skipped, they are already handled by the low interval.
+      continue;
+    }
     HInstruction* at = liveness_.GetTempUser(temp);
     if (at != current) {
       temp_index = 0;
@@ -1320,14 +1447,14 @@
         break;
 
       case Primitive::kPrimDouble:
-        // TODO: Support the case of ARM, where a double value
-        // requires an FPU register pair (note that the ARM back end
-        // does not yet use this register allocator when a method uses
-        // floats or doubles).
-        DCHECK(codegen_->GetInstructionSet() != kArm
-               && codegen_->GetInstructionSet() != kThumb2);
-        locations->SetTempAt(
-            temp_index++, Location::FpuRegisterLocation(temp->GetRegister()));
+        if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+          Location location = Location::FpuRegisterPairLocation(
+              temp->GetRegister(), temp->GetHighInterval()->GetRegister());
+          locations->SetTempAt(temp_index++, location);
+        } else {
+          locations->SetTempAt(
+              temp_index++, Location::FpuRegisterLocation(temp->GetRegister()));
+        }
         break;
 
       default:
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index cbe741c..c152a8b 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -128,6 +128,8 @@
   bool ValidateInternal(bool log_fatal_on_failure) const;
   void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
   void DumpAllIntervals(std::ostream& stream) const;
+  int FindAvailableRegisterPair(size_t* next_use) const;
+  int FindAvailableRegister(size_t* next_use) const;
 
   ArenaAllocator* const allocator_;
   CodeGenerator* const codegen_;
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 660a5c5..d41157b 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -419,10 +419,21 @@
 }
 
 Location LiveInterval::ToLocation() const {
+  DCHECK(!IsHighInterval());
   if (HasRegister()) {
-    return IsFloatingPoint()
-        ? Location::FpuRegisterLocation(GetRegister())
-        : Location::RegisterLocation(GetRegister());
+    if (IsFloatingPoint()) {
+      if (HasHighInterval()) {
+        return Location::FpuRegisterPairLocation(GetRegister(), GetHighInterval()->GetRegister());
+      } else {
+        return Location::FpuRegisterLocation(GetRegister());
+      }
+    } else {
+      if (HasHighInterval()) {
+        return Location::RegisterPairLocation(GetRegister(), GetHighInterval()->GetRegister());
+      } else {
+        return Location::RegisterLocation(GetRegister());
+      }
+    }
   } else {
     HInstruction* defined_by = GetParent()->GetDefinedBy();
     if (defined_by->IsConstant()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 2312389..74611e1 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -77,6 +77,15 @@
     stream << "[" << start_ << ", " << end_ << ")";
   }
 
+  LiveRange* Dup(ArenaAllocator* allocator) const {
+    return new (allocator) LiveRange(
+        start_, end_, next_ == nullptr ? nullptr : next_->Dup(allocator));
+  }
+
+  LiveRange* GetLastRange() {
+    return next_ == nullptr ? this : next_->GetLastRange();
+  }
+
  private:
   size_t start_;
   size_t end_;
@@ -123,6 +132,12 @@
     stream << position_;
   }
 
+  UsePosition* Dup(ArenaAllocator* allocator) const {
+    return new (allocator) UsePosition(
+        user_, input_index_, is_environment_, position_,
+        next_ == nullptr ? nullptr : next_->Dup(allocator));
+  }
+
  private:
   HInstruction* const user_;
   const size_t input_index_;
@@ -478,6 +493,8 @@
     }
     stream << "}";
     stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit();
+    stream << " is_high: " << IsHighInterval();
+    stream << " is_low: " << IsLowInterval();
   }
 
   LiveInterval* GetNextSibling() const { return next_sibling_; }
@@ -512,6 +529,58 @@
   // Returns whether `other` and `this` share the same kind of register.
   bool SameRegisterKind(Location other) const;
 
+  bool HasHighInterval() const {
+    return !IsHighInterval() && (GetParent()->high_or_low_interval_ != nullptr);
+  }
+
+  bool HasLowInterval() const {
+    return IsHighInterval();
+  }
+
+  LiveInterval* GetLowInterval() const {
+    DCHECK(HasLowInterval());
+    return high_or_low_interval_;
+  }
+
+  LiveInterval* GetHighInterval() const {
+    DCHECK(HasHighInterval());
+    return high_or_low_interval_;
+  }
+
+  bool IsHighInterval() const {
+    return GetParent()->is_high_interval_;
+  }
+
+  bool IsLowInterval() const {
+    return !IsHighInterval() && (GetParent()->high_or_low_interval_ != nullptr);
+  }
+
+  void SetLowInterval(LiveInterval* low) {
+    DCHECK(IsHighInterval());
+    high_or_low_interval_ = low;
+  }
+
+  void SetHighInterval(LiveInterval* high) {
+    DCHECK(IsLowInterval());
+    high_or_low_interval_ = high;
+  }
+
+  void AddHighInterval(bool is_temp = false) {
+    DCHECK_EQ(GetParent(), this);
+    DCHECK(!HasHighInterval());
+    DCHECK(!HasLowInterval());
+    high_or_low_interval_ = new (allocator_) LiveInterval(
+        allocator_, type_, defined_by_, false, kNoRegister, is_temp, false, true);
+    high_or_low_interval_->high_or_low_interval_ = this;
+    if (first_range_ != nullptr) {
+      high_or_low_interval_->first_range_ = first_range_->Dup(allocator_);
+      high_or_low_interval_->last_range_ = first_range_->GetLastRange();
+    }
+    if (first_use_ != nullptr) {
+      high_or_low_interval_->first_use_ = first_use_->Dup(allocator_);
+    }
+  }
+
  private:
   LiveInterval(ArenaAllocator* allocator,
                Primitive::Type type,
@@ -519,7 +588,8 @@
                bool is_fixed = false,
                int reg = kNoRegister,
                bool is_temp = false,
-               bool is_slow_path_safepoint = false)
+               bool is_slow_path_safepoint = false,
+               bool is_high_interval = false)
       : allocator_(allocator),
         first_range_(nullptr),
         last_range_(nullptr),
@@ -532,6 +602,8 @@
         is_fixed_(is_fixed),
         is_temp_(is_temp),
         is_slow_path_safepoint_(is_slow_path_safepoint),
+        is_high_interval_(is_high_interval),
+        high_or_low_interval_(nullptr),
         defined_by_(defined_by) {}
 
   ArenaAllocator* const allocator_;
@@ -568,6 +640,13 @@
   // Whether the interval is for a safepoint that calls on slow path.
   const bool is_slow_path_safepoint_;
 
+  // Whether this interval is a synthesized interval for register pair.
+  const bool is_high_interval_;
+
+  // If this interval needs a register pair, the high or low equivalent.
+  // `is_high_interval_` tells whether this holds the low or the high.
+  LiveInterval* high_or_low_interval_;
+
   // The instruction represented by this interval.
   HInstruction* const defined_by_;
 
diff --git a/compiler/optimizing/test/ConstantFolding.java b/compiler/optimizing/test/ConstantFolding.java
index 92f2a77..d08006b 100644
--- a/compiler/optimizing/test/ConstantFolding.java
+++ b/compiler/optimizing/test/ConstantFolding.java
@@ -22,13 +22,13 @@
    */
 
   // CHECK-START: int ConstantFolding.IntNegation() constant_folding (before)
-  // CHECK-DAG:     [[Const42:i[0-9]+]]  IntConstant 42
-  // CHECK-DAG:     [[Neg:i[0-9]+]]      Neg [ [[Const42]] ]
-  // CHECK-DAG:                          Return [ [[Neg]] ]
+  // CHECK-DAG:     [[Const42:i\d+]]  IntConstant 42
+  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Const42]] ]
+  // CHECK-DAG:                       Return [ [[Neg]] ]
 
   // CHECK-START: int ConstantFolding.IntNegation() constant_folding (after)
-  // CHECK-DAG:     [[ConstN42:i[0-9]+]] IntConstant -42
-  // CHECK-DAG:                          Return [ [[ConstN42]] ]
+  // CHECK-DAG:     [[ConstN42:i\d+]] IntConstant -42
+  // CHECK-DAG:                       Return [ [[ConstN42]] ]
 
   public static int IntNegation() {
     int x, y;
@@ -43,14 +43,14 @@
    */
 
   // CHECK-START: int ConstantFolding.IntAddition1() constant_folding (before)
-  // CHECK-DAG:     [[Const1:i[0-9]+]]  IntConstant 1
-  // CHECK-DAG:     [[Const2:i[0-9]+]]  IntConstant 2
-  // CHECK-DAG:     [[Add:i[0-9]+]]     Add [ [[Const1]] [[Const2]] ]
-  // CHECK-DAG:                         Return [ [[Add]] ]
+  // CHECK-DAG:     [[Const1:i\d+]]  IntConstant 1
+  // CHECK-DAG:     [[Const2:i\d+]]  IntConstant 2
+  // CHECK-DAG:     [[Add:i\d+]]     Add [ [[Const1]] [[Const2]] ]
+  // CHECK-DAG:                      Return [ [[Add]] ]
 
   // CHECK-START: int ConstantFolding.IntAddition1() constant_folding (after)
-  // CHECK-DAG:     [[Const3:i[0-9]+]]  IntConstant 3
-  // CHECK-DAG:                         Return [ [[Const3]] ]
+  // CHECK-DAG:     [[Const3:i\d+]]  IntConstant 3
+  // CHECK-DAG:                      Return [ [[Const3]] ]
 
   public static int IntAddition1() {
     int a, b, c;
@@ -66,18 +66,18 @@
   */
 
   // CHECK-START: int ConstantFolding.IntAddition2() constant_folding (before)
-  // CHECK-DAG:     [[Const1:i[0-9]+]]  IntConstant 1
-  // CHECK-DAG:     [[Const2:i[0-9]+]]  IntConstant 2
-  // CHECK-DAG:     [[Const5:i[0-9]+]]  IntConstant 5
-  // CHECK-DAG:     [[Const6:i[0-9]+]]  IntConstant 6
-  // CHECK-DAG:     [[Add1:i[0-9]+]]    Add [ [[Const1]] [[Const2]] ]
-  // CHECK-DAG:     [[Add2:i[0-9]+]]    Add [ [[Const5]] [[Const6]] ]
-  // CHECK-DAG:     [[Add3:i[0-9]+]]    Add [ [[Add1]] [[Add2]] ]
-  // CHECK-DAG:                         Return [ [[Add3]] ]
+  // CHECK-DAG:     [[Const1:i\d+]]  IntConstant 1
+  // CHECK-DAG:     [[Const2:i\d+]]  IntConstant 2
+  // CHECK-DAG:     [[Const5:i\d+]]  IntConstant 5
+  // CHECK-DAG:     [[Const6:i\d+]]  IntConstant 6
+  // CHECK-DAG:     [[Add1:i\d+]]    Add [ [[Const1]] [[Const2]] ]
+  // CHECK-DAG:     [[Add2:i\d+]]    Add [ [[Const5]] [[Const6]] ]
+  // CHECK-DAG:     [[Add3:i\d+]]    Add [ [[Add1]] [[Add2]] ]
+  // CHECK-DAG:                      Return [ [[Add3]] ]
 
   // CHECK-START: int ConstantFolding.IntAddition2() constant_folding (after)
-  // CHECK-DAG:     [[Const14:i[0-9]+]] IntConstant 14
-  // CHECK-DAG:                         Return [ [[Const14]] ]
+  // CHECK-DAG:     [[Const14:i\d+]] IntConstant 14
+  // CHECK-DAG:                      Return [ [[Const14]] ]
 
   public static int IntAddition2() {
     int a, b, c;
@@ -97,14 +97,14 @@
    */
 
   // CHECK-START: int ConstantFolding.IntSubtraction() constant_folding (before)
-  // CHECK-DAG:     [[Const5:i[0-9]+]]  IntConstant 5
-  // CHECK-DAG:     [[Const2:i[0-9]+]]  IntConstant 2
-  // CHECK-DAG:     [[Sub:i[0-9]+]]     Sub [ [[Const5]] [[Const2]] ]
-  // CHECK-DAG:                         Return [ [[Sub]] ]
+  // CHECK-DAG:     [[Const5:i\d+]]  IntConstant 5
+  // CHECK-DAG:     [[Const2:i\d+]]  IntConstant 2
+  // CHECK-DAG:     [[Sub:i\d+]]     Sub [ [[Const5]] [[Const2]] ]
+  // CHECK-DAG:                      Return [ [[Sub]] ]
 
   // CHECK-START: int ConstantFolding.IntSubtraction() constant_folding (after)
-  // CHECK-DAG:     [[Const3:i[0-9]+]]  IntConstant 3
-  // CHECK-DAG:                         Return [ [[Const3]] ]
+  // CHECK-DAG:     [[Const3:i\d+]]  IntConstant 3
+  // CHECK-DAG:                      Return [ [[Const3]] ]
 
   public static int IntSubtraction() {
     int a, b, c;
@@ -120,14 +120,14 @@
    */
 
   // CHECK-START: long ConstantFolding.LongAddition() constant_folding (before)
-  // CHECK-DAG:     [[Const1:j[0-9]+]]  LongConstant 1
-  // CHECK-DAG:     [[Const2:j[0-9]+]]  LongConstant 2
-  // CHECK-DAG:     [[Add:j[0-9]+]]     Add [ [[Const1]] [[Const2]] ]
-  // CHECK-DAG:                         Return [ [[Add]] ]
+  // CHECK-DAG:     [[Const1:j\d+]]  LongConstant 1
+  // CHECK-DAG:     [[Const2:j\d+]]  LongConstant 2
+  // CHECK-DAG:     [[Add:j\d+]]     Add [ [[Const1]] [[Const2]] ]
+  // CHECK-DAG:                      Return [ [[Add]] ]
 
   // CHECK-START: long ConstantFolding.LongAddition() constant_folding (after)
-  // CHECK-DAG:     [[Const3:j[0-9]+]]  LongConstant 3
-  // CHECK-DAG:                         Return [ [[Const3]] ]
+  // CHECK-DAG:     [[Const3:j\d+]]  LongConstant 3
+  // CHECK-DAG:                      Return [ [[Const3]] ]
 
   public static long LongAddition() {
     long a, b, c;
@@ -143,14 +143,14 @@
    */
 
   // CHECK-START: long ConstantFolding.LongSubtraction() constant_folding (before)
-  // CHECK-DAG:     [[Const5:j[0-9]+]]  LongConstant 5
-  // CHECK-DAG:     [[Const2:j[0-9]+]]  LongConstant 2
-  // CHECK-DAG:     [[Sub:j[0-9]+]]     Sub [ [[Const5]] [[Const2]] ]
-  // CHECK-DAG:                         Return [ [[Sub]] ]
+  // CHECK-DAG:     [[Const5:j\d+]]  LongConstant 5
+  // CHECK-DAG:     [[Const2:j\d+]]  LongConstant 2
+  // CHECK-DAG:     [[Sub:j\d+]]     Sub [ [[Const5]] [[Const2]] ]
+  // CHECK-DAG:                      Return [ [[Sub]] ]
 
   // CHECK-START: long ConstantFolding.LongSubtraction() constant_folding (after)
-  // CHECK-DAG:     [[Const3:j[0-9]+]]  LongConstant 3
-  // CHECK-DAG:                         Return [ [[Const3]] ]
+  // CHECK-DAG:     [[Const3:j\d+]]  LongConstant 3
+  // CHECK-DAG:                      Return [ [[Const3]] ]
 
   public static long LongSubtraction() {
     long a, b, c;
@@ -165,14 +165,14 @@
    */
 
   // CHECK-START: int ConstantFolding.StaticCondition() constant_folding (before)
-  // CHECK-DAG:     [[Const5:i[0-9]+]]  IntConstant 5
-  // CHECK-DAG:     [[Const2:i[0-9]+]]  IntConstant 2
-  // CHECK-DAG:     [[Cond:z[0-9]+]]    GreaterThanOrEqual [ [[Const5]] [[Const2]] ]
-  // CHECK-DAG:                         If [ [[Cond]] ]
+  // CHECK-DAG:     [[Const5:i\d+]]  IntConstant 5
+  // CHECK-DAG:     [[Const2:i\d+]]  IntConstant 2
+  // CHECK-DAG:     [[Cond:z\d+]]    GreaterThanOrEqual [ [[Const5]] [[Const2]] ]
+  // CHECK-DAG:                      If [ [[Cond]] ]
 
   // CHECK-START: int ConstantFolding.StaticCondition() constant_folding (after)
-  // CHECK-DAG:     [[Const1:i[0-9]+]]  IntConstant 1
-  // CHECK-DAG:                         If [ [[Const1]] ]
+  // CHECK-DAG:     [[Const1:i\d+]]  IntConstant 1
+  // CHECK-DAG:                      If [ [[Const1]] ]
 
   public static int StaticCondition() {
     int a, b, c;
@@ -195,18 +195,18 @@
    */
 
   // CHECK-START: int ConstantFolding.JumpsAndConditionals(boolean) constant_folding (before)
-  // CHECK-DAG:     [[Const2:i[0-9]+]]  IntConstant 2
-  // CHECK-DAG:     [[Const5:i[0-9]+]]  IntConstant 5
-  // CHECK-DAG:     [[Add:i[0-9]+]]     Add [ [[Const5]] [[Const2]] ]
-  // CHECK-DAG:     [[Sub:i[0-9]+]]     Sub [ [[Const5]] [[Const2]] ]
-  // CHECK-DAG:     [[Phi:i[0-9]+]]     Phi [ [[Add]] [[Sub]] ]
-  // CHECK-DAG:                         Return [ [[Phi]] ]
+  // CHECK-DAG:     [[Const2:i\d+]]  IntConstant 2
+  // CHECK-DAG:     [[Const5:i\d+]]  IntConstant 5
+  // CHECK-DAG:     [[Add:i\d+]]     Add [ [[Const5]] [[Const2]] ]
+  // CHECK-DAG:     [[Sub:i\d+]]     Sub [ [[Const5]] [[Const2]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]     Phi [ [[Add]] [[Sub]] ]
+  // CHECK-DAG:                      Return [ [[Phi]] ]
 
   // CHECK-START: int ConstantFolding.JumpsAndConditionals(boolean) constant_folding (after)
-  // CHECK-DAG:     [[Const3:i[0-9]+]]  IntConstant 3
-  // CHECK-DAG:     [[Const7:i[0-9]+]]  IntConstant 7
-  // CHECK-DAG:     [[Phi:i[0-9]+]]     Phi [ [[Const7]] [[Const3]] ]
-  // CHECK-DAG:                         Return [ [[Phi]] ]
+  // CHECK-DAG:     [[Const3:i\d+]]  IntConstant 3
+  // CHECK-DAG:     [[Const7:i\d+]]  IntConstant 7
+  // CHECK-DAG:     [[Phi:i\d+]]     Phi [ [[Const7]] [[Const3]] ]
+  // CHECK-DAG:                      Return [ [[Phi]] ]
 
   public static int JumpsAndConditionals(boolean cond) {
     int a, b, c;
diff --git a/compiler/optimizing/test/Inliner.java b/compiler/optimizing/test/Inliner.java
index ce7409c..54cce62 100644
--- a/compiler/optimizing/test/Inliner.java
+++ b/compiler/optimizing/test/Inliner.java
@@ -17,12 +17,12 @@
 public class Inliner {
 
   // CHECK-START: void Inliner.InlineVoid() inliner (before)
-  // CHECK-DAG:     [[Const42:i[0-9]+]] IntConstant 42
-  // CHECK-DAG:                         InvokeStaticOrDirect
-  // CHECK-DAG:                         InvokeStaticOrDirect [ [[Const42]] ]
+  // CHECK-DAG:     [[Const42:i\d+]] IntConstant 42
+  // CHECK-DAG:                      InvokeStaticOrDirect
+  // CHECK-DAG:                      InvokeStaticOrDirect [ [[Const42]] ]
 
   // CHECK-START: void Inliner.InlineVoid() inliner (after)
-  // CHECK-NOT:                         InvokeStaticOrDirect
+  // CHECK-NOT:                      InvokeStaticOrDirect
 
   public static void InlineVoid() {
     returnVoid();
@@ -30,119 +30,119 @@
   }
 
   // CHECK-START: int Inliner.InlineParameter(int) inliner (before)
-  // CHECK-DAG:     [[Param:i[0-9]+]]  ParameterValue
-  // CHECK-DAG:     [[Result:i[0-9]+]] InvokeStaticOrDirect [ [[Param]] ]
-  // CHECK-DAG:                        Return [ [[Result]] ]
+  // CHECK-DAG:     [[Param:i\d+]]  ParameterValue
+  // CHECK-DAG:     [[Result:i\d+]] InvokeStaticOrDirect [ [[Param]] ]
+  // CHECK-DAG:                     Return [ [[Result]] ]
 
   // CHECK-START: int Inliner.InlineParameter(int) inliner (after)
-  // CHECK-DAG:     [[Param:i[0-9]+]]  ParameterValue
-  // CHECK-DAG:                        Return [ [[Param]] ]
+  // CHECK-DAG:     [[Param:i\d+]]  ParameterValue
+  // CHECK-DAG:                     Return [ [[Param]] ]
 
   public static int InlineParameter(int a) {
     return returnParameter(a);
   }
 
   // CHECK-START: long Inliner.InlineWideParameter(long) inliner (before)
-  // CHECK-DAG:     [[Param:j[0-9]+]]  ParameterValue
-  // CHECK-DAG:     [[Result:j[0-9]+]] InvokeStaticOrDirect [ [[Param]] ]
-  // CHECK-DAG:                        Return [ [[Result]] ]
+  // CHECK-DAG:     [[Param:j\d+]]  ParameterValue
+  // CHECK-DAG:     [[Result:j\d+]] InvokeStaticOrDirect [ [[Param]] ]
+  // CHECK-DAG:                     Return [ [[Result]] ]
 
   // CHECK-START: long Inliner.InlineWideParameter(long) inliner (after)
-  // CHECK-DAG:     [[Param:j[0-9]+]]  ParameterValue
-  // CHECK-DAG:                        Return [ [[Param]] ]
+  // CHECK-DAG:     [[Param:j\d+]]  ParameterValue
+  // CHECK-DAG:                     Return [ [[Param]] ]
 
   public static long InlineWideParameter(long a) {
     return returnWideParameter(a);
   }
 
   // CHECK-START: java.lang.Object Inliner.InlineReferenceParameter(java.lang.Object) inliner (before)
-  // CHECK-DAG:     [[Param:l[0-9]+]]  ParameterValue
-  // CHECK-DAG:     [[Result:l[0-9]+]] InvokeStaticOrDirect [ [[Param]] ]
-  // CHECK-DAG:                        Return [ [[Result]] ]
+  // CHECK-DAG:     [[Param:l\d+]]  ParameterValue
+  // CHECK-DAG:     [[Result:l\d+]] InvokeStaticOrDirect [ [[Param]] ]
+  // CHECK-DAG:                     Return [ [[Result]] ]
 
   // CHECK-START: java.lang.Object Inliner.InlineReferenceParameter(java.lang.Object) inliner (after)
-  // CHECK-DAG:     [[Param:l[0-9]+]]  ParameterValue
-  // CHECK-DAG:                        Return [ [[Param]] ]
+  // CHECK-DAG:     [[Param:l\d+]]  ParameterValue
+  // CHECK-DAG:                     Return [ [[Param]] ]
 
   public static Object InlineReferenceParameter(Object o) {
     return returnReferenceParameter(o);
   }
 
   // CHECK-START: int Inliner.InlineInt() inliner (before)
-  // CHECK-DAG:     [[Result:i[0-9]+]] InvokeStaticOrDirect
-  // CHECK-DAG:                        Return [ [[Result]] ]
+  // CHECK-DAG:     [[Result:i\d+]] InvokeStaticOrDirect
+  // CHECK-DAG:                     Return [ [[Result]] ]
 
   // CHECK-START: int Inliner.InlineInt() inliner (after)
-  // CHECK-DAG:     [[Const4:i[0-9]+]] IntConstant 4
-  // CHECK-DAG:                        Return [ [[Const4]] ]
+  // CHECK-DAG:     [[Const4:i\d+]] IntConstant 4
+  // CHECK-DAG:                     Return [ [[Const4]] ]
 
   public static int InlineInt() {
     return returnInt();
   }
 
   // CHECK-START: long Inliner.InlineWide() inliner (before)
-  // CHECK-DAG:     [[Result:j[0-9]+]] InvokeStaticOrDirect
-  // CHECK-DAG:                        Return [ [[Result]] ]
+  // CHECK-DAG:     [[Result:j\d+]] InvokeStaticOrDirect
+  // CHECK-DAG:                     Return [ [[Result]] ]
 
   // CHECK-START: long Inliner.InlineWide() inliner (after)
-  // CHECK-DAG:     [[Const8:j[0-9]+]] LongConstant 8
-  // CHECK-DAG:                        Return [ [[Const8]] ]
+  // CHECK-DAG:     [[Const8:j\d+]] LongConstant 8
+  // CHECK-DAG:                     Return [ [[Const8]] ]
 
   public static long InlineWide() {
     return returnWide();
   }
 
   // CHECK-START: int Inliner.InlineAdd() inliner (before)
-  // CHECK-DAG:     [[Const3:i[0-9]+]] IntConstant 3
-  // CHECK-DAG:     [[Const5:i[0-9]+]] IntConstant 5
-  // CHECK-DAG:     [[Result:i[0-9]+]] InvokeStaticOrDirect
-  // CHECK-DAG:                        Return [ [[Result]] ]
+  // CHECK-DAG:     [[Const3:i\d+]] IntConstant 3
+  // CHECK-DAG:     [[Const5:i\d+]] IntConstant 5
+  // CHECK-DAG:     [[Result:i\d+]] InvokeStaticOrDirect
+  // CHECK-DAG:                     Return [ [[Result]] ]
 
   // CHECK-START: int Inliner.InlineAdd() inliner (after)
-  // CHECK-DAG:     [[Const3:i[0-9]+]] IntConstant 3
-  // CHECK-DAG:     [[Const5:i[0-9]+]] IntConstant 5
-  // CHECK-DAG:     [[Add:i[0-9]+]]    Add [ [[Const3]] [[Const5]] ]
-  // CHECK-DAG:                        Return [ [[Add]] ]
+  // CHECK-DAG:     [[Const3:i\d+]] IntConstant 3
+  // CHECK-DAG:     [[Const5:i\d+]] IntConstant 5
+  // CHECK-DAG:     [[Add:i\d+]]    Add [ [[Const3]] [[Const5]] ]
+  // CHECK-DAG:                     Return [ [[Add]] ]
 
   public static int InlineAdd() {
     return returnAdd(3, 5);
   }
 
   // CHECK-START: int Inliner.InlineFieldAccess() inliner (before)
-  // CHECK-DAG:     [[After:i[0-9]+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                        Return [ [[After]] ]
+  // CHECK-DAG:     [[After:i\d+]]  InvokeStaticOrDirect
+  // CHECK-DAG:                     Return [ [[After]] ]
 
   // CHECK-START: int Inliner.InlineFieldAccess() inliner (after)
-  // CHECK-DAG:     [[Const1:i[0-9]+]] IntConstant 1
-  // CHECK-DAG:     [[Before:i[0-9]+]] StaticFieldGet
-  // CHECK-DAG:     [[After:i[0-9]+]]  Add [ [[Before]] [[Const1]] ]
-  // CHECK-DAG:                        StaticFieldSet [ {{l[0-9]+}} [[After]] ]
-  // CHECK-DAG:                        Return [ [[After]] ]
+  // CHECK-DAG:     [[Const1:i\d+]] IntConstant 1
+  // CHECK-DAG:     [[Before:i\d+]] StaticFieldGet
+  // CHECK-DAG:     [[After:i\d+]]  Add [ [[Before]] [[Const1]] ]
+  // CHECK-DAG:                     StaticFieldSet [ {{l\d+}} [[After]] ]
+  // CHECK-DAG:                     Return [ [[After]] ]
 
   // CHECK-START: int Inliner.InlineFieldAccess() inliner (after)
-  // CHECK-NOT:                        InvokeStaticOrDirect
+  // CHECK-NOT:                     InvokeStaticOrDirect
 
   public static int InlineFieldAccess() {
     return incCounter();
   }
 
   // CHECK-START: int Inliner.InlineWithControlFlow(boolean) inliner (before)
-  // CHECK-DAG:     [[Const1:i[0-9]+]] IntConstant 1
-  // CHECK-DAG:     [[Const3:i[0-9]+]] IntConstant 3
-  // CHECK-DAG:     [[Const5:i[0-9]+]] IntConstant 5
-  // CHECK-DAG:     [[Add:i[0-9]+]]    InvokeStaticOrDirect [ [[Const1]] [[Const3]] ]
-  // CHECK-DAG:     [[Sub:i[0-9]+]]    InvokeStaticOrDirect [ [[Const5]] [[Const3]] ]
-  // CHECK-DAG:     [[Phi:i[0-9]+]]    Phi [ [[Add]] [[Sub]] ]
-  // CHECK-DAG:                        Return [ [[Phi]] ]
+  // CHECK-DAG:     [[Const1:i\d+]] IntConstant 1
+  // CHECK-DAG:     [[Const3:i\d+]] IntConstant 3
+  // CHECK-DAG:     [[Const5:i\d+]] IntConstant 5
+  // CHECK-DAG:     [[Add:i\d+]]    InvokeStaticOrDirect [ [[Const1]] [[Const3]] ]
+  // CHECK-DAG:     [[Sub:i\d+]]    InvokeStaticOrDirect [ [[Const5]] [[Const3]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]    Phi [ [[Add]] [[Sub]] ]
+  // CHECK-DAG:                     Return [ [[Phi]] ]
 
   // CHECK-START: int Inliner.InlineWithControlFlow(boolean) inliner (after)
-  // CHECK-DAG:     [[Const1:i[0-9]+]] IntConstant 1
-  // CHECK-DAG:     [[Const3:i[0-9]+]] IntConstant 3
-  // CHECK-DAG:     [[Const5:i[0-9]+]] IntConstant 5
-  // CHECK-DAG:     [[Add:i[0-9]+]]    Add [ [[Const1]] [[Const3]] ]
-  // CHECK-DAG:     [[Sub:i[0-9]+]]    Sub [ [[Const5]] [[Const3]] ]
-  // CHECK-DAG:     [[Phi:i[0-9]+]]    Phi [ [[Add]] [[Sub]] ]
-  // CHECK-DAG:                        Return [ [[Phi]] ]
+  // CHECK-DAG:     [[Const1:i\d+]] IntConstant 1
+  // CHECK-DAG:     [[Const3:i\d+]] IntConstant 3
+  // CHECK-DAG:     [[Const5:i\d+]] IntConstant 5
+  // CHECK-DAG:     [[Add:i\d+]]    Add [ [[Const1]] [[Const3]] ]
+  // CHECK-DAG:     [[Sub:i\d+]]    Sub [ [[Const5]] [[Const3]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]    Phi [ [[Add]] [[Sub]] ]
+  // CHECK-DAG:                     Return [ [[Phi]] ]
 
   public static int InlineWithControlFlow(boolean cond) {
     int x, const1, const3, const5;
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 87b3813..d912276 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -534,6 +534,13 @@
 
   // Load and Store. May clobber IP.
   virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0;
+  void LoadSImmediate(SRegister sd, float value, Condition cond = AL) {
+    if (!vmovs(sd, value, cond)) {
+      LoadImmediate(IP, bit_cast<int32_t, float>(value), cond);
+      vmovsr(sd, IP, cond);
+    }
+  }
+
   virtual void MarkExceptionHandler(Label* label) = 0;
   virtual void LoadFromOffset(LoadOperandType type,
                               Register reg,
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 63009bf..4f279f2 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1014,17 +1014,16 @@
   bool Setup() {
     TimingLogger::ScopedTiming t("dex2oat Setup", timings_);
     RuntimeOptions runtime_options;
-    std::vector<const DexFile*> boot_class_path;
     art::MemMap::Init();  // For ZipEntry::ExtractToMemMap.
     if (boot_image_option_.empty()) {
-      size_t failure_count = OpenDexFiles(dex_filenames_, dex_locations_, boot_class_path);
-      if (failure_count > 0) {
-        LOG(ERROR) << "Failed to open some dex files: " << failure_count;
-        return false;
-      }
-      runtime_options.push_back(std::make_pair("bootclasspath", &boot_class_path));
+      std::string boot_class_path = "-Xbootclasspath:";
+      boot_class_path += Join(dex_filenames_, ':');
+      runtime_options.push_back(std::make_pair(boot_class_path, nullptr));
+      std::string boot_class_path_locations = "-Xbootclasspath-locations:";
+      boot_class_path_locations += Join(dex_locations_, ':');
+      runtime_options.push_back(std::make_pair(boot_class_path_locations, nullptr));
     } else {
-      runtime_options.push_back(std::make_pair(boot_image_option_.c_str(), nullptr));
+      runtime_options.push_back(std::make_pair(boot_image_option_, nullptr));
     }
     for (size_t i = 0; i < runtime_args_.size(); i++) {
       runtime_options.push_back(std::make_pair(runtime_args_[i], nullptr));
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 98fe079..75ba9dd 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -201,22 +201,17 @@
   int mkdir_result = mkdir(dalvik_cache_.c_str(), 0700);
   ASSERT_EQ(mkdir_result, 0);
 
-  MemMap::Init();  // For LoadExpectSingleDexFile
-
-  std::string error_msg;
-  java_lang_dex_file_ = LoadExpectSingleDexFile(GetLibCoreDexFileName().c_str());
-  boot_class_path_.push_back(java_lang_dex_file_);
-
   std::string min_heap_string(StringPrintf("-Xms%zdm", gc::Heap::kDefaultInitialSize / MB));
   std::string max_heap_string(StringPrintf("-Xmx%zdm", gc::Heap::kDefaultMaximumSize / MB));
 
   callbacks_.reset(new NoopCompilerCallbacks());
 
   RuntimeOptions options;
-  options.push_back(std::make_pair("bootclasspath", &boot_class_path_));
+  std::string boot_class_path_string = "-Xbootclasspath:" + GetLibCoreDexFileName();
+  options.push_back(std::make_pair(boot_class_path_string, nullptr));
   options.push_back(std::make_pair("-Xcheck:jni", nullptr));
-  options.push_back(std::make_pair(min_heap_string.c_str(), nullptr));
-  options.push_back(std::make_pair(max_heap_string.c_str(), nullptr));
+  options.push_back(std::make_pair(min_heap_string, nullptr));
+  options.push_back(std::make_pair(max_heap_string, nullptr));
   options.push_back(std::make_pair("compilercallbacks", callbacks_.get()));
   SetUpRuntimeOptions(&options);
   if (!Runtime::Create(options, false)) {
@@ -239,6 +234,11 @@
   // pool is created by the runtime.
   runtime_->GetHeap()->CreateThreadPool();
   runtime_->GetHeap()->VerifyHeap();  // Check for heap corruption before the test
+
+  // Get the boot class path from the runtime so it can be used in tests.
+  boot_class_path_ = class_linker_->GetBootClassPath();
+  ASSERT_FALSE(boot_class_path_.empty());
+  java_lang_dex_file_ = boot_class_path_[0];
 }
 
 void CommonRuntimeTest::ClearDirectory(const char* dirpath) {
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 8851185..35dc30f 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -116,7 +116,7 @@
   std::string android_data_;
   std::string dalvik_cache_;
   const DexFile* java_lang_dex_file_;  // owned by runtime_
-  std::vector<const DexFile*> boot_class_path_;
+  std::vector<const DexFile*> boot_class_path_;  // owned by runtime_
   std::unique_ptr<Runtime> runtime_;
   // Owned by the runtime
   ClassLinker* class_linker_;
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 5f5d3f7..fe1e3a4 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -4089,6 +4089,10 @@
   }
 }
 
+JDWP::JdwpState* Dbg::GetJdwpState() {
+  return gJdwpState;
+}
+
 int Dbg::DdmHandleHpifChunk(HpifWhen when) {
   if (when == HPIF_WHEN_NOW) {
     DdmSendHeapInfo(when);
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 9203163..8f0db76 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -647,6 +647,8 @@
   static void SetJdwpLocation(JDWP::JdwpLocation* location, mirror::ArtMethod* m, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static JDWP::JdwpState* GetJdwpState();
+
  private:
   static JDWP::JdwpError GetLocalValue(const StackVisitor& visitor,
                                        ScopedObjectAccessUnchecked& soa, int slot,
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index e73166b..7bc83ef 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -160,6 +160,12 @@
     return IndexToOffset<uint64_t>(Size() / sizeof(intptr_t));
   }
 
+  void SetHeapSize(size_t bytes) {
+    // TODO: Un-map the end of the mem map.
+    bitmap_size_ = OffsetToIndex(bytes) * sizeof(intptr_t);
+    CHECK_EQ(HeapSize(), bytes);
+  }
+
   uintptr_t HeapBegin() const {
     return heap_begin_;
   }
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index cb9f111..681bfaa 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -251,10 +251,13 @@
   // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
   // space.
   RecordFree(ObjectBytePair(from_objects - to_objects, from_bytes - to_bytes));
-  // Clear and protect the from space.
+  // Clear the from space. Protect it with PROT_READ here and if
+  // kProtectFromSpace is true, will protect it with PROT_NONE later
+  // in FinishPhase() so the rosalloc verification works (can read the
+  // metadata magic number.)
   from_space_->Clear();
-  VLOG(heap) << "Protecting from_space_: " << *from_space_;
-  from_space_->GetMemMap()->Protect(kProtectFromSpace ? PROT_NONE : PROT_READ);
+  VLOG(heap) << "Protecting from_space_ with PROT_READ : " << *from_space_;
+  from_space_->GetMemMap()->Protect(PROT_READ);
   heap_->PreSweepingGcVerification(this);
   if (swap_semi_spaces_) {
     heap_->SwapSemiSpaces();
@@ -749,6 +752,10 @@
 
 void SemiSpace::FinishPhase() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
+  if (kProtectFromSpace) {
+    VLOG(heap) << "Protecting from_space_ with PROT_NONE : " << *from_space_;
+    from_space_->GetMemMap()->Protect(PROT_NONE);
+  }
   // Null the "to" and "from" spaces since compacting from one to the other isn't valid until
   // further action is done by the heap.
   to_space_ = nullptr;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 2575676..618f1cc 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1592,8 +1592,6 @@
   // Make sure that we will have enough room to copy.
   CHECK_GE(to_space->GetFootprintLimit(), from_space->GetFootprintLimit());
   Compact(to_space, from_space, kGcCauseHomogeneousSpaceCompact);
-  // Leave as prot read so that we can still run ROSAlloc verification on this space.
-  from_space->GetMemMap()->Protect(PROT_READ);
   const uint64_t space_size_after_compaction = to_space->Size();
   main_space_ = to_space;
   main_space_backup_.reset(from_space);
@@ -2978,6 +2976,20 @@
   }
 }
 
+void Heap::ClampGrowthLimit() {
+  capacity_ = growth_limit_;
+  for (const auto& space : continuous_spaces_) {
+    if (space->IsMallocSpace()) {
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      malloc_space->ClampGrowthLimit();
+    }
+  }
+  // This space isn't added for performance reasons.
+  if (main_space_backup_.get() != nullptr) {
+    main_space_backup_->ClampGrowthLimit();
+  }
+}
+
 void Heap::ClearGrowthLimit() {
   growth_limit_ = capacity_;
   for (const auto& space : continuous_spaces_) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 1738124..fc61fc5 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -302,6 +302,10 @@
   // implement dalvik.system.VMRuntime.clearGrowthLimit.
   void ClearGrowthLimit();
 
+  // Make the current growth limit the new maximum capacity, unmaps pages at the end of spaces
+  // which will never be used. Used to implement dalvik.system.VMRuntime.clampGrowthLimit.
+  void ClampGrowthLimit();
+
   // Target ideal heap utilization ratio, implements
   // dalvik.system.VMRuntime.getTargetHeapUtilization.
   double GetTargetHeapUtilization() const {
@@ -902,7 +906,7 @@
   collector::GcType next_gc_type_;
 
   // Maximum size that the heap can reach.
-  const size_t capacity_;
+  size_t capacity_;
 
   // The size the heap is limited to. This is initially smaller than capacity, but for largeHeap
   // programs it is "cleared" making it the same as capacity.
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 7905bb4..9bbbb3c 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -248,6 +248,16 @@
   context->freed.bytes += space->FreeList(self, num_ptrs, ptrs);
 }
 
+void MallocSpace::ClampGrowthLimit() {
+  size_t new_capacity = Capacity();
+  CHECK_LE(new_capacity, NonGrowthLimitCapacity());
+  GetLiveBitmap()->SetHeapSize(new_capacity);
+  GetMarkBitmap()->SetHeapSize(new_capacity);
+  GetMemMap()->SetSize(new_capacity);
+  limit_ = Begin() + new_capacity;
+  CHECK(temp_bitmap_.get() == nullptr);
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index 2fbd5f0..06239e5 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -110,6 +110,10 @@
     return GetMemMap()->Size();
   }
 
+  // Change the non growth limit capacity by shrinking or expanding the map. Currently, only
+  // shrinking is supported.
+  void ClampGrowthLimit();
+
   void Dump(std::ostream& os) const;
 
   void SetGrowthLimit(size_t growth_limit);
diff --git a/runtime/globals.h b/runtime/globals.h
index e531c3a..93026da 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -108,7 +108,7 @@
   kTraceClockSourceDual,  // Both wall and thread CPU clocks.
 };
 
-#if defined(HAVE_POSIX_CLOCKS)
+#if defined(__linux__)
 static constexpr TraceClockSource kDefaultTraceClockSource = kTraceClockSourceDual;
 #else
 static constexpr TraceClockSource kDefaultTraceClockSource = kTraceClockSourceWall;
diff --git a/runtime/handle_scope-inl.h b/runtime/handle_scope-inl.h
index 9ddaf61..222083b 100644
--- a/runtime/handle_scope-inl.h
+++ b/runtime/handle_scope-inl.h
@@ -21,12 +21,14 @@
 
 #include "handle.h"
 #include "thread.h"
+#include "verify_object-inl.h"
 
 namespace art {
 
 template<size_t kNumReferences>
 inline StackHandleScope<kNumReferences>::StackHandleScope(Thread* self, mirror::Object* fill_value)
     : HandleScope(self->GetTopHandleScope(), kNumReferences), self_(self), pos_(0) {
+  DCHECK_EQ(self, Thread::Current());
   static_assert(kNumReferences >= 1, "StackHandleScope must contain at least 1 reference");
   // TODO: Figure out how to use a compile assert.
   CHECK_EQ(&storage_[0], GetReferences());
@@ -42,6 +44,71 @@
   DCHECK_EQ(top_handle_scope, this);
 }
 
+inline size_t HandleScope::SizeOf(uint32_t num_references) {
+  size_t header_size = sizeof(HandleScope);
+  size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
+  return header_size + data_size;
+}
+
+inline size_t HandleScope::SizeOf(size_t pointer_size, uint32_t num_references) {
+  // Assume that the layout is packed.
+  size_t header_size = pointer_size + sizeof(number_of_references_);
+  size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
+  return header_size + data_size;
+}
+
+inline mirror::Object* HandleScope::GetReference(size_t i) const {
+  DCHECK_LT(i, number_of_references_);
+  return GetReferences()[i].AsMirrorPtr();
+}
+
+inline Handle<mirror::Object> HandleScope::GetHandle(size_t i) {
+  DCHECK_LT(i, number_of_references_);
+  return Handle<mirror::Object>(&GetReferences()[i]);
+}
+
+inline MutableHandle<mirror::Object> HandleScope::GetMutableHandle(size_t i) {
+  DCHECK_LT(i, number_of_references_);
+  return MutableHandle<mirror::Object>(&GetReferences()[i]);
+}
+
+inline void HandleScope::SetReference(size_t i, mirror::Object* object) {
+  DCHECK_LT(i, number_of_references_);
+  GetReferences()[i].Assign(object);
+}
+
+inline bool HandleScope::Contains(StackReference<mirror::Object>* handle_scope_entry) const {
+  // A HandleScope should always contain something. One created by the
+  // jni_compiler should have a jobject/jclass as a native method is
+  // passed in a this pointer or a class
+  DCHECK_GT(number_of_references_, 0U);
+  return &GetReferences()[0] <= handle_scope_entry &&
+      handle_scope_entry <= &GetReferences()[number_of_references_ - 1];
+}
+
+template<size_t kNumReferences> template<class T>
+inline MutableHandle<T> StackHandleScope<kNumReferences>::NewHandle(T* object) {
+  SetReference(pos_, object);
+  MutableHandle<T> h(GetHandle<T>(pos_));
+  pos_++;
+  return h;
+}
+
+template<size_t kNumReferences> template<class T>
+inline HandleWrapper<T> StackHandleScope<kNumReferences>::NewHandleWrapper(T** object) {
+  SetReference(pos_, *object);
+  MutableHandle<T> h(GetHandle<T>(pos_));
+  pos_++;
+  return HandleWrapper<T>(object, h);
+}
+
+template<size_t kNumReferences>
+inline void StackHandleScope<kNumReferences>::SetReference(size_t i, mirror::Object* object) {
+  DCHECK_LT(i, kNumReferences);
+  VerifyObject(object);
+  GetReferences()[i].Assign(object);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_HANDLE_SCOPE_INL_H_
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index 2c4f0f9..782bbea 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -22,6 +22,7 @@
 #include "handle.h"
 #include "stack.h"
 #include "utils.h"
+#include "verify_object.h"
 
 namespace art {
 namespace mirror {
@@ -47,19 +48,10 @@
   // takes the pointer size explicitly so that at compile time we can cross-compile correctly.
 
   // Returns the size of a HandleScope containing num_references handles.
-  static size_t SizeOf(uint32_t num_references) {
-    size_t header_size = sizeof(HandleScope);
-    size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
-    return header_size + data_size;
-  }
+  static size_t SizeOf(uint32_t num_references);
 
   // Returns the size of a HandleScope containing num_references handles.
-  static size_t SizeOf(size_t pointer_size, uint32_t num_references) {
-    // Assume that the layout is packed.
-    size_t header_size = pointer_size + sizeof(number_of_references_);
-    size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
-    return header_size + data_size;
-  }
+  static size_t SizeOf(size_t pointer_size, uint32_t num_references);
 
   // Link to previous HandleScope or null.
   HandleScope* GetLink() const {
@@ -67,37 +59,18 @@
   }
 
   ALWAYS_INLINE mirror::Object* GetReference(size_t i) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK_LT(i, number_of_references_);
-    return GetReferences()[i].AsMirrorPtr();
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ALWAYS_INLINE Handle<mirror::Object> GetHandle(size_t i)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK_LT(i, number_of_references_);
-    return Handle<mirror::Object>(&GetReferences()[i]);
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ALWAYS_INLINE MutableHandle<mirror::Object> GetMutableHandle(size_t i)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK_LT(i, number_of_references_);
-    return MutableHandle<mirror::Object>(&GetReferences()[i]);
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ALWAYS_INLINE void SetReference(size_t i, mirror::Object* object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK_LT(i, number_of_references_);
-    GetReferences()[i].Assign(object);
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool Contains(StackReference<mirror::Object>* handle_scope_entry) const {
-    // A HandleScope should always contain something. One created by the
-    // jni_compiler should have a jobject/jclass as a native method is
-    // passed in a this pointer or a class
-    DCHECK_GT(number_of_references_, 0U);
-    return &GetReferences()[0] <= handle_scope_entry &&
-        handle_scope_entry <= &GetReferences()[number_of_references_ - 1];
-  }
+  ALWAYS_INLINE bool Contains(StackReference<mirror::Object>* handle_scope_entry) const;
 
   // Offset of link within HandleScope, used by generated code.
   static size_t LinkOffset(size_t pointer_size ATTRIBUTE_UNUSED) {
@@ -174,27 +147,14 @@
   ALWAYS_INLINE ~StackHandleScope();
 
   template<class T>
-  ALWAYS_INLINE MutableHandle<T> NewHandle(T* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SetReference(pos_, object);
-    MutableHandle<T> h(GetHandle<T>(pos_));
-    pos_++;
-    return h;
-  }
+  ALWAYS_INLINE MutableHandle<T> NewHandle(T* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<class T>
   ALWAYS_INLINE HandleWrapper<T> NewHandleWrapper(T** object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SetReference(pos_, *object);
-    MutableHandle<T> h(GetHandle<T>(pos_));
-    pos_++;
-    return HandleWrapper<T>(object, h);
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ALWAYS_INLINE void SetReference(size_t i, mirror::Object* object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK_LT(i, kNumReferences);
-    GetReferences()[i].Assign(object);
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   template<class T>
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 3069581..42d2610 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -48,6 +48,8 @@
 #include "gc/heap.h"
 #include "gc/space/space.h"
 #include "globals.h"
+#include "jdwp/jdwp.h"
+#include "jdwp/jdwp_priv.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
@@ -61,7 +63,7 @@
 
 namespace hprof {
 
-#define UNIQUE_ERROR -((((uintptr_t)__func__) << 16 | __LINE__) & (0x7fffffff))
+static constexpr bool kDirectStream = true;
 
 #define HPROF_TIME 0
 #define HPROF_NULL_STACK_TRACE   0
@@ -170,6 +172,8 @@
 typedef uint32_t HprofStringId;
 typedef uint32_t HprofClassObjectId;
 
+class Hprof;
+
 // Represents a top-level hprof record, whose serialized format is:
 // U1  TAG: denoting the type of the record
 // U4  TIME: number of microseconds since the time stamp in the header
@@ -177,7 +181,8 @@
 // U1* BODY: as many bytes as specified in the above uint32_t field
 class HprofRecord {
  public:
-  HprofRecord() : alloc_length_(128), fp_(nullptr), tag_(0), time_(0), length_(0), dirty_(false) {
+  explicit HprofRecord(Hprof* hprof) : alloc_length_(128), fp_(nullptr), tag_(0), time_(0),
+      length_(0), dirty_(false), hprof_(hprof) {
     body_ = reinterpret_cast<unsigned char*>(malloc(alloc_length_));
   }
 
@@ -185,161 +190,72 @@
     free(body_);
   }
 
-  int StartNewRecord(FILE* fp, uint8_t tag, uint32_t time) {
-    int rc = Flush();
-    if (rc != 0) {
-      return rc;
-    }
-
+  // Returns how many characters were in the buffer (or written).
+  size_t StartNewRecord(FILE* fp, uint8_t tag, uint32_t time) WARN_UNUSED {
+    const size_t ret = Flush();
     fp_ = fp;
     tag_ = tag;
     time_ = time;
     length_ = 0;
     dirty_ = true;
-    return 0;
+    return ret;
   }
 
-  int Flush() {
-    if (dirty_) {
-      unsigned char headBuf[sizeof(uint8_t) + 2 * sizeof(uint32_t)];
+  // Returns how many characters were in the buffer (or written).
+  size_t Flush() WARN_UNUSED;
 
-      headBuf[0] = tag_;
-      U4_TO_BUF_BE(headBuf, 1, time_);
-      U4_TO_BUF_BE(headBuf, 5, length_);
+  void AddU1(uint8_t value);
 
-      int nb = fwrite(headBuf, 1, sizeof(headBuf), fp_);
-      if (nb != sizeof(headBuf)) {
-        return UNIQUE_ERROR;
-      }
-      nb = fwrite(body_, 1, length_, fp_);
-      if (nb != static_cast<int>(length_)) {
-        return UNIQUE_ERROR;
-      }
-
-      dirty_ = false;
-    }
-    // TODO if we used less than half (or whatever) of allocLen, shrink the buffer.
-    return 0;
+  void AddU2(uint16_t value) {
+    AddU2List(&value, 1);
   }
 
-  int AddU1(uint8_t value) {
-    int err = GuaranteeRecordAppend(1);
-    if (UNLIKELY(err != 0)) {
-      return err;
-    }
-
-    body_[length_++] = value;
-    return 0;
+  void AddU4(uint32_t value) {
+    AddU4List(&value, 1);
   }
 
-  int AddU2(uint16_t value) {
-    return AddU2List(&value, 1);
+  void AddU8(uint64_t value) {
+    AddU8List(&value, 1);
   }
 
-  int AddU4(uint32_t value) {
-    return AddU4List(&value, 1);
-  }
-
-  int AddU8(uint64_t value) {
-    return AddU8List(&value, 1);
-  }
-
-  int AddObjectId(const mirror::Object* value) {
-    return AddU4(PointerToLowMemUInt32(value));
+  void AddObjectId(const mirror::Object* value) {
+    AddU4(PointerToLowMemUInt32(value));
   }
 
   // The ID for the synthetic object generated to account for class static overhead.
-  int AddClassStaticsId(const mirror::Class* value) {
-    return AddU4(1 | PointerToLowMemUInt32(value));
+  void AddClassStaticsId(const mirror::Class* value) {
+    AddU4(1 | PointerToLowMemUInt32(value));
   }
 
-  int AddJniGlobalRefId(jobject value) {
-    return AddU4(PointerToLowMemUInt32(value));
+  void AddJniGlobalRefId(jobject value) {
+    AddU4(PointerToLowMemUInt32(value));
   }
 
-  int AddClassId(HprofClassObjectId value) {
-    return AddU4(value);
+  void AddClassId(HprofClassObjectId value) {
+    AddU4(value);
   }
 
-  int AddStringId(HprofStringId value) {
-    return AddU4(value);
+  void AddStringId(HprofStringId value) {
+    AddU4(value);
   }
 
-  int AddU1List(const uint8_t* values, size_t numValues) {
-    int err = GuaranteeRecordAppend(numValues);
-    if (UNLIKELY(err != 0)) {
-      return err;
-    }
+  void AddU1List(const uint8_t* values, size_t numValues);
+  void AddU2List(const uint16_t* values, size_t numValues);
+  void AddU4List(const uint32_t* values, size_t numValues);
+  void UpdateU4(size_t offset, uint32_t new_value);
+  void AddU8List(const uint64_t* values, size_t numValues);
 
-    memcpy(body_ + length_, values, numValues);
-    length_ += numValues;
-    return 0;
-  }
-
-  int AddU2List(const uint16_t* values, size_t numValues) {
-    int err = GuaranteeRecordAppend(numValues * 2);
-    if (UNLIKELY(err != 0)) {
-      return err;
-    }
-
-    unsigned char* insert = body_ + length_;
-    for (size_t i = 0; i < numValues; ++i) {
-      U2_TO_BUF_BE(insert, 0, *values++);
-      insert += sizeof(*values);
-    }
-    length_ += numValues * 2;
-    return 0;
-  }
-
-  int AddU4List(const uint32_t* values, size_t numValues) {
-    int err = GuaranteeRecordAppend(numValues * 4);
-    if (UNLIKELY(err != 0)) {
-      return err;
-    }
-
-    unsigned char* insert = body_ + length_;
-    for (size_t i = 0; i < numValues; ++i) {
-      U4_TO_BUF_BE(insert, 0, *values++);
-      insert += sizeof(*values);
-    }
-    length_ += numValues * 4;
-    return 0;
-  }
-
-  void UpdateU4(size_t offset, uint32_t new_value) {
-    U4_TO_BUF_BE(body_, offset, new_value);
-  }
-
-  int AddU8List(const uint64_t* values, size_t numValues) {
-    int err = GuaranteeRecordAppend(numValues * 8);
-    if (err != 0) {
-      return err;
-    }
-
-    unsigned char* insert = body_ + length_;
-    for (size_t i = 0; i < numValues; ++i) {
-      U8_TO_BUF_BE(insert, 0, *values++);
-      insert += sizeof(*values);
-    }
-    length_ += numValues * 8;
-    return 0;
-  }
-
-  int AddIdList(mirror::ObjectArray<mirror::Object>* values)
+  void AddIdList(mirror::ObjectArray<mirror::Object>* values)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    int32_t length = values->GetLength();
+    const int32_t length = values->GetLength();
     for (int32_t i = 0; i < length; ++i) {
-      int err = AddObjectId(values->GetWithoutChecks(i));
-      if (UNLIKELY(err != 0)) {
-        return err;
-      }
+      AddObjectId(values->GetWithoutChecks(i));
     }
-    return 0;
   }
 
-  int AddUtf8String(const char* str) {
+  void AddUtf8String(const char* str) {
     // The terminating NUL character is NOT written.
-    return AddU1List((const uint8_t*)str, strlen(str));
+    AddU1List((const uint8_t*)str, strlen(str));
   }
 
   size_t Size() const {
@@ -347,25 +263,15 @@
   }
 
  private:
-  int GuaranteeRecordAppend(size_t nmore) {
-    size_t minSize = length_ + nmore;
-    if (minSize > alloc_length_) {
-      size_t newAllocLen = alloc_length_ * 2;
-      if (newAllocLen < minSize) {
-        newAllocLen = alloc_length_ + nmore + nmore/2;
-      }
-      unsigned char* newBody = (unsigned char*)realloc(body_, newAllocLen);
-      if (newBody != NULL) {
-        body_ = newBody;
-        alloc_length_ = newAllocLen;
-      } else {
-        // TODO: set an error flag so future ops will fail
-        return UNIQUE_ERROR;
-      }
+  void GuaranteeRecordAppend(size_t nmore) {
+    const size_t min_size = length_ + nmore;
+    if (min_size > alloc_length_) {
+      const size_t new_alloc_len = std::max(alloc_length_ * 2, min_size);
+      body_ = (unsigned char*)realloc(body_, new_alloc_len);
+      CHECK(body_ != nullptr);
+      alloc_length_ = new_alloc_len;
     }
-
     CHECK_LE(length_ + nmore, alloc_length_);
-    return 0;
   }
 
   size_t alloc_length_;
@@ -376,6 +282,7 @@
   uint32_t time_;
   size_t length_;
   bool dirty_;
+  Hprof* hprof_;
 
   DISALLOW_COPY_AND_ASSIGN(HprofRecord);
 };
@@ -387,57 +294,50 @@
         fd_(fd),
         direct_to_ddms_(direct_to_ddms),
         start_ns_(NanoTime()),
-        current_record_(),
+        current_record_(this),
         gc_thread_serial_number_(0),
         gc_scan_state_(0),
         current_heap_(HPROF_HEAP_DEFAULT),
         objects_in_segment_(0),
-        header_fp_(NULL),
-        header_data_ptr_(NULL),
+        header_fp_(nullptr),
+        header_data_ptr_(nullptr),
         header_data_size_(0),
-        body_fp_(NULL),
-        body_data_ptr_(NULL),
+        body_fp_(nullptr),
+        body_data_ptr_(nullptr),
         body_data_size_(0),
+        net_state_(nullptr),
         next_string_id_(0x400000) {
     LOG(INFO) << "hprof: heap dump \"" << filename_ << "\" starting...";
-
-    header_fp_ = open_memstream(&header_data_ptr_, &header_data_size_);
-    if (header_fp_ == NULL) {
-      PLOG(FATAL) << "header open_memstream failed";
-    }
-
-    body_fp_ = open_memstream(&body_data_ptr_, &body_data_size_);
-    if (body_fp_ == NULL) {
-      PLOG(FATAL) << "body open_memstream failed";
-    }
   }
 
   ~Hprof() {
-    if (header_fp_ != NULL) {
+    if (header_fp_ != nullptr) {
       fclose(header_fp_);
     }
-    if (body_fp_ != NULL) {
+    if (body_fp_ != nullptr) {
       fclose(body_fp_);
     }
     free(header_data_ptr_);
     free(body_data_ptr_);
   }
 
-  void Dump()
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_) {
+  void ProcessBody() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    Runtime* runtime = Runtime::Current();
     // Walk the roots and the heap.
-    current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_SEGMENT, HPROF_TIME);
-    Runtime::Current()->VisitRoots(RootVisitor, this);
-    Thread* self = Thread::Current();
-    {
-      ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-      Runtime::Current()->GetHeap()->VisitObjects(VisitObjectCallback, this);
+    total_body_bytes_ += current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_SEGMENT,
+                                                        HPROF_TIME);
+    runtime->VisitRoots(RootVisitor, this);
+    runtime->GetHeap()->VisitObjects(VisitObjectCallback, this);
+    total_body_bytes_ += current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_END,
+                                                        HPROF_TIME);
+    total_body_bytes_ += current_record_.Flush();
+    if (allow_writing_) {
+      fflush(body_fp_);
     }
-    current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_END, HPROF_TIME);
-    current_record_.Flush();
-    fflush(body_fp_);
+  }
 
+  void ProcessHeader() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Write the header.
     WriteFixedHeader();
     // Write the string and class tables, and any stack traces, to the header.
@@ -445,49 +345,112 @@
     WriteStringTable();
     WriteClassTable();
     WriteStackTraces();
-    current_record_.Flush();
-    fflush(header_fp_);
+    total_header_bytes_ += current_record_.Flush();
+    if (allow_writing_) {
+      fflush(header_fp_);
+    }
+  }
+
+  void ProcessHeapStreaming(size_t data_len, uint32_t chunk_type)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    total_body_bytes_ = 0;
+    total_header_bytes_ = 0;
+    allow_writing_ = true;
+    CHECK(direct_to_ddms_);
+    JDWP::JdwpState* state = Dbg::GetJdwpState();
+    CHECK(state != nullptr);
+    net_state_ = state->netState;
+    CHECK(net_state_ != nullptr);
+    // Hold the socket lock for the whole tiem since we want this to be atomic.
+    MutexLock mu(Thread::Current(), *net_state_->GetSocketLock());
+    total_body_bytes_ = 0;
+    total_header_bytes_ = 0;
+    constexpr size_t kChunkHeaderSize = kJDWPHeaderLen + 8;
+    uint8_t chunk_header[kChunkHeaderSize] = { 0 };
+    state->SetupChunkHeader(chunk_type, data_len, kChunkHeaderSize, chunk_header);
+    Write(chunk_header, kChunkHeaderSize, nullptr);  // Send the header chunk to DDMS.
+    ProcessHeader();
+    ProcessBody();
+    CHECK_EQ(total_body_bytes_ + total_header_bytes_, data_len);
+    net_state_ = nullptr;
+  }
+  void ProcessHeap(bool allow_writing) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    allow_writing_ = allow_writing;
+    total_body_bytes_ = 0;
+    total_header_bytes_ = 0;
+    if (allow_writing) {
+      header_fp_ = open_memstream(&header_data_ptr_, &header_data_size_);
+      CHECK(header_fp_ != nullptr) << "header open_memstream failed";
+      body_fp_ = open_memstream(&body_data_ptr_, &body_data_size_);
+      CHECK(body_fp_ != nullptr) << "body open_memstream failed";
+    }
+    ProcessBody();
+    ProcessHeader();
+  }
+
+  void Dump() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_) {
+    {
+      ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+      // First pass to measure the size of the dump.
+      ProcessHeap(false);
+      const size_t header_bytes = total_header_bytes_;
+      const size_t body_bytes = total_body_bytes_;
+      if (direct_to_ddms_ && kDirectStream) {
+        ProcessHeapStreaming(header_bytes + body_bytes, CHUNK_TYPE("HPDS"));
+      } else {
+        ProcessHeap(true);
+        CHECK_EQ(header_data_size_, header_bytes);
+        CHECK_EQ(body_data_size_, body_bytes);
+      }
+      CHECK_EQ(total_header_bytes_, header_bytes);
+      CHECK_EQ(total_body_bytes_, body_bytes);
+    }
 
     bool okay = true;
-    if (direct_to_ddms_) {
-      // Send the data off to DDMS.
-      iovec iov[2];
-      iov[0].iov_base = header_data_ptr_;
-      iov[0].iov_len = header_data_size_;
-      iov[1].iov_base = body_data_ptr_;
-      iov[1].iov_len = body_data_size_;
-      Dbg::DdmSendChunkV(CHUNK_TYPE("HPDS"), iov, 2);
-    } else {
-      // Where exactly are we writing to?
-      int out_fd;
-      if (fd_ >= 0) {
-        out_fd = dup(fd_);
-        if (out_fd < 0) {
-          ThrowRuntimeException("Couldn't dump heap; dup(%d) failed: %s", fd_, strerror(errno));
-          return;
-        }
+    if (!kDirectStream) {
+      if (direct_to_ddms_) {
+        // Send the data off to DDMS.
+        iovec iov[2];
+        iov[0].iov_base = header_data_ptr_;
+        iov[0].iov_len = header_data_size_;
+        iov[1].iov_base = body_data_ptr_;
+        iov[1].iov_len = body_data_size_;
+        Dbg::DdmSendChunkV(CHUNK_TYPE("HPDS"), iov, 2);
       } else {
-        out_fd = open(filename_.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0644);
-        if (out_fd < 0) {
-          ThrowRuntimeException("Couldn't dump heap; open(\"%s\") failed: %s", filename_.c_str(),
-                                strerror(errno));
-          return;
+        // Where exactly are we writing to?
+        int out_fd;
+        if (fd_ >= 0) {
+          out_fd = dup(fd_);
+          if (out_fd < 0) {
+            ThrowRuntimeException("Couldn't dump heap; dup(%d) failed: %s", fd_, strerror(errno));
+            return;
+          }
+        } else {
+          out_fd = open(filename_.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0644);
+          if (out_fd < 0) {
+            ThrowRuntimeException("Couldn't dump heap; open(\"%s\") failed: %s", filename_.c_str(),
+                                  strerror(errno));
+            return;
+          }
         }
-      }
 
-      std::unique_ptr<File> file(new File(out_fd, filename_, true));
-      okay = file->WriteFully(header_data_ptr_, header_data_size_) &&
-             file->WriteFully(body_data_ptr_, body_data_size_);
-      if (okay) {
-        okay = file->FlushCloseOrErase() == 0;
-      } else {
-        file->Erase();
-      }
-      if (!okay) {
-        std::string msg(StringPrintf("Couldn't dump heap; writing \"%s\" failed: %s",
-                                     filename_.c_str(), strerror(errno)));
-        ThrowRuntimeException("%s", msg.c_str());
-        LOG(ERROR) << msg;
+        std::unique_ptr<File> file(new File(out_fd, filename_, true));
+        okay = file->WriteFully(header_data_ptr_, header_data_size_) &&
+               file->WriteFully(body_data_ptr_, body_data_size_);
+        if (okay) {
+          okay = file->FlushCloseOrErase() == 0;
+        } else {
+          file->Erase();
+        }
+        if (!okay) {
+          std::string msg(StringPrintf("Couldn't dump heap; writing \"%s\" failed: %s",
+                                       filename_.c_str(), strerror(errno)));
+          ThrowRuntimeException("%s", msg.c_str());
+          LOG(ERROR) << msg;
+        }
       }
     }
 
@@ -495,11 +458,32 @@
     if (okay) {
       uint64_t duration = NanoTime() - start_ns_;
       LOG(INFO) << "hprof: heap dump completed ("
-          << PrettySize(header_data_size_ + body_data_size_ + 1023)
+          << PrettySize(total_header_bytes_ + total_body_bytes_ + 1023)
           << ") in " << PrettyDuration(duration);
     }
   }
 
+  bool AllowWriting() const {
+    return allow_writing_;
+  }
+
+  size_t Write(const void* ptr, size_t len, FILE* fp) {
+    if (allow_writing_) {
+      if (net_state_ != nullptr) {
+        CHECK(fp == nullptr);
+        std::vector<iovec> iov;
+        iov.push_back(iovec());
+        iov[0].iov_base = const_cast<void*>(ptr);
+        iov[0].iov_len = len;
+        net_state_->WriteBufferedPacketLocked(iov);
+      } else {
+        const size_t n = fwrite(ptr, 1, len, fp);
+        CHECK_EQ(n, len);
+      }
+    }
+    return len;
+  }
+
  private:
   static void RootVisitor(mirror::Object** obj, void* arg, uint32_t thread_id, RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -511,8 +495,8 @@
 
   static void VisitObjectCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(obj != NULL);
-    DCHECK(arg != NULL);
+    DCHECK(obj != nullptr);
+    DCHECK(arg != nullptr);
     reinterpret_cast<Hprof*>(arg)->DumpHeapObject(obj);
   }
 
@@ -521,21 +505,14 @@
 
   int DumpHeapObject(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Finish() {
-  }
-
-  int WriteClassTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void WriteClassTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     HprofRecord* rec = &current_record_;
     uint32_t nextSerialNumber = 1;
 
     for (mirror::Class* c : classes_) {
       CHECK(c != nullptr);
-
-      int err = current_record_.StartNewRecord(header_fp_, HPROF_TAG_LOAD_CLASS, HPROF_TIME);
-      if (UNLIKELY(err != 0)) {
-        return err;
-      }
-
+      total_header_bytes_ += current_record_.StartNewRecord(header_fp_, HPROF_TAG_LOAD_CLASS,
+                                                            HPROF_TIME);
       // LOAD CLASS format:
       // U4: class serial number (always > 0)
       // ID: class object ID. We use the address of the class object structure as its ID.
@@ -546,44 +523,31 @@
       rec->AddU4(HPROF_NULL_STACK_TRACE);
       rec->AddStringId(LookupClassNameId(c));
     }
-
-    return 0;
   }
 
-  int WriteStringTable() {
+  void WriteStringTable() {
     HprofRecord* rec = &current_record_;
-
-    for (std::pair<std::string, HprofStringId> p : strings_) {
+    for (const std::pair<std::string, HprofStringId>& p : strings_) {
       const std::string& string = p.first;
-      size_t id = p.second;
+      const size_t id = p.second;
 
-      int err = current_record_.StartNewRecord(header_fp_, HPROF_TAG_STRING, HPROF_TIME);
-      if (err != 0) {
-        return err;
-      }
+      total_header_bytes_ += current_record_.StartNewRecord(header_fp_, HPROF_TAG_STRING,
+                                                            HPROF_TIME);
 
       // STRING format:
       // ID:  ID for this string
       // U1*: UTF8 characters for string (NOT NULL terminated)
       //      (the record format encodes the length)
-      err = rec->AddU4(id);
-      if (err != 0) {
-        return err;
-      }
-      err = rec->AddUtf8String(string.c_str());
-      if (err != 0) {
-        return err;
-      }
+      rec->AddU4(id);
+      rec->AddUtf8String(string.c_str());
     }
-
-    return 0;
   }
 
   void StartNewHeapDumpSegment() {
     // This flushes the old segment and starts a new one.
-    current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_SEGMENT, HPROF_TIME);
+    total_body_bytes_ += current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_SEGMENT,
+                                                        HPROF_TIME);
     objects_in_segment_ = 0;
-
     // Starting a new HEAP_DUMP resets the heap to default.
     current_heap_ = HPROF_HEAP_DEFAULT;
   }
@@ -591,22 +555,14 @@
   int MarkRootObject(const mirror::Object* obj, jobject jniObj);
 
   HprofClassObjectId LookupClassId(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (c == nullptr) {
-      // c is the superclass of java.lang.Object or a primitive.
-      return 0;
-    }
-
-    {
+    if (c != nullptr) {
       auto result = classes_.insert(c);
       const mirror::Class* present = *result.first;
       CHECK_EQ(present, c);
+      // Make sure that we've assigned a string ID for this class' name
+      LookupClassNameId(c);
     }
-
-    // Make sure that we've assigned a string ID for this class' name
-    LookupClassNameId(c);
-
-    HprofClassObjectId result = PointerToLowMemUInt32(c);
-    return result;
+    return PointerToLowMemUInt32(c);
   }
 
   HprofStringId LookupStringId(mirror::String* string) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -633,41 +589,33 @@
 
   void WriteFixedHeader() {
     char magic[] = "JAVA PROFILE 1.0.3";
-    unsigned char buf[4];
-
+    unsigned char buf[4] = { 0 };
     // Write the file header.
     // U1: NUL-terminated magic string.
-    fwrite(magic, 1, sizeof(magic), header_fp_);
-
+    total_header_bytes_ += Write(magic, sizeof(magic), header_fp_);
     // U4: size of identifiers.  We're using addresses as IDs and our heap references are stored
     // as uint32_t.
     // Note of warning: hprof-conv hard-codes the size of identifiers to 4.
     static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(uint32_t),
                   "Unexpected HeapReference size");
     U4_TO_BUF_BE(buf, 0, sizeof(uint32_t));
-    fwrite(buf, 1, sizeof(uint32_t), header_fp_);
-
+    total_header_bytes_ += Write(buf, sizeof(uint32_t), header_fp_);
     // The current time, in milliseconds since 0:00 GMT, 1/1/70.
     timeval now;
-    uint64_t nowMs;
-    if (gettimeofday(&now, NULL) < 0) {
-      nowMs = 0;
-    } else {
-      nowMs = (uint64_t)now.tv_sec * 1000 + now.tv_usec / 1000;
-    }
-
+    const uint64_t nowMs = (gettimeofday(&now, NULL) < 0) ? 0 :
+        (uint64_t)now.tv_sec * 1000 + now.tv_usec / 1000;
     // U4: high word of the 64-bit time.
     U4_TO_BUF_BE(buf, 0, (uint32_t)(nowMs >> 32));
-    fwrite(buf, 1, sizeof(uint32_t), header_fp_);
-
+    total_header_bytes_ += Write(buf, sizeof(uint32_t), header_fp_);
     // U4: low word of the 64-bit time.
     U4_TO_BUF_BE(buf, 0, (uint32_t)(nowMs & 0xffffffffULL));
-    fwrite(buf, 1, sizeof(uint32_t), header_fp_);  // xxx fix the time
+    total_header_bytes_ += Write(buf, sizeof(uint32_t), header_fp_);  // xxx fix the time
   }
 
   void WriteStackTraces() {
     // Write a dummy stack trace record so the analysis tools don't freak out.
-    current_record_.StartNewRecord(header_fp_, HPROF_TAG_STACK_TRACE, HPROF_TIME);
+    total_header_bytes_ +=
+        current_record_.StartNewRecord(header_fp_, HPROF_TAG_STACK_TRACE, HPROF_TIME);
     current_record_.AddU4(HPROF_NULL_STACK_TRACE);
     current_record_.AddU4(HPROF_NULL_THREAD);
     current_record_.AddU4(0);    // no frames
@@ -680,6 +628,9 @@
   int fd_;
   bool direct_to_ddms_;
 
+  // Whether or not we are in the size calculating mode or writing mode.
+  bool allow_writing_;
+
   uint64_t start_ns_;
 
   HprofRecord current_record_;
@@ -692,10 +643,14 @@
   FILE* header_fp_;
   char* header_data_ptr_;
   size_t header_data_size_;
+  size_t total_header_bytes_;
 
   FILE* body_fp_;
   char* body_data_ptr_;
   size_t body_data_size_;
+  size_t total_body_bytes_;
+
+  JDWP::JdwpNetStateBase* net_state_;
 
   std::set<mirror::Class*> classes_;
   HprofStringId next_string_id_;
@@ -1103,6 +1058,78 @@
   Runtime::Current()->GetThreadList()->ResumeAll();
 }
 
-}  // namespace hprof
+// Returns how many characters were in the buffer (or written).
+size_t HprofRecord::Flush() {
+  size_t chars = 0;
+  if (dirty_) {
+    unsigned char headBuf[sizeof(uint8_t) + 2 * sizeof(uint32_t)];
+    headBuf[0] = tag_;
+    U4_TO_BUF_BE(headBuf, 1, time_);
+    U4_TO_BUF_BE(headBuf, 5, length_);
+    chars += hprof_->Write(headBuf, sizeof(headBuf), fp_);
+    chars += hprof_->Write(body_, length_, fp_);
+    dirty_ = false;
+  }
+  return chars;
+}
 
+void HprofRecord::AddU1(uint8_t value) {
+  if (hprof_->AllowWriting()) {
+    GuaranteeRecordAppend(1);
+    body_[length_] = value;
+  }
+  ++length_;
+}
+
+void HprofRecord::AddU1List(const uint8_t* values, size_t numValues) {
+  if (hprof_->AllowWriting()) {
+    GuaranteeRecordAppend(numValues);
+    memcpy(body_ + length_, values, numValues);
+  }
+  length_ += numValues;
+}
+
+void HprofRecord::AddU2List(const uint16_t* values, size_t numValues) {
+  if (hprof_->AllowWriting()) {
+    GuaranteeRecordAppend(numValues * 2);
+    unsigned char* insert = body_ + length_;
+    for (size_t i = 0; i < numValues; ++i) {
+      U2_TO_BUF_BE(insert, 0, *values++);
+      insert += sizeof(*values);
+    }
+  }
+  length_ += numValues * 2;
+}
+
+void HprofRecord::AddU4List(const uint32_t* values, size_t numValues) {
+  if (hprof_->AllowWriting()) {
+    GuaranteeRecordAppend(numValues * 4);
+    unsigned char* insert = body_ + length_;
+    for (size_t i = 0; i < numValues; ++i) {
+      U4_TO_BUF_BE(insert, 0, *values++);
+      insert += sizeof(*values);
+    }
+  }
+  length_ += numValues * 4;
+}
+
+void HprofRecord::UpdateU4(size_t offset, uint32_t new_value) {
+  if (hprof_->AllowWriting()) {
+    U4_TO_BUF_BE(body_, offset, new_value);
+  }
+}
+
+void HprofRecord::AddU8List(const uint64_t* values, size_t numValues) {
+  if (hprof_->AllowWriting()) {
+    GuaranteeRecordAppend(numValues * 8);
+    unsigned char* insert = body_ + length_;
+    for (size_t i = 0; i < numValues; ++i) {
+      U8_TO_BUF_BE(insert, 0, *values++);
+      insert += sizeof(*values);
+    }
+  }
+  length_ += numValues * 8;
+}
+
+}  // namespace hprof
 }  // namespace art
diff --git a/runtime/java_vm_ext_test.cc b/runtime/java_vm_ext_test.cc
index 60c6a5c..2cbfa81 100644
--- a/runtime/java_vm_ext_test.cc
+++ b/runtime/java_vm_ext_test.cc
@@ -69,7 +69,12 @@
     } else {
       ok = vms_buf[0]->AttachCurrentThreadAsDaemon(&env, nullptr);
     }
-    EXPECT_EQ(gSmallStack ? JNI_ERR : JNI_OK, ok);
+    // TODO: Find a way to test with exact SMALL_STACK value, for which we would bail. The pthreads
+    //       spec says that the stack size argument is a lower bound, and bionic currently gives us
+    //       a chunk more on arm64.
+    if (!gSmallStack) {
+      EXPECT_EQ(JNI_OK, ok);
+    }
     if (ok == JNI_OK) {
       ok = vms_buf[0]->DetachCurrentThread();
       EXPECT_EQ(JNI_OK, ok);
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index aa0c103..9309ab5 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -252,6 +252,9 @@
   // Called if/when we realize we're talking to DDMS.
   void NotifyDdmsActive() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+
+  void SetupChunkHeader(uint32_t type, size_t data_len, size_t header_size, uint8_t* out_header);
+
   /*
    * Send up a chunk of DDM data.
    */
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index cb28ff0..a8eaa26 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -1257,6 +1257,22 @@
 }
 
 /*
+ * Setup the header for a chunk of DDM data.
+ */
+void JdwpState::SetupChunkHeader(uint32_t type, size_t data_len, size_t header_size,
+                                 uint8_t* out_header) {
+  CHECK_EQ(header_size, static_cast<size_t>(kJDWPHeaderLen + 8));
+  /* form the header (JDWP plus DDMS) */
+  Set4BE(out_header, header_size + data_len);
+  Set4BE(out_header + 4, NextRequestSerial());
+  Set1(out_header + 8, 0);     /* flags */
+  Set1(out_header + 9, kJDWPDdmCmdSet);
+  Set1(out_header + 10, kJDWPDdmCmd);
+  Set4BE(out_header + 11, type);
+  Set4BE(out_header + 15, data_len);
+}
+
+/*
  * Send up a chunk of DDM data.
  *
  * While this takes the form of a JDWP "event", it doesn't interact with
@@ -1264,7 +1280,7 @@
  * the fun event token gymnastics.
  */
 void JdwpState::DdmSendChunkV(uint32_t type, const iovec* iov, int iov_count) {
-  uint8_t header[kJDWPHeaderLen + 8];
+  uint8_t header[kJDWPHeaderLen + 8] = { 0 };
   size_t dataLen = 0;
 
   CHECK(iov != nullptr);
@@ -1282,14 +1298,7 @@
     dataLen += iov[i].iov_len;
   }
 
-  /* form the header (JDWP plus DDMS) */
-  Set4BE(header, sizeof(header) + dataLen);
-  Set4BE(header + 4, NextRequestSerial());
-  Set1(header + 8, 0);     /* flags */
-  Set1(header + 9, kJDWPDdmCmdSet);
-  Set1(header + 10, kJDWPDdmCmd);
-  Set4BE(header + 11, type);
-  Set4BE(header + 15, dataLen);
+  SetupChunkHeader(type, dataLen, sizeof(header), header);
 
   wrapiov[0].iov_base = header;
   wrapiov[0].iov_len = sizeof(header);
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index bfd4252..40211de 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -135,6 +135,11 @@
  */
 ssize_t JdwpNetStateBase::WriteBufferedPacket(const std::vector<iovec>& iov) {
   MutexLock mu(Thread::Current(), socket_lock_);
+  return WriteBufferedPacketLocked(iov);
+}
+
+ssize_t JdwpNetStateBase::WriteBufferedPacketLocked(const std::vector<iovec>& iov) {
+  socket_lock_.AssertHeld(Thread::Current());
   return TEMP_FAILURE_RETRY(writev(clientSock, &iov[0], iov.size()));
 }
 
diff --git a/runtime/jdwp/jdwp_priv.h b/runtime/jdwp/jdwp_priv.h
index 29ad185..f290be0 100644
--- a/runtime/jdwp/jdwp_priv.h
+++ b/runtime/jdwp/jdwp_priv.h
@@ -71,6 +71,10 @@
 
   ssize_t WritePacket(ExpandBuf* pReply, size_t length) LOCKS_EXCLUDED(socket_lock_);
   ssize_t WriteBufferedPacket(const std::vector<iovec>& iov) LOCKS_EXCLUDED(socket_lock_);
+  Mutex* GetSocketLock() {
+    return &socket_lock_;
+  }
+  ssize_t WriteBufferedPacketLocked(const std::vector<iovec>& iov);
 
   int clientSock;  // Active connection to debugger.
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 8303f84..a722813 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -665,6 +665,19 @@
   maps_ = nullptr;
 }
 
+void MemMap::SetSize(size_t new_size) {
+  if (new_size == base_size_) {
+    return;
+  }
+  CHECK_ALIGNED(new_size, kPageSize);
+  CHECK_EQ(base_size_, size_) << "Unsupported";
+  CHECK_LE(new_size, base_size_);
+  CHECK_EQ(munmap(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(BaseBegin()) + new_size),
+                  base_size_ - new_size), 0) << new_size << " " << base_size_;
+  base_size_ = new_size;
+  size_ = new_size;
+}
+
 std::ostream& operator<<(std::ostream& os, const MemMap& mem_map) {
   os << StringPrintf("[MemMap: %p-%p prot=0x%x %s]",
                      mem_map.BaseBegin(), mem_map.BaseEnd(), mem_map.GetProtect(),
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 9b003aa..dc337e0 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -107,6 +107,9 @@
     return size_;
   }
 
+  // Resize the mem-map by unmapping pages at the end. Currently only supports shrinking.
+  void SetSize(size_t new_size);
+
   uint8_t* End() const {
     return Begin() + Size();
   }
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 13f881d..4dddd38 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -19,6 +19,7 @@
 
 #include "array.h"
 
+#include "base/stringprintf.h"
 #include "class.h"
 #include "gc/heap-inl.h"
 #include "thread.h"
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index f503b35..471aa9c 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -134,6 +134,10 @@
   Runtime::Current()->GetHeap()->ClearGrowthLimit();
 }
 
+static void VMRuntime_clampGrowthLimit(JNIEnv*, jobject) {
+  Runtime::Current()->GetHeap()->ClampGrowthLimit();
+}
+
 static jboolean VMRuntime_isDebuggerActive(JNIEnv*, jobject) {
   return Dbg::IsDebuggerActive();
 }
@@ -577,6 +581,7 @@
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(VMRuntime, addressOf, "!(Ljava/lang/Object;)J"),
   NATIVE_METHOD(VMRuntime, bootClassPath, "()Ljava/lang/String;"),
+  NATIVE_METHOD(VMRuntime, clampGrowthLimit, "()V"),
   NATIVE_METHOD(VMRuntime, classPath, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, clearGrowthLimit, "()V"),
   NATIVE_METHOD(VMRuntime, concurrentGC, "()V"),
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 5f68d60..c056adc 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -26,7 +26,7 @@
 #include "ScopedUtfChars.h"
 #include "thread-inl.h"
 
-#if defined(HAVE_PRCTL)
+#if defined(__linux__)
 #include <sys/prctl.h>
 #endif
 
@@ -35,9 +35,9 @@
 namespace art {
 
 static void EnableDebugger() {
+#if defined(__linux__)
   // To let a non-privileged gdbserver attach to this
   // process, we must set our dumpable flag.
-#if defined(HAVE_PRCTL)
   if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
     PLOG(ERROR) << "prctl(PR_SET_DUMPABLE) failed for pid " << getpid();
   }
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 1b992d5..4ba3cb9 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -34,7 +34,6 @@
 
 ParsedOptions::ParsedOptions()
     :
-    boot_class_path_(nullptr),
     check_jni_(kIsDebugBuild),                      // -Xcheck:jni is off by default for regular
                                                     // builds but on by default in debug builds.
     force_copy_(false),
@@ -288,6 +287,9 @@
     } else if (StartsWith(option, "-Xbootclasspath:")) {
       boot_class_path_string_ = option.substr(strlen("-Xbootclasspath:")).data();
       LOG(INFO) << "setting boot class path to " << boot_class_path_string_;
+    } else if (StartsWith(option, "-Xbootclasspath-locations:")) {
+      boot_class_path_locations_string_ = option.substr(
+          strlen("-Xbootclasspath-locations:")).data();
     } else if (option == "-classpath" || option == "-cp") {
       // TODO: support -Djava.class.path
       i++;
@@ -297,9 +299,6 @@
       }
       const StringPiece& value = options[i].first;
       class_path_string_ = value.data();
-    } else if (option == "bootclasspath") {
-      boot_class_path_
-          = reinterpret_cast<const std::vector<const DexFile*>*>(options[i].second);
     } else if (StartsWith(option, "-Ximage:")) {
       if (!ParseStringAfterChar(option, ':', &image_)) {
         return false;
@@ -720,6 +719,24 @@
     boot_class_path_string_.replace(core_jar_pos, core_jar.size(), core_libart_jar);
   }
 
+  if (!boot_class_path_locations_string_.empty()) {
+    std::vector<std::string> files;
+    Split(boot_class_path_string_, ':', &files);
+
+    std::vector<std::string> locations;
+    Split(boot_class_path_locations_string_, ':', &locations);
+
+    if (files.size() != locations.size()) {
+      Usage("The number of boot class path files does not match"
+          " the number of boot class path locations given\n"
+          "  boot class path files     (%zu): %s\n"
+          "  boot class path locations (%zu): %s\n",
+          files.size(), boot_class_path_string_.c_str(),
+          locations.size(), boot_class_path_locations_string_.c_str());
+      return false;
+    }
+  }
+
   if (compiler_callbacks_ == nullptr && image_.empty()) {
     image_ += GetAndroidRoot();
     image_ += "/framework/boot.art";
@@ -804,6 +821,8 @@
   UsageMessage(stream, "  -Xgc:[no]postverify_rosalloc\n");
   UsageMessage(stream, "  -Xgc:[no]presweepingverify\n");
   UsageMessage(stream, "  -Ximage:filename\n");
+  UsageMessage(stream, "  -Xbootclasspath-locations:bootclasspath\n"
+      "     (override the dex locations of the -Xbootclasspath files)\n");
   UsageMessage(stream, "  -XX:+DisableExplicitGC\n");
   UsageMessage(stream, "  -XX:ParallelGCThreads=integervalue\n");
   UsageMessage(stream, "  -XX:ConcGCThreads=integervalue\n");
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 9294868..c7162b8 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -40,8 +40,8 @@
   // returns null if problem parsing and ignore_unrecognized is false
   static ParsedOptions* Create(const RuntimeOptions& options, bool ignore_unrecognized);
 
-  const std::vector<const DexFile*>* boot_class_path_;
   std::string boot_class_path_string_;
+  std::string boot_class_path_locations_string_;
   std::string class_path_string_;
   std::string image_;
   bool check_jni_;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index a2c9f50..fb6034d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -683,6 +683,7 @@
 
 
 static size_t OpenDexFiles(const std::vector<std::string>& dex_filenames,
+                           const std::vector<std::string>& dex_locations,
                            const std::string& image_location,
                            std::vector<const DexFile*>& dex_files) {
   size_t failure_count = 0;
@@ -692,12 +693,13 @@
   failure_count = 0;
   for (size_t i = 0; i < dex_filenames.size(); i++) {
     const char* dex_filename = dex_filenames[i].c_str();
+    const char* dex_location = dex_locations[i].c_str();
     std::string error_msg;
     if (!OS::FileExists(dex_filename)) {
       LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
       continue;
     }
-    if (!DexFile::Open(dex_filename, dex_filename, &error_msg, &dex_files)) {
+    if (!DexFile::Open(dex_filename, dex_location, &error_msg, &dex_files)) {
       LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
       ++failure_count;
     }
@@ -858,17 +860,25 @@
 
   CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U);
   class_linker_ = new ClassLinker(intern_table_);
-  bool options_class_path_used = false;
   if (GetHeap()->HasImageSpace()) {
     class_linker_->InitFromImage();
     if (kIsDebugBuild) {
       GetHeap()->GetImageSpace()->VerifyImageAllocations();
     }
-  } else if (!IsCompiler() || !image_dex2oat_enabled_) {
+  } else {
     std::vector<std::string> dex_filenames;
     Split(boot_class_path_string_, ':', &dex_filenames);
+
+    std::vector<std::string> dex_locations;
+    if (options->boot_class_path_locations_string_.empty()) {
+      dex_locations = dex_filenames;
+    } else {
+      Split(options->boot_class_path_locations_string_, ':', &dex_locations);
+      CHECK_EQ(dex_filenames.size(), dex_locations.size());
+    }
+
     std::vector<const DexFile*> boot_class_path;
-    OpenDexFiles(dex_filenames, options->image_, boot_class_path);
+    OpenDexFiles(dex_filenames, dex_locations, options->image_, boot_class_path);
     class_linker_->InitWithoutImage(boot_class_path);
     // TODO: Should we move the following to InitWithoutImage?
     SetInstructionSet(kRuntimeISA);
@@ -878,18 +888,6 @@
         SetCalleeSaveMethod(CreateCalleeSaveMethod(), type);
       }
     }
-  } else {
-    CHECK(options->boot_class_path_ != nullptr);
-    CHECK_NE(options->boot_class_path_->size(), 0U);
-    class_linker_->InitWithoutImage(*options->boot_class_path_);
-    options_class_path_used = true;
-  }
-
-  if (!options_class_path_used) {
-    // If the class linker does not take ownership of the boot class path, wipe it to prevent leaks.
-    auto boot_class_path_vector_ptr =
-        const_cast<std::vector<const DexFile*>*>(options->boot_class_path_);
-    STLDeleteElements(boot_class_path_vector_ptr);
   }
 
   CHECK(class_linker_ != nullptr);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index d2d5be7..6a1aeb5 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -592,13 +592,13 @@
 }
 
 uint64_t Thread::GetCpuMicroTime() const {
-#if defined(HAVE_POSIX_CLOCKS)
+#if defined(__linux__)
   clockid_t cpu_clock_id;
   pthread_getcpuclockid(tlsPtr_.pthread_self, &cpu_clock_id);
   timespec now;
   clock_gettime(cpu_clock_id, &now);
   return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000000) + now.tv_nsec / UINT64_C(1000);
-#else
+#else  // __APPLE__
   UNIMPLEMENTED(WARNING);
   return -1;
 #endif
@@ -982,8 +982,9 @@
 void Thread::DumpJavaStack(std::ostream& os) const {
   // Dumping the Java stack involves the verifier for locks. The verifier operates under the
   // assumption that there is no exception pending on entry. Thus, stash any pending exception.
-  // TODO: Find a way to avoid const_cast.
-  StackHandleScope<3> scope(const_cast<Thread*>(this));
+  // Thread::Current() instead of this in case a thread is dumping the stack of another suspended
+  // thread.
+  StackHandleScope<3> scope(Thread::Current());
   Handle<mirror::Throwable> exc;
   Handle<mirror::Object> throw_location_this_object;
   Handle<mirror::ArtMethod> throw_location_method;
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 9707c7b..20fbc37 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -150,6 +150,11 @@
   closedir(d);
 }
 
+// Dump checkpoint timeout in milliseconds. Larger amount on the host, as dumping will invoke
+// addr2line when available.
+static constexpr uint32_t kDumpWaitTimeoutTarget = 10000;
+static constexpr uint32_t kDumpWaitTimeoutHost = 20000;
+
 // A closure used by Thread::Dump.
 class DumpCheckpoint FINAL : public Closure {
  public:
@@ -176,8 +181,8 @@
   void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
     Thread* self = Thread::Current();
     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
-    const uint32_t kWaitTimeoutMs = 10000;
-    bool timed_out = barrier_.Increment(self, threads_running_checkpoint, kWaitTimeoutMs);
+    bool timed_out = barrier_.Increment(self, threads_running_checkpoint,
+        kIsTargetBuild ? kDumpWaitTimeoutTarget : kDumpWaitTimeoutHost);
     if (timed_out) {
       // Avoid a recursive abort.
       LOG((kIsDebugBuild && (gAborting == 0)) ? FATAL : ERROR)
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 29a3b09..5066e03 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -150,7 +150,7 @@
 }
 
 void Trace::SetDefaultClockSource(TraceClockSource clock_source) {
-#if defined(HAVE_POSIX_CLOCKS)
+#if defined(__linux__)
   default_clock_source_ = clock_source;
 #else
   if (clock_source != kTraceClockSourceWall) {
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 7234ec0..d7d4ec2 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -39,17 +39,10 @@
 #include "scoped_thread_state_change.h"
 #include "utf-inl.h"
 
-#if !defined(HAVE_POSIX_CLOCKS)
-#include <sys/time.h>
-#endif
-
-#if defined(HAVE_PRCTL)
-#include <sys/prctl.h>
-#endif
-
 #if defined(__APPLE__)
 #include "AvailabilityMacros.h"  // For MAC_OS_X_VERSION_MAX_ALLOWED
 #include <sys/syscall.h>
+#include <sys/time.h>
 #endif
 
 #include <backtrace/Backtrace.h>  // For DumpNativeStack.
@@ -60,6 +53,10 @@
 
 namespace art {
 
+#if defined(__linux__)
+static constexpr bool kUseAddr2line = !kIsTargetBuild;
+#endif
+
 pid_t GetTid() {
 #if defined(__APPLE__)
   uint64_t owner;
@@ -164,11 +161,11 @@
 }
 
 uint64_t MilliTime() {
-#if defined(HAVE_POSIX_CLOCKS)
+#if defined(__linux__)
   timespec now;
   clock_gettime(CLOCK_MONOTONIC, &now);
   return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000) + now.tv_nsec / UINT64_C(1000000);
-#else
+#else  // __APPLE__
   timeval now;
   gettimeofday(&now, NULL);
   return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000) + now.tv_usec / UINT64_C(1000);
@@ -176,11 +173,11 @@
 }
 
 uint64_t MicroTime() {
-#if defined(HAVE_POSIX_CLOCKS)
+#if defined(__linux__)
   timespec now;
   clock_gettime(CLOCK_MONOTONIC, &now);
   return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000000) + now.tv_nsec / UINT64_C(1000);
-#else
+#else  // __APPLE__
   timeval now;
   gettimeofday(&now, NULL);
   return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000000) + now.tv_usec;
@@ -188,11 +185,11 @@
 }
 
 uint64_t NanoTime() {
-#if defined(HAVE_POSIX_CLOCKS)
+#if defined(__linux__)
   timespec now;
   clock_gettime(CLOCK_MONOTONIC, &now);
   return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000000000) + now.tv_nsec;
-#else
+#else  // __APPLE__
   timeval now;
   gettimeofday(&now, NULL);
   return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000000000) + now.tv_usec * UINT64_C(1000);
@@ -200,11 +197,11 @@
 }
 
 uint64_t ThreadCpuNanoTime() {
-#if defined(HAVE_POSIX_CLOCKS)
+#if defined(__linux__)
   timespec now;
   clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now);
   return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000000000) + now.tv_nsec;
-#else
+#else  // __APPLE__
   UNIMPLEMENTED(WARNING);
   return -1;
 #endif
@@ -1057,21 +1054,17 @@
   } else {
     s = thread_name + len - 15;
   }
-#if defined(__BIONIC__)
+#if defined(__linux__)
   // pthread_setname_np fails rather than truncating long strings.
-  char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
+  char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded in the kernel.
   strncpy(buf, s, sizeof(buf)-1);
   buf[sizeof(buf)-1] = '\0';
   errno = pthread_setname_np(pthread_self(), buf);
   if (errno != 0) {
     PLOG(WARNING) << "Unable to set the name of current thread to '" << buf << "'";
   }
-#elif defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1060
+#else  // __APPLE__
   pthread_setname_np(thread_name);
-#elif defined(HAVE_PRCTL)
-  prctl(PR_SET_NAME, (unsigned long) s, 0, 0, 0);  // NOLINT (unsigned long)
-#else
-  UNIMPLEMENTED(WARNING) << thread_name;
 #endif
 }
 
@@ -1117,6 +1110,74 @@
   return "";
 }
 
+#if defined(__linux__)
+
+ALWAYS_INLINE
+static inline void WritePrefix(std::ostream* os, const char* prefix, bool odd) {
+  if (prefix != nullptr) {
+    *os << prefix;
+  }
+  *os << "  ";
+  if (!odd) {
+    *os << " ";
+  }
+}
+
+static bool RunCommand(std::string cmd, std::ostream* os, const char* prefix) {
+  FILE* stream = popen(cmd.c_str(), "r");
+  if (stream) {
+    if (os != nullptr) {
+      bool odd_line = true;               // We indent them differently.
+      bool wrote_prefix = false;          // Have we already written a prefix?
+      constexpr size_t kMaxBuffer = 128;  // Relatively small buffer. Should be OK as we're on an
+                                          // alt stack, but just to be sure...
+      char buffer[kMaxBuffer];
+      while (!feof(stream)) {
+        if (fgets(buffer, kMaxBuffer, stream) != nullptr) {
+          // Split on newlines.
+          char* tmp = buffer;
+          for (;;) {
+            char* new_line = strchr(tmp, '\n');
+            if (new_line == nullptr) {
+              // Print the rest.
+              if (*tmp != 0) {
+                if (!wrote_prefix) {
+                  WritePrefix(os, prefix, odd_line);
+                }
+                wrote_prefix = true;
+                *os << tmp;
+              }
+              break;
+            }
+            if (!wrote_prefix) {
+              WritePrefix(os, prefix, odd_line);
+            }
+            char saved = *(new_line + 1);
+            *(new_line + 1) = 0;
+            *os << tmp;
+            *(new_line + 1) = saved;
+            tmp = new_line + 1;
+            odd_line = !odd_line;
+            wrote_prefix = false;
+          }
+        }
+      }
+    }
+    pclose(stream);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static void Addr2line(const std::string& map_src, uintptr_t offset, std::ostream& os,
+                      const char* prefix) {
+  std::string cmdline(StringPrintf("addr2line --functions --inlines --demangle -e %s %zx",
+                                   map_src.c_str(), offset));
+  RunCommand(cmdline.c_str(), &os, prefix);
+}
+#endif
+
 void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix,
     mirror::ArtMethod* current_method, void* ucontext_ptr) {
 #if __linux__
@@ -1142,6 +1203,16 @@
     return;
   }
 
+  // Check whether we have and should use addr2line.
+  bool use_addr2line;
+  if (kUseAddr2line) {
+    // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
+    // and print to stderr.
+    use_addr2line = RunCommand("addr2line -h", nullptr, nullptr);
+  } else {
+    use_addr2line = false;
+  }
+
   for (Backtrace::const_iterator it = backtrace->begin();
        it != backtrace->end(); ++it) {
     // We produce output like this:
@@ -1153,6 +1224,7 @@
     // after the <RELATIVE_ADDR>. There can be any prefix data before the
     // #XX. <RELATIVE_ADDR> has to be a hex number but with no 0x prefix.
     os << prefix << StringPrintf("#%02zu pc ", it->num);
+    bool try_addr2line = false;
     if (!it->map) {
       os << StringPrintf("%08" PRIxPTR "  ???", it->pc);
     } else {
@@ -1163,6 +1235,7 @@
         if (it->func_offset != 0) {
           os << "+" << it->func_offset;
         }
+        try_addr2line = true;
       } else if (current_method != nullptr &&
                  Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
                  current_method->PcIsWithinQuickCode(it->pc)) {
@@ -1175,9 +1248,12 @@
       os << ")";
     }
     os << "\n";
+    if (try_addr2line && use_addr2line) {
+      Addr2line(it->map->name, it->pc - it->map->start, os, prefix);
+    }
   }
 #else
-  UNUSED(os, tid, prefix, current_method);
+  UNUSED(os, tid, prefix, current_method, ucontext_ptr);
 #endif
 }
 
diff --git a/tools/checker.py b/tools/checker.py
index 0813d0c..406a311 100755
--- a/tools/checker.py
+++ b/tools/checker.py
@@ -40,7 +40,7 @@
 #               later than lines matched against any preceeding in-order checks.
 #               In other words, the order of output lines does not matter
 #               between consecutive DAG checks.
-#  - CHECK-NOT: Must not match any output line which appear in the output group
+#  - CHECK-NOT: Must not match any output line which appears in the output group
 #               later than lines matched against any preceeding checks and
 #               earlier than lines matched against any subsequent checks.
 #               Surrounding non-negative checks (or boundaries of the group)
@@ -159,6 +159,23 @@
     """Supported language constructs."""
     Text, Pattern, VarRef, VarDef = range(4)
 
+  rStartOptional = r"("
+  rEndOptional = r")?"
+
+  rName = r"([a-zA-Z][a-zA-Z0-9]*)"
+  rRegex = r"(.+?)"
+  rPatternStartSym = r"(\{\{)"
+  rPatternEndSym = r"(\}\})"
+  rVariableStartSym = r"(\[\[)"
+  rVariableEndSym = r"(\]\])"
+  rVariableSeparator = r"(:)"
+
+  regexPattern = rPatternStartSym + rRegex + rPatternEndSym
+  regexVariable = rVariableStartSym + \
+                    rName + \
+                    (rStartOptional + rVariableSeparator + rRegex + rEndOptional) + \
+                  rVariableEndSym
+
   def __init__(self, variant, name, pattern):
     self.variant = variant
     self.name = name
@@ -170,22 +187,21 @@
 
   @staticmethod
   def parsePattern(patternElem):
-    return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:len(patternElem)-2])
+    return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:-2])
 
   @staticmethod
   def parseVariable(varElem):
     colonPos = varElem.find(":")
     if colonPos == -1:
       # Variable reference
-      name = varElem[2:len(varElem)-2]
+      name = varElem[2:-2]
       return CheckElement(CheckElement.Variant.VarRef, name, None)
     else:
       # Variable definition
       name = varElem[2:colonPos]
-      body = varElem[colonPos+1:len(varElem)-2]
+      body = varElem[colonPos+1:-2]
       return CheckElement(CheckElement.Variant.VarDef, name, body)
 
-
 class CheckLine(CommonEqualityMixin):
   """Representation of a single assertion in the check file formed of one or
      more regex elements. Matching against an output line is successful only
@@ -226,24 +242,6 @@
     starts = map(lambda m: len(string) if m is None else m.start(), matches)
     return min(starts)
 
-  # Returns the regex for finding a regex pattern in the check line.
-  def __getPatternRegex(self):
-    rStartSym = "\{\{"
-    rEndSym = "\}\}"
-    rBody = ".+?"
-    return rStartSym + rBody + rEndSym
-
-  # Returns the regex for finding a variable use in the check line.
-  def __getVariableRegex(self):
-    rStartSym = "\[\["
-    rEndSym = "\]\]"
-    rStartOptional = "("
-    rEndOptional = ")?"
-    rName = "[a-zA-Z][a-zA-Z0-9]*"
-    rSeparator = ":"
-    rBody = ".+?"
-    return rStartSym + rName + rStartOptional + rSeparator + rBody + rEndOptional + rEndSym
-
   # This method parses the content of a check line stripped of the initial
   # comment symbol and the CHECK keyword.
   def __parse(self, line):
@@ -251,9 +249,9 @@
     # Loop as long as there is something to parse.
     while line:
       # Search for the nearest occurrence of the special markers.
-      matchWhitespace = re.search("\s+", line)
-      matchPattern = re.search(self.__getPatternRegex(), line)
-      matchVariable = re.search(self.__getVariableRegex(), line)
+      matchWhitespace = re.search(r"\s+", line)
+      matchPattern = re.search(CheckElement.regexPattern, line)
+      matchVariable = re.search(CheckElement.regexVariable, line)
 
       # If one of the above was identified at the current position, extract them
       # from the line, parse them and add to the list of line parts.
@@ -262,7 +260,7 @@
         # a whitespace, we add a regex pattern for an arbitrary non-zero number
         # of whitespaces.
         line = line[matchWhitespace.end():]
-        lineParts.append(CheckElement.parsePattern("{{\s+}}"))
+        lineParts.append(CheckElement.parsePattern(r"{{\s+}}"))
       elif self.__isMatchAtStart(matchPattern):
         pattern = line[0:matchPattern.end()]
         line = line[matchPattern.end():]
@@ -536,21 +534,26 @@
   # followed by the CHECK keyword, given attribute and a colon at the very
   # beginning of the line. Whitespaces are ignored.
   def _extractLine(self, prefix, line):
-    ignoreWhitespace = "\s*"
-    commentSymbols = ["//", "#"]
-    prefixRegex = ignoreWhitespace + \
-                  "(" + "|".join(commentSymbols) + ")" + \
-                  ignoreWhitespace + \
-                  prefix + ":"
+    rIgnoreWhitespace = r"\s*"
+    rCommentSymbols = [r"//", r"#"]
+    regexPrefix = rIgnoreWhitespace + \
+                  r"(" + r"|".join(rCommentSymbols) + r")" + \
+                  rIgnoreWhitespace + \
+                  prefix + r":"
 
     # The 'match' function succeeds only if the pattern is matched at the
     # beginning of the line.
-    match = re.match(prefixRegex, line)
+    match = re.match(regexPrefix, line)
     if match is not None:
       return line[match.end():].strip()
     else:
       return None
 
+  # This function is invoked on each line of the check file and returns a pair
+  # which instructs the parser how the line should be handled. If the line is to
+  # be included in the current check group, it is returned in the first value.
+  # If the line starts a new check group, the name of the group is returned in
+  # the second value.
   def _processLine(self, line, lineNo):
     # Lines beginning with 'CHECK-START' start a new check group.
     startLine = self._extractLine(self.prefix + "-START", line)
@@ -578,6 +581,7 @@
   def _exceptionLineOutsideGroup(self, line, lineNo):
     Logger.fail("Check line not inside a group", self.fileName, lineNo)
 
+  # Constructs a check group from the parser-collected check lines.
   def _processGroup(self, name, lines, lineNo):
     checkLines = list(map(lambda line: CheckLine(line[0], line[1], self.fileName, line[2]), lines))
     return CheckGroup(name, checkLines, self.fileName, lineNo)
@@ -618,6 +622,11 @@
     self.state = OutputFile.ParsingState.OutsideBlock
     self.groups = self._parseStream(outputStream)
 
+  # This function is invoked on each line of the output file and returns a pair
+  # which instructs the parser how the line should be handled. If the line is to
+  # be included in the current group, it is returned in the first value. If the
+  # line starts a new output group, the name of the group is returned in the
+  # second value.
   def _processLine(self, line, lineNo):
     if self.state == OutputFile.ParsingState.StartingCfgBlock:
       # Previous line started a new 'cfg' block which means that this one must
@@ -663,6 +672,7 @@
       else:
         Logger.fail("Output line not inside a group", self.fileName, lineNo)
 
+  # Constructs an output group from the parser-collected output lines.
   def _processGroup(self, name, lines, lineNo):
     return OutputGroup(name, lines, self.fileName, lineNo + 1)
 
diff --git a/tools/checker_test.py b/tools/checker_test.py
index 2846a9c..3c659c2 100755
--- a/tools/checker_test.py
+++ b/tools/checker_test.py
@@ -23,7 +23,7 @@
 
 # The parent type of exception expected to be thrown by Checker during tests.
 # It must be specific enough to not cover exceptions thrown due to actual flaws
-# in Checker..
+# in Checker.
 CheckerException = SystemExit