diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 1e07430..086f298 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -84,7 +84,7 @@
 ART_TARGET_CLANG := false
 endif
 
-ifeq ($(TARGET_ARCH),mips)
+ifeq ($(TARGET_ARCH)|$(ART_TARGET_CLANG),mips|true)
   # b/18807290, Clang generated mips assembly code for array.cc
   # cannot be compiled by gas.
   # b/18789639, Clang assembler cannot compile inlined assembly code in
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index b059391..52a516c 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -929,6 +929,10 @@
     }
   } else {
     if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      if (!kArm32QuickCodeUseSoftFloat && arg.IsWide() && cur_core_reg_ == 0) {
+        // Skip r1, and use r2-r3 for the register pair.
+        cur_core_reg_++;
+      }
       result = coreArgMappingToPhysicalReg[cur_core_reg_++];
       if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
         result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 36d065f..73b68a5 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -1040,9 +1040,8 @@
     // Use LDREXD for the atomic load. (Expect displacement > 0, don't optimize for == 0.)
     RegStorage r_ptr = AllocTemp();
     OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
-    LIR* lir = NewLIR3(kThumb2Ldrexd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg());
+    load = NewLIR3(kThumb2Ldrexd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg());
     FreeTemp(r_ptr);
-    return lir;
   } else {
     load = LoadBaseDispBody(r_base, displacement, r_dest, size);
   }
@@ -1174,7 +1173,7 @@
     GenMemBarrier(kAnyStore);
   }
 
-  LIR* store;
+  LIR* null_ck_insn;
   if (is_volatile == kVolatile && (size == k64 || size == kDouble) &&
       !cu_->compiler_driver->GetInstructionSetFeatures()->
           AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd()) {
@@ -1191,17 +1190,16 @@
     RegStorage r_temp = AllocTemp();
     RegStorage r_temp_high = AllocTemp(false);  // We may not have another temp.
     if (r_temp_high.Valid()) {
-      NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_temp_high.GetReg(), r_ptr.GetReg());
+      null_ck_insn = NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_temp_high.GetReg(), r_ptr.GetReg());
       FreeTemp(r_temp_high);
       FreeTemp(r_temp);
     } else {
       // If we don't have another temp, clobber r_ptr in LDREXD and reload it.
-      NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_ptr.GetReg(), r_ptr.GetReg());
+      null_ck_insn = NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_ptr.GetReg(), r_ptr.GetReg());
       FreeTemp(r_temp);  // May need the temp for kOpAdd.
       OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
     }
-    store = NewLIR4(kThumb2Strexd, r_temp.GetReg(), r_src.GetLowReg(), r_src.GetHighReg(),
-                    r_ptr.GetReg());
+    NewLIR4(kThumb2Strexd, r_temp.GetReg(), r_src.GetLowReg(), r_src.GetHighReg(), r_ptr.GetReg());
     OpCmpImmBranch(kCondNe, r_temp, 0, fail_target);
     FreeTemp(r_ptr);
   } else {
@@ -1210,7 +1208,7 @@
       size = k32;
     }
 
-    store = StoreBaseDispBody(r_base, displacement, r_src, size);
+    null_ck_insn = StoreBaseDispBody(r_base, displacement, r_src, size);
   }
 
   if (UNLIKELY(is_volatile == kVolatile)) {
@@ -1219,7 +1217,7 @@
     GenMemBarrier(kAnyAny);
   }
 
-  return store;
+  return null_ck_insn;
 }
 
 LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 3733507..d2b32b5 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -939,15 +939,15 @@
     }
     GenNullCheck(rl_obj.reg, opt_flags);
     int field_offset = field_info.FieldOffset().Int32Value();
-    LIR* store;
+    LIR* null_ck_insn;
     if (IsRef(size)) {
-      store = StoreRefDisp(rl_obj.reg, field_offset, rl_src.reg, field_info.IsVolatile() ?
+      null_ck_insn = StoreRefDisp(rl_obj.reg, field_offset, rl_src.reg, field_info.IsVolatile() ?
           kVolatile : kNotVolatile);
     } else {
-      store = StoreBaseDisp(rl_obj.reg, field_offset, rl_src.reg, size,
-                            field_info.IsVolatile() ? kVolatile : kNotVolatile);
+      null_ck_insn = StoreBaseDisp(rl_obj.reg, field_offset, rl_src.reg, size,
+                                   field_info.IsVolatile() ? kVolatile : kNotVolatile);
     }
-    MarkPossibleNullPointerExceptionAfter(opt_flags, store);
+    MarkPossibleNullPointerExceptionAfter(opt_flags, null_ck_insn);
     if (IsRef(size) && !mir_graph_->IsConstantNullRef(rl_src)) {
       MarkGCCard(opt_flags, rl_src.reg, rl_obj.reg);
     }
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index 0d1d9bf..4265ae1 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -434,7 +434,7 @@
  * anchor:
  *      ori  rAT, rAT, ((target-anchor) & 0xffff)
  *      addu rAT, rAT, rRA
- *      jr   rAT
+ *      jalr rZERO, rAT
  * hop:
  *
  * Orig unconditional branch
@@ -448,7 +448,7 @@
  * anchor:
  *      ori  rAT, rAT, ((target-anchor) & 0xffff)
  *      addu rAT, rAT, rRA
- *      jr   rAT
+ *      jalr rZERO, rAT
  *
  *
  * NOTE: An out-of-range bal isn't supported because it should
@@ -482,7 +482,7 @@
   if (!unconditional) {
     hop_target = RawLIR(dalvik_offset, kPseudoTargetLabel);
     LIR* hop_branch = RawLIR(dalvik_offset, opcode, lir->operands[0],
-                            lir->operands[1], 0, 0, 0, hop_target);
+                             lir->operands[1], 0, 0, 0, hop_target);
     InsertLIRBefore(lir, hop_branch);
   }
   LIR* curr_pc = RawLIR(dalvik_offset, kMipsCurrPC);
@@ -497,8 +497,8 @@
   InsertLIRBefore(lir, delta_lo);
   LIR* addu = RawLIR(dalvik_offset, kMipsAddu, rAT, rAT, rRA);
   InsertLIRBefore(lir, addu);
-  LIR* jr = RawLIR(dalvik_offset, kMipsJr, rAT);
-  InsertLIRBefore(lir, jr);
+  LIR* jalr = RawLIR(dalvik_offset, kMipsJalr, rZERO, rAT);
+  InsertLIRBefore(lir, jalr);
   if (!unconditional) {
     InsertLIRBefore(lir, hop_target);
   }
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 3bb81bf..51a8c98 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -58,7 +58,7 @@
  *   bne   r_val, r_key, loop
  *   lw    r_disp, -4(r_base)
  *   addu  rRA, r_disp
- *   jr    rRA
+ *   jalr  rZERO, rRA
  * done:
  *
  */
@@ -136,7 +136,7 @@
  *   bound check -> done
  *   lw    r_disp, [rRA, r_val]
  *   addu  rRA, r_disp
- *   jr    rRA
+ *   jalr  rZERO, rRA
  * done:
  */
 void MipsMir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 8f976df..9c3ce7b 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -216,7 +216,8 @@
     void ConvertShortToLongBranch(LIR* lir);
     RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
                           RegLocation rl_src2, bool is_div, int flags) OVERRIDE;
-    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) OVERRIDE;
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div)
+        OVERRIDE;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 0778c3b..aabef60 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -172,7 +172,7 @@
   if (r_dest.IsFloat() || r_src.IsFloat())
     return OpFpRegCopy(r_dest, r_src);
   LIR* res = RawLIR(current_dalvik_offset_, kMipsMove,
-            r_dest.GetReg(), r_src.GetReg());
+                    r_dest.GetReg(), r_src.GetReg());
   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
     res->flags.is_nop = true;
   }
@@ -194,7 +194,7 @@
       if (src_fp) {
         OpRegCopy(r_dest, r_src);
       } else {
-         /* note the operands are swapped for the mtc1 instr */
+        /* note the operands are swapped for the mtc1 instr */
         NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
         NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
       }
@@ -240,7 +240,7 @@
 }
 
 RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
-                                    bool is_div) {
+                                   bool is_div) {
   NewLIR2(kMipsDiv, reg1.GetReg(), reg2.GetReg());
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (is_div) {
@@ -252,7 +252,7 @@
 }
 
 RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit,
-                                       bool is_div) {
+                                      bool is_div) {
   RegStorage t_reg = AllocTemp();
   NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
   NewLIR2(kMipsDiv, reg1.GetReg(), t_reg.GetReg());
@@ -501,7 +501,7 @@
  * Generate array load
  */
 void MipsMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_dest, int scale) {
+                              RegLocation rl_index, RegLocation rl_dest, int scale) {
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -570,7 +570,7 @@
  *
  */
 void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
+                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -632,7 +632,7 @@
   } else {
     rl_src = LoadValue(rl_src, reg_class);
     if (needs_range_check) {
-       GenArrayBoundsCheck(rl_index.reg, reg_len);
+      GenArrayBoundsCheck(rl_index.reg, reg_len);
       FreeTemp(reg_len);
     }
     StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index c22ba04..c819903 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -89,9 +89,9 @@
 
 // Convert k64BitSolo into k64BitPair
 RegStorage MipsMir2Lir::Solo64ToPair64(RegStorage reg) {
-    DCHECK(reg.IsDouble());
-    int reg_num = (reg.GetRegNum() & ~1) | RegStorage::kFloatingPoint;
-    return RegStorage(RegStorage::k64BitPair, reg_num, reg_num + 1);
+  DCHECK(reg.IsDouble());
+  int reg_num = (reg.GetRegNum() & ~1) | RegStorage::kFloatingPoint;
+  return RegStorage(RegStorage::k64BitPair, reg_num, reg_num + 1);
 }
 
 // Return a target-dependent special register.
@@ -223,78 +223,78 @@
       if (nc == '!') {
         strcpy(tbuf, "!");
       } else {
-         DCHECK_LT(fmt, fmt_end);
-         DCHECK_LT(static_cast<unsigned>(nc-'0'), 4u);
-         operand = lir->operands[nc-'0'];
-         switch (*fmt++) {
-           case 'b':
-             strcpy(tbuf, "0000");
-             for (i = 3; i >= 0; i--) {
-               tbuf[i] += operand & 1;
-               operand >>= 1;
-             }
-             break;
-           case 's':
-             snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
-             break;
-           case 'S':
-             DCHECK_EQ(RegStorage::RegNum(operand) & 1, 0);
-             snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
-             break;
-           case 'h':
-             snprintf(tbuf, arraysize(tbuf), "%04x", operand);
-             break;
-           case 'M':
-           case 'd':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand);
-             break;
-           case 'D':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand+1);
-             break;
-           case 'E':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
-             break;
-           case 'F':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
-             break;
-           case 't':
-             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
-                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
-                 lir->target);
-             break;
-           case 'T':
-             snprintf(tbuf, arraysize(tbuf), "0x%08x", operand << 2);
-             break;
-           case 'u': {
-             int offset_1 = lir->operands[0];
-             int offset_2 = NEXT_LIR(lir)->operands[0];
-             uintptr_t target =
-                 (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) & ~3) +
-                 (offset_1 << 21 >> 9) + (offset_2 << 1)) & 0xfffffffc;
-             snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void*>(target));
-             break;
+        DCHECK_LT(fmt, fmt_end);
+        DCHECK_LT(static_cast<unsigned>(nc-'0'), 4u);
+        operand = lir->operands[nc-'0'];
+        switch (*fmt++) {
+          case 'b':
+            strcpy(tbuf, "0000");
+            for (i = 3; i >= 0; i--) {
+              tbuf[i] += operand & 1;
+              operand >>= 1;
+            }
+            break;
+          case 's':
+            snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
+            break;
+          case 'S':
+            DCHECK_EQ(RegStorage::RegNum(operand) & 1, 0);
+            snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
+            break;
+          case 'h':
+            snprintf(tbuf, arraysize(tbuf), "%04x", operand);
+            break;
+          case 'M':
+          case 'd':
+            snprintf(tbuf, arraysize(tbuf), "%d", operand);
+            break;
+          case 'D':
+            snprintf(tbuf, arraysize(tbuf), "%d", operand+1);
+            break;
+          case 'E':
+            snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
+            break;
+          case 'F':
+            snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
+            break;
+          case 't':
+            snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
+                     reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
+                     lir->target);
+            break;
+          case 'T':
+            snprintf(tbuf, arraysize(tbuf), "0x%08x", operand << 2);
+            break;
+          case 'u': {
+            int offset_1 = lir->operands[0];
+            int offset_2 = NEXT_LIR(lir)->operands[0];
+            uintptr_t target =
+                (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) & ~3) +
+                    (offset_1 << 21 >> 9) + (offset_2 << 1)) & 0xfffffffc;
+            snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void*>(target));
+            break;
           }
 
-           /* Nothing to print for BLX_2 */
-           case 'v':
-             strcpy(tbuf, "see above");
-             break;
-           case 'r':
-             DCHECK(operand >= 0 && operand < MIPS_REG_COUNT);
-             strcpy(tbuf, mips_reg_name[operand]);
-             break;
-           case 'N':
-             // Placeholder for delay slot handling
-             strcpy(tbuf, ";  nop");
-             break;
-           default:
-             strcpy(tbuf, "DecodeError");
-             break;
-         }
-         buf += tbuf;
+          /* Nothing to print for BLX_2 */
+          case 'v':
+            strcpy(tbuf, "see above");
+            break;
+          case 'r':
+            DCHECK(operand >= 0 && operand < MIPS_REG_COUNT);
+            strcpy(tbuf, mips_reg_name[operand]);
+            break;
+          case 'N':
+            // Placeholder for delay slot handling
+            strcpy(tbuf, ";  nop");
+            break;
+          default:
+            strcpy(tbuf, "DecodeError");
+            break;
+        }
+        buf += tbuf;
       }
     } else {
-       buf += *fmt++;
+      buf += *fmt++;
     }
   }
   return buf;
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 18f1cde..15fc69d 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -125,7 +125,7 @@
       opcode = kMipsJalr;
       break;
     case kOpBx:
-      return NewLIR1(kMipsJr, r_dest_src.GetReg());
+      return NewLIR2(kMipsJalr, rZERO, r_dest_src.GetReg());
       break;
     default:
       LOG(FATAL) << "Bad case in OpReg";
@@ -228,17 +228,17 @@
       }
       break;
     case kOpLsl:
-        DCHECK(value >= 0 && value <= 31);
-        opcode = kMipsSll;
-        break;
+      DCHECK(value >= 0 && value <= 31);
+      opcode = kMipsSll;
+      break;
     case kOpLsr:
-        DCHECK(value >= 0 && value <= 31);
-        opcode = kMipsSrl;
-        break;
+      DCHECK(value >= 0 && value <= 31);
+      opcode = kMipsSrl;
+      break;
     case kOpAsr:
-        DCHECK(value >= 0 && value <= 31);
-        opcode = kMipsSra;
-        break;
+      DCHECK(value >= 0 && value <= 31);
+      opcode = kMipsSra;
+      break;
     case kOpAnd:
       if (IS_UIMM16((value))) {
         opcode = kMipsAndi;
@@ -324,7 +324,7 @@
       }
       return res;
     case kOp2Char:
-       return NewLIR3(kMipsAndi, r_dest_src1.GetReg(), r_src2.GetReg(), 0xFFFF);
+      return NewLIR3(kMipsAndi, r_dest_src1.GetReg(), r_src2.GetReg(), 0xFFFF);
     default:
       LOG(FATAL) << "Bad case in OpRegReg";
       UNREACHABLE();
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index c30cc04..a02e25e 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -106,36 +106,37 @@
 
   // All libcore references should resolve
   ScopedObjectAccess soa(Thread::Current());
-  const DexFile* dex = java_lang_dex_file_;
-  mirror::DexCache* dex_cache = class_linker_->FindDexCache(*dex);
-  EXPECT_EQ(dex->NumStringIds(), dex_cache->NumStrings());
+  ASSERT_TRUE(java_lang_dex_file_ != NULL);
+  const DexFile& dex = *java_lang_dex_file_;
+  mirror::DexCache* dex_cache = class_linker_->FindDexCache(dex);
+  EXPECT_EQ(dex.NumStringIds(), dex_cache->NumStrings());
   for (size_t i = 0; i < dex_cache->NumStrings(); i++) {
     const mirror::String* string = dex_cache->GetResolvedString(i);
     EXPECT_TRUE(string != NULL) << "string_idx=" << i;
   }
-  EXPECT_EQ(dex->NumTypeIds(), dex_cache->NumResolvedTypes());
+  EXPECT_EQ(dex.NumTypeIds(), dex_cache->NumResolvedTypes());
   for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
     mirror::Class* type = dex_cache->GetResolvedType(i);
     EXPECT_TRUE(type != NULL) << "type_idx=" << i
-                              << " " << dex->GetTypeDescriptor(dex->GetTypeId(i));
+                              << " " << dex.GetTypeDescriptor(dex.GetTypeId(i));
   }
-  EXPECT_EQ(dex->NumMethodIds(), dex_cache->NumResolvedMethods());
+  EXPECT_EQ(dex.NumMethodIds(), dex_cache->NumResolvedMethods());
   for (size_t i = 0; i < dex_cache->NumResolvedMethods(); i++) {
     mirror::ArtMethod* method = dex_cache->GetResolvedMethod(i);
     EXPECT_TRUE(method != NULL) << "method_idx=" << i
-                                << " " << dex->GetMethodDeclaringClassDescriptor(dex->GetMethodId(i))
-                                << " " << dex->GetMethodName(dex->GetMethodId(i));
+                                << " " << dex.GetMethodDeclaringClassDescriptor(dex.GetMethodId(i))
+                                << " " << dex.GetMethodName(dex.GetMethodId(i));
     EXPECT_TRUE(method->GetEntryPointFromQuickCompiledCode() != NULL) << "method_idx=" << i
                                            << " "
-                                           << dex->GetMethodDeclaringClassDescriptor(dex->GetMethodId(i))
-                                           << " " << dex->GetMethodName(dex->GetMethodId(i));
+                                           << dex.GetMethodDeclaringClassDescriptor(dex.GetMethodId(i))
+                                           << " " << dex.GetMethodName(dex.GetMethodId(i));
   }
-  EXPECT_EQ(dex->NumFieldIds(), dex_cache->NumResolvedFields());
+  EXPECT_EQ(dex.NumFieldIds(), dex_cache->NumResolvedFields());
   for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
     mirror::ArtField* field = dex_cache->GetResolvedField(i);
     EXPECT_TRUE(field != NULL) << "field_idx=" << i
-                               << " " << dex->GetFieldDeclaringClassDescriptor(dex->GetFieldId(i))
-                               << " " << dex->GetFieldName(dex->GetFieldId(i));
+                               << " " << dex.GetFieldDeclaringClassDescriptor(dex.GetFieldId(i))
+                               << " " << dex.GetFieldName(dex.GetFieldId(i));
   }
 
   // TODO check Class::IsVerified for all classes
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index fd20715..669c3bb 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -168,6 +168,13 @@
         } else {
           // FIXME: Pointer this returns as both reference and long.
           if (IsCurrentParamALong() && !IsCurrentParamAReference()) {  // Long.
+            if (gpr_index < arraysize(kHFCoreArgumentRegisters) - 1) {
+              // Skip R1, and use R2_R3 if the long is the first parameter.
+              if (gpr_index == 1) {
+                gpr_index++;
+              }
+            }
+
             // If it spans register and memory, we must use the value in memory.
             if (gpr_index < arraysize(kHFCoreArgumentRegisters) - 1) {
               entry_spills_.push_back(
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index d141538..b3ab370 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -39,10 +39,10 @@
 
   void CheckMethod(mirror::ArtMethod* method,
                    const OatFile::OatMethod& oat_method,
-                   const DexFile* dex_file)
+                   const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const CompiledMethod* compiled_method =
-        compiler_driver_->GetCompiledMethod(MethodReference(dex_file,
+        compiler_driver_->GetCompiledMethod(MethodReference(&dex_file,
                                                             method->GetDexMethodIndex()));
 
     if (compiled_method == nullptr) {
@@ -130,22 +130,23 @@
   ASSERT_EQ(4096U, oat_header.GetImageFileLocationOatDataBegin());
   ASSERT_EQ("lue.art", std::string(oat_header.GetStoreValueByKey(OatHeader::kImageLocationKey)));
 
-  const DexFile* dex_file = java_lang_dex_file_;
-  uint32_t dex_file_checksum = dex_file->GetLocationChecksum();
-  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file->GetLocation().c_str(),
+  ASSERT_TRUE(java_lang_dex_file_ != nullptr);
+  const DexFile& dex_file = *java_lang_dex_file_;
+  uint32_t dex_file_checksum = dex_file.GetLocationChecksum();
+  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file.GetLocation().c_str(),
                                                                     &dex_file_checksum);
   ASSERT_TRUE(oat_dex_file != nullptr);
-  CHECK_EQ(dex_file->GetLocationChecksum(), oat_dex_file->GetDexFileLocationChecksum());
+  CHECK_EQ(dex_file.GetLocationChecksum(), oat_dex_file->GetDexFileLocationChecksum());
   ScopedObjectAccess soa(Thread::Current());
-  for (size_t i = 0; i < dex_file->NumClassDefs(); i++) {
-    const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
-    const uint8_t* class_data = dex_file->GetClassData(class_def);
+  for (size_t i = 0; i < dex_file.NumClassDefs(); i++) {
+    const DexFile::ClassDef& class_def = dex_file.GetClassDef(i);
+    const uint8_t* class_data = dex_file.GetClassData(class_def);
     size_t num_virtual_methods = 0;
     if (class_data != nullptr) {
-      ClassDataItemIterator it(*dex_file, class_data);
+      ClassDataItemIterator it(dex_file, class_data);
       num_virtual_methods = it.NumVirtualMethods();
     }
-    const char* descriptor = dex_file->GetClassDescriptor(class_def);
+    const char* descriptor = dex_file.GetClassDescriptor(class_def);
     StackHandleScope<1> hs(soa.Self());
     mirror::Class* klass = class_linker->FindClass(soa.Self(), descriptor,
                                                    NullHandle<mirror::ClassLoader>());
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1cc2dcc..1862061 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -49,6 +49,9 @@
 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterFpuRegisters);
 
+static constexpr DRegister DTMP = D7;
+static constexpr SRegister STMP = S14;
+
 class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> {
  public:
   InvokeRuntimeCallingConvention()
@@ -472,6 +475,11 @@
   blocked_core_registers_[R10] = true;
   blocked_core_registers_[R11] = true;
 
+  // Don't allocate our temporary double register.
+  blocked_fpu_registers_[STMP] = true;
+  blocked_fpu_registers_[STMP + 1] = true;
+  DCHECK_EQ(FromLowSToD(STMP), DTMP);
+
   blocked_fpu_registers_[S16] = true;
   blocked_fpu_registers_[S17] = true;
   blocked_fpu_registers_[S18] = true;
@@ -590,9 +598,17 @@
       gp_index_ += 2;
       stack_index_ += 2;
       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
-        ArmManagedRegister pair = ArmManagedRegister::FromRegisterPair(
-            calling_convention.GetRegisterPairAt(index));
-        return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
+        if (calling_convention.GetRegisterAt(index) == R1) {
+          // Skip R1, and use R2_R3 instead.
+          gp_index_++;
+          index++;
+        }
+      }
+      if (index + 1 < calling_convention.GetNumberOfRegisters()) {
+        DCHECK_EQ(calling_convention.GetRegisterAt(index) + 1,
+                  calling_convention.GetRegisterAt(index + 1));
+        return Location::RegisterPairLocation(calling_convention.GetRegisterAt(index),
+                                              calling_convention.GetRegisterAt(index + 1));
       } else {
         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
       }
@@ -617,6 +633,9 @@
       if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
         uint32_t index = double_index_;
         double_index_ += 2;
+        DCHECK_EQ(calling_convention.GetFpuRegisterAt(index) + 1,
+                  calling_convention.GetFpuRegisterAt(index + 1));
+        DCHECK_EQ(calling_convention.GetFpuRegisterAt(index) & 1, 0);
         return Location::FpuRegisterPairLocation(
           calling_convention.GetFpuRegisterAt(index),
           calling_convention.GetFpuRegisterAt(index + 1));
@@ -3364,9 +3383,9 @@
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
     Exchange(source.GetStackIndex(), destination.GetStackIndex());
   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
-    __ vmovrs(IP, source.AsFpuRegister<SRegister>());
+    __ vmovs(STMP, source.AsFpuRegister<SRegister>());
     __ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>());
-    __ vmovsr(destination.AsFpuRegister<SRegister>(), IP);
+    __ vmovs(destination.AsFpuRegister<SRegister>(), STMP);
   } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
     SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>()
                                            : destination.AsFpuRegister<SRegister>();
@@ -3374,11 +3393,33 @@
         ? destination.GetStackIndex()
         : source.GetStackIndex();
 
-    __ vmovrs(IP, reg);
+    __ vmovs(STMP, reg);
     __ LoadSFromOffset(reg, SP, mem);
-    __ StoreToOffset(kStoreWord, IP, SP, mem);
+    __ StoreSToOffset(STMP, SP, mem);
+  } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
+    __ vmovd(DTMP, FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()));
+    __ vmovd(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()),
+             FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()));
+    __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), DTMP);
+  } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
+    DRegister reg = source.IsFpuRegisterPair()
+        ? FromLowSToD(source.AsFpuRegisterPairLow<SRegister>())
+        : FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>());
+    int mem = source.IsFpuRegisterPair()
+        ? destination.GetStackIndex()
+        : source.GetStackIndex();
+
+    __ vmovd(DTMP, reg);
+    __ LoadDFromOffset(reg, SP, mem);
+    __ StoreDToOffset(DTMP, SP, mem);
+  } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
+    // TODO: We could use DTMP and ask for a pair scratch register (float or core).
+    // This would save four instructions if two scratch registers are available, and
+    // two instructions if not.
+    Exchange(source.GetStackIndex(), destination.GetStackIndex());
+    Exchange(source.GetHighStackIndex(kArmWordSize), destination.GetHighStackIndex(kArmWordSize));
   } else {
-    LOG(FATAL) << "Unimplemented";
+    LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index c1b4eda..8b29b15 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -33,7 +33,6 @@
 static constexpr size_t kArmWordSize = kArmPointerSize;
 
 static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
-static constexpr RegisterPair kParameterCorePairRegisters[] = { R1_R2, R2_R3 };
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
 static constexpr SRegister kParameterFpuRegisters[] =
     { S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 };
@@ -47,11 +46,6 @@
                           kParameterFpuRegisters,
                           kParameterFpuRegistersLength) {}
 
-  RegisterPair GetRegisterPairAt(size_t argument_index) {
-    DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
-    return kParameterCorePairRegisters[argument_index];
-  }
-
  private:
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 1e93ece..b8f5070 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -37,10 +37,12 @@
 
   // Perform the moves with constant sources.
   for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& move = *moves_.Get(i);
-    if (!move.IsEliminated()) {
-      DCHECK(move.GetSource().IsConstant());
+    MoveOperands* move = moves_.Get(i);
+    if (!move->IsEliminated()) {
+      DCHECK(move->GetSource().IsConstant());
       EmitMove(i);
+      // Eliminate the move, in case following moves need a scratch register.
+      move->Eliminate();
     }
   }
 
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 309425e..7ec1dd2 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -58,6 +58,9 @@
   };
 
   bool IsScratchLocation(Location loc);
+
+  // Allocate a scratch register for performing a move. The method will try to use
+  // a register that is the destination of a move, but that move has not been emitted yet.
   int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
 
   // Emit a move.
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 62629bc..210f7d7 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -31,9 +31,13 @@
     if (!message_.str().empty()) {
       message_ << " ";
     }
-    message_ << "("
-             << move->GetSource().reg()
-             << " -> "
+    message_ << "(";
+    if (move->GetSource().IsConstant()) {
+      message_ << "C";
+    } else {
+      message_ << move->GetSource().reg();
+    }
+    message_ << " -> "
              << move->GetDestination().reg()
              << ")";
   }
@@ -129,4 +133,21 @@
   }
 }
 
+TEST(ParallelMoveTest, ConstantLast) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  TestParallelMoveResolver resolver(&allocator);
+  HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+  moves->AddMove(new (&allocator) MoveOperands(
+      Location::ConstantLocation(new (&allocator) HIntConstant(0)),
+      Location::RegisterLocation(0),
+      nullptr));
+  moves->AddMove(new (&allocator) MoveOperands(
+      Location::RegisterLocation(1),
+      Location::RegisterLocation(2),
+      nullptr));
+  resolver.EmitNativeCode(moves);
+  ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str());
+}
+
 }  // namespace art
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 8001dcd..b5437b0 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -332,7 +332,7 @@
 }
 
 void MipsAssembler::Jr(Register rs) {
-  EmitR(0, rs, static_cast<Register>(0), static_cast<Register>(0), 0, 0x08);
+  EmitR(0, rs, static_cast<Register>(0), static_cast<Register>(0), 0, 0x09);  // Jalr zero, rs
   Nop();
 }
 
@@ -420,7 +420,7 @@
 }
 
 void MipsAssembler::Move(Register rt, Register rs) {
-  EmitI(0x8, rs, rt, 0);
+  EmitI(0x9, rs, rt, 0);    // Addiu
 }
 
 void MipsAssembler::Clear(Register rt) {
@@ -447,11 +447,11 @@
 }
 
 void MipsAssembler::AddConstant(Register rt, Register rs, int32_t value) {
-  Addi(rt, rs, value);
+  Addiu(rt, rs, value);
 }
 
 void MipsAssembler::LoadImmediate(Register rt, int32_t value) {
-  Addi(rt, ZERO, value);
+  Addiu(rt, ZERO, value);
 }
 
 void MipsAssembler::EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset,
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 4f279f2..56d4582 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -34,6 +34,7 @@
 #include <cutils/trace.h>
 
 #include "arch/instruction_set_features.h"
+#include "arch/mips/instruction_set_features_mips.h"
 #include "base/dumpable.h"
 #include "base/stl_util.h"
 #include "base/stringpiece.h"
@@ -445,7 +446,15 @@
       timings_(timings) {}
 
   ~Dex2Oat() {
-    LogCompletionTime();  // Needs to be before since it accesses the runtime.
+    // Free opened dex files before deleting the runtime_, because ~DexFile
+    // uses MemMap, which is shut down by ~Runtime.
+    class_path_files_.clear();
+    opened_dex_files_.clear();
+
+    // Log completion time before deleting the runtime_, because this accesses
+    // the runtime.
+    LogCompletionTime();
+
     if (kIsDebugBuild || (RUNNING_ON_VALGRIND != 0)) {
       delete runtime_;  // See field declaration for why this is manual.
     }
@@ -852,7 +861,13 @@
     }
 
     if (compiler_filter_string == nullptr) {
-      if (instruction_set_ == kMips64) {
+      if (instruction_set_ == kMips &&
+          reinterpret_cast<const MipsInstructionSetFeatures*>(instruction_set_features_.get())->
+          IsR6()) {
+        // For R6, only interpreter mode is working.
+        // TODO: fix compiler for Mips32r6.
+        compiler_filter_string = "interpret-only";
+      } else if (instruction_set_ == kMips64) {
         // TODO: fix compiler for Mips64.
         compiler_filter_string = "interpret-only";
       } else if (image_) {
@@ -1101,18 +1116,24 @@
               << error_msg;
           return false;
         }
-        if (!DexFile::OpenFromZip(*zip_archive.get(), zip_location_, &error_msg, &dex_files_)) {
+        if (!DexFile::OpenFromZip(*zip_archive.get(), zip_location_, &error_msg, &opened_dex_files_)) {
           LOG(ERROR) << "Failed to open dex from file descriptor for zip file '" << zip_location_
               << "': " << error_msg;
           return false;
         }
+        for (auto& dex_file : opened_dex_files_) {
+          dex_files_.push_back(dex_file.get());
+        }
         ATRACE_END();
       } else {
-        size_t failure_count = OpenDexFiles(dex_filenames_, dex_locations_, dex_files_);
+        size_t failure_count = OpenDexFiles(dex_filenames_, dex_locations_, &opened_dex_files_);
         if (failure_count > 0) {
           LOG(ERROR) << "Failed to open some dex files: " << failure_count;
           return false;
         }
+        for (auto& dex_file : opened_dex_files_) {
+          dex_files_.push_back(dex_file.get());
+        }
       }
 
       constexpr bool kSaveDexInput = false;
@@ -1186,9 +1207,13 @@
     Thread* self = Thread::Current();
     if (!boot_image_option_.empty()) {
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      std::vector<const DexFile*> class_path_files(dex_files_);
-      OpenClassPathFiles(runtime_->GetClassPathString(), class_path_files);
+      OpenClassPathFiles(runtime_->GetClassPathString(), dex_files_, &class_path_files_);
       ScopedObjectAccess soa(self);
+      std::vector<const DexFile*> class_path_files(dex_files_);
+      for (auto& class_path_file : class_path_files_) {
+        class_path_files.push_back(class_path_file.get());
+      }
+
       for (size_t i = 0; i < class_path_files.size(); i++) {
         class_linker->RegisterDexFile(*class_path_files[i]);
       }
@@ -1439,7 +1464,8 @@
  private:
   static size_t OpenDexFiles(const std::vector<const char*>& dex_filenames,
                              const std::vector<const char*>& dex_locations,
-                             std::vector<const DexFile*>& dex_files) {
+                             std::vector<std::unique_ptr<const DexFile>>* dex_files) {
+    DCHECK(dex_files != nullptr) << "OpenDexFiles out-param is NULL";
     size_t failure_count = 0;
     for (size_t i = 0; i < dex_filenames.size(); i++) {
       const char* dex_filename = dex_filenames[i];
@@ -1450,7 +1476,7 @@
         LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
         continue;
       }
-      if (!DexFile::Open(dex_filename, dex_location, &error_msg, &dex_files)) {
+      if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
         LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
         ++failure_count;
       }
@@ -1470,10 +1496,12 @@
     return false;
   }
 
-  // Appends to dex_files any elements of class_path that it doesn't already
-  // contain. This will open those dex files as necessary.
+  // Appends to opened_dex_files any elements of class_path that dex_files
+  // doesn't already contain. This will open those dex files as necessary.
   static void OpenClassPathFiles(const std::string& class_path,
-                                 std::vector<const DexFile*>& dex_files) {
+                                 std::vector<const DexFile*> dex_files,
+                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles out-param is NULL";
     std::vector<std::string> parsed;
     Split(class_path, ':', &parsed);
     // Take Locks::mutator_lock_ so that lock ordering on the ClassLinker::dex_lock_ is maintained.
@@ -1483,7 +1511,7 @@
         continue;
       }
       std::string error_msg;
-      if (!DexFile::Open(parsed[i].c_str(), parsed[i].c_str(), &error_msg, &dex_files)) {
+      if (!DexFile::Open(parsed[i].c_str(), parsed[i].c_str(), &error_msg, opened_dex_files)) {
         LOG(WARNING) << "Failed to open dex file '" << parsed[i] << "': " << error_msg;
       }
     }
@@ -1623,6 +1651,9 @@
   DexFileToMethodInlinerMap method_inliner_map_;
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 
+  // Ownership for the class path files.
+  std::vector<std::unique_ptr<const DexFile>> class_path_files_;
+
   // Not a unique_ptr as we want to just exit on non-debug builds, not bringing the runtime down
   // in an orderly fashion. The destructor takes care of deleting this.
   Runtime* runtime_;
@@ -1655,6 +1686,7 @@
   bool is_host_;
   std::string android_root_;
   std::vector<const DexFile*> dex_files_;
+  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
   std::unique_ptr<CompilerDriver> driver_;
   std::vector<std::string> verbose_methods_;
   bool dump_stats_;
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index 97c06f1..7442c70 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -138,7 +138,9 @@
   { kITypeMask, 41u << kOpcodeShift, "sh", "TO", },
   { kITypeMask, 43u << kOpcodeShift, "sw", "TO", },
   { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", },
+  { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", },
   { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", },
+  { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", },
 
   // Floating point.
   { kFpMask,                kCop1 | 0, "add", "fdst" },
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index de4ea36..931cca7 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1968,13 +1968,13 @@
   ScopedObjectAccess soa(self);
   ClassLinker* class_linker = runtime->GetClassLinker();
   class_linker->RegisterOatFile(oat_file);
-  std::vector<const DexFile*> dex_files;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
   for (const OatFile::OatDexFile* odf : oat_file->GetOatDexFiles()) {
     std::string error_msg;
-    const DexFile* dex_file = odf->OpenDexFile(&error_msg);
+    std::unique_ptr<const DexFile> dex_file = odf->OpenDexFile(&error_msg);
     CHECK(dex_file != nullptr) << error_msg;
     class_linker->RegisterDexFile(*dex_file);
-    dex_files.push_back(dex_file);
+    dex_files.push_back(std::move(dex_file));
   }
 
   // Need a class loader.
@@ -1983,7 +1983,11 @@
       soa.Env()->AllocObject(WellKnownClasses::dalvik_system_PathClassLoader));
   jobject class_loader = soa.Env()->NewGlobalRef(class_loader_local.get());
   // Fake that we're a compiler.
-  runtime->SetCompileTimeClassPath(class_loader, dex_files);
+  std::vector<const DexFile*> class_path;
+  for (auto& dex_file : dex_files) {
+    class_path.push_back(dex_file.get());
+  }
+  runtime->SetCompileTimeClassPath(class_loader, class_path);
 
   // Use the class loader while dumping.
   StackHandleScope<1> scope(self);
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index b6ec223..28f9668 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -48,7 +48,7 @@
 
 namespace art {
 
-static InstructionSet ElfISAToInstructionSet(Elf32_Word isa) {
+static InstructionSet ElfISAToInstructionSet(Elf32_Word isa, Elf32_Word e_flags) {
   switch (isa) {
     case EM_ARM:
       return kArm;
@@ -59,7 +59,12 @@
     case EM_X86_64:
       return kX86_64;
     case EM_MIPS:
-      return kMips;
+      if (((e_flags & EF_MIPS_ARCH) == EF_MIPS_ARCH_32R2) ||
+          ((e_flags & EF_MIPS_ARCH) == EF_MIPS_ARCH_32R6)) {
+        return kMips;
+      } else {
+        return kNone;
+      }
     default:
       return kNone;
   }
@@ -212,7 +217,7 @@
       LOG(ERROR) << "unable to read elf header";
       return false;
     }
-    isa = ElfISAToInstructionSet(elf_hdr.e_machine);
+    isa = ElfISAToInstructionSet(elf_hdr.e_machine, elf_hdr.e_flags);
   }
   const char* isa_name = GetInstructionSetString(isa);
   std::string image_filename;
diff --git a/runtime/arch/arm/quick_entrypoints_cc_arm.cc b/runtime/arch/arm/quick_entrypoints_cc_arm.cc
index e21e6c1..a3acd7e 100644
--- a/runtime/arch/arm/quick_entrypoints_cc_arm.cc
+++ b/runtime/arch/arm/quick_entrypoints_cc_arm.cc
@@ -75,7 +75,14 @@
         }
         break;
       case 'J':
+        if (gpr_index == 1 && !kArm32QuickCodeUseSoftFloat) {
+          // Don't use r1-r2 as a register pair, move to r2-r3 instead.
+          gpr_index++;
+        }
         if (gpr_index < arraysize(core_reg_args)) {
+          // Note that we don't need to do this if two registers are not available
+          // when !kArm32QuickCodeUseSoftFloat. We do it anyway to leave this
+          // code simple.
           core_reg_args[gpr_index++] = args[arg_index];
         }
         ++arg_index;
diff --git a/runtime/arch/memcmp16.h b/runtime/arch/memcmp16.h
index 4b9fb8e..c449a14 100644
--- a/runtime/arch/memcmp16.h
+++ b/runtime/arch/memcmp16.h
@@ -30,7 +30,7 @@
 //
 // In both cases, MemCmp16 is declared.
 
-#if defined(__aarch64__) || defined(__arm__) || defined(__mips) || defined(__i386__) || defined(__x86_64__)
+#if defined(__aarch64__) || defined(__arm__) || defined(__mips__) || defined(__i386__) || defined(__x86_64__)
 
 extern "C" uint32_t __memcmp16(const uint16_t* s0, const uint16_t* s1, size_t count);
 #define MemCmp16 __memcmp16
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
index 11be2a8..00ab613 100644
--- a/runtime/arch/mips/instruction_set_features_mips.cc
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -25,52 +25,82 @@
 namespace art {
 
 const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromVariant(
-    const std::string& variant ATTRIBUTE_UNUSED, std::string* error_msg ATTRIBUTE_UNUSED) {
-  if (variant != "default") {
-    std::ostringstream os;
-    LOG(WARNING) << "Unexpected CPU variant for Mips using defaults: " << variant;
-  }
+    const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED) {
+
   bool smp = true;  // Conservative default.
   bool fpu_32bit = true;
-  bool mips_isa_gte2 = true;
-  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2);
+  bool mips_isa_gte2 = false;
+  bool r6 = false;
+
+  // Override defaults based on variant string.
+  // Only care if it is R1, R2 or R6 and we assume all CPUs will have a FP unit.
+  constexpr const char* kMips32Prefix = "mips32r";
+  const size_t kPrefixLength = strlen(kMips32Prefix);
+  if (variant.compare(0, kPrefixLength, kMips32Prefix, kPrefixLength) == 0 &&
+      variant.size() > kPrefixLength) {
+    if (variant[kPrefixLength] >= '6') {
+      fpu_32bit = false;
+      r6 = true;
+    }
+    if (variant[kPrefixLength] >= '2') {
+      mips_isa_gte2 = true;
+    }
+  } else if (variant == "default") {
+    // Default variant is: smp = true, has fpu, is gte2, is not r6. This is the traditional
+    // setting.
+    mips_isa_gte2 = true;
+  } else {
+    LOG(WARNING) << "Unexpected CPU variant for Mips32 using defaults: " << variant;
+  }
+
+  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
 }
 
 const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromBitmap(uint32_t bitmap) {
   bool smp = (bitmap & kSmpBitfield) != 0;
   bool fpu_32bit = (bitmap & kFpu32Bitfield) != 0;
   bool mips_isa_gte2 = (bitmap & kIsaRevGte2Bitfield) != 0;
-  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2);
+  bool r6 = (bitmap & kR6) != 0;
+  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
 }
 
 const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromCppDefines() {
+  // Assume conservative defaults.
   const bool smp = true;
+  bool fpu_32bit = true;
+  bool mips_isa_gte2 = false;
+  bool r6 = false;
 
-  // TODO: here we assume the FPU is always 32-bit.
-  const bool fpu_32bit = true;
-
-#if __mips_isa_rev >= 2
-  const bool mips_isa_gte2 = true;
-#else
-  const bool mips_isa_gte2 = false;
+  // Override defaults based on compiler flags.
+#if (_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS32R5) || defined(_MIPS_ARCH_MIPS32R6)
+  mips_isa_gte2 = true;
 #endif
 
-  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2);
+#if defined(_MIPS_ARCH_MIPS32R6)
+  r6 = true;
+  fpu_32bit = false;
+#endif
+
+  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
 }
 
 const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromCpuInfo() {
   // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
   // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
+  // Assume conservative defaults.
   bool smp = false;
+  bool fpu_32bit = true;
+  bool mips_isa_gte2 = false;
+  bool r6 = false;
 
-  // TODO: here we assume the FPU is always 32-bit.
-  const bool fpu_32bit = true;
+  // Override defaults based on compiler flags.
+#if (_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS32R5) || defined(_MIPS_ARCH_MIPS32R6)
+  mips_isa_gte2 = true;
+#endif
 
-  // TODO: here we assume all MIPS processors are >= v2.
-#if __mips_isa_rev >= 2
-  const bool mips_isa_gte2 = true;
-#else
-  const bool mips_isa_gte2 = false;
+#if defined(_MIPS_ARCH_MIPS32R6)
+  r6 = true;
+  fpu_32bit = false;
 #endif
 
   std::ifstream in("/proc/cpuinfo");
@@ -89,7 +119,7 @@
   } else {
     LOG(ERROR) << "Failed to open /proc/cpuinfo";
   }
-  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2);
+  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
 }
 
 const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromHwcap() {
@@ -109,13 +139,15 @@
   const MipsInstructionSetFeatures* other_as_mips = other->AsMipsInstructionSetFeatures();
   return (IsSmp() == other->IsSmp()) &&
       (fpu_32bit_ == other_as_mips->fpu_32bit_) &&
-      (mips_isa_gte2_ == other_as_mips->mips_isa_gte2_);
+      (mips_isa_gte2_ == other_as_mips->mips_isa_gte2_) &&
+      (r6_ == other_as_mips->r6_);
 }
 
 uint32_t MipsInstructionSetFeatures::AsBitmap() const {
   return (IsSmp() ? kSmpBitfield : 0) |
       (fpu_32bit_ ? kFpu32Bitfield : 0) |
-      (mips_isa_gte2_ ? kIsaRevGte2Bitfield : 0);
+      (mips_isa_gte2_ ? kIsaRevGte2Bitfield : 0) |
+      (r6_ ? kR6 : 0);
 }
 
 std::string MipsInstructionSetFeatures::GetFeatureString() const {
@@ -135,6 +167,9 @@
   } else {
     result += ",-mips2";
   }
+  if (r6_) {
+    result += ",r6";
+  }  // Suppress non-r6.
   return result;
 }
 
@@ -142,6 +177,7 @@
     const bool smp, const std::vector<std::string>& features, std::string* error_msg) const {
   bool fpu_32bit = fpu_32bit_;
   bool mips_isa_gte2 = mips_isa_gte2_;
+  bool r6 = r6_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = Trim(*i);
     if (feature == "fpu32") {
@@ -152,12 +188,16 @@
       mips_isa_gte2 = true;
     } else if (feature == "-mips2") {
       mips_isa_gte2 = false;
+    } else if (feature == "r6") {
+      r6 = true;
+    } else if (feature == "-r6") {
+      r6 = false;
     } else {
       *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
       return nullptr;
     }
   }
-  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2);
+  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/instruction_set_features_mips.h b/runtime/arch/mips/instruction_set_features_mips.h
index f7c64fe..aac436e 100644
--- a/runtime/arch/mips/instruction_set_features_mips.h
+++ b/runtime/arch/mips/instruction_set_features_mips.h
@@ -67,6 +67,10 @@
     return fpu_32bit_;
   }
 
+  bool IsR6() const {
+    return r6_;
+  }
+
   virtual ~MipsInstructionSetFeatures() {}
 
  protected:
@@ -76,19 +80,21 @@
                                  std::string* error_msg) const OVERRIDE;
 
  private:
-  MipsInstructionSetFeatures(bool smp, bool fpu_32bit, bool mips_isa_gte2)
-      : InstructionSetFeatures(smp), fpu_32bit_(fpu_32bit),  mips_isa_gte2_(mips_isa_gte2) {
-  }
+  MipsInstructionSetFeatures(bool smp, bool fpu_32bit, bool mips_isa_gte2, bool r6)
+      : InstructionSetFeatures(smp), fpu_32bit_(fpu_32bit),  mips_isa_gte2_(mips_isa_gte2), r6_(r6)
+  {}
 
   // Bitmap positions for encoding features as a bitmap.
   enum {
     kSmpBitfield = 1,
     kFpu32Bitfield = 2,
     kIsaRevGte2Bitfield = 4,
+    kR6 = 8,
   };
 
   const bool fpu_32bit_;
   const bool mips_isa_gte2_;
+  const bool r6_;
 
   DISALLOW_COPY_AND_ASSIGN(MipsInstructionSetFeatures);
 };
diff --git a/runtime/arch/mips/jni_entrypoints_mips.S b/runtime/arch/mips/jni_entrypoints_mips.S
index 9a79467..fbc81d5 100644
--- a/runtime/arch/mips/jni_entrypoints_mips.S
+++ b/runtime/arch/mips/jni_entrypoints_mips.S
@@ -47,9 +47,9 @@
     addiu $sp, $sp, 32          # restore the stack
     .cfi_adjust_cfa_offset -32
     move  $t9, $v0              # put method code result in $t9
-    jr    $t9                   # leaf call to method's code
+    jalr  $zero, $t9            # leaf call to method's code
     nop
 .Lno_native_code_found:
-    jr    $ra
+    jalr  $zero, $ra
     nop
 END art_jni_dlsym_lookup_stub
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 509f991..666528a 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -154,7 +154,7 @@
 
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
-    jr     $ra
+    jalr   $zero, $ra
     nop
 .endm
 
@@ -274,7 +274,7 @@
 .macro DELIVER_PENDING_EXCEPTION
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME     # save callee saves for throw
     la      $t9, artDeliverPendingExceptionFromCode
-    jr      $t9                          # artDeliverPendingExceptionFromCode(Thread*)
+    jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
     move    $a0, rSELF                   # pass Thread::Current
 .endm
 
@@ -283,7 +283,7 @@
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     bnez   $t0, 1f                       # success if no exception is pending
     nop
-    jr     $ra
+    jalr   $zero, $ra
     nop
 1:
     DELIVER_PENDING_EXCEPTION
@@ -293,7 +293,7 @@
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     bnez   $v0, 1f                       # success?
     nop
-    jr     $ra                           # return on success
+    jalr   $zero, $ra                    # return on success
     nop
 1:
     DELIVER_PENDING_EXCEPTION
@@ -303,7 +303,7 @@
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     beqz   $v0, 1f                       # success?
     nop
-    jr     $ra                           # return on success
+    jalr   $zero, $ra                    # return on success
     nop
 1:
     DELIVER_PENDING_EXCEPTION
@@ -365,7 +365,7 @@
     lw      $ra, 124($a0)
     lw      $a0, 16($a0)
     move    $v0, $zero          # clear result registers r0 and r1
-    jr      $ra                 # do long jump
+    jalr    $zero, $ra          # do long jump
     move    $v1, $zero
 END art_quick_do_long_jump
 
@@ -377,7 +377,7 @@
 ENTRY art_quick_deliver_exception
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     la   $t9, artDeliverExceptionFromCode
-    jr   $t9                        # artDeliverExceptionFromCode(Throwable*, Thread*)
+    jalr $zero, $t9                 # artDeliverExceptionFromCode(Throwable*, Thread*)
     move $a1, rSELF                 # pass Thread::Current
 END art_quick_deliver_exception
 
@@ -388,7 +388,7 @@
 ENTRY art_quick_throw_null_pointer_exception
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     la   $t9, artThrowNullPointerExceptionFromCode
-    jr   $t9                        # artThrowNullPointerExceptionFromCode(Thread*)
+    jalr $zero, $t9                 # artThrowNullPointerExceptionFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_null_pointer_exception
 
@@ -399,7 +399,7 @@
 ENTRY art_quick_throw_div_zero
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     la   $t9, artThrowDivZeroFromCode
-    jr   $t9                        # artThrowDivZeroFromCode(Thread*)
+    jalr $zero, $t9                 # artThrowDivZeroFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_div_zero
 
@@ -410,7 +410,7 @@
 ENTRY art_quick_throw_array_bounds
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     la   $t9, artThrowArrayBoundsFromCode
-    jr   $t9                        # artThrowArrayBoundsFromCode(index, limit, Thread*)
+    jalr $zero, $t9                 # artThrowArrayBoundsFromCode(index, limit, Thread*)
     move $a2, rSELF                 # pass Thread::Current
 END art_quick_throw_array_bounds
 
@@ -421,7 +421,7 @@
 ENTRY art_quick_throw_stack_overflow
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     la   $t9, artThrowStackOverflowFromCode
-    jr   $t9                        # artThrowStackOverflowFromCode(Thread*)
+    jalr $zero, $t9                 # artThrowStackOverflowFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_stack_overflow
 
@@ -432,7 +432,7 @@
 ENTRY art_quick_throw_no_such_method
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     la   $t9, artThrowNoSuchMethodFromCode
-    jr   $t9                        # artThrowNoSuchMethodFromCode(method_idx, Thread*)
+    jalr $zero, $t9                 # artThrowNoSuchMethodFromCode(method_idx, Thread*)
     move $a1, rSELF                 # pass Thread::Current
 END art_quick_throw_no_such_method
 
@@ -465,7 +465,7 @@
     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     beqz  $v0, 1f
     move  $t9, $v1                        # save $v0->code_
-    jr    $t9
+    jalr  $zero, $t9
     nop
 1:
     DELIVER_PENDING_EXCEPTION
@@ -540,11 +540,11 @@
     li    $t3, 70               # put char 'F' into t3
     beq   $t1, $t3, 1f          # branch if result type char == 'F'
     sw    $v0, 0($t0)           # store the result
-    jr    $ra
+    jalr  $zero, $ra
     sw    $v1, 4($t0)           # store the other half of the result
 1:
     SDu   $f0, $f1, 0, $t0, $t1 # store floating point result
-    jr    $ra
+    jalr  $zero, $ra
     nop
 END art_quick_invoke_stub
 
@@ -604,7 +604,7 @@
     addiu  $sp, $sp, 16
     beqz   $v0, .Lthrow_class_cast_exception
     lw     $ra, 12($sp)
-    jr     $ra
+    jalr   $zero, $ra
     addiu  $sp, $sp, 16
     .cfi_adjust_cfa_offset -16
 .Lthrow_class_cast_exception:
@@ -615,7 +615,7 @@
     .cfi_adjust_cfa_offset -16
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     la   $t9, artThrowClassCastException
-    jr   $t9                        # artThrowClassCastException (Class*, Class*, Thread*)
+    jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
 END art_quick_check_cast
 
@@ -657,13 +657,13 @@
     srl $t1, $a0, 7
     add $t1, $t1, $t0
     sb  $t0, ($t1)
-    jr  $ra
+    jalr $zero, $ra
     nop
 .Ldo_aput_null:
     sll $a1, $a1, 2
     add $t0, $a0, $a1
     sw  $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
-    jr  $ra
+    jalr $zero, $ra
     nop
 .Lcheck_assignability:
     addiu  $sp, $sp, -32
@@ -691,7 +691,7 @@
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     move $a1, $a2
     la   $t9, artThrowArrayStoreException
-    jr   $t9                        # artThrowArrayStoreException(Class*, Class*, Thread*)
+    jalr $zero, $t9                 # artThrowArrayStoreException(Class*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
 END art_quick_aput_obj
 
@@ -901,6 +901,7 @@
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
+                                         # 64 bit new_val is in a2:a3 pair
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     jal    artSet64StaticFromCode        # (field_idx, referrer, new_val, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
@@ -961,6 +962,7 @@
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
     lw     $t1, 0($sp)                   # load referrer's Method*
+                                         # 64 bit new_val is in a2:a3 pair
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
     sw     rSELF, 20($sp)                # pass Thread::Current
     jal    artSet64InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
@@ -1038,7 +1040,7 @@
     lh     $a0, THREAD_FLAGS_OFFSET(rSELF)
     bnez   $a0, 1f
     addiu  rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL   # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
-    jr     $ra
+    jalr   $zero, $ra
     nop
 1:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          # save callee saves for stack crawl
@@ -1062,7 +1064,7 @@
     bnez    $t0, 1f
     # don't care if $v0 and/or $v1 are modified, when exception branch taken
     MTD     $v0, $v1, $f0, $f1          # move float value to return value
-    jr      $ra
+    jalr    $zero, $ra
     nop
 1:
     DELIVER_PENDING_EXCEPTION
@@ -1079,7 +1081,7 @@
     add     $a0, $t0               # get address of target method
     lw      $a0, MIRROR_OBJECT_ARRAY_DATA_OFFSET($a0)  # load the target method
     la      $t9, art_quick_invoke_interface_trampoline
-    jr      $t9
+    jalr    $zero, $t9
 END art_quick_imt_conflict_trampoline
 
     .extern artQuickResolutionTrampoline
@@ -1092,7 +1094,7 @@
     lw      $a0, ARG_SLOT_SIZE($sp)       # load resolved method to $a0
     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     move    $t9, $v0               # code pointer must be in $t9 to generate the global pointer
-    jr      $v0                    # tail call to method
+    jalr    $zero, $v0             # tail call to method
     nop
 1:
     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
@@ -1150,7 +1152,7 @@
     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
 
     MTD     $v0, $v1, $f0, $f1     # move float value to return value
-    jr      $ra
+    jalr    $zero, $ra
     nop
 
 1:
@@ -1171,7 +1173,7 @@
     bnez    $t0, 1f
     # don't care if $v0 and/or $v1 are modified, when exception branch taken
     MTD     $v0, $v1, $f0, $f1                  # move float value to return value
-    jr      $ra
+    jalr    $zero, $ra
     nop
 1:
     DELIVER_PENDING_EXCEPTION
@@ -1222,7 +1224,7 @@
     lw       $v0, 12($sp)   # restore return values
     lw       $v1, 8($sp)
     l.d      $f0, 0($sp)
-    jr       $t0            # return
+    jalr     $zero, $t0     # return
     addiu    $sp, $sp, 16   # remove temp storage from stack
     .cfi_adjust_cfa_offset -16
 END art_quick_instrumentation_exit
@@ -1263,7 +1265,7 @@
     move    $v1, $v0                         #  rhi<- rlo (if shift&0x20)
     move    $v0, $zero                       #  rlo<- 0 (if shift&0x20)
 
-1:  jr      $ra
+1:  jalr    $zero, $ra
     nop
 END art_quick_shl_long
 
@@ -1291,7 +1293,7 @@
     move    $v0, $v1                         #  rlo<- rhi (if shift&0x20)
     move    $v1, $a3                         #  rhi<- sign(ahi) (if shift&0x20)
 
-1:  jr      $ra
+1:  jalr    $zero, $ra
     nop
 END art_quick_shr_long
 
@@ -1319,7 +1321,7 @@
     move    $v0, $v1                         #  rlo<- rhi (if shift&0x20)
     move    $v1, $zero                       #  rhi<- 0 (if shift&0x20)
 
-1:  jr      $ra
+1:  jalr    $zero, $ra
     nop
 END art_quick_ushr_long
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d119a56..438cebf 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -246,7 +246,7 @@
   memset(find_array_class_cache_, 0, kFindArrayCacheSize * sizeof(mirror::Class*));
 }
 
-void ClassLinker::InitWithoutImage(const std::vector<const DexFile*>& boot_class_path) {
+void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path) {
   VLOG(startup) << "ClassLinker::Init";
   CHECK(!Runtime::Current()->GetHeap()->HasImageSpace()) << "Runtime has image. We should use it.";
 
@@ -405,9 +405,10 @@
   // DexCache instances. Needs to be after String, Field, Method arrays since AllocDexCache uses
   // these roots.
   CHECK_NE(0U, boot_class_path.size());
-  for (const DexFile* dex_file : boot_class_path) {
-    CHECK(dex_file != nullptr);
+  for (auto& dex_file : boot_class_path) {
+    CHECK(dex_file.get() != nullptr);
     AppendToBootClassPath(self, *dex_file);
+    opened_dex_files_.push_back(std::move(dex_file));
   }
 
   // now we can use FindSystemClass
@@ -794,7 +795,7 @@
                                          const uint32_t* dex_location_checksum,
                                          bool generated,
                                          std::vector<std::string>* error_msgs,
-                                         std::vector<const DexFile*>* dex_files) {
+                                         std::vector<std::unique_ptr<const DexFile>>* dex_files) {
   if (oat_file == nullptr) {
     return false;
   }
@@ -841,12 +842,12 @@
     }
 
     if (success) {
-      const DexFile* dex_file = oat_dex_file->OpenDexFile(&error_msg);
-      if (dex_file == nullptr) {
+      std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+      if (dex_file.get() == nullptr) {
         success = false;
         error_msgs->push_back(error_msg);
       } else {
-        dex_files->push_back(dex_file);
+        dex_files->push_back(std::move(dex_file));
       }
     }
 
@@ -864,14 +865,7 @@
   if (success) {
     return true;
   } else {
-    // Free all the dex files we have loaded.
-    auto it = dex_files->begin() + old_size;
-    auto it_end = dex_files->end();
-    for (; it != it_end; it++) {
-      delete *it;
-    }
-    dex_files->erase(dex_files->begin() + old_size, it_end);
-
+    dex_files->erase(dex_files->begin() + old_size, dex_files->end());
     return false;
   }
 }
@@ -882,7 +876,7 @@
 // multidex ahead of time.
 bool ClassLinker::OpenDexFilesFromOat(const char* dex_location, const char* oat_location,
                                       std::vector<std::string>* error_msgs,
-                                      std::vector<const DexFile*>* dex_files) {
+                                      std::vector<std::unique_ptr<const DexFile>>* dex_files) {
   // 1) Check whether we have an open oat file.
   // This requires a dex checksum, use the "primary" one.
   uint32_t dex_location_checksum;
@@ -1232,15 +1226,15 @@
                                 error_msg->c_str());
       return false;
     }
-    dex_file.reset(oat_dex_file->OpenDexFile(error_msg));
+    dex_file = oat_dex_file->OpenDexFile(error_msg);
   } else {
     bool verified = VerifyOatAndDexFileChecksums(oat_file, dex_location, *dex_location_checksum,
                                                  kRuntimeISA, error_msg);
     if (!verified) {
       return false;
     }
-    dex_file.reset(oat_file->GetOatDexFile(dex_location,
-                                           dex_location_checksum)->OpenDexFile(error_msg));
+    dex_file = oat_file->GetOatDexFile(dex_location,
+                                       dex_location_checksum)->OpenDexFile(error_msg);
   }
   return dex_file.get() != nullptr;
 }
@@ -1685,8 +1679,8 @@
                                                                      nullptr);
     CHECK(oat_dex_file != nullptr) << oat_file.GetLocation() << " " << dex_file_location;
     std::string error_msg;
-    const DexFile* dex_file = oat_dex_file->OpenDexFile(&error_msg);
-    if (dex_file == nullptr) {
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+    if (dex_file.get() == nullptr) {
       LOG(FATAL) << "Failed to open dex file " << dex_file_location
                  << " from within oat file " << oat_file.GetLocation()
                  << " error '" << error_msg << "'";
@@ -1695,7 +1689,8 @@
 
     CHECK_EQ(dex_file->GetLocationChecksum(), oat_dex_file->GetDexFileLocationChecksum());
 
-    AppendToBootClassPath(*dex_file, dex_cache);
+    AppendToBootClassPath(*dex_file.get(), dex_cache);
+    opened_dex_files_.push_back(std::move(dex_file));
   }
 
   // Set classes on AbstractMethod early so that IsMethod tests can be performed during the live
@@ -1928,7 +1923,6 @@
   mirror::ShortArray::ResetArrayClass();
   mirror::Throwable::ResetClass();
   mirror::StackTraceElement::ResetClass();
-  STLDeleteElements(&boot_class_path_);
   STLDeleteElements(&oat_files_);
 }
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 6461835..6570c5f 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -105,7 +105,7 @@
   ~ClassLinker();
 
   // Initialize class linker by bootstraping from dex files.
-  void InitWithoutImage(const std::vector<const DexFile*>& boot_class_path)
+  void InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Initialize class linker from one or more images.
@@ -324,7 +324,7 @@
   // (if multidex) into the given vector.
   bool OpenDexFilesFromOat(const char* dex_location, const char* oat_location,
                            std::vector<std::string>* error_msgs,
-                           std::vector<const DexFile*>* dex_files)
+                           std::vector<std::unique_ptr<const DexFile>>* dex_files)
       LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
 
   // Returns true if the given oat file has the same image checksum as the image it is paired with.
@@ -722,6 +722,7 @@
   const void* GetRuntimeQuickGenericJniStub() const;
 
   std::vector<const DexFile*> boot_class_path_;
+  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
 
   mutable ReaderWriterMutex dex_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::vector<size_t> new_dex_cache_roots_ GUARDED_BY(dex_lock_);
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 4f09460..6c7c1e2 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -342,28 +342,26 @@
     }
   }
 
-  void AssertDexFile(const DexFile* dex, mirror::ClassLoader* class_loader)
+  void AssertDexFile(const DexFile& dex, mirror::ClassLoader* class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    ASSERT_TRUE(dex != nullptr);
-
     // Verify all the classes defined in this file
-    for (size_t i = 0; i < dex->NumClassDefs(); i++) {
-      const DexFile::ClassDef& class_def = dex->GetClassDef(i);
-      const char* descriptor = dex->GetClassDescriptor(class_def);
+    for (size_t i = 0; i < dex.NumClassDefs(); i++) {
+      const DexFile::ClassDef& class_def = dex.GetClassDef(i);
+      const char* descriptor = dex.GetClassDescriptor(class_def);
       AssertDexFileClass(class_loader, descriptor);
     }
     // Verify all the types referenced by this file
-    for (size_t i = 0; i < dex->NumTypeIds(); i++) {
-      const DexFile::TypeId& type_id = dex->GetTypeId(i);
-      const char* descriptor = dex->GetTypeDescriptor(type_id);
+    for (size_t i = 0; i < dex.NumTypeIds(); i++) {
+      const DexFile::TypeId& type_id = dex.GetTypeId(i);
+      const char* descriptor = dex.GetTypeDescriptor(type_id);
       AssertDexFileClass(class_loader, descriptor);
     }
     class_linker_->VisitRoots(TestRootVisitor, nullptr, kVisitRootFlagAllRoots);
     // Verify the dex cache has resolution methods in all resolved method slots
-    mirror::DexCache* dex_cache = class_linker_->FindDexCache(*dex);
+    mirror::DexCache* dex_cache = class_linker_->FindDexCache(dex);
     mirror::ObjectArray<mirror::ArtMethod>* resolved_methods = dex_cache->GetResolvedMethods();
     for (size_t i = 0; i < static_cast<size_t>(resolved_methods->GetLength()); i++) {
-      EXPECT_TRUE(resolved_methods->Get(i) != nullptr) << dex->GetLocation() << " i=" << i;
+      EXPECT_TRUE(resolved_methods->Get(i) != nullptr) << dex.GetLocation() << " i=" << i;
     }
   }
 
@@ -744,7 +742,8 @@
 
 TEST_F(ClassLinkerTest, LibCore) {
   ScopedObjectAccess soa(Thread::Current());
-  AssertDexFile(java_lang_dex_file_, nullptr);
+  ASSERT_TRUE(java_lang_dex_file_ != nullptr);
+  AssertDexFile(*java_lang_dex_file_, nullptr);
 }
 
 // The first reference array element must be a multiple of 4 bytes from the
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 75ba9dd..e017699 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -102,7 +102,11 @@
 }
 
 CommonRuntimeTest::CommonRuntimeTest() {}
-CommonRuntimeTest::~CommonRuntimeTest() {}
+CommonRuntimeTest::~CommonRuntimeTest() {
+  // Ensure the dex files are cleaned up before the runtime.
+  loaded_dex_files_.clear();
+  runtime_.reset();
+}
 
 void CommonRuntimeTest::SetUpAndroidRoot() {
   if (IsHost()) {
@@ -181,15 +185,15 @@
   return GetCoreFileLocation("oat");
 }
 
-const DexFile* CommonRuntimeTest::LoadExpectSingleDexFile(const char* location) {
-  std::vector<const DexFile*> dex_files;
+std::unique_ptr<const DexFile> CommonRuntimeTest::LoadExpectSingleDexFile(const char* location) {
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
   std::string error_msg;
   if (!DexFile::Open(location, location, &error_msg, &dex_files)) {
     LOG(FATAL) << "Could not open .dex file '" << location << "': " << error_msg << "\n";
-    return nullptr;
+    UNREACHABLE();
   } else {
     CHECK_EQ(1U, dex_files.size()) << "Expected only one dex file in " << location;
-    return dex_files[0];
+    return std::move(dex_files[0]);
   }
 }
 
@@ -222,6 +226,9 @@
   class_linker_ = runtime_->GetClassLinker();
   class_linker_->FixupDexCaches(runtime_->GetResolutionMethod());
   class_linker_->RunRootClinits();
+  boot_class_path_ = class_linker_->GetBootClassPath();
+  java_lang_dex_file_ = boot_class_path_[0];
+
 
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we
   // Runtime::Start, give it away now and then switch to a more managable ScopedObjectAccess.
@@ -285,8 +292,6 @@
   IcuCleanupFn icu_cleanup_fn = reinterpret_cast<IcuCleanupFn>(sym);
   (*icu_cleanup_fn)();
 
-  STLDeleteElements(&opened_dex_files_);
-
   Runtime::Current()->GetHeap()->VerifyHeap();  // Check for heap corruption after the test
 }
 
@@ -323,7 +328,7 @@
 #define ART_TARGET_NATIVETEST_DIR_STRING ""
 #endif
 
-std::vector<const DexFile*> CommonRuntimeTest::OpenTestDexFiles(const char* name) {
+std::vector<std::unique_ptr<const DexFile>> CommonRuntimeTest::OpenTestDexFiles(const char* name) {
   CHECK(name != nullptr);
   std::string filename;
   if (IsHost()) {
@@ -336,28 +341,30 @@
   filename += name;
   filename += ".jar";
   std::string error_msg;
-  std::vector<const DexFile*> dex_files;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
   bool success = DexFile::Open(filename.c_str(), filename.c_str(), &error_msg, &dex_files);
   CHECK(success) << "Failed to open '" << filename << "': " << error_msg;
-  for (const DexFile* dex_file : dex_files) {
+  for (auto& dex_file : dex_files) {
     CHECK_EQ(PROT_READ, dex_file->GetPermissions());
     CHECK(dex_file->IsReadOnly());
   }
-  opened_dex_files_.insert(opened_dex_files_.end(), dex_files.begin(), dex_files.end());
   return dex_files;
 }
 
-const DexFile* CommonRuntimeTest::OpenTestDexFile(const char* name) {
-  std::vector<const DexFile*> vector = OpenTestDexFiles(name);
+std::unique_ptr<const DexFile> CommonRuntimeTest::OpenTestDexFile(const char* name) {
+  std::vector<std::unique_ptr<const DexFile>> vector = OpenTestDexFiles(name);
   EXPECT_EQ(1U, vector.size());
-  return vector[0];
+  return std::move(vector[0]);
 }
 
 jobject CommonRuntimeTest::LoadDex(const char* dex_name) {
-  std::vector<const DexFile*> dex_files = OpenTestDexFiles(dex_name);
+  std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles(dex_name);
+  std::vector<const DexFile*> class_path;
   CHECK_NE(0U, dex_files.size());
-  for (const DexFile* dex_file : dex_files) {
+  for (auto& dex_file : dex_files) {
+    class_path.push_back(dex_file.get());
     class_linker_->RegisterDexFile(*dex_file);
+    loaded_dex_files_.push_back(std::move(dex_file));
   }
   Thread* self = Thread::Current();
   JNIEnvExt* env = self->GetJniEnv();
@@ -365,7 +372,7 @@
       env->AllocObject(WellKnownClasses::dalvik_system_PathClassLoader));
   jobject class_loader = env->NewGlobalRef(class_loader_local.get());
   self->SetClassLoaderOverride(class_loader_local.get());
-  Runtime::Current()->SetCompileTimeClassPath(class_loader, dex_files);
+  Runtime::Current()->SetCompileTimeClassPath(class_loader, class_path);
   return class_loader;
 }
 
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 35dc30f..38a9733 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -87,7 +87,7 @@
   // File location to core.oat, e.g. $ANDROID_HOST_OUT/system/framework/core.oat
   static std::string GetCoreOatLocation();
 
-  const DexFile* LoadExpectSingleDexFile(const char* location);
+  std::unique_ptr<const DexFile> LoadExpectSingleDexFile(const char* location);
 
   virtual void SetUp();
 
@@ -106,26 +106,30 @@
 
   std::string GetTestAndroidRoot();
 
-  std::vector<const DexFile*> OpenTestDexFiles(const char* name)
+  std::vector<std::unique_ptr<const DexFile>> OpenTestDexFiles(const char* name)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const DexFile* OpenTestDexFile(const char* name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  std::unique_ptr<const DexFile> OpenTestDexFile(const char* name)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   jobject LoadDex(const char* dex_name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   std::string android_data_;
   std::string dalvik_cache_;
-  const DexFile* java_lang_dex_file_;  // owned by runtime_
-  std::vector<const DexFile*> boot_class_path_;  // owned by runtime_
+
   std::unique_ptr<Runtime> runtime_;
-  // Owned by the runtime
+
+  // The class_linker_, java_lang_dex_file_, and boot_class_path_ are all
+  // owned by the runtime.
   ClassLinker* class_linker_;
+  const DexFile* java_lang_dex_file_;
+  std::vector<const DexFile*> boot_class_path_;
 
  private:
   static std::string GetCoreFileLocation(const char* suffix);
 
   std::unique_ptr<CompilerCallbacks> callbacks_;
-  std::vector<const DexFile*> opened_dex_files_;
+  std::vector<std::unique_ptr<const DexFile>> loaded_dex_files_;
 };
 
 // Sets a CheckJni abort hook to catch failures. Note that this will cause CheckJNI to carry on
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index fe1e3a4..229a1af 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -819,10 +819,15 @@
     }
     gDebuggerActive = false;
   }
-  gRegistry->Clear();
-  gDebuggerConnected = false;
   CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   runtime->GetThreadList()->ResumeAll();
+
+  {
+    ScopedObjectAccess soa(self);
+    gRegistry->Clear();
+  }
+
+  gDebuggerConnected = false;
 }
 
 bool Dbg::IsDebuggerActive() {
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 3d4184b..3f6175f 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -125,7 +125,8 @@
 }
 
 bool DexFile::Open(const char* filename, const char* location, std::string* error_msg,
-                   std::vector<const DexFile*>* dex_files) {
+                   std::vector<std::unique_ptr<const DexFile>>* dex_files) {
+  DCHECK(dex_files != nullptr) << "DexFile::Open: out-param is NULL";
   uint32_t magic;
   ScopedFd fd(OpenAndReadMagic(filename, &magic, error_msg));
   if (fd.get() == -1) {
@@ -139,7 +140,7 @@
     std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.release(), location, true,
                                                               error_msg));
     if (dex_file.get() != nullptr) {
-      dex_files->push_back(dex_file.release());
+      dex_files->push_back(std::move(dex_file));
       return true;
     } else {
       return false;
@@ -179,8 +180,8 @@
   }
 }
 
-const DexFile* DexFile::OpenFile(int fd, const char* location, bool verify,
-                                 std::string* error_msg) {
+std::unique_ptr<const DexFile> DexFile::OpenFile(int fd, const char* location, bool verify,
+                                                 std::string* error_msg) {
   CHECK(location != nullptr);
   std::unique_ptr<MemMap> map;
   {
@@ -224,13 +225,14 @@
     return nullptr;
   }
 
-  return dex_file.release();
+  return dex_file;
 }
 
 const char* DexFile::kClassesDex = "classes.dex";
 
 bool DexFile::OpenZip(int fd, const std::string& location, std::string* error_msg,
-                      std::vector<const  DexFile*>* dex_files) {
+                      std::vector<std::unique_ptr<const DexFile>>* dex_files) {
+  DCHECK(dex_files != nullptr) << "DexFile::OpenZip: out-param is NULL";
   std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, location.c_str(), error_msg));
   if (zip_archive.get() == nullptr) {
     DCHECK(!error_msg->empty());
@@ -239,10 +241,10 @@
   return DexFile::OpenFromZip(*zip_archive, location, error_msg, dex_files);
 }
 
-const DexFile* DexFile::OpenMemory(const std::string& location,
-                                   uint32_t location_checksum,
-                                   MemMap* mem_map,
-                                   std::string* error_msg) {
+std::unique_ptr<const DexFile> DexFile::OpenMemory(const std::string& location,
+                                                   uint32_t location_checksum,
+                                                   MemMap* mem_map,
+                                                   std::string* error_msg) {
   return OpenMemory(mem_map->Begin(),
                     mem_map->Size(),
                     location,
@@ -251,9 +253,9 @@
                     error_msg);
 }
 
-const DexFile* DexFile::Open(const ZipArchive& zip_archive, const char* entry_name,
-                             const std::string& location, std::string* error_msg,
-                             ZipOpenErrorCode* error_code) {
+std::unique_ptr<const DexFile> DexFile::Open(const ZipArchive& zip_archive, const char* entry_name,
+                                             const std::string& location, std::string* error_msg,
+                                             ZipOpenErrorCode* error_code) {
   CHECK(!location.empty());
   std::unique_ptr<ZipEntry> zip_entry(zip_archive.Find(entry_name, error_msg));
   if (zip_entry.get() == NULL) {
@@ -287,11 +289,13 @@
     return nullptr;
   }
   *error_code = ZipOpenErrorCode::kNoError;
-  return dex_file.release();
+  return dex_file;
 }
 
 bool DexFile::OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
-                          std::string* error_msg, std::vector<const DexFile*>* dex_files) {
+                          std::string* error_msg,
+                          std::vector<std::unique_ptr<const DexFile>>* dex_files) {
+  DCHECK(dex_files != nullptr) << "DexFile::OpenFromZip: out-param is NULL";
   ZipOpenErrorCode error_code;
   std::unique_ptr<const DexFile> dex_file(Open(zip_archive, kClassesDex, location, error_msg,
                                                &error_code));
@@ -299,7 +303,7 @@
     return false;
   } else {
     // Had at least classes.dex.
-    dex_files->push_back(dex_file.release());
+    dex_files->push_back(std::move(dex_file));
 
     // Now try some more.
     size_t i = 2;
@@ -318,7 +322,7 @@
         }
         break;
       } else {
-        dex_files->push_back(next_dex_file.release());
+        dex_files->push_back(std::move(next_dex_file));
       }
 
       i++;
@@ -329,18 +333,17 @@
 }
 
 
-const DexFile* DexFile::OpenMemory(const uint8_t* base,
-                                   size_t size,
-                                   const std::string& location,
-                                   uint32_t location_checksum,
-                                   MemMap* mem_map, std::string* error_msg) {
+std::unique_ptr<const DexFile> DexFile::OpenMemory(const uint8_t* base,
+                                                   size_t size,
+                                                   const std::string& location,
+                                                   uint32_t location_checksum,
+                                                   MemMap* mem_map, std::string* error_msg) {
   CHECK_ALIGNED(base, 4);  // various dex file structures must be word aligned
   std::unique_ptr<DexFile> dex_file(new DexFile(base, size, location, location_checksum, mem_map));
   if (!dex_file->Init(error_msg)) {
-    return nullptr;
-  } else {
-    return dex_file.release();
+    dex_file.reset();
   }
+  return std::unique_ptr<const DexFile>(dex_file.release());
 }
 
 DexFile::DexFile(const uint8_t* base, size_t size,
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index a71ca42..019c8e6 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -385,19 +385,20 @@
 
   // Opens .dex files found in the container, guessing the container format based on file extension.
   static bool Open(const char* filename, const char* location, std::string* error_msg,
-                   std::vector<const DexFile*>* dex_files);
+                   std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
   // Opens .dex file, backed by existing memory
-  static const DexFile* Open(const uint8_t* base, size_t size,
-                             const std::string& location,
-                             uint32_t location_checksum,
-                             std::string* error_msg) {
+  static std::unique_ptr<const DexFile> Open(const uint8_t* base, size_t size,
+                                             const std::string& location,
+                                             uint32_t location_checksum,
+                                             std::string* error_msg) {
     return OpenMemory(base, size, location, location_checksum, NULL, error_msg);
   }
 
   // Open all classesXXX.dex files from a zip archive.
   static bool OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
-                          std::string* error_msg, std::vector<const DexFile*>* dex_files);
+                          std::string* error_msg,
+                          std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
   // Closes a .dex file.
   virtual ~DexFile();
@@ -892,11 +893,12 @@
 
  private:
   // Opens a .dex file
-  static const DexFile* OpenFile(int fd, const char* location, bool verify, std::string* error_msg);
+  static std::unique_ptr<const DexFile> OpenFile(int fd, const char* location,
+                                                 bool verify, std::string* error_msg);
 
   // Opens dex files from within a .jar, .zip, or .apk file
   static bool OpenZip(int fd, const std::string& location, std::string* error_msg,
-                      std::vector<const DexFile*>* dex_files);
+                      std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
   enum class ZipOpenErrorCode {  // private
     kNoError,
@@ -909,23 +911,23 @@
 
   // Opens .dex file from the entry_name in a zip archive. error_code is undefined when non-nullptr
   // return.
-  static const DexFile* Open(const ZipArchive& zip_archive, const char* entry_name,
-                             const std::string& location, std::string* error_msg,
-                             ZipOpenErrorCode* error_code);
+  static std::unique_ptr<const DexFile> Open(const ZipArchive& zip_archive, const char* entry_name,
+                                             const std::string& location, std::string* error_msg,
+                                             ZipOpenErrorCode* error_code);
 
   // Opens a .dex file at the given address backed by a MemMap
-  static const DexFile* OpenMemory(const std::string& location,
-                                   uint32_t location_checksum,
-                                   MemMap* mem_map,
-                                   std::string* error_msg);
+  static std::unique_ptr<const DexFile> OpenMemory(const std::string& location,
+                                                   uint32_t location_checksum,
+                                                   MemMap* mem_map,
+                                                   std::string* error_msg);
 
   // Opens a .dex file at the given address, optionally backed by a MemMap
-  static const DexFile* OpenMemory(const uint8_t* dex_file,
-                                   size_t size,
-                                   const std::string& location,
-                                   uint32_t location_checksum,
-                                   MemMap* mem_map,
-                                   std::string* error_msg);
+  static std::unique_ptr<const DexFile> OpenMemory(const uint8_t* dex_file,
+                                                   size_t size,
+                                                   const std::string& location,
+                                                   uint32_t location_checksum,
+                                                   MemMap* mem_map,
+                                                   std::string* error_msg);
 
   DexFile(const uint8_t* base, size_t size,
           const std::string& location,
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 0b54d47..7f5a181 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -32,8 +32,8 @@
 
 TEST_F(DexFileTest, Open) {
   ScopedObjectAccess soa(Thread::Current());
-  const DexFile* dex(OpenTestDexFile("Nested"));
-  ASSERT_TRUE(dex != NULL);
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("Nested"));
+  ASSERT_TRUE(dex.get() != NULL);
 }
 
 static const uint8_t kBase64Map[256] = {
@@ -133,8 +133,8 @@
   "AAACAAAAQAEAAAEgAAACAAAAVAEAAAYgAAACAAAAiAEAAAEQAAABAAAAqAEAAAIgAAAPAAAArgEA"
   "AAMgAAACAAAAiAIAAAQgAAADAAAAlAIAAAAgAAACAAAAqwIAAAAQAAABAAAAxAIAAA==";
 
-static const DexFile* OpenDexFileBase64(const char* base64,
-                                        const char* location) {
+static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
+                                                        const char* location) {
   // decode base64
   CHECK(base64 != NULL);
   size_t length;
@@ -155,11 +155,11 @@
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
   std::string error_msg;
-  std::vector<const DexFile*> tmp;
+  std::vector<std::unique_ptr<const DexFile>> tmp;
   bool success = DexFile::Open(location, location, &error_msg, &tmp);
   CHECK(success) << error_msg;
   EXPECT_EQ(1U, tmp.size());
-  const DexFile* dex_file = tmp[0];
+  std::unique_ptr<const DexFile> dex_file = std::move(tmp[0]);
   EXPECT_EQ(PROT_READ, dex_file->GetPermissions());
   EXPECT_TRUE(dex_file->IsReadOnly());
   return dex_file;
@@ -198,7 +198,7 @@
 
 TEST_F(DexFileTest, GetLocationChecksum) {
   ScopedObjectAccess soa(Thread::Current());
-  const DexFile* raw(OpenTestDexFile("Main"));
+  std::unique_ptr<const DexFile> raw(OpenTestDexFile("Main"));
   EXPECT_NE(raw->GetHeader().checksum_, raw->GetLocationChecksum());
 }
 
@@ -213,8 +213,8 @@
 
 TEST_F(DexFileTest, ClassDefs) {
   ScopedObjectAccess soa(Thread::Current());
-  const DexFile* raw(OpenTestDexFile("Nested"));
-  ASSERT_TRUE(raw != NULL);
+  std::unique_ptr<const DexFile> raw(OpenTestDexFile("Nested"));
+  ASSERT_TRUE(raw.get() != nullptr);
   EXPECT_EQ(2U, raw->NumClassDefs());
 
   const DexFile::ClassDef& c0 = raw->GetClassDef(0);
@@ -226,8 +226,8 @@
 
 TEST_F(DexFileTest, GetMethodSignature) {
   ScopedObjectAccess soa(Thread::Current());
-  const DexFile* raw(OpenTestDexFile("GetMethodSignature"));
-  ASSERT_TRUE(raw != NULL);
+  std::unique_ptr<const DexFile> raw(OpenTestDexFile("GetMethodSignature"));
+  ASSERT_TRUE(raw.get() != nullptr);
   EXPECT_EQ(1U, raw->NumClassDefs());
 
   const DexFile::ClassDef& class_def = raw->GetClassDef(0);
@@ -276,8 +276,8 @@
 
 TEST_F(DexFileTest, FindStringId) {
   ScopedObjectAccess soa(Thread::Current());
-  const DexFile* raw(OpenTestDexFile("GetMethodSignature"));
-  ASSERT_TRUE(raw != NULL);
+  std::unique_ptr<const DexFile> raw(OpenTestDexFile("GetMethodSignature"));
+  ASSERT_TRUE(raw.get() != nullptr);
   EXPECT_EQ(1U, raw->NumClassDefs());
 
   const char* strings[] = { "LGetMethodSignature;", "Ljava/lang/Float;", "Ljava/lang/Object;",
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index ec1e5f0..00ca8a9 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -101,8 +101,9 @@
   return dst.release();
 }
 
-static const DexFile* OpenDexFileBase64(const char* base64, const char* location,
-                                        std::string* error_msg) {
+static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
+                                                        const char* location,
+                                                        std::string* error_msg) {
   // decode base64
   CHECK(base64 != NULL);
   size_t length;
@@ -122,11 +123,11 @@
 
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
-  std::vector<const DexFile*> tmp;
+  std::vector<std::unique_ptr<const DexFile>> tmp;
   bool success = DexFile::Open(location, location, error_msg, &tmp);
   CHECK(success) << error_msg;
   EXPECT_EQ(1U, tmp.size());
-  const DexFile* dex_file = tmp[0];
+  std::unique_ptr<const DexFile> dex_file = std::move(tmp[0]);
   EXPECT_EQ(PROT_READ, dex_file->GetPermissions());
   EXPECT_TRUE(dex_file->IsReadOnly());
   return dex_file;
@@ -166,8 +167,9 @@
   header->checksum_ = adler_checksum;
 }
 
-static const DexFile* FixChecksumAndOpen(uint8_t* bytes, size_t length, const char* location,
-                                         std::string* error_msg) {
+static std::unique_ptr<const DexFile> FixChecksumAndOpen(uint8_t* bytes, size_t length,
+                                                         const char* location,
+                                                         std::string* error_msg) {
   // Check data.
   CHECK(bytes != nullptr);
 
@@ -187,12 +189,12 @@
 
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
-  std::vector<const DexFile*> tmp;
+  std::vector<std::unique_ptr<const DexFile>> tmp;
   if (!DexFile::Open(location, location, error_msg, &tmp)) {
     return nullptr;
   }
   EXPECT_EQ(1U, tmp.size());
-  const DexFile* dex_file = tmp[0];
+  std::unique_ptr<const DexFile> dex_file = std::move(tmp[0]);
   EXPECT_EQ(PROT_READ, dex_file->GetPermissions());
   EXPECT_TRUE(dex_file->IsReadOnly());
   return dex_file;
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 4198905..b6df609 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -1332,7 +1332,10 @@
         break;
       }
       case EM_MIPS: {
-        elf_ISA = kMips;
+        if ((GetHeader().e_flags & EF_MIPS_ARCH) == EF_MIPS_ARCH_32R2 ||
+            (GetHeader().e_flags & EF_MIPS_ARCH) == EF_MIPS_ARCH_32R6) {
+          elf_ISA = kMips;
+        }
         break;
       }
     }
diff --git a/runtime/elf_utils.h b/runtime/elf_utils.h
index 7b00bad..3579e27 100644
--- a/runtime/elf_utils.h
+++ b/runtime/elf_utils.h
@@ -30,6 +30,7 @@
 #define EF_ARM_EABI_VER5 0x05000000
 #define EF_MIPS_ABI_O32 0x00001000
 #define EF_MIPS_ARCH_32R2 0x70000000
+#define EF_MIPS_ARCH_32R6 0x90000000
 
 #define EI_ABIVERSION 8
 #define EM_ARM 40
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 4bec70a..ac640b4 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -59,6 +59,7 @@
   // | S0         |
   // |            |    4x2 bytes padding
   // | Method*    |  <- sp
+  static constexpr bool kAlignPairRegister = !kArm32QuickCodeUseSoftFloat;
   static constexpr bool kQuickSoftFloatAbi = kArm32QuickCodeUseSoftFloat;
   static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = !kArm32QuickCodeUseSoftFloat;
   static constexpr size_t kNumQuickGprArgs = 3;
@@ -93,6 +94,7 @@
   // | D0         |
   // |            |    padding
   // | Method*    |  <- sp
+  static constexpr bool kAlignPairRegister = false;
   static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
   static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
   static constexpr size_t kNumQuickGprArgs = 7;  // 7 arguments passed in GPRs.
@@ -121,6 +123,7 @@
   // | A2         |    arg2
   // | A1         |    arg1
   // | A0/Method* |  <- sp
+  static constexpr bool kAlignPairRegister = false;
   static constexpr bool kQuickSoftFloatAbi = true;  // This is a soft float ABI.
   static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
   static constexpr size_t kNumQuickGprArgs = 3;  // 3 arguments passed in GPRs.
@@ -146,6 +149,7 @@
   // | EDX         |    arg2
   // | ECX         |    arg1
   // | EAX/Method* |  <- sp
+  static constexpr bool kAlignPairRegister = false;
   static constexpr bool kQuickSoftFloatAbi = true;  // This is a soft float ABI.
   static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
   static constexpr size_t kNumQuickGprArgs = 3;  // 3 arguments passed in GPRs.
@@ -184,6 +188,7 @@
   // | XMM0            |    float arg 1
   // | Padding         |
   // | RDI/Method*     |  <- sp
+  static constexpr bool kAlignPairRegister = false;
   static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
   static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
   static constexpr size_t kNumQuickGprArgs = 5;  // 5 arguments passed in GPRs.
@@ -370,6 +375,11 @@
         case Primitive::kPrimDouble:
         case Primitive::kPrimLong:
           if (kQuickSoftFloatAbi || (cur_type_ == Primitive::kPrimLong)) {
+            if (cur_type_ == Primitive::kPrimLong && kAlignPairRegister && gpr_index_ == 0) {
+              // Currently, this is only for ARM, where the first available parameter register
+              // is R1. So we skip it, and use R2 instead.
+              gpr_index_++;
+            }
             is_split_long_or_double_ = (GetBytesPerGprSpillLocation(kRuntimeISA) == 4) &&
                 ((gpr_index_ + 1) == kNumQuickGprArgs);
             Visit();
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 42d2610..1716d5e 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -65,44 +65,15 @@
 
 static constexpr bool kDirectStream = true;
 
-#define HPROF_TIME 0
-#define HPROF_NULL_STACK_TRACE   0
-#define HPROF_NULL_THREAD        0
+static constexpr uint32_t kHprofTime = 0;
+static constexpr uint32_t kHprofNullStackTrace = 0;
+static constexpr uint32_t kHprofNullThread = 0;
 
-#define U2_TO_BUF_BE(buf, offset, value) \
-    do { \
-      unsigned char* buf_ = (unsigned char*)(buf); \
-      int offset_ = static_cast<int>(offset); \
-      uint16_t value_ = (uint16_t)(value); \
-      buf_[offset_ + 0] = (unsigned char)(value_ >>  8); \
-      buf_[offset_ + 1] = (unsigned char)(value_      ); \
-    } while (0)
+static constexpr size_t kMaxObjectsPerSegment = 128;
+static constexpr size_t kMaxBytesPerSegment = 4096;
 
-#define U4_TO_BUF_BE(buf, offset, value) \
-    do { \
-      unsigned char* buf_ = (unsigned char*)(buf); \
-      int offset_ = static_cast<int>(offset); \
-      uint32_t value_ = (uint32_t)(value); \
-      buf_[offset_ + 0] = (unsigned char)(value_ >> 24); \
-      buf_[offset_ + 1] = (unsigned char)(value_ >> 16); \
-      buf_[offset_ + 2] = (unsigned char)(value_ >>  8); \
-      buf_[offset_ + 3] = (unsigned char)(value_      ); \
-    } while (0)
-
-#define U8_TO_BUF_BE(buf, offset, value) \
-    do { \
-      unsigned char* buf_ = (unsigned char*)(buf); \
-      int offset_ = static_cast<int>(offset); \
-      uint64_t value_ = (uint64_t)(value); \
-      buf_[offset_ + 0] = (unsigned char)(value_ >> 56); \
-      buf_[offset_ + 1] = (unsigned char)(value_ >> 48); \
-      buf_[offset_ + 2] = (unsigned char)(value_ >> 40); \
-      buf_[offset_ + 3] = (unsigned char)(value_ >> 32); \
-      buf_[offset_ + 4] = (unsigned char)(value_ >> 24); \
-      buf_[offset_ + 5] = (unsigned char)(value_ >> 16); \
-      buf_[offset_ + 6] = (unsigned char)(value_ >>  8); \
-      buf_[offset_ + 7] = (unsigned char)(value_      ); \
-    } while (0)
+// The static field-name for the synthetic object generated to account for class static overhead.
+static constexpr const char* kStaticOverheadName = "$staticOverhead";
 
 enum HprofTag {
   HPROF_TAG_STRING = 0x01,
@@ -172,44 +143,43 @@
 typedef uint32_t HprofStringId;
 typedef uint32_t HprofClassObjectId;
 
-class Hprof;
-
-// Represents a top-level hprof record, whose serialized format is:
-// U1  TAG: denoting the type of the record
-// U4  TIME: number of microseconds since the time stamp in the header
-// U4  LENGTH: number of bytes that follow this uint32_t field and belong to this record
-// U1* BODY: as many bytes as specified in the above uint32_t field
-class HprofRecord {
+class EndianOutput {
  public:
-  explicit HprofRecord(Hprof* hprof) : alloc_length_(128), fp_(nullptr), tag_(0), time_(0),
-      length_(0), dirty_(false), hprof_(hprof) {
-    body_ = reinterpret_cast<unsigned char*>(malloc(alloc_length_));
+  EndianOutput() : length_(0), sum_length_(0), max_length_(0), started_(false) {}
+  virtual ~EndianOutput() {}
+
+  void StartNewRecord(uint8_t tag, uint32_t time) {
+    if (length_ > 0) {
+      EndRecord();
+    }
+    DCHECK_EQ(length_, 0U);
+    AddU1(tag);
+    AddU4(time);
+    AddU4(0xdeaddead);  // Length, replaced on flush.
+    started_ = true;
   }
 
-  ~HprofRecord() {
-    free(body_);
-  }
+  void EndRecord() {
+    // Replace length in header.
+    if (started_) {
+      UpdateU4(sizeof(uint8_t) + sizeof(uint32_t),
+               length_ - sizeof(uint8_t) - 2 * sizeof(uint32_t));
+    }
 
-  // Returns how many characters were in the buffer (or written).
-  size_t StartNewRecord(FILE* fp, uint8_t tag, uint32_t time) WARN_UNUSED {
-    const size_t ret = Flush();
-    fp_ = fp;
-    tag_ = tag;
-    time_ = time;
+    HandleEndRecord();
+
+    sum_length_ += length_;
+    max_length_ = std::max(max_length_, length_);
     length_ = 0;
-    dirty_ = true;
-    return ret;
+    started_ = false;
   }
 
-  // Returns how many characters were in the buffer (or written).
-  size_t Flush() WARN_UNUSED;
-
-  void AddU1(uint8_t value);
-
+  void AddU1(uint8_t value) {
+    AddU1List(&value, 1);
+  }
   void AddU2(uint16_t value) {
     AddU2List(&value, 1);
   }
-
   void AddU4(uint32_t value) {
     AddU4List(&value, 1);
   }
@@ -239,14 +209,28 @@
     AddU4(value);
   }
 
-  void AddU1List(const uint8_t* values, size_t numValues);
-  void AddU2List(const uint16_t* values, size_t numValues);
-  void AddU4List(const uint32_t* values, size_t numValues);
-  void UpdateU4(size_t offset, uint32_t new_value);
-  void AddU8List(const uint64_t* values, size_t numValues);
+  void AddU1List(const uint8_t* values, size_t count) {
+    HandleU1List(values, count);
+    length_ += count;
+  }
+  void AddU2List(const uint16_t* values, size_t count) {
+    HandleU2List(values, count);
+    length_ += count * sizeof(uint16_t);
+  }
+  void AddU4List(const uint32_t* values, size_t count) {
+    HandleU4List(values, count);
+    length_ += count * sizeof(uint32_t);
+  }
+  virtual void UpdateU4(size_t offset ATTRIBUTE_UNUSED, uint32_t new_value ATTRIBUTE_UNUSED) {
+    DCHECK_LE(offset, length_ - 4);
+  }
+  void AddU8List(const uint64_t* values, size_t count) {
+    HandleU8List(values, count);
+    length_ += count * sizeof(uint64_t);
+  }
 
   void AddIdList(mirror::ObjectArray<mirror::Object>* values)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const int32_t length = values->GetLength();
     for (int32_t i = 0; i < length; ++i) {
       AddObjectId(values->GetWithoutChecks(i));
@@ -258,35 +242,168 @@
     AddU1List((const uint8_t*)str, strlen(str));
   }
 
-  size_t Size() const {
+  size_t Length() const {
     return length_;
   }
 
- private:
-  void GuaranteeRecordAppend(size_t nmore) {
-    const size_t min_size = length_ + nmore;
-    if (min_size > alloc_length_) {
-      const size_t new_alloc_len = std::max(alloc_length_ * 2, min_size);
-      body_ = (unsigned char*)realloc(body_, new_alloc_len);
-      CHECK(body_ != nullptr);
-      alloc_length_ = new_alloc_len;
-    }
-    CHECK_LE(length_ + nmore, alloc_length_);
+  size_t SumLength() const {
+    return sum_length_;
   }
 
-  size_t alloc_length_;
-  unsigned char* body_;
+  size_t MaxLength() const {
+    return max_length_;
+  }
 
-  FILE* fp_;
-  uint8_t tag_;
-  uint32_t time_;
-  size_t length_;
-  bool dirty_;
-  Hprof* hprof_;
+ protected:
+  virtual void HandleU1List(const uint8_t* values ATTRIBUTE_UNUSED,
+                            size_t count ATTRIBUTE_UNUSED) {
+  }
+  virtual void HandleU2List(const uint16_t* values ATTRIBUTE_UNUSED,
+                            size_t count ATTRIBUTE_UNUSED) {
+  }
+  virtual void HandleU4List(const uint32_t* values ATTRIBUTE_UNUSED,
+                            size_t count ATTRIBUTE_UNUSED) {
+  }
+  virtual void HandleU8List(const uint64_t* values ATTRIBUTE_UNUSED,
+                            size_t count ATTRIBUTE_UNUSED) {
+  }
+  virtual void HandleEndRecord() {
+  }
 
-  DISALLOW_COPY_AND_ASSIGN(HprofRecord);
+  size_t length_;      // Current record size.
+  size_t sum_length_;  // Size of all data.
+  size_t max_length_;  // Maximum seen length.
+  bool started_;       // Was StartRecord called?
 };
 
+// This keeps things buffered until flushed.
+class EndianOutputBuffered : public EndianOutput {
+ public:
+  explicit EndianOutputBuffered(size_t reserve_size) {
+    buffer_.reserve(reserve_size);
+  }
+  virtual ~EndianOutputBuffered() {}
+
+  void UpdateU4(size_t offset, uint32_t new_value) OVERRIDE {
+    DCHECK_LE(offset, length_ - 4);
+    buffer_[offset + 0] = static_cast<uint8_t>((new_value >> 24) & 0xFF);
+    buffer_[offset + 1] = static_cast<uint8_t>((new_value >> 16) & 0xFF);
+    buffer_[offset + 2] = static_cast<uint8_t>((new_value >> 8)  & 0xFF);
+    buffer_[offset + 3] = static_cast<uint8_t>((new_value >> 0)  & 0xFF);
+  }
+
+ protected:
+  void HandleU1List(const uint8_t* values, size_t count) OVERRIDE {
+    DCHECK_EQ(length_, buffer_.size());
+    buffer_.insert(buffer_.end(), values, values + count);
+  }
+
+  void HandleU2List(const uint16_t* values, size_t count) OVERRIDE {
+    DCHECK_EQ(length_, buffer_.size());
+    for (size_t i = 0; i < count; ++i) {
+      uint16_t value = *values;
+      buffer_.push_back(static_cast<uint8_t>((value >> 8) & 0xFF));
+      buffer_.push_back(static_cast<uint8_t>((value >> 0) & 0xFF));
+      values++;
+    }
+  }
+
+  void HandleU4List(const uint32_t* values, size_t count) OVERRIDE {
+    DCHECK_EQ(length_, buffer_.size());
+    for (size_t i = 0; i < count; ++i) {
+      uint32_t value = *values;
+      buffer_.push_back(static_cast<uint8_t>((value >> 24) & 0xFF));
+      buffer_.push_back(static_cast<uint8_t>((value >> 16) & 0xFF));
+      buffer_.push_back(static_cast<uint8_t>((value >> 8)  & 0xFF));
+      buffer_.push_back(static_cast<uint8_t>((value >> 0)  & 0xFF));
+      values++;
+    }
+  }
+
+  void HandleU8List(const uint64_t* values, size_t count) OVERRIDE {
+    DCHECK_EQ(length_, buffer_.size());
+    for (size_t i = 0; i < count; ++i) {
+      uint64_t value = *values;
+      buffer_.push_back(static_cast<uint8_t>((value >> 56) & 0xFF));
+      buffer_.push_back(static_cast<uint8_t>((value >> 48) & 0xFF));
+      buffer_.push_back(static_cast<uint8_t>((value >> 40) & 0xFF));
+      buffer_.push_back(static_cast<uint8_t>((value >> 32) & 0xFF));
+      buffer_.push_back(static_cast<uint8_t>((value >> 24) & 0xFF));
+      buffer_.push_back(static_cast<uint8_t>((value >> 16) & 0xFF));
+      buffer_.push_back(static_cast<uint8_t>((value >> 8)  & 0xFF));
+      buffer_.push_back(static_cast<uint8_t>((value >> 0)  & 0xFF));
+      values++;
+    }
+  }
+
+  void HandleEndRecord() OVERRIDE {
+    DCHECK_EQ(buffer_.size(), length_);
+    if (kIsDebugBuild && started_) {
+      uint32_t stored_length =
+          static_cast<uint32_t>(buffer_[5]) << 24 |
+          static_cast<uint32_t>(buffer_[6]) << 16 |
+          static_cast<uint32_t>(buffer_[7]) << 8 |
+          static_cast<uint32_t>(buffer_[8]);
+      DCHECK_EQ(stored_length, length_ - sizeof(uint8_t) - 2 * sizeof(uint32_t));
+    }
+    HandleFlush(buffer_.data(), length_);
+    buffer_.clear();
+  }
+
+  virtual void HandleFlush(const uint8_t* buffer ATTRIBUTE_UNUSED, size_t length ATTRIBUTE_UNUSED) {
+  }
+
+  std::vector<uint8_t> buffer_;
+};
+
+class FileEndianOutput FINAL : public EndianOutputBuffered {
+ public:
+  FileEndianOutput(File* fp, size_t reserved_size)
+      : EndianOutputBuffered(reserved_size), fp_(fp), errors_(false) {
+    DCHECK(fp != nullptr);
+  }
+  ~FileEndianOutput() {
+  }
+
+  bool Errors() {
+    return errors_;
+  }
+
+ protected:
+  void HandleFlush(const uint8_t* buffer, size_t length) OVERRIDE {
+    if (!errors_) {
+      errors_ = !fp_->WriteFully(buffer, length);
+    }
+  }
+
+ private:
+  File* fp_;
+  bool errors_;
+};
+
+class NetStateEndianOutput FINAL : public EndianOutputBuffered {
+ public:
+  NetStateEndianOutput(JDWP::JdwpNetStateBase* net_state, size_t reserved_size)
+      : EndianOutputBuffered(reserved_size), net_state_(net_state) {
+    DCHECK(net_state != nullptr);
+  }
+  ~NetStateEndianOutput() {}
+
+ protected:
+  void HandleFlush(const uint8_t* buffer, size_t length) OVERRIDE {
+    std::vector<iovec> iov;
+    iov.push_back(iovec());
+    iov[0].iov_base = const_cast<void*>(reinterpret_cast<const void*>(buffer));
+    iov[0].iov_len = length;
+    net_state_->WriteBufferedPacketLocked(iov);
+  }
+
+ private:
+  JDWP::JdwpNetStateBase* net_state_;
+};
+
+#define __ output->
+
 class Hprof {
  public:
   Hprof(const char* output_filename, int fd, bool direct_to_ddms)
@@ -294,265 +411,173 @@
         fd_(fd),
         direct_to_ddms_(direct_to_ddms),
         start_ns_(NanoTime()),
-        current_record_(this),
-        gc_thread_serial_number_(0),
-        gc_scan_state_(0),
         current_heap_(HPROF_HEAP_DEFAULT),
         objects_in_segment_(0),
-        header_fp_(nullptr),
-        header_data_ptr_(nullptr),
-        header_data_size_(0),
-        body_fp_(nullptr),
-        body_data_ptr_(nullptr),
-        body_data_size_(0),
-        net_state_(nullptr),
         next_string_id_(0x400000) {
     LOG(INFO) << "hprof: heap dump \"" << filename_ << "\" starting...";
   }
 
-  ~Hprof() {
-    if (header_fp_ != nullptr) {
-      fclose(header_fp_);
-    }
-    if (body_fp_ != nullptr) {
-      fclose(body_fp_);
-    }
-    free(header_data_ptr_);
-    free(body_data_ptr_);
-  }
-
-  void ProcessBody() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
-    Runtime* runtime = Runtime::Current();
-    // Walk the roots and the heap.
-    total_body_bytes_ += current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_SEGMENT,
-                                                        HPROF_TIME);
-    runtime->VisitRoots(RootVisitor, this);
-    runtime->GetHeap()->VisitObjects(VisitObjectCallback, this);
-    total_body_bytes_ += current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_END,
-                                                        HPROF_TIME);
-    total_body_bytes_ += current_record_.Flush();
-    if (allow_writing_) {
-      fflush(body_fp_);
-    }
-  }
-
-  void ProcessHeader() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Write the header.
-    WriteFixedHeader();
-    // Write the string and class tables, and any stack traces, to the header.
-    // (jhat requires that these appear before any of the data in the body that refers to them.)
-    WriteStringTable();
-    WriteClassTable();
-    WriteStackTraces();
-    total_header_bytes_ += current_record_.Flush();
-    if (allow_writing_) {
-      fflush(header_fp_);
-    }
-  }
-
-  void ProcessHeapStreaming(size_t data_len, uint32_t chunk_type)
+  void Dump()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
-    total_body_bytes_ = 0;
-    total_header_bytes_ = 0;
-    allow_writing_ = true;
-    CHECK(direct_to_ddms_);
-    JDWP::JdwpState* state = Dbg::GetJdwpState();
-    CHECK(state != nullptr);
-    net_state_ = state->netState;
-    CHECK(net_state_ != nullptr);
-    // Hold the socket lock for the whole tiem since we want this to be atomic.
-    MutexLock mu(Thread::Current(), *net_state_->GetSocketLock());
-    total_body_bytes_ = 0;
-    total_header_bytes_ = 0;
-    constexpr size_t kChunkHeaderSize = kJDWPHeaderLen + 8;
-    uint8_t chunk_header[kChunkHeaderSize] = { 0 };
-    state->SetupChunkHeader(chunk_type, data_len, kChunkHeaderSize, chunk_header);
-    Write(chunk_header, kChunkHeaderSize, nullptr);  // Send the header chunk to DDMS.
-    ProcessHeader();
-    ProcessBody();
-    CHECK_EQ(total_body_bytes_ + total_header_bytes_, data_len);
-    net_state_ = nullptr;
-  }
-  void ProcessHeap(bool allow_writing) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
-    allow_writing_ = allow_writing;
-    total_body_bytes_ = 0;
-    total_header_bytes_ = 0;
-    if (allow_writing) {
-      header_fp_ = open_memstream(&header_data_ptr_, &header_data_size_);
-      CHECK(header_fp_ != nullptr) << "header open_memstream failed";
-      body_fp_ = open_memstream(&body_data_ptr_, &body_data_size_);
-      CHECK(body_fp_ != nullptr) << "body open_memstream failed";
-    }
-    ProcessBody();
-    ProcessHeader();
-  }
-
-  void Dump() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_) {
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    // First pass to measure the size of the dump.
+    size_t overall_size;
+    size_t max_length;
     {
-      ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
-      // First pass to measure the size of the dump.
-      ProcessHeap(false);
-      const size_t header_bytes = total_header_bytes_;
-      const size_t body_bytes = total_body_bytes_;
-      if (direct_to_ddms_ && kDirectStream) {
-        ProcessHeapStreaming(header_bytes + body_bytes, CHUNK_TYPE("HPDS"));
-      } else {
-        ProcessHeap(true);
-        CHECK_EQ(header_data_size_, header_bytes);
-        CHECK_EQ(body_data_size_, body_bytes);
-      }
-      CHECK_EQ(total_header_bytes_, header_bytes);
-      CHECK_EQ(total_body_bytes_, body_bytes);
+      EndianOutput count_output;
+      ProcessHeap(&count_output, false);
+      overall_size = count_output.SumLength();
+      max_length = count_output.MaxLength();
     }
 
-    bool okay = true;
-    if (!kDirectStream) {
-      if (direct_to_ddms_) {
-        // Send the data off to DDMS.
-        iovec iov[2];
-        iov[0].iov_base = header_data_ptr_;
-        iov[0].iov_len = header_data_size_;
-        iov[1].iov_base = body_data_ptr_;
-        iov[1].iov_len = body_data_size_;
-        Dbg::DdmSendChunkV(CHUNK_TYPE("HPDS"), iov, 2);
+    bool okay;
+    if (direct_to_ddms_) {
+      if (kDirectStream) {
+        okay = DumpToDdmsDirect(overall_size, max_length, CHUNK_TYPE("HPDS"));
       } else {
-        // Where exactly are we writing to?
-        int out_fd;
-        if (fd_ >= 0) {
-          out_fd = dup(fd_);
-          if (out_fd < 0) {
-            ThrowRuntimeException("Couldn't dump heap; dup(%d) failed: %s", fd_, strerror(errno));
-            return;
-          }
-        } else {
-          out_fd = open(filename_.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0644);
-          if (out_fd < 0) {
-            ThrowRuntimeException("Couldn't dump heap; open(\"%s\") failed: %s", filename_.c_str(),
-                                  strerror(errno));
-            return;
-          }
-        }
-
-        std::unique_ptr<File> file(new File(out_fd, filename_, true));
-        okay = file->WriteFully(header_data_ptr_, header_data_size_) &&
-               file->WriteFully(body_data_ptr_, body_data_size_);
-        if (okay) {
-          okay = file->FlushCloseOrErase() == 0;
-        } else {
-          file->Erase();
-        }
-        if (!okay) {
-          std::string msg(StringPrintf("Couldn't dump heap; writing \"%s\" failed: %s",
-                                       filename_.c_str(), strerror(errno)));
-          ThrowRuntimeException("%s", msg.c_str());
-          LOG(ERROR) << msg;
-        }
+        okay = DumpToDdmsBuffered(overall_size, max_length);
       }
+    } else {
+      okay = DumpToFile(overall_size, max_length);
     }
 
-    // Throw out a log message for the benefit of "runhat".
     if (okay) {
       uint64_t duration = NanoTime() - start_ns_;
       LOG(INFO) << "hprof: heap dump completed ("
-          << PrettySize(total_header_bytes_ + total_body_bytes_ + 1023)
+          << PrettySize(RoundUp(overall_size, 1024))
           << ") in " << PrettyDuration(duration);
     }
   }
 
-  bool AllowWriting() const {
-    return allow_writing_;
-  }
-
-  size_t Write(const void* ptr, size_t len, FILE* fp) {
-    if (allow_writing_) {
-      if (net_state_ != nullptr) {
-        CHECK(fp == nullptr);
-        std::vector<iovec> iov;
-        iov.push_back(iovec());
-        iov[0].iov_base = const_cast<void*>(ptr);
-        iov[0].iov_len = len;
-        net_state_->WriteBufferedPacketLocked(iov);
-      } else {
-        const size_t n = fwrite(ptr, 1, len, fp);
-        CHECK_EQ(n, len);
-      }
-    }
-    return len;
-  }
-
  private:
+  struct Env {
+    Hprof* hprof;
+    EndianOutput* output;
+  };
+
   static void RootVisitor(mirror::Object** obj, void* arg, uint32_t thread_id, RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(arg != nullptr);
     DCHECK(obj != nullptr);
     DCHECK(*obj != nullptr);
-    reinterpret_cast<Hprof*>(arg)->VisitRoot(*obj, thread_id, root_type);
+    Env* env = reinterpret_cast<Env*>(arg);
+    env->hprof->VisitRoot(*obj, thread_id, root_type, env->output);
   }
 
   static void VisitObjectCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(obj != nullptr);
     DCHECK(arg != nullptr);
-    reinterpret_cast<Hprof*>(arg)->DumpHeapObject(obj);
+    Env* env = reinterpret_cast<Env*>(arg);
+    env->hprof->DumpHeapObject(obj, env->output);
   }
 
-  void VisitRoot(const mirror::Object* obj, uint32_t thread_id, RootType type)
+  void DumpHeapObject(mirror::Object* obj, EndianOutput* output)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int DumpHeapObject(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void DumpHeapClass(mirror::Class* klass, EndianOutput* output)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void WriteClassTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    HprofRecord* rec = &current_record_;
+  void DumpHeapArray(mirror::Array* obj, mirror::Class* klass, EndianOutput* output)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass, EndianOutput* output)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void ProcessHeap(EndianOutput* output, bool header_first)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    // Reset current heap and object count.
+    current_heap_ = HPROF_HEAP_DEFAULT;
+    objects_in_segment_ = 0;
+
+    if (header_first) {
+      ProcessHeader(output);
+      ProcessBody(output);
+    } else {
+      ProcessBody(output);
+      ProcessHeader(output);
+    }
+  }
+
+  void ProcessBody(EndianOutput* output) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    Runtime* runtime = Runtime::Current();
+    // Walk the roots and the heap.
+    output->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
+
+    Env env = { this, output };
+    runtime->VisitRoots(RootVisitor, &env);
+    runtime->GetHeap()->VisitObjects(VisitObjectCallback, &env);
+
+    output->StartNewRecord(HPROF_TAG_HEAP_DUMP_END, kHprofTime);
+    output->EndRecord();
+  }
+
+  void ProcessHeader(EndianOutput* output) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Write the header.
+    WriteFixedHeader(output);
+    // Write the string and class tables, and any stack traces, to the header.
+    // (jhat requires that these appear before any of the data in the body that refers to them.)
+    WriteStringTable(output);
+    WriteClassTable(output);
+    WriteStackTraces(output);
+    output->EndRecord();
+  }
+
+  void WriteClassTable(EndianOutput* output) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t nextSerialNumber = 1;
 
     for (mirror::Class* c : classes_) {
       CHECK(c != nullptr);
-      total_header_bytes_ += current_record_.StartNewRecord(header_fp_, HPROF_TAG_LOAD_CLASS,
-                                                            HPROF_TIME);
+      output->StartNewRecord(HPROF_TAG_LOAD_CLASS, kHprofTime);
       // LOAD CLASS format:
       // U4: class serial number (always > 0)
       // ID: class object ID. We use the address of the class object structure as its ID.
       // U4: stack trace serial number
       // ID: class name string ID
-      rec->AddU4(nextSerialNumber++);
-      rec->AddObjectId(c);
-      rec->AddU4(HPROF_NULL_STACK_TRACE);
-      rec->AddStringId(LookupClassNameId(c));
+      __ AddU4(nextSerialNumber++);
+      __ AddObjectId(c);
+      __ AddU4(kHprofNullStackTrace);
+      __ AddStringId(LookupClassNameId(c));
     }
   }
 
-  void WriteStringTable() {
-    HprofRecord* rec = &current_record_;
+  void WriteStringTable(EndianOutput* output) {
     for (const std::pair<std::string, HprofStringId>& p : strings_) {
       const std::string& string = p.first;
       const size_t id = p.second;
 
-      total_header_bytes_ += current_record_.StartNewRecord(header_fp_, HPROF_TAG_STRING,
-                                                            HPROF_TIME);
+      output->StartNewRecord(HPROF_TAG_STRING, kHprofTime);
 
       // STRING format:
       // ID:  ID for this string
       // U1*: UTF8 characters for string (NOT NULL terminated)
       //      (the record format encodes the length)
-      rec->AddU4(id);
-      rec->AddUtf8String(string.c_str());
+      __ AddU4(id);
+      __ AddUtf8String(string.c_str());
     }
   }
 
-  void StartNewHeapDumpSegment() {
+  void StartNewHeapDumpSegment(EndianOutput* output) {
     // This flushes the old segment and starts a new one.
-    total_body_bytes_ += current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_SEGMENT,
-                                                        HPROF_TIME);
+    output->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
     objects_in_segment_ = 0;
     // Starting a new HEAP_DUMP resets the heap to default.
     current_heap_ = HPROF_HEAP_DEFAULT;
   }
 
-  int MarkRootObject(const mirror::Object* obj, jobject jniObj);
+  void CheckHeapSegmentConstraints(EndianOutput* output) {
+    if (objects_in_segment_ >= kMaxObjectsPerSegment || output->Length() >= kMaxBytesPerSegment) {
+      StartNewHeapDumpSegment(output);
+    }
+  }
+
+  void VisitRoot(const mirror::Object* obj, uint32_t thread_id, RootType type, EndianOutput* output)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void MarkRootObject(const mirror::Object* obj, jobject jni_obj, HprofHeapTag heap_tag,
+                      uint32_t thread_serial, EndianOutput* output);
 
   HprofClassObjectId LookupClassId(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (c != nullptr) {
@@ -587,38 +612,128 @@
     return LookupStringId(PrettyDescriptor(c));
   }
 
-  void WriteFixedHeader() {
-    char magic[] = "JAVA PROFILE 1.0.3";
-    unsigned char buf[4] = { 0 };
+  void WriteFixedHeader(EndianOutput* output) {
     // Write the file header.
     // U1: NUL-terminated magic string.
-    total_header_bytes_ += Write(magic, sizeof(magic), header_fp_);
+    const char magic[] = "JAVA PROFILE 1.0.3";
+    __ AddU1List(reinterpret_cast<const uint8_t*>(magic), sizeof(magic));
+
     // U4: size of identifiers.  We're using addresses as IDs and our heap references are stored
     // as uint32_t.
     // Note of warning: hprof-conv hard-codes the size of identifiers to 4.
     static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(uint32_t),
                   "Unexpected HeapReference size");
-    U4_TO_BUF_BE(buf, 0, sizeof(uint32_t));
-    total_header_bytes_ += Write(buf, sizeof(uint32_t), header_fp_);
+    __ AddU4(sizeof(uint32_t));
+
     // The current time, in milliseconds since 0:00 GMT, 1/1/70.
     timeval now;
-    const uint64_t nowMs = (gettimeofday(&now, NULL) < 0) ? 0 :
+    const uint64_t nowMs = (gettimeofday(&now, nullptr) < 0) ? 0 :
         (uint64_t)now.tv_sec * 1000 + now.tv_usec / 1000;
+    // TODO: It seems it would be correct to use U8.
     // U4: high word of the 64-bit time.
-    U4_TO_BUF_BE(buf, 0, (uint32_t)(nowMs >> 32));
-    total_header_bytes_ += Write(buf, sizeof(uint32_t), header_fp_);
+    __ AddU4(static_cast<uint32_t>(nowMs >> 32));
     // U4: low word of the 64-bit time.
-    U4_TO_BUF_BE(buf, 0, (uint32_t)(nowMs & 0xffffffffULL));
-    total_header_bytes_ += Write(buf, sizeof(uint32_t), header_fp_);  // xxx fix the time
+    __ AddU4(static_cast<uint32_t>(nowMs & 0xFFFFFFFF));
   }
 
-  void WriteStackTraces() {
+  void WriteStackTraces(EndianOutput* output) {
     // Write a dummy stack trace record so the analysis tools don't freak out.
-    total_header_bytes_ +=
-        current_record_.StartNewRecord(header_fp_, HPROF_TAG_STACK_TRACE, HPROF_TIME);
-    current_record_.AddU4(HPROF_NULL_STACK_TRACE);
-    current_record_.AddU4(HPROF_NULL_THREAD);
-    current_record_.AddU4(0);    // no frames
+    output->StartNewRecord(HPROF_TAG_STACK_TRACE, kHprofTime);
+    __ AddU4(kHprofNullStackTrace);
+    __ AddU4(kHprofNullThread);
+    __ AddU4(0);    // no frames
+  }
+
+  bool DumpToDdmsBuffered(size_t overall_size ATTRIBUTE_UNUSED, size_t max_length ATTRIBUTE_UNUSED)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    LOG(FATAL) << "Unimplemented";
+    UNREACHABLE();
+    //        // Send the data off to DDMS.
+    //        iovec iov[2];
+    //        iov[0].iov_base = header_data_ptr_;
+    //        iov[0].iov_len = header_data_size_;
+    //        iov[1].iov_base = body_data_ptr_;
+    //        iov[1].iov_len = body_data_size_;
+    //        Dbg::DdmSendChunkV(CHUNK_TYPE("HPDS"), iov, 2);
+  }
+
+  bool DumpToFile(size_t overall_size, size_t max_length)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    // Where exactly are we writing to?
+    int out_fd;
+    if (fd_ >= 0) {
+      out_fd = dup(fd_);
+      if (out_fd < 0) {
+        ThrowRuntimeException("Couldn't dump heap; dup(%d) failed: %s", fd_, strerror(errno));
+        return false;
+      }
+    } else {
+      out_fd = open(filename_.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0644);
+      if (out_fd < 0) {
+        ThrowRuntimeException("Couldn't dump heap; open(\"%s\") failed: %s", filename_.c_str(),
+                              strerror(errno));
+        return false;
+      }
+    }
+
+    std::unique_ptr<File> file(new File(out_fd, filename_, true));
+    bool okay;
+    {
+      FileEndianOutput file_output(file.get(), max_length);
+      ProcessHeap(&file_output, true);
+      okay = !file_output.Errors();
+
+      if (okay) {
+        // Check for expected size.
+        CHECK_EQ(file_output.SumLength(), overall_size);
+      }
+    }
+
+    if (okay) {
+      okay = file->FlushCloseOrErase() == 0;
+    } else {
+      file->Erase();
+    }
+    if (!okay) {
+      std::string msg(StringPrintf("Couldn't dump heap; writing \"%s\" failed: %s",
+                                   filename_.c_str(), strerror(errno)));
+      ThrowRuntimeException("%s", msg.c_str());
+      LOG(ERROR) << msg;
+    }
+
+    return okay;
+  }
+
+  bool DumpToDdmsDirect(size_t overall_size, size_t max_length, uint32_t chunk_type)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    CHECK(direct_to_ddms_);
+    JDWP::JdwpState* state = Dbg::GetJdwpState();
+    CHECK(state != nullptr);
+    JDWP::JdwpNetStateBase* net_state = state->netState;
+    CHECK(net_state != nullptr);
+
+    // Hold the socket lock for the whole time since we want this to be atomic.
+    MutexLock mu(Thread::Current(), *net_state->GetSocketLock());
+
+    // Prepare the Ddms chunk.
+    constexpr size_t kChunkHeaderSize = kJDWPHeaderLen + 8;
+    uint8_t chunk_header[kChunkHeaderSize] = { 0 };
+    state->SetupChunkHeader(chunk_type, overall_size, kChunkHeaderSize, chunk_header);
+
+    // Prepare the output and send the chunk header.
+    NetStateEndianOutput net_output(net_state, max_length);
+    net_output.AddU1List(chunk_header, kChunkHeaderSize);
+
+    // Write the dump.
+    ProcessHeap(&net_output, true);
+
+    // Check for expected size.
+    CHECK_EQ(net_output.SumLength(), overall_size + kChunkHeaderSize);
+
+    return true;
   }
 
   // If direct_to_ddms_ is set, "filename_" and "fd" will be ignored.
@@ -628,30 +743,11 @@
   int fd_;
   bool direct_to_ddms_;
 
-  // Whether or not we are in the size calculating mode or writing mode.
-  bool allow_writing_;
-
   uint64_t start_ns_;
 
-  HprofRecord current_record_;
-
-  uint32_t gc_thread_serial_number_;
-  uint8_t gc_scan_state_;
   HprofHeapId current_heap_;  // Which heap we're currently dumping.
   size_t objects_in_segment_;
 
-  FILE* header_fp_;
-  char* header_data_ptr_;
-  size_t header_data_size_;
-  size_t total_header_bytes_;
-
-  FILE* body_fp_;
-  char* body_data_ptr_;
-  size_t body_data_size_;
-  size_t total_body_bytes_;
-
-  JDWP::JdwpNetStateBase* net_state_;
-
   std::set<mirror::Class*> classes_;
   HprofStringId next_string_id_;
   SafeMap<std::string, HprofStringId> strings_;
@@ -659,56 +755,56 @@
   DISALLOW_COPY_AND_ASSIGN(Hprof);
 };
 
-#define OBJECTS_PER_SEGMENT     ((size_t)128)
-#define BYTES_PER_SEGMENT       ((size_t)4096)
-
-// The static field-name for the synthetic object generated to account for class static overhead.
-#define STATIC_OVERHEAD_NAME    "$staticOverhead"
-
-static HprofBasicType SignatureToBasicTypeAndSize(const char* sig, size_t* sizeOut) {
+static HprofBasicType SignatureToBasicTypeAndSize(const char* sig, size_t* size_out) {
   char c = sig[0];
   HprofBasicType ret;
   size_t size;
 
   switch (c) {
-  case '[':
-  case 'L': ret = hprof_basic_object;  size = 4; break;
-  case 'Z': ret = hprof_basic_boolean; size = 1; break;
-  case 'C': ret = hprof_basic_char;    size = 2; break;
-  case 'F': ret = hprof_basic_float;   size = 4; break;
-  case 'D': ret = hprof_basic_double;  size = 8; break;
-  case 'B': ret = hprof_basic_byte;    size = 1; break;
-  case 'S': ret = hprof_basic_short;   size = 2; break;
-  case 'I': ret = hprof_basic_int;     size = 4; break;
-  case 'J': ret = hprof_basic_long;    size = 8; break;
-  default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE();
+    case '[':
+    case 'L':
+      ret = hprof_basic_object;
+      size = 4;
+      break;
+    case 'Z':
+      ret = hprof_basic_boolean;
+      size = 1;
+      break;
+    case 'C':
+      ret = hprof_basic_char;
+      size = 2;
+      break;
+    case 'F':
+      ret = hprof_basic_float;
+      size = 4;
+      break;
+    case 'D':
+      ret = hprof_basic_double;
+      size = 8;
+      break;
+    case 'B':
+      ret = hprof_basic_byte;
+      size = 1;
+      break;
+    case 'S':
+      ret = hprof_basic_short;
+      size = 2;
+      break;
+    case 'I':
+      ret = hprof_basic_int;
+      size = 4;
+      break;
+    case 'J':
+      ret = hprof_basic_long;
+      size = 8;
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
 
-  if (sizeOut != NULL) {
-    *sizeOut = size;
-  }
-
-  return ret;
-}
-
-static HprofBasicType PrimitiveToBasicTypeAndSize(Primitive::Type prim, size_t* sizeOut) {
-  HprofBasicType ret;
-  size_t size;
-
-  switch (prim) {
-  case Primitive::kPrimBoolean: ret = hprof_basic_boolean; size = 1; break;
-  case Primitive::kPrimChar:    ret = hprof_basic_char;    size = 2; break;
-  case Primitive::kPrimFloat:   ret = hprof_basic_float;   size = 4; break;
-  case Primitive::kPrimDouble:  ret = hprof_basic_double;  size = 8; break;
-  case Primitive::kPrimByte:    ret = hprof_basic_byte;    size = 1; break;
-  case Primitive::kPrimShort:   ret = hprof_basic_short;   size = 2; break;
-  case Primitive::kPrimInt:     ret = hprof_basic_int;     size = 4; break;
-  case Primitive::kPrimLong:    ret = hprof_basic_long;    size = 8; break;
-  default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE();
-  }
-
-  if (sizeOut != NULL) {
-    *sizeOut = size;
+  if (size_out != nullptr) {
+    *size_out = size;
   }
 
   return ret;
@@ -718,95 +814,94 @@
 // something when ctx->gc_scan_state_ is non-zero, which is usually
 // only true when marking the root set or unreachable
 // objects.  Used to add rootset references to obj.
-int Hprof::MarkRootObject(const mirror::Object* obj, jobject jniObj) {
-  HprofRecord* rec = &current_record_;
-  HprofHeapTag heapTag = (HprofHeapTag)gc_scan_state_;
-
-  if (heapTag == 0) {
-    return 0;
+void Hprof::MarkRootObject(const mirror::Object* obj, jobject jni_obj, HprofHeapTag heap_tag,
+                           uint32_t thread_serial, EndianOutput* output) {
+  if (heap_tag == 0) {
+    return;
   }
 
-  if (objects_in_segment_ >= OBJECTS_PER_SEGMENT || rec->Size() >= BYTES_PER_SEGMENT) {
-    StartNewHeapDumpSegment();
-  }
+  CheckHeapSegmentConstraints(output);
 
-  switch (heapTag) {
-  // ID: object ID
-  case HPROF_ROOT_UNKNOWN:
-  case HPROF_ROOT_STICKY_CLASS:
-  case HPROF_ROOT_MONITOR_USED:
-  case HPROF_ROOT_INTERNED_STRING:
-  case HPROF_ROOT_DEBUGGER:
-  case HPROF_ROOT_VM_INTERNAL:
-    rec->AddU1(heapTag);
-    rec->AddObjectId(obj);
-    break;
+  switch (heap_tag) {
+    // ID: object ID
+    case HPROF_ROOT_UNKNOWN:
+    case HPROF_ROOT_STICKY_CLASS:
+    case HPROF_ROOT_MONITOR_USED:
+    case HPROF_ROOT_INTERNED_STRING:
+    case HPROF_ROOT_DEBUGGER:
+    case HPROF_ROOT_VM_INTERNAL:
+      __ AddU1(heap_tag);
+      __ AddObjectId(obj);
+      break;
 
-  // ID: object ID
-  // ID: JNI global ref ID
-  case HPROF_ROOT_JNI_GLOBAL:
-    rec->AddU1(heapTag);
-    rec->AddObjectId(obj);
-    rec->AddJniGlobalRefId(jniObj);
-    break;
+      // ID: object ID
+      // ID: JNI global ref ID
+    case HPROF_ROOT_JNI_GLOBAL:
+      __ AddU1(heap_tag);
+      __ AddObjectId(obj);
+      __ AddJniGlobalRefId(jni_obj);
+      break;
 
-  // ID: object ID
-  // U4: thread serial number
-  // U4: frame number in stack trace (-1 for empty)
-  case HPROF_ROOT_JNI_LOCAL:
-  case HPROF_ROOT_JNI_MONITOR:
-  case HPROF_ROOT_JAVA_FRAME:
-    rec->AddU1(heapTag);
-    rec->AddObjectId(obj);
-    rec->AddU4(gc_thread_serial_number_);
-    rec->AddU4((uint32_t)-1);
-    break;
+      // ID: object ID
+      // U4: thread serial number
+      // U4: frame number in stack trace (-1 for empty)
+    case HPROF_ROOT_JNI_LOCAL:
+    case HPROF_ROOT_JNI_MONITOR:
+    case HPROF_ROOT_JAVA_FRAME:
+      __ AddU1(heap_tag);
+      __ AddObjectId(obj);
+      __ AddU4(thread_serial);
+      __ AddU4((uint32_t)-1);
+      break;
 
-  // ID: object ID
-  // U4: thread serial number
-  case HPROF_ROOT_NATIVE_STACK:
-  case HPROF_ROOT_THREAD_BLOCK:
-    rec->AddU1(heapTag);
-    rec->AddObjectId(obj);
-    rec->AddU4(gc_thread_serial_number_);
-    break;
+      // ID: object ID
+      // U4: thread serial number
+    case HPROF_ROOT_NATIVE_STACK:
+    case HPROF_ROOT_THREAD_BLOCK:
+      __ AddU1(heap_tag);
+      __ AddObjectId(obj);
+      __ AddU4(thread_serial);
+      break;
 
-  // ID: thread object ID
-  // U4: thread serial number
-  // U4: stack trace serial number
-  case HPROF_ROOT_THREAD_OBJECT:
-    rec->AddU1(heapTag);
-    rec->AddObjectId(obj);
-    rec->AddU4(gc_thread_serial_number_);
-    rec->AddU4((uint32_t)-1);    // xxx
-    break;
+      // ID: thread object ID
+      // U4: thread serial number
+      // U4: stack trace serial number
+    case HPROF_ROOT_THREAD_OBJECT:
+      __ AddU1(heap_tag);
+      __ AddObjectId(obj);
+      __ AddU4(thread_serial);
+      __ AddU4((uint32_t)-1);    // xxx
+      break;
 
-  case HPROF_CLASS_DUMP:
-  case HPROF_INSTANCE_DUMP:
-  case HPROF_OBJECT_ARRAY_DUMP:
-  case HPROF_PRIMITIVE_ARRAY_DUMP:
-  case HPROF_HEAP_DUMP_INFO:
-  case HPROF_PRIMITIVE_ARRAY_NODATA_DUMP:
-    // Ignored.
-    break;
+    case HPROF_CLASS_DUMP:
+    case HPROF_INSTANCE_DUMP:
+    case HPROF_OBJECT_ARRAY_DUMP:
+    case HPROF_PRIMITIVE_ARRAY_DUMP:
+    case HPROF_HEAP_DUMP_INFO:
+    case HPROF_PRIMITIVE_ARRAY_NODATA_DUMP:
+      // Ignored.
+      break;
 
-  case HPROF_ROOT_FINALIZING:
-  case HPROF_ROOT_REFERENCE_CLEANUP:
-  case HPROF_UNREACHABLE:
-    LOG(FATAL) << "obsolete tag " << static_cast<int>(heapTag);
-    break;
+    case HPROF_ROOT_FINALIZING:
+    case HPROF_ROOT_REFERENCE_CLEANUP:
+    case HPROF_UNREACHABLE:
+      LOG(FATAL) << "obsolete tag " << static_cast<int>(heap_tag);
+      break;
   }
 
   ++objects_in_segment_;
-  return 0;
 }
 
 static int StackTraceSerialNumber(const mirror::Object* /*obj*/) {
-  return HPROF_NULL_STACK_TRACE;
+  return kHprofNullStackTrace;
 }
 
-int Hprof::DumpHeapObject(mirror::Object* obj) {
-  HprofRecord* rec = &current_record_;
+void Hprof::DumpHeapObject(mirror::Object* obj, EndianOutput* output) {
+  // Ignore classes that are retired.
+  if (obj->IsClass() && obj->AsClass()->IsRetired()) {
+    return;
+  }
+
   gc::space::ContinuousSpace* space =
       Runtime::Current()->GetHeap()->FindContinuousSpaceFromObject(obj, true);
   HprofHeapId heap_type = HPROF_HEAP_APP;
@@ -817,17 +912,15 @@
       heap_type = HPROF_HEAP_IMAGE;
     }
   }
-  if (objects_in_segment_ >= OBJECTS_PER_SEGMENT || rec->Size() >= BYTES_PER_SEGMENT) {
-    StartNewHeapDumpSegment();
-  }
+  CheckHeapSegmentConstraints(output);
 
   if (heap_type != current_heap_) {
     HprofStringId nameId;
 
     // This object is in a different heap than the current one.
     // Emit a HEAP_DUMP_INFO tag to change heaps.
-    rec->AddU1(HPROF_HEAP_DUMP_INFO);
-    rec->AddU4(static_cast<uint32_t>(heap_type));   // uint32_t: heap type
+    __ AddU1(HPROF_HEAP_DUMP_INFO);
+    __ AddU4(static_cast<uint32_t>(heap_type));   // uint32_t: heap type
     switch (heap_type) {
     case HPROF_HEAP_APP:
       nameId = LookupStringId("app");
@@ -844,179 +937,195 @@
       nameId = LookupStringId("<ILLEGAL>");
       break;
     }
-    rec->AddStringId(nameId);
+    __ AddStringId(nameId);
     current_heap_ = heap_type;
   }
 
   mirror::Class* c = obj->GetClass();
-  if (c == NULL) {
+  if (c == nullptr) {
     // This object will bother HprofReader, because it has a NULL
     // class, so just don't dump it. It could be
     // gDvm.unlinkedJavaLangClass or it could be an object just
     // allocated which hasn't been initialized yet.
   } else {
     if (obj->IsClass()) {
-      mirror::Class* thisClass = obj->AsClass();
-      // obj is a ClassObject.
-      size_t sFieldCount = thisClass->NumStaticFields();
-      if (sFieldCount != 0) {
-        int byteLength = sFieldCount * sizeof(JValue);  // TODO bogus; fields are packed
-        // Create a byte array to reflect the allocation of the
-        // StaticField array at the end of this class.
-        rec->AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
-        rec->AddClassStaticsId(thisClass);
-        rec->AddU4(StackTraceSerialNumber(obj));
-        rec->AddU4(byteLength);
-        rec->AddU1(hprof_basic_byte);
-        for (int i = 0; i < byteLength; ++i) {
-          rec->AddU1(0);
-        }
-      }
-
-      rec->AddU1(HPROF_CLASS_DUMP);
-      rec->AddClassId(LookupClassId(thisClass));
-      rec->AddU4(StackTraceSerialNumber(thisClass));
-      rec->AddClassId(LookupClassId(thisClass->GetSuperClass()));
-      rec->AddObjectId(thisClass->GetClassLoader());
-      rec->AddObjectId(nullptr);    // no signer
-      rec->AddObjectId(nullptr);    // no prot domain
-      rec->AddObjectId(nullptr);    // reserved
-      rec->AddObjectId(nullptr);    // reserved
-      if (thisClass->IsClassClass()) {
-        // ClassObjects have their static fields appended, so aren't all the same size.
-        // But they're at least this size.
-        rec->AddU4(sizeof(mirror::Class));  // instance size
-      } else if (thisClass->IsArrayClass() || thisClass->IsPrimitive()) {
-        rec->AddU4(0);
-      } else {
-        rec->AddU4(thisClass->GetObjectSize());  // instance size
-      }
-
-      rec->AddU2(0);  // empty const pool
-
-      // Static fields
-      if (sFieldCount == 0) {
-        rec->AddU2((uint16_t)0);
-      } else {
-        rec->AddU2((uint16_t)(sFieldCount+1));
-        rec->AddStringId(LookupStringId(STATIC_OVERHEAD_NAME));
-        rec->AddU1(hprof_basic_object);
-        rec->AddClassStaticsId(thisClass);
-
-        for (size_t i = 0; i < sFieldCount; ++i) {
-          mirror::ArtField* f = thisClass->GetStaticField(i);
-
-          size_t size;
-          HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
-          rec->AddStringId(LookupStringId(f->GetName()));
-          rec->AddU1(t);
-          if (size == 1) {
-            rec->AddU1(static_cast<uint8_t>(f->Get32(thisClass)));
-          } else if (size == 2) {
-            rec->AddU2(static_cast<uint16_t>(f->Get32(thisClass)));
-          } else if (size == 4) {
-            rec->AddU4(f->Get32(thisClass));
-          } else if (size == 8) {
-            rec->AddU8(f->Get64(thisClass));
-          } else {
-            CHECK(false);
-          }
-        }
-      }
-
-      // Instance fields for this class (no superclass fields)
-      int iFieldCount = thisClass->IsObjectClass() ? 0 : thisClass->NumInstanceFields();
-      rec->AddU2((uint16_t)iFieldCount);
-      for (int i = 0; i < iFieldCount; ++i) {
-        mirror::ArtField* f = thisClass->GetInstanceField(i);
-        HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), NULL);
-        rec->AddStringId(LookupStringId(f->GetName()));
-        rec->AddU1(t);
-      }
+      DumpHeapClass(obj->AsClass(), output);
     } else if (c->IsArrayClass()) {
-      mirror::Array* aobj = obj->AsArray();
-      uint32_t length = aobj->GetLength();
-
-      if (obj->IsObjectArray()) {
-        // obj is an object array.
-        rec->AddU1(HPROF_OBJECT_ARRAY_DUMP);
-
-        rec->AddObjectId(obj);
-        rec->AddU4(StackTraceSerialNumber(obj));
-        rec->AddU4(length);
-        rec->AddClassId(LookupClassId(c));
-
-        // Dump the elements, which are always objects or NULL.
-        rec->AddIdList(aobj->AsObjectArray<mirror::Object>());
-      } else {
-        size_t size;
-        HprofBasicType t = PrimitiveToBasicTypeAndSize(c->GetComponentType()->GetPrimitiveType(), &size);
-
-        // obj is a primitive array.
-        rec->AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
-
-        rec->AddObjectId(obj);
-        rec->AddU4(StackTraceSerialNumber(obj));
-        rec->AddU4(length);
-        rec->AddU1(t);
-
-        // Dump the raw, packed element values.
-        if (size == 1) {
-          rec->AddU1List((const uint8_t*)aobj->GetRawData(sizeof(uint8_t), 0), length);
-        } else if (size == 2) {
-          rec->AddU2List((const uint16_t*)aobj->GetRawData(sizeof(uint16_t), 0), length);
-        } else if (size == 4) {
-          rec->AddU4List((const uint32_t*)aobj->GetRawData(sizeof(uint32_t), 0), length);
-        } else if (size == 8) {
-          rec->AddU8List((const uint64_t*)aobj->GetRawData(sizeof(uint64_t), 0), length);
-        }
-      }
+      DumpHeapArray(obj->AsArray(), c, output);
     } else {
-      // obj is an instance object.
-      rec->AddU1(HPROF_INSTANCE_DUMP);
-      rec->AddObjectId(obj);
-      rec->AddU4(StackTraceSerialNumber(obj));
-      rec->AddClassId(LookupClassId(c));
-
-      // Reserve some space for the length of the instance data, which we won't
-      // know until we're done writing it.
-      size_t size_patch_offset = rec->Size();
-      rec->AddU4(0x77777777);
-
-      // Write the instance data;  fields for this class, followed by super class fields,
-      // and so on. Don't write the klass or monitor fields of Object.class.
-      mirror::Class* sclass = c;
-      while (!sclass->IsObjectClass()) {
-        int ifieldCount = sclass->NumInstanceFields();
-        for (int i = 0; i < ifieldCount; ++i) {
-          mirror::ArtField* f = sclass->GetInstanceField(i);
-          size_t size;
-          SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
-          if (size == 1) {
-            rec->AddU1(f->Get32(obj));
-          } else if (size == 2) {
-            rec->AddU2(f->Get32(obj));
-          } else if (size == 4) {
-            rec->AddU4(f->Get32(obj));
-          } else {
-            CHECK_EQ(size, 8U);
-            rec->AddU8(f->Get64(obj));
-          }
-        }
-
-        sclass = sclass->GetSuperClass();
-      }
-
-      // Patch the instance field length.
-      rec->UpdateU4(size_patch_offset, rec->Size() - (size_patch_offset + 4));
+      DumpHeapInstanceObject(obj, c, output);
     }
   }
 
   ++objects_in_segment_;
-  return 0;
 }
 
-void Hprof::VisitRoot(const mirror::Object* obj, uint32_t thread_id, RootType type) {
+void Hprof::DumpHeapClass(mirror::Class* klass, EndianOutput* output) {
+  size_t sFieldCount = klass->NumStaticFields();
+  if (sFieldCount != 0) {
+    int byteLength = sFieldCount * sizeof(JValue);  // TODO bogus; fields are packed
+    // Create a byte array to reflect the allocation of the
+    // StaticField array at the end of this class.
+    __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
+    __ AddClassStaticsId(klass);
+    __ AddU4(StackTraceSerialNumber(klass));
+    __ AddU4(byteLength);
+    __ AddU1(hprof_basic_byte);
+    for (int i = 0; i < byteLength; ++i) {
+      __ AddU1(0);
+    }
+  }
+
+  __ AddU1(HPROF_CLASS_DUMP);
+  __ AddClassId(LookupClassId(klass));
+  __ AddU4(StackTraceSerialNumber(klass));
+  __ AddClassId(LookupClassId(klass->GetSuperClass()));
+  __ AddObjectId(klass->GetClassLoader());
+  __ AddObjectId(nullptr);    // no signer
+  __ AddObjectId(nullptr);    // no prot domain
+  __ AddObjectId(nullptr);    // reserved
+  __ AddObjectId(nullptr);    // reserved
+  if (klass->IsClassClass()) {
+    // ClassObjects have their static fields appended, so aren't all the same size.
+    // But they're at least this size.
+    __ AddU4(sizeof(mirror::Class));  // instance size
+  } else if (klass->IsArrayClass() || klass->IsPrimitive()) {
+    __ AddU4(0);
+  } else {
+    __ AddU4(klass->GetObjectSize());  // instance size
+  }
+
+  __ AddU2(0);  // empty const pool
+
+  // Static fields
+  if (sFieldCount == 0) {
+    __ AddU2((uint16_t)0);
+  } else {
+    __ AddU2((uint16_t)(sFieldCount+1));
+    __ AddStringId(LookupStringId(kStaticOverheadName));
+    __ AddU1(hprof_basic_object);
+    __ AddClassStaticsId(klass);
+
+    for (size_t i = 0; i < sFieldCount; ++i) {
+      mirror::ArtField* f = klass->GetStaticField(i);
+
+      size_t size;
+      HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
+      __ AddStringId(LookupStringId(f->GetName()));
+      __ AddU1(t);
+      switch (size) {
+        case 1:
+          __ AddU1(static_cast<uint8_t>(f->Get32(klass)));
+          break;
+        case 2:
+          __ AddU2(static_cast<uint16_t>(f->Get32(klass)));
+          break;
+        case 4:
+          __ AddU4(f->Get32(klass));
+          break;
+        case 8:
+          __ AddU8(f->Get64(klass));
+          break;
+        default:
+          LOG(FATAL) << "Unexpected size " << size;
+          UNREACHABLE();
+      }
+    }
+  }
+
+  // Instance fields for this class (no superclass fields)
+  int iFieldCount = klass->IsObjectClass() ? 0 : klass->NumInstanceFields();
+  __ AddU2((uint16_t)iFieldCount);
+  for (int i = 0; i < iFieldCount; ++i) {
+    mirror::ArtField* f = klass->GetInstanceField(i);
+    __ AddStringId(LookupStringId(f->GetName()));
+    HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), nullptr);
+    __ AddU1(t);
+  }
+}
+
+void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass, EndianOutput* output) {
+  uint32_t length = obj->GetLength();
+
+  if (obj->IsObjectArray()) {
+    // obj is an object array.
+    __ AddU1(HPROF_OBJECT_ARRAY_DUMP);
+
+    __ AddObjectId(obj);
+    __ AddU4(StackTraceSerialNumber(obj));
+    __ AddU4(length);
+    __ AddClassId(LookupClassId(klass));
+
+    // Dump the elements, which are always objects or NULL.
+    __ AddIdList(obj->AsObjectArray<mirror::Object>());
+  } else {
+    size_t size;
+    HprofBasicType t = SignatureToBasicTypeAndSize(
+        Primitive::Descriptor(klass->GetComponentType()->GetPrimitiveType()), &size);
+
+    // obj is a primitive array.
+    __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
+
+    __ AddObjectId(obj);
+    __ AddU4(StackTraceSerialNumber(obj));
+    __ AddU4(length);
+    __ AddU1(t);
+
+    // Dump the raw, packed element values.
+    if (size == 1) {
+      __ AddU1List(reinterpret_cast<const uint8_t*>(obj->GetRawData(sizeof(uint8_t), 0)), length);
+    } else if (size == 2) {
+      __ AddU2List(reinterpret_cast<const uint16_t*>(obj->GetRawData(sizeof(uint16_t), 0)), length);
+    } else if (size == 4) {
+      __ AddU4List(reinterpret_cast<const uint32_t*>(obj->GetRawData(sizeof(uint32_t), 0)), length);
+    } else if (size == 8) {
+      __ AddU8List(reinterpret_cast<const uint64_t*>(obj->GetRawData(sizeof(uint64_t), 0)), length);
+    }
+  }
+}
+
+void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass,
+                                   EndianOutput* output) {
+  // obj is an instance object.
+  __ AddU1(HPROF_INSTANCE_DUMP);
+  __ AddObjectId(obj);
+  __ AddU4(StackTraceSerialNumber(obj));
+  __ AddClassId(LookupClassId(klass));
+
+  // Reserve some space for the length of the instance data, which we won't
+  // know until we're done writing it.
+  size_t size_patch_offset = output->Length();
+  __ AddU4(0x77777777);
+
+  // Write the instance data;  fields for this class, followed by super class fields,
+  // and so on. Don't write the klass or monitor fields of Object.class.
+  while (!klass->IsObjectClass()) {
+    int ifieldCount = klass->NumInstanceFields();
+    for (int i = 0; i < ifieldCount; ++i) {
+      mirror::ArtField* f = klass->GetInstanceField(i);
+      size_t size;
+      SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
+      if (size == 1) {
+        __ AddU1(f->Get32(obj));
+      } else if (size == 2) {
+        __ AddU2(f->Get32(obj));
+      } else if (size == 4) {
+        __ AddU4(f->Get32(obj));
+      } else {
+        CHECK_EQ(size, 8U);
+        __ AddU8(f->Get64(obj));
+      }
+    }
+
+    klass = klass->GetSuperClass();
+  }
+
+  // Patch the instance field length.
+  __ UpdateU4(size_patch_offset, output->Length() - (size_patch_offset + 4));
+}
+
+void Hprof::VisitRoot(const mirror::Object* obj, uint32_t thread_id, RootType type,
+                      EndianOutput* output) {
   static const HprofHeapTag xlate[] = {
     HPROF_ROOT_UNKNOWN,
     HPROF_ROOT_JNI_GLOBAL,
@@ -1035,14 +1144,10 @@
     HPROF_ROOT_JNI_MONITOR,
   };
   CHECK_LT(type, sizeof(xlate) / sizeof(HprofHeapTag));
-  if (obj == NULL) {
+  if (obj == nullptr) {
     return;
   }
-  gc_scan_state_ = xlate[type];
-  gc_thread_serial_number_ = thread_id;
-  MarkRootObject(obj, 0);
-  gc_scan_state_ = 0;
-  gc_thread_serial_number_ = 0;
+  MarkRootObject(obj, 0, xlate[type], thread_id, output);
 }
 
 // If "direct_to_ddms" is true, the other arguments are ignored, and data is
@@ -1050,7 +1155,7 @@
 // If "fd" is >= 0, the output will be written to that file descriptor.
 // Otherwise, "filename" is used to create an output file.
 void DumpHeap(const char* filename, int fd, bool direct_to_ddms) {
-  CHECK(filename != NULL);
+  CHECK(filename != nullptr);
 
   Runtime::Current()->GetThreadList()->SuspendAll();
   Hprof hprof(filename, fd, direct_to_ddms);
@@ -1058,78 +1163,5 @@
   Runtime::Current()->GetThreadList()->ResumeAll();
 }
 
-// Returns how many characters were in the buffer (or written).
-size_t HprofRecord::Flush() {
-  size_t chars = 0;
-  if (dirty_) {
-    unsigned char headBuf[sizeof(uint8_t) + 2 * sizeof(uint32_t)];
-    headBuf[0] = tag_;
-    U4_TO_BUF_BE(headBuf, 1, time_);
-    U4_TO_BUF_BE(headBuf, 5, length_);
-    chars += hprof_->Write(headBuf, sizeof(headBuf), fp_);
-    chars += hprof_->Write(body_, length_, fp_);
-    dirty_ = false;
-  }
-  return chars;
-}
-
-void HprofRecord::AddU1(uint8_t value) {
-  if (hprof_->AllowWriting()) {
-    GuaranteeRecordAppend(1);
-    body_[length_] = value;
-  }
-  ++length_;
-}
-
-void HprofRecord::AddU1List(const uint8_t* values, size_t numValues) {
-  if (hprof_->AllowWriting()) {
-    GuaranteeRecordAppend(numValues);
-    memcpy(body_ + length_, values, numValues);
-  }
-  length_ += numValues;
-}
-
-void HprofRecord::AddU2List(const uint16_t* values, size_t numValues) {
-  if (hprof_->AllowWriting()) {
-    GuaranteeRecordAppend(numValues * 2);
-    unsigned char* insert = body_ + length_;
-    for (size_t i = 0; i < numValues; ++i) {
-      U2_TO_BUF_BE(insert, 0, *values++);
-      insert += sizeof(*values);
-    }
-  }
-  length_ += numValues * 2;
-}
-
-void HprofRecord::AddU4List(const uint32_t* values, size_t numValues) {
-  if (hprof_->AllowWriting()) {
-    GuaranteeRecordAppend(numValues * 4);
-    unsigned char* insert = body_ + length_;
-    for (size_t i = 0; i < numValues; ++i) {
-      U4_TO_BUF_BE(insert, 0, *values++);
-      insert += sizeof(*values);
-    }
-  }
-  length_ += numValues * 4;
-}
-
-void HprofRecord::UpdateU4(size_t offset, uint32_t new_value) {
-  if (hprof_->AllowWriting()) {
-    U4_TO_BUF_BE(body_, offset, new_value);
-  }
-}
-
-void HprofRecord::AddU8List(const uint64_t* values, size_t numValues) {
-  if (hprof_->AllowWriting()) {
-    GuaranteeRecordAppend(numValues * 8);
-    unsigned char* insert = body_ + length_;
-    for (size_t i = 0; i < numValues; ++i) {
-      U8_TO_BUF_BE(insert, 0, *values++);
-      insert += sizeof(*values);
-    }
-  }
-  length_ += numValues * 8;
-}
-
 }  // namespace hprof
 }  // namespace art
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index b294e48..a1d2a6c 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -333,15 +333,15 @@
   std::vector<std::string> class_path;
   Split(Runtime::Current()->GetClassPathString(), ':', &class_path);
   expandBufAdd4BE(pReply, class_path.size());
-  for (size_t i = 0; i < class_path.size(); ++i) {
-    expandBufAddUtf8String(pReply, class_path[i]);
+  for (const std::string& str : class_path) {
+    expandBufAddUtf8String(pReply, str);
   }
 
   std::vector<std::string> boot_class_path;
   Split(Runtime::Current()->GetBootClassPathString(), ':', &boot_class_path);
   expandBufAdd4BE(pReply, boot_class_path.size());
-  for (size_t i = 0; i < boot_class_path.size(); ++i) {
-    expandBufAddUtf8String(pReply, boot_class_path[i]);
+  for (const std::string& str : boot_class_path) {
+    expandBufAddUtf8String(pReply, str);
   }
 
   return ERR_NONE;
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index 20db368..e415c3d 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -104,7 +104,20 @@
 }
 
 void ObjectRegistry::Clear() {
-  Thread* self = Thread::Current();
+  Thread* const self = Thread::Current();
+
+  // We must not hold the mutator lock exclusively if we want to delete weak global
+  // references. Otherwise this can lead to a deadlock with a running GC:
+  // 1. GC thread disables access to weak global references, then releases
+  //    mutator lock.
+  // 2. JDWP thread takes mutator lock exclusively after suspending all
+  //    threads.
+  // 3. GC thread waits for shared mutator lock which is held by JDWP
+  //    thread.
+  // 4. JDWP thread clears weak global references but need to wait for GC
+  //    thread to re-enable access to them.
+  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
+
   MutexLock mu(self, lock_);
   VLOG(jdwp) << "Object registry contained " << object_to_entry_.size() << " entries";
   // Delete all the JNI references.
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index ef6fc67..53e5534 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -34,6 +34,7 @@
 TEST_F(DexCacheTest, Open) {
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<1> hs(soa.Self());
+  ASSERT_TRUE(java_lang_dex_file_ != NULL);
   Handle<DexCache> dex_cache(
       hs.NewHandle(class_linker_->AllocDexCache(soa.Self(), *java_lang_dex_file_)));
   ASSERT_TRUE(dex_cache.Get() != NULL);
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 44c6d87..037072d 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -115,7 +115,8 @@
   }
 
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
-  std::unique_ptr<std::vector<const DexFile*>> dex_files(new std::vector<const DexFile*>());
+  std::unique_ptr<std::vector<std::unique_ptr<const DexFile>>> dex_files(
+      new std::vector<std::unique_ptr<const DexFile>>());
   std::vector<std::string> error_msgs;
 
   bool success = linker->OpenDexFilesFromOat(sourceName.c_str(), outputName.c_str(), &error_msgs,
@@ -143,9 +144,11 @@
   }
 }
 
-static std::vector<const DexFile*>* toDexFiles(jlong dex_file_address, JNIEnv* env) {
-  std::vector<const DexFile*>* dex_files = reinterpret_cast<std::vector<const DexFile*>*>(
-      static_cast<uintptr_t>(dex_file_address));
+static std::vector<std::unique_ptr<const DexFile>>*
+toDexFiles(jlong dex_file_address, JNIEnv* env) {
+  std::vector<std::unique_ptr<const DexFile>>* dex_files
+    = reinterpret_cast<std::vector<std::unique_ptr<const DexFile>>*>(
+        static_cast<uintptr_t>(dex_file_address));
   if (UNLIKELY(dex_files == nullptr)) {
     ScopedObjectAccess soa(env);
     ThrowNullPointerException(NULL, "dex_file == null");
@@ -154,27 +157,29 @@
 }
 
 static void DexFile_closeDexFile(JNIEnv* env, jclass, jlong cookie) {
-  std::unique_ptr<std::vector<const DexFile*>> dex_files(toDexFiles(cookie, env));
+  std::unique_ptr<std::vector<std::unique_ptr<const DexFile>>> dex_files(toDexFiles(cookie, env));
   if (dex_files.get() == nullptr) {
     return;
   }
   ScopedObjectAccess soa(env);
 
-  size_t index = 0;
-  for (const DexFile* dex_file : *dex_files) {
+  // The Runtime currently never unloads classes, which means any registered
+  // dex files must be kept around forever in case they are used. We
+  // accomplish this here by explicitly leaking those dex files that are
+  // registered.
+  //
+  // TODO: The Runtime should support unloading of classes and freeing of the
+  // dex files for those unloaded classes rather than leaking dex files here.
+  for (auto& dex_file : *dex_files) {
     if (Runtime::Current()->GetClassLinker()->IsDexFileRegistered(*dex_file)) {
-      (*dex_files)[index] = nullptr;
+      dex_file.release();
     }
-    index++;
   }
-
-  STLDeleteElements(dex_files.get());
-  // Unique_ptr will delete the vector itself.
 }
 
 static jclass DexFile_defineClassNative(JNIEnv* env, jclass, jstring javaName, jobject javaLoader,
                                         jlong cookie) {
-  std::vector<const DexFile*>* dex_files = toDexFiles(cookie, env);
+  std::vector<std::unique_ptr<const DexFile>>* dex_files = toDexFiles(cookie, env);
   if (dex_files == NULL) {
     VLOG(class_linker) << "Failed to find dex_file";
     return NULL;
@@ -186,7 +191,7 @@
   }
   const std::string descriptor(DotToDescriptor(class_name.c_str()));
   const size_t hash(ComputeModifiedUtf8Hash(descriptor.c_str()));
-  for (const DexFile* dex_file : *dex_files) {
+  for (auto& dex_file : *dex_files) {
     const DexFile::ClassDef* dex_class_def = dex_file->FindClassDef(descriptor.c_str(), hash);
     if (dex_class_def != nullptr) {
       ScopedObjectAccess soa(env);
@@ -218,13 +223,13 @@
 // Note: this can be an expensive call, as we sort out duplicates in MultiDex files.
 static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jlong cookie) {
   jobjectArray result = nullptr;
-  std::vector<const DexFile*>* dex_files = toDexFiles(cookie, env);
+  std::vector<std::unique_ptr<const DexFile>>* dex_files = toDexFiles(cookie, env);
 
   if (dex_files != nullptr) {
     // Push all class descriptors into a set. Use set instead of unordered_set as we want to
     // retrieve all in the end.
     std::set<const char*, CharPointerComparator> descriptors;
-    for (const DexFile* dex_file : *dex_files) {
+    for (auto& dex_file : *dex_files) {
       for (size_t i = 0; i < dex_file->NumClassDefs(); ++i) {
         const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
         const char* descriptor = dex_file->GetClassDescriptor(class_def);
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 1c6cc8b..358519b 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -454,7 +454,7 @@
   return reinterpret_cast<const DexFile::Header*>(dex_file_pointer_)->file_size_;
 }
 
-const DexFile* OatFile::OatDexFile::OpenDexFile(std::string* error_msg) const {
+std::unique_ptr<const DexFile> OatFile::OatDexFile::OpenDexFile(std::string* error_msg) const {
   return DexFile::Open(dex_file_pointer_, FileSize(), dex_file_location_,
                        dex_file_location_checksum_, error_msg);
 }
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 831ba1e..6ae3c3e 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -210,7 +210,7 @@
   class OatDexFile {
    public:
     // Opens the DexFile referred to by this OatDexFile from within the containing OatFile.
-    const DexFile* OpenDexFile(std::string* error_msg) const;
+    std::unique_ptr<const DexFile> OpenDexFile(std::string* error_msg) const;
 
     const OatFile* GetOatFile() const {
       return oat_file_;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index fb6034d..fabbbfb 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -625,8 +625,9 @@
 }
 
 static bool OpenDexFilesFromImage(const std::string& image_location,
-                                  std::vector<const DexFile*>& dex_files,
+                                  std::vector<std::unique_ptr<const DexFile>>* dex_files,
                                   size_t* failures) {
+  DCHECK(dex_files != nullptr) << "OpenDexFilesFromImage: out-param is NULL";
   std::string system_filename;
   bool has_system = false;
   std::string cache_filename_unused;
@@ -670,11 +671,11 @@
       *failures += 1;
       continue;
     }
-    const DexFile* dex_file = oat_dex_file->OpenDexFile(&error_msg);
-    if (dex_file == nullptr) {
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+    if (dex_file.get() == nullptr) {
       *failures += 1;
     } else {
-      dex_files.push_back(dex_file);
+      dex_files->push_back(std::move(dex_file));
     }
   }
   Runtime::Current()->GetClassLinker()->RegisterOatFile(oat_file.release());
@@ -685,7 +686,8 @@
 static size_t OpenDexFiles(const std::vector<std::string>& dex_filenames,
                            const std::vector<std::string>& dex_locations,
                            const std::string& image_location,
-                           std::vector<const DexFile*>& dex_files) {
+                           std::vector<std::unique_ptr<const DexFile>>* dex_files) {
+  DCHECK(dex_files != nullptr) << "OpenDexFiles: out-param is NULL";
   size_t failure_count = 0;
   if (!image_location.empty() && OpenDexFilesFromImage(image_location, dex_files, &failure_count)) {
     return failure_count;
@@ -699,7 +701,7 @@
       LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
       continue;
     }
-    if (!DexFile::Open(dex_filename, dex_location, &error_msg, &dex_files)) {
+    if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
       LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
       ++failure_count;
     }
@@ -865,6 +867,16 @@
     if (kIsDebugBuild) {
       GetHeap()->GetImageSpace()->VerifyImageAllocations();
     }
+    if (boot_class_path_string_.empty()) {
+      // The bootclasspath is not explicitly specified: construct it from the loaded dex files.
+      const std::vector<const DexFile*>& boot_class_path = GetClassLinker()->GetBootClassPath();
+      std::vector<std::string> dex_locations;
+      dex_locations.reserve(boot_class_path.size());
+      for (const DexFile* dex_file : boot_class_path) {
+        dex_locations.push_back(dex_file->GetLocation());
+      }
+      boot_class_path_string_ = Join(dex_locations, ':');
+    }
   } else {
     std::vector<std::string> dex_filenames;
     Split(boot_class_path_string_, ':', &dex_filenames);
@@ -877,9 +889,9 @@
       CHECK_EQ(dex_filenames.size(), dex_locations.size());
     }
 
-    std::vector<const DexFile*> boot_class_path;
-    OpenDexFiles(dex_filenames, dex_locations, options->image_, boot_class_path);
-    class_linker_->InitWithoutImage(boot_class_path);
+    std::vector<std::unique_ptr<const DexFile>> boot_class_path;
+    OpenDexFiles(dex_filenames, dex_locations, options->image_, &boot_class_path);
+    class_linker_->InitWithoutImage(std::move(boot_class_path));
     // TODO: Should we move the following to InitWithoutImage?
     SetInstructionSet(kRuntimeISA);
     for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index e319963..d58fe3c 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -425,6 +425,9 @@
       LOCKS_EXCLUDED(method_verifier_lock_);
 
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
+
+  // The caller is responsible for ensuring the class_path DexFiles remain
+  // valid as long as the Runtime object remains valid.
   void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
 
   void StartProfiler(const char* profile_output_filename);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 364b7c2..0f64883 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -152,8 +152,7 @@
 
 // Dump checkpoint timeout in milliseconds. Larger amount on the host, as dumping will invoke
 // addr2line when available.
-static constexpr uint32_t kDumpWaitTimeoutTarget = 10000;
-static constexpr uint32_t kDumpWaitTimeoutHost = 20000;
+static constexpr uint32_t kDumpWaitTimeout = kIsTargetBuild ? 10000 : 20000;
 
 // A closure used by Thread::Dump.
 class DumpCheckpoint FINAL : public Closure {
@@ -181,8 +180,7 @@
   void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
     Thread* self = Thread::Current();
     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
-    bool timed_out = barrier_.Increment(self, threads_running_checkpoint,
-        kIsTargetBuild ? kDumpWaitTimeoutTarget : kDumpWaitTimeoutHost);
+    bool timed_out = barrier_.Increment(self, threads_running_checkpoint, kDumpWaitTimeout);
     if (timed_out) {
       // Avoid a recursive abort.
       LOG((kIsDebugBuild && (gAborting == 0)) ? FATAL : ERROR)
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index 770ca7e..f67adc1 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -41,14 +41,12 @@
         << error_msg;
   }
 
-  void VerifyDexFile(const DexFile* dex)
+  void VerifyDexFile(const DexFile& dex)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    ASSERT_TRUE(dex != NULL);
-
     // Verify all the classes defined in this file
-    for (size_t i = 0; i < dex->NumClassDefs(); i++) {
-      const DexFile::ClassDef& class_def = dex->GetClassDef(i);
-      const char* descriptor = dex->GetClassDescriptor(class_def);
+    for (size_t i = 0; i < dex.NumClassDefs(); i++) {
+      const DexFile::ClassDef& class_def = dex.GetClassDef(i);
+      const char* descriptor = dex.GetClassDescriptor(class_def);
       VerifyClass(descriptor);
     }
   }
@@ -56,7 +54,8 @@
 
 TEST_F(MethodVerifierTest, LibCore) {
   ScopedObjectAccess soa(Thread::Current());
-  VerifyDexFile(java_lang_dex_file_);
+  ASSERT_TRUE(java_lang_dex_file_ != nullptr);
+  VerifyDexFile(*java_lang_dex_file_);
 }
 
 }  // namespace verifier
diff --git a/test/122-npe/src/Main.java b/test/122-npe/src/Main.java
index 2fdcb9c..8f68205 100644
--- a/test/122-npe/src/Main.java
+++ b/test/122-npe/src/Main.java
@@ -191,6 +191,132 @@
     check(npe, thisLine += 7);
 
     try {
+      ((Value) null).volatileObjectField.toString();
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      ((Value) null).volatileObjectField = "Fisk";
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      useInt(((Value) null).volatileIntField);
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      ((Value) null).volatileIntField = 42;
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      useFloat(((Value) null).volatileFloatField);
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      ((Value) null).volatileFloatField = 42.0F;
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      useLong(((Value) null).volatileLongField);
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      ((Value) null).volatileLongField = 42L;
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      useDouble(((Value) null).volatileDoubleField);
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      ((Value) null).volatileDoubleField = 42.0d;
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      useInt(((Value) null).volatileByteField);
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      ((Value) null).volatileByteField = 42;
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      if (((Value) null).volatileBooleanField) { }
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      ((Value) null).volatileBooleanField = true;
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      useInt(((Value) null).volatileCharField);
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      ((Value) null).volatileCharField = '\u0042';
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      useInt(((Value) null).volatileShortField);
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
+      ((Value) null).volatileShortField = 42;
+    } catch (NullPointerException e) {
+      npe = e;
+    }
+    check(npe, thisLine += 7);
+
+    try {
       ((Object[]) null)[0].toString();
     } catch (NullPointerException e) {
       npe = e;
@@ -477,11 +603,22 @@
   static class Value {
     Object objectField;
     int intField;
-    float floatField; long longField;
+    float floatField;
+    long longField;
     double doubleField;
     byte byteField;
     boolean booleanField;
     char charField;
     short shortField;
+
+    volatile Object volatileObjectField;
+    volatile int volatileIntField;
+    volatile float volatileFloatField;
+    volatile long volatileLongField;
+    volatile double volatileDoubleField;
+    volatile byte volatileByteField;
+    volatile boolean volatileBooleanField;
+    volatile char volatileCharField;
+    volatile short volatileShortField;
   }
 }
diff --git a/test/130-hprof/expected.txt b/test/130-hprof/expected.txt
new file mode 100644
index 0000000..cc3d9f2
--- /dev/null
+++ b/test/130-hprof/expected.txt
@@ -0,0 +1 @@
+Generated data.
diff --git a/test/130-hprof/info.txt b/test/130-hprof/info.txt
new file mode 100644
index 0000000..64475ef
--- /dev/null
+++ b/test/130-hprof/info.txt
@@ -0,0 +1 @@
+Dump the heap for this test.
diff --git a/test/130-hprof/src/Main.java b/test/130-hprof/src/Main.java
new file mode 100644
index 0000000..67e5232
--- /dev/null
+++ b/test/130-hprof/src/Main.java
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.lang.ref.WeakReference;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+    private static final int TEST_LENGTH = 100;
+
+    private static boolean makeArray(int i) {
+        return i % 10 == 0;
+    }
+
+    private static void fillArray(Object global[], Object local[], int i) {
+        // Very stupid linking.
+        local[0] = global;
+        for (int j = 1; j < local.length; j++) {
+            local[j] = global[j];
+        }
+    }
+
+    public static void main(String[] args) {
+        // Create some data.
+        Object data[] = new Object[TEST_LENGTH];
+        for (int i = 0; i < data.length; i++) {
+            if (makeArray(i)) {
+                data[i] = new Object[TEST_LENGTH];
+            } else {
+                data[i] = String.valueOf(i);
+            }
+        }
+        for (int i = 0; i < data.length; i++) {
+            if (makeArray(i)) {
+                Object data2[] = (Object[]) data[i];
+                fillArray(data, data2, i);
+            }
+        }
+        System.out.println("Generated data.");
+
+        File dumpFile = null;
+        File convFile = null;
+
+        try {
+            // Now dump the heap.
+            dumpFile = createDump();
+
+            // Run hprof-conv on it.
+            convFile = getConvFile();
+
+            File hprof_conv = getHprofConf();
+            try {
+                ProcessBuilder pb = new ProcessBuilder(
+                        hprof_conv.getAbsoluteFile().toString(),
+                        dumpFile.getAbsoluteFile().toString(),
+                        convFile.getAbsoluteFile().toString());
+                pb.redirectErrorStream(true);
+                Process process = pb.start();
+                int ret = process.waitFor();
+                if (ret != 0) {
+                    throw new RuntimeException("Exited abnormally with " + ret);
+                }
+            } catch (Exception exc) {
+                throw new RuntimeException(exc);
+            }
+        } finally {
+            // Delete the files.
+            if (dumpFile != null) {
+                dumpFile.delete();
+            }
+            if (convFile != null) {
+                convFile.delete();
+            }
+        }
+    }
+
+    private static File getHprofConf() {
+        // Use the java.library.path. It points to the lib directory.
+        File libDir = new File(System.getProperty("java.library.path"));
+        return new File(new File(libDir.getParentFile(), "bin"), "hprof-conv");
+    }
+
+    private static File createDump() {
+        java.lang.reflect.Method dumpHprofDataMethod = getDumpHprofDataMethod();
+        if (dumpHprofDataMethod != null) {
+            File f = getDumpFile();
+            try {
+                dumpHprofDataMethod.invoke(null, f.getAbsoluteFile().toString());
+                return f;
+            } catch (Exception exc) {
+                exc.printStackTrace(System.out);
+            }
+        } else {
+            System.out.println("Could not find dump method!");
+        }
+        return null;
+    }
+
+    /**
+     * Finds VMDebug.dumpHprofData() through reflection.  In the reference
+     * implementation this will not be available.
+     *
+     * @return the reflection object, or null if the method can't be found
+     */
+    private static Method getDumpHprofDataMethod() {
+        ClassLoader myLoader = Main.class.getClassLoader();
+        Class vmdClass;
+        try {
+            vmdClass = myLoader.loadClass("dalvik.system.VMDebug");
+        } catch (ClassNotFoundException cnfe) {
+            return null;
+        }
+
+        Method meth;
+        try {
+            meth = vmdClass.getMethod("dumpHprofData",
+                    new Class[] { String.class });
+        } catch (NoSuchMethodException nsme) {
+            System.err.println("Found VMDebug but not dumpHprofData method");
+            return null;
+        }
+
+        return meth;
+    }
+
+    private static File getDumpFile() {
+        try {
+            return File.createTempFile("test-130-hprof", "dump");
+        } catch (Exception exc) {
+            return null;
+        }
+    }
+
+    private static File getConvFile() {
+        try {
+            return File.createTempFile("test-130-hprof", "conv");
+        } catch (Exception exc) {
+            return null;
+        }
+    }
+}
diff --git a/test/439-swap-double/expected.txt b/test/439-swap-double/expected.txt
new file mode 100644
index 0000000..019c901
--- /dev/null
+++ b/test/439-swap-double/expected.txt
@@ -0,0 +1,4 @@
+-26.0
+-24.0
+-22.0
+-20.0
diff --git a/test/439-swap-double/info.txt b/test/439-swap-double/info.txt
new file mode 100644
index 0000000..23447d2
--- /dev/null
+++ b/test/439-swap-double/info.txt
@@ -0,0 +1,2 @@
+Test for the optimizing compiler's parallel swap support in
+the presence of register pairs (in this case, doubles on ARM).
diff --git a/test/439-swap-double/src/Main.java b/test/439-swap-double/src/Main.java
new file mode 100644
index 0000000..da11577
--- /dev/null
+++ b/test/439-swap-double/src/Main.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Test for the optimizing compiler's parallel swap support in
+// the presence of register pairs (in this case, doubles on ARM).
+public class Main {
+  public static void main(String[] args) {
+    new Main().foo();
+  }
+
+  public void foo() {
+    // Do multiple calls to force swapping of registers. Note that
+    // this depends on the calling convention, as a stack-only convention
+    // may not need the swapping.
+    callWithDoubles(a, b, c, d, e, f, g);
+    callWithDoubles(b, c, d, e, f, g, a);
+    callWithDoubles(c, d, e, f, g, a, b);
+    callWithDoubles(d, e, f, g, a, b, c);
+  }
+
+  public static void callWithDoubles(
+      double a, double b, double c, double d, double e, double f, double g) {
+    System.out.println(a - b - c - d - e - f - g);
+  }
+
+  double a = 1.0;
+  double b = 2.0;
+  double c = 3.0;
+  double d = 4.0;
+  double e = 5.0;
+  double f = 6.0;
+  double g = 7.0;
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index fd66a02..dc4ec66 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -229,6 +229,14 @@
     $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),115-native-bridge, \
     $(ALL_ADDRESS_SIZES))
 
+# 130-hprof dumps the heap and runs hprof-conv to check whether the file is somewhat readable. This
+# is only possible on the host.
+# TODO: Turn off all the other combinations, this is more about testing actual ART code. A gtest is
+#       very hard to write here, as (for a complete test) JDWP must be set up.
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
+    $(PICTEST_TYPES),130-hprof,$(ALL_ADDRESS_SIZES))
+
 # All these tests check that we have sane behavior if we don't have a patchoat or dex2oat.
 # Therefore we shouldn't run them in situations where we actually don't have these since they
 # explicitly test for them. These all also assume we have an image.
@@ -580,7 +588,7 @@
   endif
 $$(run_test_rule_name): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
 .PHONY: $$(run_test_rule_name)
-$$(run_test_rule_name): $(DX) $(HOST_OUT_EXECUTABLES)/jasmin $(HOST_OUT_EXECUTABLES)/smali $(HOST_OUT_EXECUTABLES)/dexmerger $$(prereq_rule)
+$$(run_test_rule_name): $(DX) $(HOST_OUT_EXECUTABLES)/jasmin $(HOST_OUT_EXECUTABLES)/smali $(HOST_OUT_EXECUTABLES)/dexmerger $(HOST_OUT_EXECUTABLES)/hprof-conv $$(prereq_rule)
 	$(hide) $$(call ART_TEST_SKIP,$$@) && \
 	  DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	    SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
