Merge "Move single-step control into thread." into dalvik-dev
diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_APACHE2
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 0000000..faed58a
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,190 @@
+
+   Copyright (c) 2005-2013, The Android Open Source Project
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index b07753c..bed48ba 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -22,6 +22,7 @@
 	compiler/elf_writer_test.cc \
 	compiler/image_test.cc \
 	compiler/jni/jni_compiler_test.cc \
+	compiler/leb128_encoder_test.cc \
 	compiler/oat_test.cc \
 	compiler/output_stream_test.cc \
 	compiler/utils/dedupe_set_test.cc \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index fc2f02b..b7dc9f6 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -23,6 +23,7 @@
 	dex/local_value_numbering.cc \
 	dex/arena_allocator.cc \
 	dex/arena_bit_vector.cc \
+	dex/quick/arm/arm_dex_file_method_inliner.cc \
 	dex/quick/arm/assemble_arm.cc \
 	dex/quick/arm/call_arm.cc \
 	dex/quick/arm/fp_arm.cc \
@@ -30,6 +31,8 @@
 	dex/quick/arm/target_arm.cc \
 	dex/quick/arm/utility_arm.cc \
 	dex/quick/codegen_util.cc \
+	dex/quick/dex_file_method_inliner.cc \
+	dex/quick/dex_file_to_method_inliner_map.cc \
 	dex/quick/gen_common.cc \
 	dex/quick/gen_invoke.cc \
 	dex/quick/gen_loadstore.cc \
@@ -38,6 +41,7 @@
 	dex/quick/mips/call_mips.cc \
 	dex/quick/mips/fp_mips.cc \
 	dex/quick/mips/int_mips.cc \
+	dex/quick/mips/mips_dex_file_method_inliner.cc \
 	dex/quick/mips/target_mips.cc \
 	dex/quick/mips/utility_mips.cc \
 	dex/quick/mir_to_lir.cc \
@@ -48,6 +52,7 @@
 	dex/quick/x86/int_x86.cc \
 	dex/quick/x86/target_x86.cc \
 	dex/quick/x86/utility_x86.cc \
+	dex/quick/x86/x86_dex_file_method_inliner.cc \
 	dex/portable/mir_to_gbc.cc \
 	dex/dex_to_dex_compiler.cc \
 	dex/mir_dataflow.cc \
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 546ce4a..3798b45 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -94,7 +94,7 @@
 
   UniquePtr<MIRGraph> mir_graph;   // MIR container.
   UniquePtr<Backend> cg;           // Target-specific codegen.
-  base::TimingLogger timings;
+  TimingLogger timings;
 };
 
 }  // namespace art
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index b8cd67e..e53d636 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -31,6 +31,8 @@
 #include "llvm/llvm_compilation_unit.h"
 #endif
 
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+
 namespace {
 #if !defined(ART_USE_PORTABLE_COMPILER)
   pthread_once_t llvm_multi_init = PTHREAD_ONCE_INIT;
@@ -61,14 +63,20 @@
 LLVMInfo::~LLVMInfo() {
 }
 
+QuickCompilerContext::QuickCompilerContext(CompilerDriver& compiler)
+  : inliner_map_(new DexFileToMethodInlinerMap(&compiler)) {
+}
+
+QuickCompilerContext::~QuickCompilerContext() {
+}
+
 extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver& compiler) {
   CHECK(compiler.GetCompilerContext() == NULL);
-  LLVMInfo* llvm_info = new LLVMInfo();
-  compiler.SetCompilerContext(llvm_info);
+  compiler.SetCompilerContext(new QuickCompilerContext(compiler));
 }
 
 extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver& compiler) {
-  delete reinterpret_cast<LLVMInfo*>(compiler.GetCompilerContext());
+  delete reinterpret_cast<QuickCompilerContext*>(compiler.GetCompilerContext());
   compiler.SetCompilerContext(NULL);
 }
 
@@ -84,6 +92,7 @@
   // (1 << kBBOpt) |
   // (1 << kMatch) |
   // (1 << kPromoteCompilerTemps) |
+  // (1 << kSuppressExceptionEdges) |
   0;
 
 static uint32_t kCompilerDebugFlags = 0 |     // Enable debug/testing modes
@@ -157,7 +166,7 @@
   if (enable_debug & (1 << kDebugTimings)) {
     timings.EndSplit();
     LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file);
-    LOG(INFO) << Dumpable<base::TimingLogger>(timings);
+    LOG(INFO) << Dumpable<TimingLogger>(timings);
   }
 }
 
@@ -212,7 +221,9 @@
 
   if (compiler_backend == kPortable) {
     // Fused long branches not currently useful in bitcode.
-    cu.disable_opt |= (1 << kBranchFusing);
+    cu.disable_opt |=
+        (1 << kBranchFusing) |
+        (1 << kSuppressExceptionEdges);
   }
 
   if (cu.instruction_set == kMips) {
diff --git a/compiler/dex/frontend.h b/compiler/dex/frontend.h
index 43f6855..4a863f5 100644
--- a/compiler/dex/frontend.h
+++ b/compiler/dex/frontend.h
@@ -56,6 +56,7 @@
   kMatch,
   kPromoteCompilerTemps,
   kBranchFusing,
+  kSuppressExceptionEdges,
 };
 
 // Force code generation paths for testing.
@@ -81,6 +82,9 @@
   kDebugTimings
 };
 
+class DexFileToMethodInlinerMap;
+class CompilerDriver;
+
 class LLVMInfo {
   public:
     LLVMInfo();
@@ -109,6 +113,19 @@
     UniquePtr<art::llvm::IRBuilder> ir_builder_;
 };
 
+class QuickCompilerContext {
+  public:
+    explicit QuickCompilerContext(CompilerDriver& compiler);
+    ~QuickCompilerContext();
+
+    DexFileToMethodInlinerMap* GetInlinerMap() {
+      return inliner_map_.get();
+    }
+
+  private:
+    UniquePtr<DexFileToMethodInlinerMap> inliner_map_;
+};
+
 struct CompilationUnit;
 struct BasicBlock;
 
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index 35d2923..75883b7 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -380,7 +380,9 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Establish value number for loaded register. Note use of memory version.
@@ -419,7 +421,9 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Rev the memory version
@@ -443,7 +447,9 @@
         } else {
           null_checked_.insert(base);
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         uint16_t memory_version = GetMemoryVersion(base, field_ref);
         if (opcode == Instruction::IGET_WIDE) {
@@ -473,7 +479,9 @@
         } else {
           null_checked_.insert(base);
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         AdvanceMemoryVersion(base, field_ref);
       }
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 11e19dc..d359ee2 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1243,12 +1243,13 @@
     if (mir->ssa_rep == NULL) {
       continue;
     }
-    // Each level of nesting adds *16 to count, up to 3 levels deep.
-    uint32_t weight = std::min(3U, static_cast<uint32_t>(bb->nesting_depth) * 4);
+    // Each level of nesting adds *100 to count, up to 3 levels deep.
+    uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
+    uint32_t weight = std::max(1U, depth * 100);
     for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
       int s_reg = mir->ssa_rep->uses[i];
       raw_use_counts_.Increment(s_reg);
-      use_counts_.Put(s_reg, use_counts_.Get(s_reg) + (1 << weight));
+      use_counts_.Put(s_reg, use_counts_.Get(s_reg) + weight);
     }
     if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
       int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
@@ -1267,7 +1268,7 @@
         }
         if (uses_method_star) {
           raw_use_counts_.Increment(method_sreg_);
-          use_counts_.Put(method_sreg_, use_counts_.Get(method_sreg_) + (1 << weight));
+          use_counts_.Put(method_sreg_, use_counts_.Get(method_sreg_) + weight);
         }
       }
     }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index cf758fc..deaf2ff 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -365,8 +365,8 @@
 }
 
 /* Process instructions with the kSwitch flag */
-void MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                                int flags) {
+BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
+                                       int width, int flags) {
   const uint16_t* switch_data =
       reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset + insn->dalvikInsn.vB);
   int size;
@@ -437,6 +437,7 @@
                                             /* create */ true, /* immed_pred_block_p */ NULL);
   cur_block->fall_through = fallthrough_block->id;
   fallthrough_block->predecessors->Insert(cur_block->id);
+  return cur_block;
 }
 
 /* Process instructions with the kThrow flag */
@@ -444,6 +445,9 @@
                                       int width, int flags, ArenaBitVector* try_block_addr,
                                       const uint16_t* code_ptr, const uint16_t* code_end) {
   bool in_try_block = try_block_addr->IsBitSet(cur_offset);
+  bool is_throw = (insn->dalvikInsn.opcode == Instruction::THROW);
+  bool build_all_edges =
+      (cu_->disable_opt & (1 << kSuppressExceptionEdges)) || is_throw || in_try_block;
 
   /* In try block */
   if (in_try_block) {
@@ -473,7 +477,7 @@
       cur_block->successor_blocks->Insert(successor_block_info);
       catch_block->predecessors->Insert(cur_block->id);
     }
-  } else {
+  } else if (build_all_edges) {
     BasicBlock *eh_block = NewMemBB(kExceptionHandling, num_blocks_++);
     cur_block->taken = eh_block->id;
     block_list_.Insert(eh_block);
@@ -481,7 +485,7 @@
     eh_block->predecessors->Insert(cur_block->id);
   }
 
-  if (insn->dalvikInsn.opcode == Instruction::THROW) {
+  if (is_throw) {
     cur_block->explicit_throw = true;
     if (code_ptr < code_end) {
       // Force creation of new block following THROW via side-effect
@@ -494,6 +498,16 @@
     }
   }
 
+  if (!build_all_edges) {
+    /*
+     * Even though there is an exception edge here, control cannot return to this
+     * method.  Thus, for the purposes of dataflow analysis and optimization, we can
+     * ignore the edge.  Doing this reduces compile time, and increases the scope
+     * of the basic-block level optimization pass.
+     */
+    return cur_block;
+  }
+
   /*
    * Split the potentially-throwing instruction into two parts.
    * The first half will be a pseudo-op that captures the exception
@@ -695,7 +709,7 @@
       cur_block = ProcessCanThrow(cur_block, insn, current_offset_, width, flags, try_block_addr_,
                                   code_ptr, code_end);
     } else if (flags & Instruction::kSwitch) {
-      ProcessCanSwitch(cur_block, insn, current_offset_, width, flags);
+      cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width, flags);
     }
     current_offset_ += width;
     BasicBlock *next_block = FindBlock(current_offset_, /* split */ false, /* create */
@@ -1100,6 +1114,7 @@
 void MIRGraph::DumpMIRGraph() {
   BasicBlock* bb;
   const char* block_type_names[] = {
+    "Null Block",
     "Entry Block",
     "Code Block",
     "Exit Block",
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index a69dde0..8c20728 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -698,8 +698,8 @@
   void ProcessTryCatchBlocks();
   BasicBlock* ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
                                int flags, const uint16_t* code_ptr, const uint16_t* code_end);
-  void ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                        int flags);
+  BasicBlock* ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
+                               int flags);
   BasicBlock* ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
                               int flags, ArenaBitVector* try_block_addr, const uint16_t* code_ptr,
                               const uint16_t* code_end);
diff --git a/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc b/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc
new file mode 100644
index 0000000..a8ae3cd
--- /dev/null
+++ b/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "dex/compiler_enums.h"
+
+#include "arm_dex_file_method_inliner.h"
+
+namespace art {
+
+const DexFileMethodInliner::IntrinsicDef ArmDexFileMethodInliner::kIntrinsicMethods[] = {
+#define INTRINSIC(c, n, p, o, d) \
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, d } }
+
+    INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
+    INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
+
+    INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, kWord),
+    INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, kLong),
+    INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
+
+    INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangMath,       Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangStrictMath, Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
+    INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
+
+    INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
+    INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
+    INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
+    INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone),
+    INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
+    INTRINSIC(JavaLangString, Length, _I, kIntrinsicIsEmptyOrLength, kIntrinsicFlagLength),
+
+    INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
+
+    INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PeekIntNative, J_I, kIntrinsicPeek, kWord),
+    INTRINSIC(LibcoreIoMemory, PeekLongNative, J_J, kIntrinsicPeek, kLong),
+    INTRINSIC(LibcoreIoMemory, PeekShortNative, J_S, kIntrinsicPeek, kSignedHalf),
+    INTRINSIC(LibcoreIoMemory, PokeByte, JB_V, kIntrinsicPoke, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PokeIntNative, JI_V, kIntrinsicPoke, kWord),
+    INTRINSIC(LibcoreIoMemory, PokeLongNative, JJ_V, kIntrinsicPoke, kLong),
+    INTRINSIC(LibcoreIoMemory, PokeShortNative, JS_V, kIntrinsicPoke, kSignedHalf),
+
+    INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas32,
+              kIntrinsicFlagDontNeedWriteBarrier),
+    INTRINSIC(SunMiscUnsafe, CompareAndSwapObject, ObjectJObjectObject_Z, kIntrinsicCas32,
+              kIntrinsicFlagNeedWriteBarrier),
+
+#define UNSAFE_GET_PUT(type, code, type_flags) \
+    INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              type_flags & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              (type_flags | kIntrinsicFlagIsVolatile) & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags), \
+    INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsVolatile), \
+    INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsOrdered)
+
+    UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
+    UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
+    UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject),
+#undef UNSAFE_GET_PUT
+
+#undef INTRINSIC
+};
+
+ArmDexFileMethodInliner::ArmDexFileMethodInliner() {
+}
+
+ArmDexFileMethodInliner::~ArmDexFileMethodInliner() {
+}
+
+void ArmDexFileMethodInliner::FindIntrinsics(const DexFile* dex_file) {
+  IndexCache cache;
+  DoFindIntrinsics(dex_file, &cache, kIntrinsicMethods, arraysize(kIntrinsicMethods));
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm/arm_dex_file_method_inliner.h b/compiler/dex/quick/arm/arm_dex_file_method_inliner.h
new file mode 100644
index 0000000..3428391
--- /dev/null
+++ b/compiler/dex/quick/arm/arm_dex_file_method_inliner.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_ARM_ARM_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_ARM_ARM_DEX_FILE_METHOD_INLINER_H_
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class ArmDexFileMethodInliner : public DexFileMethodInliner {
+  public:
+    ArmDexFileMethodInliner();
+    ~ArmDexFileMethodInliner();
+
+    void FindIntrinsics(const DexFile* dex_file);
+
+  private:
+    static const IntrinsicDef kIntrinsicMethods[];
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_ARM_ARM_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index ffaaf84..8cd7c94 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -334,7 +334,7 @@
   kThumb2VcvtDF,     // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0].
   kThumb2Vsqrts,     // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0].
   kThumb2Vsqrtd,     // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0].
-  kThumb2MovImmShift,  // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
+  kThumb2MovI8M,     // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
   kThumb2MovImm16,   // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8.
   kThumb2StrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
   kThumb2LdrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
@@ -346,14 +346,14 @@
   kThumb2MovRR,      // mov rd, rm [11101010010011110000] rd[11..8] [0000] rm[3..0].
   kThumb2Vmovs,      // vmov.f32 vd, vm [111011101] D [110000] vd[15..12] 101001] M [0] vm[3..0].
   kThumb2Vmovd,      // vmov.f64 vd, vm [111011101] D [110000] vd[15..12] 101101] M [0] vm[3..0].
-  kThumb2Ldmia,      // ldmia  [111010001001[ rn[19..16] mask[15..0].
-  kThumb2Stmia,      // stmia  [111010001000[ rn[19..16] mask[15..0].
+  kThumb2Ldmia,      // ldmia  [111010001001] rn[19..16] mask[15..0].
+  kThumb2Stmia,      // stmia  [111010001000] rn[19..16] mask[15..0].
   kThumb2AddRRR,     // add [111010110000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2SubRRR,     // sub [111010111010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2SbcRRR,     // sbc [111010110110] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2CmpRR,      // cmp [111010111011] rn[19..16] [0000] [1111] [0000] rm[3..0].
-  kThumb2SubRRI12,   // sub rd, rn, #imm12 [11110] i [01010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2MvnImm12,   // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8.
+  kThumb2SubRRI12,   // sub rd, rn, #imm12 [11110] i [101010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2MvnI8M,     // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8.
   kThumb2Sel,        // sel rd, rn, rm [111110101010] rn[19-16] rd[11-8] rm[3-0].
   kThumb2Ubfx,       // ubfx rd,rn,#lsb,#width [111100111100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
   kThumb2Sbfx,       // ubfx rd,rn,#lsb,#width [111100110100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
@@ -373,7 +373,8 @@
   kThumb2StrbRRI12,  // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0].
   kThumb2Pop,        // pop   [1110100010111101] list[15-0]*/
   kThumb2Push,       // push  [1110100100101101] list[15-0]*/
-  kThumb2CmpRI12,    // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
+  kThumb2CmpRI8M,    // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
+  kThumb2CmnRI8M,    // cmn rn, #<const> [11110] i [010001] rn[19-16] [0] imm3 [1111] imm8[7..0].
   kThumb2AdcRRR,     // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2AndRRR,     // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2BicRRR,     // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
@@ -383,7 +384,7 @@
   kThumb2SdivRRR,    // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
   kThumb2UdivRRR,    // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
   kThumb2MnvRR,      // mvn [11101010011011110] rd[11-8] [0000] rm[3..0].
-  kThumb2RsubRRI8,   // rsub [111100011100] rn[19..16] [0000] rd[11..8] imm8[7..0].
+  kThumb2RsubRRI8M,  // rsb rd, rn, #<const> [11110] i [011101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
   kThumb2NegRR,      // actually rsub rd, rn, #0.
   kThumb2OrrRRR,     // orr [111010100100] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2TstRR,      // tst [111010100001] rn[19..16] [0000] [1111] [0000] rm[3..0].
@@ -395,14 +396,14 @@
   kThumb2LsrRRI5,    // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0].
   kThumb2AsrRRI5,    // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0].
   kThumb2RorRRI5,    // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0].
-  kThumb2BicRRI8,    // bic [111100000010] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2AndRRI8,    // bic [111100000000] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2OrrRRI8,    // orr [111100000100] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2EorRRI8,    // eor [111100001000] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2AddRRI8,    // add [111100001000] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2AdcRRI8,    // adc [111100010101] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2SubRRI8,    // sub [111100011011] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2SbcRRI8,    // sbc [111100010111] rn[19..16] [0] imm3 rd[11..8] imm8.
+  kThumb2BicRRI8M,   // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AndRRI8M,   // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2OrrRRI8M,   // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2EorRRI8M,   // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AddRRI8M,   // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AdcRRI8M,   // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2SubRRI8M,   // sub rd, rn, #<const> [11110] i [011011] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2SbcRRI8M,   // sub rd, rn, #<const> [11110] i [010111] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
   kThumb2RevRR,      // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0]
   kThumb2RevshRR,    // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0]
   kThumb2It,         // it [10111111] firstcond[7-4] mask[3-0].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 3d0f263..1c81a5a 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -489,7 +489,7 @@
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MovImmShift, 0xf04f0000, /* no setflags encoding */
+    ENCODING_MAP(kThumb2MovI8M, 0xf04f0000, /* no setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
                  "mov", "!0C, #!1m", 4, kFixupNone),
@@ -573,8 +573,8 @@
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
                  "sub", "!0C,!1C,#!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MvnImm12,  0xf06f0000, /* no setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
+    ENCODING_MAP(kThumb2MvnI8M,  0xf06f0000, /* no setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
                  "mvn", "!0C, #!1n", 4, kFixupNone),
     ENCODING_MAP(kThumb2Sel,       0xfaa0f080,
@@ -656,11 +656,16 @@
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
                  | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop),
-    ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00,
+    ENCODING_MAP(kThumb2CmpRI8M, 0xf1b00f00,
                  kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_USE0 | SETS_CCODES,
                  "cmp", "!0C, #!1m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2CmnRI8M, 0xf1100f00,
+                 kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmn", "!0C, #!1m", 4, kFixupNone),
     ENCODING_MAP(kThumb2AdcRRR,  0xeb500000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
@@ -699,11 +704,11 @@
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "mvn", "!0C, !1C, shift !2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RsubRRI8,       0xf1d00000,
+    ENCODING_MAP(kThumb2RsubRRI8M,       0xf1d00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "rsb", "!0C,!1C,#!2m", 4, kFixupNone),
+                 "rsbs", "!0C,!1C,#!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2NegRR,       0xf1d00000, /* instance of rsub */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
@@ -750,38 +755,38 @@
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "ror", "!0C, !1C, #!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2BicRRI8,  0xf0200000,
+    ENCODING_MAP(kThumb2BicRRI8M,  0xf0200000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "bic", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AndRRI8,  0xf0000000,
+    ENCODING_MAP(kThumb2AndRRI8M,  0xf0000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "and", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2OrrRRI8,  0xf0400000,
+    ENCODING_MAP(kThumb2OrrRRI8M,  0xf0400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "orr", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2EorRRI8,  0xf0800000,
+    ENCODING_MAP(kThumb2EorRRI8M,  0xf0800000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "eor", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AddRRI8,  0xf1100000,
+    ENCODING_MAP(kThumb2AddRRI8M,  0xf1100000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
                  "adds", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AdcRRI8,  0xf1500000,
+    ENCODING_MAP(kThumb2AdcRRI8M,  0xf1500000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
                  "adcs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SubRRI8,  0xf1b00000,
+    ENCODING_MAP(kThumb2SubRRI8M,  0xf1b00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
                  "subs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SbcRRI8,  0xf1700000,
+    ENCODING_MAP(kThumb2SbcRRI8M,  0xf1700000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 480e021..1575ece 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -274,7 +274,7 @@
   NewLIR0(kThumb2Fmstat);
 
   OpIT((default_result == -1) ? kCondGt : kCondMi, "");
-  NewLIR2(kThumb2MovImmShift, rl_result.low_reg,
+  NewLIR2(kThumb2MovI8M, rl_result.low_reg,
           ModifiedImmediate(-default_result));  // Must not alter ccodes
   GenBarrier();
 
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 42bf3d4..9f84b03 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -97,7 +97,7 @@
   LIR* branch3 = OpCondBranch(kCondEq, NULL);
 
   OpIT(kCondHi, "E");
-  NewLIR2(kThumb2MovImmShift, t_reg, ModifiedImmediate(-1));
+  NewLIR2(kThumb2MovI8M, t_reg, ModifiedImmediate(-1));
   LoadConstant(t_reg, 1);
   GenBarrier();
 
@@ -299,7 +299,6 @@
 LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, int check_value,
                                 LIR* target) {
   LIR* branch;
-  int mod_imm;
   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
   /*
    * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
@@ -317,16 +316,7 @@
     branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
                      reg, 0);
   } else {
-    mod_imm = ModifiedImmediate(check_value);
-    if (ARM_LOWREG(reg) && ((check_value & 0xff) == check_value)) {
-      NewLIR2(kThumbCmpRI8, reg, check_value);
-    } else if (mod_imm >= 0) {
-      NewLIR2(kThumb2CmpRI12, reg, mod_imm);
-    } else {
-      int t_reg = AllocTemp();
-      LoadConstant(t_reg, check_value);
-      OpRegReg(kOpCmp, reg, t_reg);
-    }
+    OpRegImm(kOpCmp, reg, check_value);
     branch = NewLIR2(kThumbBCond, 0, arm_cond);
   }
   branch->target = target;
@@ -1124,8 +1114,8 @@
   switch (opcode) {
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
-      NewLIR3(kThumb2AddRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
-      NewLIR3(kThumb2AdcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
+      NewLIR3(kThumb2AddRRI8M, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
+      NewLIR3(kThumb2AdcRRI8M, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
       break;
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
@@ -1152,8 +1142,8 @@
       break;
     case Instruction::SUB_LONG_2ADDR:
     case Instruction::SUB_LONG:
-      NewLIR3(kThumb2SubRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
-      NewLIR3(kThumb2SbcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
+      NewLIR3(kThumb2SubRRI8M, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
+      NewLIR3(kThumb2SbcRRI8M, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
       break;
     default:
       LOG(FATAL) << "Unexpected opcode " << opcode;
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index d631cf7..8a8b168 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -184,12 +184,12 @@
   /* Check Modified immediate special cases */
   mod_imm = ModifiedImmediate(value);
   if (mod_imm >= 0) {
-    res = NewLIR2(kThumb2MovImmShift, r_dest, mod_imm);
+    res = NewLIR2(kThumb2MovI8M, r_dest, mod_imm);
     return res;
   }
   mod_imm = ModifiedImmediate(~value);
   if (mod_imm >= 0) {
-    res = NewLIR2(kThumb2MvnImm12, r_dest, mod_imm);
+    res = NewLIR2(kThumb2MvnI8M, r_dest, mod_imm);
     return res;
   }
   /* 16-bit immediate? */
@@ -446,7 +446,6 @@
   ArmOpcode alt_opcode = kThumbBkpt;
   bool all_low_regs = (ARM_LOWREG(r_dest) && ARM_LOWREG(r_src1));
   int32_t mod_imm = ModifiedImmediate(value);
-  int32_t mod_imm_neg = ModifiedImmediate(-value);
 
   switch (op) {
     case kOpLsl:
@@ -482,47 +481,55 @@
         else
           opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3;
         return NewLIR3(opcode, r_dest, r_src1, abs_value);
-      } else if ((abs_value & 0xff) == abs_value) {
+      } else if ((abs_value & 0x3ff) == abs_value) {
         if (op == kOpAdd)
           opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12;
         else
           opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12;
         return NewLIR3(opcode, r_dest, r_src1, abs_value);
       }
-      if (mod_imm_neg >= 0) {
-        op = (op == kOpAdd) ? kOpSub : kOpAdd;
-        mod_imm = mod_imm_neg;
+      if (mod_imm < 0) {
+        mod_imm = ModifiedImmediate(-value);
+        if (mod_imm >= 0) {
+          op = (op == kOpAdd) ? kOpSub : kOpAdd;
+        }
       }
       if (op == kOpSub) {
-        opcode = kThumb2SubRRI8;
+        opcode = kThumb2SubRRI8M;
         alt_opcode = kThumb2SubRRR;
       } else {
-        opcode = kThumb2AddRRI8;
+        opcode = kThumb2AddRRI8M;
         alt_opcode = kThumb2AddRRR;
       }
       break;
     case kOpRsub:
-      opcode = kThumb2RsubRRI8;
+      opcode = kThumb2RsubRRI8M;
       alt_opcode = kThumb2RsubRRR;
       break;
     case kOpAdc:
-      opcode = kThumb2AdcRRI8;
+      opcode = kThumb2AdcRRI8M;
       alt_opcode = kThumb2AdcRRR;
       break;
     case kOpSbc:
-      opcode = kThumb2SbcRRI8;
+      opcode = kThumb2SbcRRI8M;
       alt_opcode = kThumb2SbcRRR;
       break;
     case kOpOr:
-      opcode = kThumb2OrrRRI8;
+      opcode = kThumb2OrrRRI8M;
       alt_opcode = kThumb2OrrRRR;
       break;
     case kOpAnd:
-      opcode = kThumb2AndRRI8;
+      if (mod_imm < 0) {
+        mod_imm = ModifiedImmediate(~value);
+        if (mod_imm >= 0) {
+          return NewLIR3(kThumb2BicRRI8M, r_dest, r_src1, mod_imm);
+        }
+      }
+      opcode = kThumb2AndRRI8M;
       alt_opcode = kThumb2AndRRR;
       break;
     case kOpXor:
-      opcode = kThumb2EorRRI8;
+      opcode = kThumb2EorRRI8M;
       alt_opcode = kThumb2EorRRR;
       break;
     case kOpMul:
@@ -531,15 +538,19 @@
       alt_opcode = kThumb2MulRRR;
       break;
     case kOpCmp: {
-      int mod_imm = ModifiedImmediate(value);
       LIR* res;
       if (mod_imm >= 0) {
-        res = NewLIR2(kThumb2CmpRI12, r_src1, mod_imm);
+        res = NewLIR2(kThumb2CmpRI8M, r_src1, mod_imm);
       } else {
-        int r_tmp = AllocTemp();
-        res = LoadConstant(r_tmp, value);
-        OpRegReg(kOpCmp, r_src1, r_tmp);
-        FreeTemp(r_tmp);
+        mod_imm = ModifiedImmediate(-value);
+        if (mod_imm >= 0) {
+          res = NewLIR2(kThumb2CmnRI8M, r_src1, mod_imm);
+        } else {
+          int r_tmp = AllocTemp();
+          res = LoadConstant(r_tmp, value);
+          OpRegReg(kOpCmp, r_src1, r_tmp);
+          FreeTemp(r_tmp);
+        }
       }
       return res;
     }
@@ -585,13 +596,10 @@
       }
       break;
     case kOpCmp:
-      if (ARM_LOWREG(r_dest_src1) && short_form) {
-        opcode = (short_form) ?  kThumbCmpRI8 : kThumbCmpRR;
-      } else if (ARM_LOWREG(r_dest_src1)) {
-        opcode = kThumbCmpRR;
+      if (!neg && short_form) {
+        opcode = kThumbCmpRI8;
       } else {
         short_form = false;
-        opcode = kThumbCmpHL;
       }
       break;
     default:
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index dfbc887..4bc0b35 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -920,7 +920,8 @@
       core_spill_mask_(0),
       fp_spill_mask_(0),
       first_lir_insn_(NULL),
-      last_lir_insn_(NULL) {
+      last_lir_insn_(NULL),
+      inliner_(nullptr) {
   promotion_map_ = static_cast<PromotionMap*>
       (arena_->Alloc((cu_->num_dalvik_registers  + cu_->num_compiler_temps + 1) *
                       sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc));
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
new file mode 100644
index 0000000..6a760fa
--- /dev/null
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -0,0 +1,345 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include "base/macros.h"
+#include "dex/mir_graph.h"
+
+#include "dex_file_method_inliner.h"
+
+namespace art {
+
+const char* DexFileMethodInliner::kClassCacheNames[] = {
+    "Z",                       // kClassCacheBoolean
+    "B",                       // kClassCacheByte
+    "C",                       // kClassCacheChar
+    "S",                       // kClassCacheShort
+    "I",                       // kClassCacheInt
+    "J",                       // kClassCacheLong
+    "F",                       // kClassCacheFloat
+    "D",                       // kClassCacheDouble
+    "V",                       // kClassCacheVoid
+    "Ljava/lang/Object;",      // kClassCacheJavaLangObject
+    "Ljava/lang/String;",      // kClassCacheJavaLangString
+    "Ljava/lang/Double;",      // kClassCacheJavaLangDouble
+    "Ljava/lang/Float;",       // kClassCacheJavaLangFloat
+    "Ljava/lang/Integer;",     // kClassCacheJavaLangInteger
+    "Ljava/lang/Long;",        // kClassCacheJavaLangLong
+    "Ljava/lang/Short;",       // kClassCacheJavaLangShort
+    "Ljava/lang/Math;",        // kClassCacheJavaLangMath
+    "Ljava/lang/StrictMath;",  // kClassCacheJavaLangStrictMath
+    "Ljava/lang/Thread;",      // kClassCacheJavaLangThread
+    "Llibcore/io/Memory;",     // kClassCacheLibcoreIoMemory
+    "Lsun/misc/Unsafe;",       // kClassCacheSunMiscUnsafe
+};
+
+const char* DexFileMethodInliner::kNameCacheNames[] = {
+    "reverseBytes",          // kNameCacheReverseBytes
+    "doubleToRawLongBits",   // kNameCacheDoubleToRawLongBits
+    "longBitsToDouble",      // kNameCacheLongBitsToDouble
+    "floatToRawIntBits",     // kNameCacheFloatToRawIntBits
+    "intBitsToFloat",        // kNameCacheIntBitsToFloat
+    "abs",                   // kNameCacheAbs
+    "max",                   // kNameCacheMax
+    "min",                   // kNameCacheMin
+    "sqrt",                  // kNameCacheSqrt
+    "charAt",                // kNameCacheCharAt
+    "compareTo",             // kNameCacheCompareTo
+    "isEmpty",               // kNameCacheIsEmpty
+    "indexOf",               // kNameCacheIndexOf
+    "length",                // kNameCacheLength
+    "currentThread",         // kNameCacheCurrentThread
+    "peekByte",              // kNameCachePeekByte
+    "peekIntNative",         // kNameCachePeekIntNative
+    "peekLongNative",        // kNameCachePeekLongNative
+    "peekShortNative",       // kNameCachePeekShortNative
+    "pokeByte",              // kNameCachePokeByte
+    "pokeIntNative",         // kNameCachePokeIntNative
+    "pokeLongNative",        // kNameCachePokeLongNative
+    "pokeShortNative",       // kNameCachePokeShortNative
+    "compareAndSwapInt",     // kNameCacheCompareAndSwapInt
+    "compareAndSwapObject",  // kNameCacheCompareAndSwapObject
+    "getInt",                // kNameCacheGetInt
+    "getIntVolatile",        // kNameCacheGetIntVolatile
+    "putInt",                // kNameCachePutInt
+    "putIntVolatile",        // kNameCachePutIntVolatile
+    "putOrderedInt",         // kNameCachePutOrderedInt
+    "getLong",               // kNameCacheGetLong
+    "getLongVolatile",       // kNameCacheGetLongVolatile
+    "putLong",               // kNameCachePutLong
+    "putLongVolatile",       // kNameCachePutLongVolatile
+    "putOrderedLong",        // kNameCachePutOrderedLong
+    "getObject",             // kNameCacheGetObject
+    "getObjectVolatile",     // kNameCacheGetObjectVolatile
+    "putObject",             // kNameCachePutObject
+    "putObjectVolatile",     // kNameCachePutObjectVolatile
+    "putOrderedObject",      // kNameCachePutOrderedObject
+};
+
+const DexFileMethodInliner::ProtoDef DexFileMethodInliner::kProtoCacheDefs[] = {
+    // kProtoCacheI_I
+    { kClassCacheInt, 1, { kClassCacheInt } },
+    // kProtoCacheJ_J
+    { kClassCacheLong, 1, { kClassCacheLong } },
+    // kProtoCacheS_S
+    { kClassCacheShort, 1, { kClassCacheShort } },
+    // kProtoCacheD_D
+    { kClassCacheDouble, 1, { kClassCacheDouble } },
+    // kProtoCacheD_J
+    { kClassCacheLong, 1, { kClassCacheDouble } },
+    // kProtoCacheJ_D
+    { kClassCacheDouble, 1, { kClassCacheLong } },
+    // kProtoCacheF_I
+    { kClassCacheInt, 1, { kClassCacheFloat } },
+    // kProtoCacheI_F
+    { kClassCacheFloat, 1, { kClassCacheInt } },
+    // kProtoCacheII_I
+    { kClassCacheInt, 2, { kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheI_C
+    { kClassCacheChar, 1, { kClassCacheInt } },
+    // kProtoCacheString_I
+    { kClassCacheInt, 1, { kClassCacheJavaLangString } },
+    // kProtoCache_Z
+    { kClassCacheBoolean, 0, { } },
+    // kProtoCache_I
+    { kClassCacheInt, 0, { } },
+    // kProtoCache_Thread
+    { kClassCacheJavaLangThread, 0, { } },
+    // kProtoCacheJ_B
+    { kClassCacheByte, 1, { kClassCacheLong } },
+    // kProtoCacheJ_I
+    { kClassCacheInt, 1, { kClassCacheLong } },
+    // kProtoCacheJ_S
+    { kClassCacheShort, 1, { kClassCacheLong } },
+    // kProtoCacheJB_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheByte } },
+    // kProtoCacheJI_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheInt } },
+    // kProtoCacheJJ_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheLong } },
+    // kProtoCacheJS_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheShort } },
+    // kProtoCacheObjectJII_Z
+    { kClassCacheBoolean, 4, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheObjectJObjectObject_Z
+    { kClassCacheBoolean, 4, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheJavaLangObject, kClassCacheJavaLangObject } },
+    // kProtoCacheObjectJ_I
+    { kClassCacheInt, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJI_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheInt } },
+    // kProtoCacheObjectJ_J
+    { kClassCacheLong, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJJ_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheLong } },
+    // kProtoCacheObjectJ_Object
+    { kClassCacheJavaLangObject, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJObject_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheJavaLangObject } },
+};
+
+DexFileMethodInliner::~DexFileMethodInliner() {
+}
+
+DexFileMethodInliner::DexFileMethodInliner()
+    : dex_file_(NULL) {
+  COMPILE_ASSERT(kClassCacheFirst == 0, kClassCacheFirst_not_0);
+  COMPILE_ASSERT(arraysize(kClassCacheNames) == kClassCacheLast, bad_arraysize_kClassCacheNames);
+  COMPILE_ASSERT(kNameCacheFirst == 0, kNameCacheFirst_not_0);
+  COMPILE_ASSERT(arraysize(kNameCacheNames) == kNameCacheLast, bad_arraysize_kNameCacheNames);
+  COMPILE_ASSERT(kProtoCacheFirst == 0, kProtoCacheFirst_not_0);
+  COMPILE_ASSERT(arraysize(kProtoCacheDefs) == kProtoCacheLast, bad_arraysize_kProtoCacheNames);
+}
+
+bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index) const {
+  return intrinsics_.find(method_index) != intrinsics_.end();
+}
+
+bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) const {
+  auto it = intrinsics_.find(info->index);
+  if (it == intrinsics_.end()) {
+    return false;
+  }
+  const Intrinsic& intrinsic = it->second;
+  switch (intrinsic.opcode) {
+    case kIntrinsicDoubleCvt:
+      return backend->GenInlinedDoubleCvt(info);
+    case kIntrinsicFloatCvt:
+      return backend->GenInlinedFloatCvt(info);
+    case kIntrinsicReverseBytes:
+      return backend->GenInlinedReverseBytes(info, static_cast<OpSize>(intrinsic.data));
+    case kIntrinsicAbsInt:
+      return backend->GenInlinedAbsInt(info);
+    case kIntrinsicAbsLong:
+      return backend->GenInlinedAbsLong(info);
+    case kIntrinsicMinMaxInt:
+      return backend->GenInlinedMinMaxInt(info, intrinsic.data & kIntrinsicFlagMin);
+    case kIntrinsicSqrt:
+      return backend->GenInlinedSqrt(info);
+    case kIntrinsicCharAt:
+      return backend->GenInlinedCharAt(info);
+    case kIntrinsicCompareTo:
+      return backend->GenInlinedStringCompareTo(info);
+    case kIntrinsicIsEmptyOrLength:
+      return backend->GenInlinedStringIsEmptyOrLength(info, intrinsic.data & kIntrinsicFlagIsEmpty);
+    case kIntrinsicIndexOf:
+      return backend->GenInlinedIndexOf(info, intrinsic.data & kIntrinsicFlagBase0);
+    case kIntrinsicCurrentThread:
+      return backend->GenInlinedCurrentThread(info);
+    case kIntrinsicPeek:
+      return backend->GenInlinedPeek(info, static_cast<OpSize>(intrinsic.data));
+    case kIntrinsicPoke:
+      return backend->GenInlinedPoke(info, static_cast<OpSize>(intrinsic.data));
+    case kIntrinsicCas32:
+      return backend->GenInlinedCas32(info, intrinsic.data & kIntrinsicFlagNeedWriteBarrier);
+    case kIntrinsicUnsafeGet:
+      return backend->GenInlinedUnsafeGet(info, intrinsic.data & kIntrinsicFlagIsLong,
+                                          intrinsic.data & kIntrinsicFlagIsVolatile);
+    case kIntrinsicUnsafePut:
+      return backend->GenInlinedUnsafePut(info, intrinsic.data & kIntrinsicFlagIsLong,
+                                          intrinsic.data & kIntrinsicFlagIsObject,
+                                          intrinsic.data & kIntrinsicFlagIsVolatile,
+                                          intrinsic.data & kIntrinsicFlagIsOrdered);
+    default:
+      LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode;
+      return false;  // avoid warning "control reaches end of non-void function"
+  }
+}
+
+uint32_t DexFileMethodInliner::FindClassIndex(const DexFile* dex_file, IndexCache* cache,
+                                              ClassCacheIndex index) {
+  uint32_t* class_index = &cache->class_indexes[index];
+  if (*class_index != kIndexUnresolved) {
+    return *class_index;
+  }
+
+  const DexFile::StringId* string_id = dex_file->FindStringId(kClassCacheNames[index]);
+  if (string_id == nullptr) {
+    *class_index = kIndexNotFound;
+    return *class_index;
+  }
+  uint32_t string_index = dex_file->GetIndexForStringId(*string_id);
+
+  const DexFile::TypeId* type_id = dex_file->FindTypeId(string_index);
+  if (type_id == nullptr) {
+    *class_index = kIndexNotFound;
+    return *class_index;
+  }
+  *class_index = dex_file->GetIndexForTypeId(*type_id);
+  return *class_index;
+}
+
+uint32_t DexFileMethodInliner::FindNameIndex(const DexFile* dex_file, IndexCache* cache,
+                                             NameCacheIndex index) {
+  uint32_t* name_index = &cache->name_indexes[index];
+  if (*name_index != kIndexUnresolved) {
+    return *name_index;
+  }
+
+  const DexFile::StringId* string_id = dex_file->FindStringId(kNameCacheNames[index]);
+  if (string_id == nullptr) {
+    *name_index = kIndexNotFound;
+    return *name_index;
+  }
+  *name_index = dex_file->GetIndexForStringId(*string_id);
+  return *name_index;
+}
+
+uint32_t DexFileMethodInliner::FindProtoIndex(const DexFile* dex_file, IndexCache* cache,
+                                              ProtoCacheIndex index) {
+  uint32_t* proto_index = &cache->proto_indexes[index];
+  if (*proto_index != kIndexUnresolved) {
+    return *proto_index;
+  }
+
+  const ProtoDef& proto_def = kProtoCacheDefs[index];
+  uint32_t return_index = FindClassIndex(dex_file, cache, proto_def.return_type);
+  if (return_index == kIndexNotFound) {
+    *proto_index = kIndexNotFound;
+    return *proto_index;
+  }
+  uint16_t return_type = static_cast<uint16_t>(return_index);
+  DCHECK_EQ(static_cast<uint32_t>(return_type), return_index);
+
+  uint32_t signature_length = proto_def.param_count;
+  uint16_t signature_type_idxs[kProtoMaxParams];
+  for (uint32_t i = 0; i != signature_length; ++i) {
+    uint32_t param_index = FindClassIndex(dex_file, cache, proto_def.params[i]);
+    if (param_index == kIndexNotFound) {
+      *proto_index = kIndexNotFound;
+      return *proto_index;
+    }
+    signature_type_idxs[i] = static_cast<uint16_t>(param_index);
+    DCHECK_EQ(static_cast<uint32_t>(signature_type_idxs[i]), param_index);
+  }
+
+  const DexFile::ProtoId* proto_id = dex_file->FindProtoId(return_type, signature_type_idxs,
+                                                           signature_length);
+  if (proto_id == nullptr) {
+    *proto_index = kIndexNotFound;
+    return *proto_index;
+  }
+  *proto_index = dex_file->GetIndexForProtoId(*proto_id);
+  return *proto_index;
+}
+
+uint32_t DexFileMethodInliner::FindMethodIndex(const DexFile* dex_file, IndexCache* cache,
+                                               const MethodDef& method_def) {
+  uint32_t declaring_class_index = FindClassIndex(dex_file, cache, method_def.declaring_class);
+  if (declaring_class_index == kIndexNotFound) {
+    return kIndexNotFound;
+  }
+  uint32_t name_index = FindNameIndex(dex_file, cache, method_def.name);
+  if (name_index == kIndexNotFound) {
+    return kIndexNotFound;
+  }
+  uint32_t proto_index = FindProtoIndex(dex_file, cache, method_def.proto);
+  if (proto_index == kIndexNotFound) {
+    return kIndexNotFound;
+  }
+  const DexFile::MethodId* method_id =
+      dex_file->FindMethodId(dex_file->GetTypeId(declaring_class_index),
+                             dex_file->GetStringId(name_index),
+                             dex_file->GetProtoId(proto_index));
+  if (method_id == nullptr) {
+    return kIndexNotFound;
+  }
+  return dex_file->GetIndexForMethodId(*method_id);
+}
+
+DexFileMethodInliner::IndexCache::IndexCache() {
+  std::fill_n(class_indexes, arraysize(class_indexes), kIndexUnresolved);
+  std::fill_n(name_indexes, arraysize(name_indexes), kIndexUnresolved);
+  std::fill_n(proto_indexes, arraysize(proto_indexes), kIndexUnresolved);
+}
+
+void DexFileMethodInliner::DoFindIntrinsics(const DexFile* dex_file, IndexCache* cache,
+                                            const IntrinsicDef* defs, uint32_t def_count) {
+  DCHECK(dex_file != nullptr);
+  DCHECK(dex_file_ == nullptr);
+  for (uint32_t i = 0u; i != def_count; ++i) {
+    uint32_t method_id = FindMethodIndex(dex_file, cache, defs[i].method_def);
+    if (method_id != kIndexNotFound) {
+      DCHECK(intrinsics_.find(method_id) == intrinsics_.end());
+      intrinsics_[method_id] = defs[i].intrinsic;
+    }
+  }
+  dex_file_ = dex_file;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
new file mode 100644
index 0000000..95b8dd3
--- /dev/null
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -0,0 +1,318 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_DEX_FILE_METHOD_INLINER_H_
+
+#include <stdint.h>
+#include <map>
+
+namespace art {
+
+class CallInfo;
+class DexFile;
+class Mir2Lir;
+
+enum IntrinsicOpcode {
+  kIntrinsicDoubleCvt,
+  kIntrinsicFloatCvt,
+  kIntrinsicReverseBytes,
+  kIntrinsicAbsInt,
+  kIntrinsicAbsLong,
+  kIntrinsicMinMaxInt,
+  kIntrinsicSqrt,
+  kIntrinsicCharAt,
+  kIntrinsicCompareTo,
+  kIntrinsicIsEmptyOrLength,
+  kIntrinsicIndexOf,
+  kIntrinsicCurrentThread,
+  kIntrinsicPeek,
+  kIntrinsicPoke,
+  kIntrinsicCas32,
+  kIntrinsicUnsafeGet,
+  kIntrinsicUnsafePut,
+};
+
+enum IntrinsicFlags {
+  kIntrinsicFlagNone = 0,
+
+  // kIntrinsicMinMaxInt
+  kIntrinsicFlagMax = kIntrinsicFlagNone,
+  kIntrinsicFlagMin = 1,
+
+  // kIntrinsicIsEmptyOrLength
+  kIntrinsicFlagLength  = kIntrinsicFlagNone,
+  kIntrinsicFlagIsEmpty = 1,
+
+  // kIntrinsicIndexOf
+  kIntrinsicFlagBase0 = 1,
+
+  // kIntrinsicUnsafeCas32
+  kIntrinsicFlagDontNeedWriteBarrier = 0,
+  kIntrinsicFlagNeedWriteBarrier = 1,
+
+  // kIntrinsicUnsafeGet, kIntrinsicUnsafePut
+  kIntrinsicFlagIsLong     = 1,
+  kIntrinsicFlagIsVolatile = 2,
+  // kIntrinsicUnsafePut
+  kIntrinsicFlagIsObject   = 4,
+  kIntrinsicFlagIsOrdered  = 8,
+};
+
+struct Intrinsic {
+  IntrinsicOpcode opcode;
+  uint32_t data;
+};
+
+/**
+ * Handles inlining of methods from a particular DexFile.
+ *
+ * Intrinsics are a special case of inline methods. The DexFile indices for
+ * all the supported intrinsic methods are looked up once by the FindIntrinsics
+ * function and cached by this class for quick lookup by the method index.
+ *
+ * TODO: Detect short methods (at least getters, setters and empty functions)
+ * from the verifier and mark them for inlining. Inline these methods early
+ * during compilation to allow further optimizations. Similarly, provide
+ * additional information about intrinsics to the early phases of compilation.
+ */
+class DexFileMethodInliner {
+  public:
+    virtual ~DexFileMethodInliner();
+
+    /**
+     * Find all known intrinsic methods in the dex_file and cache their indices.
+     */
+    virtual void FindIntrinsics(const DexFile* dex_file) = 0;
+
+    /**
+     * Check whether a particular method index corresponds to an intrinsic function.
+     */
+    bool IsIntrinsic(uint32_t method_index) const;
+
+    /**
+     * Generate code for an intrinsic function invocation.
+     *
+     * TODO: This should be target-specific. For the time being,
+     * it's shared since it dispatches everything to backend.
+     */
+    bool GenIntrinsic(Mir2Lir* backend, CallInfo* info) const;
+
+  protected:
+    DexFileMethodInliner();
+
+    /**
+     * To avoid multiple lookups of a class by its descriptor, we cache its
+     * type index in the IndexCache. These are the indexes into the IndexCache
+     * class_indexes array.
+     */
+    enum ClassCacheIndex : uint8_t {  // unit8_t to save space, make larger if needed
+      kClassCacheFirst = 0,
+      kClassCacheBoolean = kClassCacheFirst,
+      kClassCacheByte,
+      kClassCacheChar,
+      kClassCacheShort,
+      kClassCacheInt,
+      kClassCacheLong,
+      kClassCacheFloat,
+      kClassCacheDouble,
+      kClassCacheVoid,
+      kClassCacheJavaLangObject,
+      kClassCacheJavaLangString,
+      kClassCacheJavaLangDouble,
+      kClassCacheJavaLangFloat,
+      kClassCacheJavaLangInteger,
+      kClassCacheJavaLangLong,
+      kClassCacheJavaLangShort,
+      kClassCacheJavaLangMath,
+      kClassCacheJavaLangStrictMath,
+      kClassCacheJavaLangThread,
+      kClassCacheLibcoreIoMemory,
+      kClassCacheSunMiscUnsafe,
+      kClassCacheLast
+    };
+
+    /**
+     * To avoid multiple lookups of a method name string, we cache its string
+     * index in the IndexCache. These are the indexes into the IndexCache
+     * name_indexes array.
+     */
+    enum NameCacheIndex : uint8_t {  // unit8_t to save space, make larger if needed
+      kNameCacheFirst = 0,
+      kNameCacheReverseBytes = kNameCacheFirst,
+      kNameCacheDoubleToRawLongBits,
+      kNameCacheLongBitsToDouble,
+      kNameCacheFloatToRawIntBits,
+      kNameCacheIntBitsToFloat,
+      kNameCacheAbs,
+      kNameCacheMax,
+      kNameCacheMin,
+      kNameCacheSqrt,
+      kNameCacheCharAt,
+      kNameCacheCompareTo,
+      kNameCacheIsEmpty,
+      kNameCacheIndexOf,
+      kNameCacheLength,
+      kNameCacheCurrentThread,
+      kNameCachePeekByte,
+      kNameCachePeekIntNative,
+      kNameCachePeekLongNative,
+      kNameCachePeekShortNative,
+      kNameCachePokeByte,
+      kNameCachePokeIntNative,
+      kNameCachePokeLongNative,
+      kNameCachePokeShortNative,
+      kNameCacheCompareAndSwapInt,
+      kNameCacheCompareAndSwapObject,
+      kNameCacheGetInt,
+      kNameCacheGetIntVolatile,
+      kNameCachePutInt,
+      kNameCachePutIntVolatile,
+      kNameCachePutOrderedInt,
+      kNameCacheGetLong,
+      kNameCacheGetLongVolatile,
+      kNameCachePutLong,
+      kNameCachePutLongVolatile,
+      kNameCachePutOrderedLong,
+      kNameCacheGetObject,
+      kNameCacheGetObjectVolatile,
+      kNameCachePutObject,
+      kNameCachePutObjectVolatile,
+      kNameCachePutOrderedObject,
+      kNameCacheLast
+    };
+
+    /**
+     * To avoid multiple lookups of a method signature, we cache its proto
+     * index in the IndexCache. These are the indexes into the IndexCache
+     * proto_indexes array.
+     */
+    enum ProtoCacheIndex : uint8_t {  // unit8_t to save space, make larger if needed
+      kProtoCacheFirst = 0,
+      kProtoCacheI_I = kProtoCacheFirst,
+      kProtoCacheJ_J,
+      kProtoCacheS_S,
+      kProtoCacheD_D,
+      kProtoCacheD_J,
+      kProtoCacheJ_D,
+      kProtoCacheF_I,
+      kProtoCacheI_F,
+      kProtoCacheII_I,
+      kProtoCacheI_C,
+      kProtoCacheString_I,
+      kProtoCache_Z,
+      kProtoCache_I,
+      kProtoCache_Thread,
+      kProtoCacheJ_B,
+      kProtoCacheJ_I,
+      kProtoCacheJ_S,
+      kProtoCacheJB_V,
+      kProtoCacheJI_V,
+      kProtoCacheJJ_V,
+      kProtoCacheJS_V,
+      kProtoCacheObjectJII_Z,
+      kProtoCacheObjectJObjectObject_Z,
+      kProtoCacheObjectJ_I,
+      kProtoCacheObjectJI_V,
+      kProtoCacheObjectJ_J,
+      kProtoCacheObjectJJ_V,
+      kProtoCacheObjectJ_Object,
+      kProtoCacheObjectJObject_V,
+      kProtoCacheLast
+    };
+
+    /**
+     * The maximum number of method parameters we support in the ProtoDef.
+     */
+    static constexpr uint32_t kProtoMaxParams = 6;
+
+    /**
+     * The method signature (proto) definition using cached class indexes.
+     * The return_type and params are used with the IndexCache to look up
+     * appropriate class indexes to be passed to DexFile::FindProtoId().
+     */
+    struct ProtoDef {
+      ClassCacheIndex return_type;
+      uint8_t param_count;
+      ClassCacheIndex params[kProtoMaxParams];
+    };
+
+    /**
+     * The method definition using cached class, name and proto indexes.
+     * The class index, method name index and proto index are used with
+     * IndexCache to look up appropriate parameters for DexFile::FindMethodId().
+     */
+    struct MethodDef {
+      ClassCacheIndex declaring_class;
+      NameCacheIndex name;
+      ProtoCacheIndex proto;
+    };
+
+    /**
+     * The definition of an intrinsic function binds the method definition
+     * to an Intrinsic.
+     */
+    struct IntrinsicDef {
+      MethodDef method_def;
+      Intrinsic intrinsic;
+    };
+
+    /**
+     * Cache for class, method name and method signature indexes used during
+     * intrinsic function lookup to avoid multiple lookups of the same items.
+     *
+     * Many classes have multiple intrinsics and/or they are used in multiple
+     * method signatures and we want to avoid repeated lookups since they are
+     * not exactly cheap. The method names and method signatures are sometimes
+     * reused and therefore cached as well.
+     */
+    struct IndexCache {
+      IndexCache();
+
+      uint32_t class_indexes[kClassCacheLast - kClassCacheFirst];
+      uint32_t name_indexes[kNameCacheLast - kNameCacheFirst];
+      uint32_t proto_indexes[kProtoCacheLast - kProtoCacheFirst];
+    };
+
+    static const char* kClassCacheNames[];
+    static const char* kNameCacheNames[];
+    static const ProtoDef kProtoCacheDefs[];
+
+    static const uint32_t kIndexNotFound = static_cast<uint32_t>(-1);
+    static const uint32_t kIndexUnresolved = static_cast<uint32_t>(-2);
+
+    static uint32_t FindClassIndex(const DexFile* dex_file, IndexCache* cache,
+                                   ClassCacheIndex index);
+    static uint32_t FindNameIndex(const DexFile* dex_file, IndexCache* cache,
+                                  NameCacheIndex index);
+    static uint32_t FindProtoIndex(const DexFile* dex_file, IndexCache* cache,
+                                   ProtoCacheIndex index);
+    static uint32_t FindMethodIndex(const DexFile* dex_file, IndexCache* cache,
+                                    const MethodDef& method_def);
+
+    void DoFindIntrinsics(const DexFile* dex_file, IndexCache* cache,
+                          const IntrinsicDef* defs, uint32_t def_count);
+
+    /*
+     * Maps method indexes (for the particular DexFile) to Intrinsic defintions.
+     */
+    std::map<uint32_t, Intrinsic> intrinsics_;
+    const DexFile* dex_file_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/dex/quick/dex_file_to_method_inliner_map.cc b/compiler/dex/quick/dex_file_to_method_inliner_map.cc
new file mode 100644
index 0000000..56a42bc
--- /dev/null
+++ b/compiler/dex/quick/dex_file_to_method_inliner_map.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <utility>
+#include "thread.h"
+#include "thread-inl.h"
+#include "base/mutex.h"
+#include "base/mutex-inl.h"
+#include "base/logging.h"
+#include "driver/compiler_driver.h"
+#include "dex/quick/arm/arm_dex_file_method_inliner.h"
+#include "dex/quick/mips/mips_dex_file_method_inliner.h"
+#include "dex/quick/x86/x86_dex_file_method_inliner.h"
+
+#include "dex_file_to_method_inliner_map.h"
+
+namespace art {
+
+DexFileToMethodInlinerMap::DexFileToMethodInlinerMap(const CompilerDriver* compiler)
+    : compiler_(compiler),
+      mutex_("inline_helper_mutex") {
+}
+
+DexFileToMethodInlinerMap::~DexFileToMethodInlinerMap() {
+  for (auto& entry : inliners_) {
+    delete entry.second;
+  }
+}
+
+const DexFileMethodInliner& DexFileToMethodInlinerMap::GetMethodInliner(const DexFile* dex_file) {
+  Thread* self = Thread::Current();
+  {
+    ReaderMutexLock lock(self, mutex_);
+    auto it = inliners_.find(dex_file);
+    if (it != inliners_.end()) {
+      return *it->second;
+    }
+  }
+
+  WriterMutexLock lock(self, mutex_);
+  DexFileMethodInliner** inliner = &inliners_[dex_file];  // inserts new entry if not found
+  if (*inliner) {
+    return **inliner;
+  }
+  switch (compiler_->GetInstructionSet()) {
+    case kThumb2:
+      *inliner = new ArmDexFileMethodInliner;
+      break;
+    case kX86:
+      *inliner = new X86DexFileMethodInliner;
+      break;
+    case kMips:
+      *inliner = new MipsDexFileMethodInliner;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected instruction set: " << compiler_->GetInstructionSet();
+  }
+  DCHECK(*inliner != nullptr);
+  // TODO: per-dex file locking for the intrinsics container filling.
+  (*inliner)->FindIntrinsics(dex_file);
+  return **inliner;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/dex_file_to_method_inliner_map.h b/compiler/dex/quick/dex_file_to_method_inliner_map.h
new file mode 100644
index 0000000..77f2648
--- /dev/null
+++ b/compiler/dex/quick/dex_file_to_method_inliner_map.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_DEX_FILE_TO_METHOD_INLINER_MAP_H_
+#define ART_COMPILER_DEX_QUICK_DEX_FILE_TO_METHOD_INLINER_MAP_H_
+
+#include <map>
+#include <vector>
+#include "base/macros.h"
+#include "base/mutex.h"
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class CompilerDriver;
+class DexFile;
+
+/**
+ * Map each DexFile to its DexFileMethodInliner.
+ *
+ * The method inliner is created and initialized the first time it's requested
+ * for a particular DexFile.
+ */
+class DexFileToMethodInlinerMap {
+  public:
+    explicit DexFileToMethodInlinerMap(const CompilerDriver* compiler);
+    ~DexFileToMethodInlinerMap();
+
+    const DexFileMethodInliner& GetMethodInliner(const DexFile* dex_file) LOCKS_EXCLUDED(mutex_);
+
+  private:
+    const CompilerDriver* const compiler_;
+    ReaderWriterMutex mutex_;
+    std::map<const DexFile*, DexFileMethodInliner*> inliners_ GUARDED_BY(mutex_);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_DEX_FILE_TO_METHOD_INLINER_MAP_H_
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index d1a9a13..469c577 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -15,6 +15,9 @@
  */
 
 #include "dex/compiler_ir.h"
+#include "dex/frontend.h"
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex_file-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "invoke_type.h"
@@ -1227,201 +1230,16 @@
   return true;
 }
 
-bool Mir2Lir::GenIntrinsic(CallInfo* info) {
-  if (info->opt_flags & MIR_INLINED) {
-    return false;
-  }
-  /*
-   * TODO: move these to a target-specific structured constant array
-   * and use a generic match function.  The list of intrinsics may be
-   * slightly different depending on target.
-   * TODO: Fold this into a matching function that runs during
-   * basic block building.  This should be part of the action for
-   * small method inlining and recognition of the special object init
-   * method.  By doing this during basic block construction, we can also
-   * take advantage of/generate new useful dataflow info.
-   */
-  const DexFile::MethodId& target_mid = cu_->dex_file->GetMethodId(info->index);
-  const DexFile::TypeId& declaring_type = cu_->dex_file->GetTypeId(target_mid.class_idx_);
-  StringPiece tgt_methods_declaring_class(
-      cu_->dex_file->StringDataByIdx(declaring_type.descriptor_idx_));
-  if (tgt_methods_declaring_class.starts_with("Ljava/lang/")) {
-    tgt_methods_declaring_class.remove_prefix(sizeof("Ljava/lang/") - 1);
-    if (tgt_methods_declaring_class.starts_with("Double;")) {
-      std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-      if (tgt_method == "long java.lang.Double.doubleToRawLongBits(double)") {
-        return GenInlinedDoubleCvt(info);
-      }
-      if (tgt_method == "double java.lang.Double.longBitsToDouble(long)") {
-        return GenInlinedDoubleCvt(info);
-      }
-    } else if (tgt_methods_declaring_class.starts_with("Float;")) {
-      std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-      if (tgt_method == "int java.lang.Float.floatToRawIntBits(float)") {
-        return GenInlinedFloatCvt(info);
-      }
-      if (tgt_method == "float java.lang.Float.intBitsToFloat(int)") {
-        return GenInlinedFloatCvt(info);
-      }
-    } else if (tgt_methods_declaring_class.starts_with("Integer;")) {
-      std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-      if (tgt_method == "int java.lang.Integer.reverseBytes(int)") {
-        return GenInlinedReverseBytes(info, kWord);
-      }
-    } else if (tgt_methods_declaring_class.starts_with("Long;")) {
-      std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-      if (tgt_method == "long java.lang.Long.reverseBytes(long)") {
-        return GenInlinedReverseBytes(info, kLong);
-      }
-    } else if (tgt_methods_declaring_class.starts_with("Math;") ||
-               tgt_methods_declaring_class.starts_with("StrictMath;")) {
-      std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-      if (tgt_method == "int java.lang.Math.abs(int)" ||
-          tgt_method == "int java.lang.StrictMath.abs(int)") {
-        return GenInlinedAbsInt(info);
-      }
-      if (tgt_method == "long java.lang.Math.abs(long)" ||
-          tgt_method == "long java.lang.StrictMath.abs(long)") {
-        return GenInlinedAbsLong(info);
-      }
-      if (tgt_method == "int java.lang.Math.max(int, int)" ||
-          tgt_method == "int java.lang.StrictMath.max(int, int)") {
-        return GenInlinedMinMaxInt(info, false /* is_min */);
-      }
-      if (tgt_method == "int java.lang.Math.min(int, int)" ||
-          tgt_method == "int java.lang.StrictMath.min(int, int)") {
-        return GenInlinedMinMaxInt(info, true /* is_min */);
-      }
-      if (tgt_method == "double java.lang.Math.sqrt(double)" ||
-          tgt_method == "double java.lang.StrictMath.sqrt(double)") {
-        return GenInlinedSqrt(info);
-      }
-    } else if (tgt_methods_declaring_class.starts_with("Short;")) {
-      std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-      if (tgt_method == "short java.lang.Short.reverseBytes(short)") {
-        return GenInlinedReverseBytes(info, kSignedHalf);
-      }
-    } else if (tgt_methods_declaring_class.starts_with("String;")) {
-      std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-      if (tgt_method == "char java.lang.String.charAt(int)") {
-        return GenInlinedCharAt(info);
-      }
-      if (tgt_method == "int java.lang.String.compareTo(java.lang.String)") {
-        return GenInlinedStringCompareTo(info);
-      }
-      if (tgt_method == "boolean java.lang.String.is_empty()") {
-        return GenInlinedStringIsEmptyOrLength(info, true /* is_empty */);
-      }
-      if (tgt_method == "int java.lang.String.index_of(int, int)") {
-        return GenInlinedIndexOf(info, false /* base 0 */);
-      }
-      if (tgt_method == "int java.lang.String.index_of(int)") {
-        return GenInlinedIndexOf(info, true /* base 0 */);
-      }
-      if (tgt_method == "int java.lang.String.length()") {
-        return GenInlinedStringIsEmptyOrLength(info, false /* is_empty */);
-      }
-    } else if (tgt_methods_declaring_class.starts_with("Thread;")) {
-      std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-      if (tgt_method == "java.lang.Thread java.lang.Thread.currentThread()") {
-        return GenInlinedCurrentThread(info);
-      }
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Llibcore/io/Memory;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "byte libcore.io.Memory.peekByte(long)") {
-      return GenInlinedPeek(info, kSignedByte);
-    }
-    if (tgt_method == "int libcore.io.Memory.peekIntNative(long)") {
-      return GenInlinedPeek(info, kWord);
-    }
-    if (tgt_method == "long libcore.io.Memory.peekLongNative(long)") {
-      return GenInlinedPeek(info, kLong);
-    }
-    if (tgt_method == "short libcore.io.Memory.peekShortNative(long)") {
-      return GenInlinedPeek(info, kSignedHalf);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeByte(long, byte)") {
-      return GenInlinedPoke(info, kSignedByte);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeIntNative(long, int)") {
-      return GenInlinedPoke(info, kWord);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeLongNative(long, long)") {
-      return GenInlinedPoke(info, kLong);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeShortNative(long, short)") {
-      return GenInlinedPoke(info, kSignedHalf);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Lsun/misc/Unsafe;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") {
-      return GenInlinedCas32(info, false);
-    }
-    if (tgt_method == "boolean sun.misc.Unsafe.compareAndSwapObject(java.lang.Object, long, java.lang.Object, java.lang.Object)") {
-      return GenInlinedCas32(info, true);
-    }
-    if (tgt_method == "int sun.misc.Unsafe.getInt(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, false /* is_volatile */);
-    }
-    if (tgt_method == "int sun.misc.Unsafe.getIntVolatile(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, true /* is_volatile */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putInt(java.lang.Object, long, int)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putIntVolatile(java.lang.Object, long, int)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */,
-                                 true /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putOrderedInt(java.lang.Object, long, int)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, true /* is_ordered */);
-    }
-    if (tgt_method == "long sun.misc.Unsafe.getLong(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, true /* is_long */, false /* is_volatile */);
-    }
-    if (tgt_method == "long sun.misc.Unsafe.getLongVolatile(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, true /* is_long */, true /* is_volatile */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putLong(java.lang.Object, long, long)") {
-      return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putLongVolatile(java.lang.Object, long, long)") {
-      return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */,
-                                 true /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putOrderedLong(java.lang.Object, long, long)") {
-      return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, true /* is_ordered */);
-    }
-    if (tgt_method == "java.lang.Object sun.misc.Unsafe.getObject(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, false /* is_volatile */);
-    }
-    if (tgt_method == "java.lang.Object sun.misc.Unsafe.getObjectVolatile(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, true /* is_volatile */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putObject(java.lang.Object, long, java.lang.Object)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */,
-                                 false /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putObjectVolatile(java.lang.Object, long, java.lang.Object)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */,
-                                 true /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putOrderedObject(java.lang.Object, long, java.lang.Object)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */,
-                                 false /* is_volatile */, true /* is_ordered */);
-    }
-  }
-  return false;
-}
-
 void Mir2Lir::GenInvoke(CallInfo* info) {
-  if (GenIntrinsic(info)) {
-    return;
+  if (!(info->opt_flags & MIR_INLINED)) {
+    if (inliner_ == nullptr) {
+      QuickCompilerContext* context = reinterpret_cast<QuickCompilerContext*>(
+          cu_->compiler_driver->GetCompilerContext());
+      inliner_ = &context->GetInlinerMap()->GetMethodInliner(cu_->dex_file);
+    }
+    if (inliner_->GenIntrinsic(this, info)) {
+      return;
+    }
   }
   InvokeType original_type = info->type;  // avoiding mutation by ComputeInvokeInfo
   int call_state = 0;
diff --git a/compiler/dex/quick/mips/mips_dex_file_method_inliner.cc b/compiler/dex/quick/mips/mips_dex_file_method_inliner.cc
new file mode 100644
index 0000000..e5345ec
--- /dev/null
+++ b/compiler/dex/quick/mips/mips_dex_file_method_inliner.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "dex/compiler_enums.h"
+
+#include "mips_dex_file_method_inliner.h"
+
+namespace art {
+
+const DexFileMethodInliner::IntrinsicDef MipsDexFileMethodInliner::kIntrinsicMethods[] = {
+#define INTRINSIC(c, n, p, o, d) \
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, d } }
+
+    // INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
+    // INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
+    // INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
+    // INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
+
+    // INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, kWord),
+    // INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, kLong),
+    // INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
+
+    // INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
+    // INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
+    // INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
+    // INTRINSIC(JavaLangStrictMath, Abs, J_J, kIntrinsicAbsLong, 0),
+    // INTRINSIC(JavaLangMath,       Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    // INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    // INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    // INTRINSIC(JavaLangStrictMath, Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    // INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
+    // INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
+
+    // INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
+    // INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
+    // INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
+    // INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone),
+    // INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
+    // INTRINSIC(JavaLangString, Length, _I, kIntrinsicIsEmptyOrLength, kIntrinsicFlagLength),
+
+    INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
+
+    INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
+    // INTRINSIC(LibcoreIoMemory, PeekIntNative, J_I, kIntrinsicPeek, kWord),
+    // INTRINSIC(LibcoreIoMemory, PeekLongNative, J_J, kIntrinsicPeek, kLong),
+    // INTRINSIC(LibcoreIoMemory, PeekShortNative, J_S, kIntrinsicPeek, kSignedHalf),
+    INTRINSIC(LibcoreIoMemory, PokeByte, JB_V, kIntrinsicPoke, kSignedByte),
+    // INTRINSIC(LibcoreIoMemory, PokeIntNative, JI_V, kIntrinsicPoke, kWord),
+    // INTRINSIC(LibcoreIoMemory, PokeLongNative, JJ_V, kIntrinsicPoke, kLong),
+    // INTRINSIC(LibcoreIoMemory, PokeShortNative, JS_V, kIntrinsicPoke, kSignedHalf),
+
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas32,
+    //           kIntrinsicFlagDontNeedWriteBarrier),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapObject, ObjectJObjectObject_Z, kIntrinsicCas32,
+    //           kIntrinsicFlagNeedWriteBarrier),
+
+#define UNSAFE_GET_PUT(type, code, type_flags) \
+    INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              type_flags & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              (type_flags | kIntrinsicFlagIsVolatile) & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags), \
+    INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsVolatile), \
+    INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsOrdered)
+
+    // UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
+    // UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
+    // UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject),
+#undef UNSAFE_GET_PUT
+
+#undef INTRINSIC
+};
+
+MipsDexFileMethodInliner::MipsDexFileMethodInliner() {
+}
+
+MipsDexFileMethodInliner::~MipsDexFileMethodInliner() {
+}
+
+void MipsDexFileMethodInliner::FindIntrinsics(const DexFile* dex_file) {
+  IndexCache cache;
+  DoFindIntrinsics(dex_file, &cache, kIntrinsicMethods, arraysize(kIntrinsicMethods));
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/mips/mips_dex_file_method_inliner.h b/compiler/dex/quick/mips/mips_dex_file_method_inliner.h
new file mode 100644
index 0000000..8fe7ec7
--- /dev/null
+++ b/compiler/dex/quick/mips/mips_dex_file_method_inliner.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_MIPS_MIPS_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_MIPS_MIPS_DEX_FILE_METHOD_INLINER_H_
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class MipsDexFileMethodInliner : public DexFileMethodInliner {
+  public:
+    MipsDexFileMethodInliner();
+    ~MipsDexFileMethodInliner();
+
+    void FindIntrinsics(const DexFile* dex_file);
+
+  private:
+    static const IntrinsicDef kIntrinsicMethods[];
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_MIPS_MIPS_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 1a30b7a..f567b5c 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -198,6 +198,10 @@
     SetupRegMask(&lir->u.m.use_mask, lir->operands[3]);
   }
 
+  if (flags & REG_USE4) {
+    SetupRegMask(&lir->u.m.use_mask, lir->operands[4]);
+  }
+
   if (flags & SETS_CCODES) {
     lir->u.m.def_mask |= ENCODE_CCODE;
   }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4c56b74..58a77c7 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -106,6 +106,7 @@
 struct LIR;
 struct RegLocation;
 struct RegisterInfo;
+class DexFileMethodInliner;
 class MIRGraph;
 class Mir2Lir;
 
@@ -555,7 +556,6 @@
     bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
     bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
                              bool is_volatile, bool is_ordered);
-    bool GenIntrinsic(CallInfo* info);
     int LoadArgRegs(CallInfo* info, int call_state,
                     NextCallInsn next_call_insn,
                     const MethodReference& target_method,
@@ -837,6 +837,8 @@
     unsigned int fp_spill_mask_;
     LIR* first_lir_insn_;
     LIR* last_lir_insn_;
+    // Lazily retrieved method inliner for intrinsics.
+    const DexFileMethodInliner* inliner_;
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_dex_file_method_inliner.cc b/compiler/dex/quick/x86/x86_dex_file_method_inliner.cc
new file mode 100644
index 0000000..87f881a
--- /dev/null
+++ b/compiler/dex/quick/x86/x86_dex_file_method_inliner.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "dex/compiler_enums.h"
+
+#include "x86_dex_file_method_inliner.h"
+
+namespace art {
+
+const DexFileMethodInliner::IntrinsicDef X86DexFileMethodInliner::kIntrinsicMethods[] = {
+#define INTRINSIC(c, n, p, o, d) \
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, d } }
+
+    INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
+    INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
+
+    INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, kWord),
+    INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, kLong),
+    INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
+
+    INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangMath,       Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangStrictMath, Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    // INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
+    // INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
+
+    INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
+    INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
+    INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
+    INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone),
+    INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
+    INTRINSIC(JavaLangString, Length, _I, kIntrinsicIsEmptyOrLength, kIntrinsicFlagLength),
+
+    INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
+
+    INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PeekIntNative, J_I, kIntrinsicPeek, kWord),
+    INTRINSIC(LibcoreIoMemory, PeekLongNative, J_J, kIntrinsicPeek, kLong),
+    INTRINSIC(LibcoreIoMemory, PeekShortNative, J_S, kIntrinsicPeek, kSignedHalf),
+    INTRINSIC(LibcoreIoMemory, PokeByte, JB_V, kIntrinsicPoke, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PokeIntNative, JI_V, kIntrinsicPoke, kWord),
+    INTRINSIC(LibcoreIoMemory, PokeLongNative, JJ_V, kIntrinsicPoke, kLong),
+    INTRINSIC(LibcoreIoMemory, PokeShortNative, JS_V, kIntrinsicPoke, kSignedHalf),
+
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas32,
+    //           kIntrinsicFlagDontNeedWriteBarrier),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapObject, ObjectJObjectObject_Z, kIntrinsicCas32,
+    //           kIntrinsicFlagNeedWriteBarrier),
+
+#define UNSAFE_GET_PUT(type, code, type_flags) \
+    INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              type_flags & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              (type_flags | kIntrinsicFlagIsVolatile) & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags), \
+    INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsVolatile), \
+    INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsOrdered)
+
+    UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
+    UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
+
+    // UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject),
+    // PutObject: "TODO: fix X86, it exhausts registers for card marking."
+    INTRINSIC(SunMiscUnsafe, GetObject, ObjectJ_Object, kIntrinsicUnsafeGet,
+              kIntrinsicFlagNone),
+    INTRINSIC(SunMiscUnsafe, GetObjectVolatile, ObjectJ_Object, kIntrinsicUnsafeGet,
+              kIntrinsicFlagIsVolatile),
+#undef UNSAFE_GET_PUT
+
+#undef INTRINSIC
+};
+
+X86DexFileMethodInliner::X86DexFileMethodInliner() {
+}
+
+X86DexFileMethodInliner::~X86DexFileMethodInliner() {
+}
+
+void X86DexFileMethodInliner::FindIntrinsics(const DexFile* dex_file) {
+  IndexCache cache;
+  DoFindIntrinsics(dex_file, &cache, kIntrinsicMethods, arraysize(kIntrinsicMethods));
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/x86/x86_dex_file_method_inliner.h b/compiler/dex/quick/x86/x86_dex_file_method_inliner.h
new file mode 100644
index 0000000..7813e44
--- /dev/null
+++ b/compiler/dex/quick/x86/x86_dex_file_method_inliner.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_X86_X86_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_X86_X86_DEX_FILE_METHOD_INLINER_H_
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class X86DexFileMethodInliner : public DexFileMethodInliner {
+  public:
+    X86DexFileMethodInliner();
+    ~X86DexFileMethodInliner();
+
+    void FindIntrinsics(const DexFile* dex_file);
+
+  private:
+    static const IntrinsicDef kIntrinsicMethods[];
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_X86_X86_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d74383e..b9df1d6 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -503,7 +503,7 @@
 
 void CompilerDriver::CompileAll(jobject class_loader,
                                 const std::vector<const DexFile*>& dex_files,
-                                base::TimingLogger& timings) {
+                                TimingLogger& timings) {
   DCHECK(!Runtime::Current()->IsStarted());
   UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
   PreCompile(class_loader, dex_files, *thread_pool.get(), timings);
@@ -546,7 +546,7 @@
   }
 }
 
-void CompilerDriver::CompileOne(const mirror::ArtMethod* method, base::TimingLogger& timings) {
+void CompilerDriver::CompileOne(const mirror::ArtMethod* method, TimingLogger& timings) {
   DCHECK(!Runtime::Current()->IsStarted());
   Thread* self = Thread::Current();
   jobject jclass_loader;
@@ -591,7 +591,7 @@
 }
 
 void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                             ThreadPool& thread_pool, base::TimingLogger& timings) {
+                             ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -600,7 +600,7 @@
 }
 
 void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                                ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                ThreadPool& thread_pool, TimingLogger& timings) {
   LoadImageClasses(timings);
 
   Resolve(class_loader, dex_files, thread_pool, timings);
@@ -685,7 +685,7 @@
 }
 
 // Make a list of descriptors for classes to include in the image
-void CompilerDriver::LoadImageClasses(base::TimingLogger& timings)
+void CompilerDriver::LoadImageClasses(TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_) {
   if (!IsImage()) {
     return;
@@ -773,7 +773,7 @@
   MaybeAddToImageClasses(object->GetClass(), compiler_driver->image_classes_.get());
 }
 
-void CompilerDriver::UpdateImageClasses(base::TimingLogger& timings) {
+void CompilerDriver::UpdateImageClasses(TimingLogger& timings) {
   if (IsImage()) {
     timings.NewSplit("UpdateImageClasses");
 
@@ -1613,7 +1613,7 @@
 }
 
 void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file,
-                                    ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                    ThreadPool& thread_pool, TimingLogger& timings) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: we could resolve strings here, although the string table is largely filled with class
@@ -1632,7 +1632,7 @@
 }
 
 void CompilerDriver::Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                            ThreadPool& thread_pool, base::TimingLogger& timings) {
+                            ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -1686,7 +1686,7 @@
 }
 
 void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file,
-                                   ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                   ThreadPool& thread_pool, TimingLogger& timings) {
   timings.NewSplit("Verify Dex File");
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool);
@@ -2192,7 +2192,7 @@
 }
 
 void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file,
-                                       ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                       ThreadPool& thread_pool, TimingLogger& timings) {
   timings.NewSplit("InitializeNoClinit");
 #ifndef NDEBUG
   // Sanity check blacklist descriptors.
@@ -2210,7 +2210,7 @@
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
                                        const std::vector<const DexFile*>& dex_files,
-                                       ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                       ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -2219,7 +2219,7 @@
 }
 
 void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                       ThreadPool& thread_pool, base::TimingLogger& timings) {
+                       ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -2300,7 +2300,7 @@
 }
 
 void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file,
-                                    ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                    ThreadPool& thread_pool, TimingLogger& timings) {
   timings.NewSplit("Compile Dex File");
   ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
                                      &dex_file, thread_pool);
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 9bfea6f..7e81849 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -98,11 +98,11 @@
   ~CompilerDriver();
 
   void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                  base::TimingLogger& timings)
+                  TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Compile a single Method
-  void CompileOne(const mirror::ArtMethod* method, base::TimingLogger& timings)
+  void CompileOne(const mirror::ArtMethod* method, TimingLogger& timings)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const InstructionSet& GetInstructionSet() const {
@@ -340,43 +340,43 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                  ThreadPool& thread_pool, base::TimingLogger& timings)
+                  ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  void LoadImageClasses(base::TimingLogger& timings);
+  void LoadImageClasses(TimingLogger& timings);
 
   // Attempt to resolve all type, methods, fields, and strings
   // referenced from code in the dex file following PathClassLoader
   // ordering semantics.
   void Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool& thread_pool, base::TimingLogger& timings)
+               ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void ResolveDexFile(jobject class_loader, const DexFile& dex_file,
-                      ThreadPool& thread_pool, base::TimingLogger& timings)
+                      ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-              ThreadPool& thread_pool, base::TimingLogger& timings);
+              ThreadPool& thread_pool, TimingLogger& timings);
   void VerifyDexFile(jobject class_loader, const DexFile& dex_file,
-                     ThreadPool& thread_pool, base::TimingLogger& timings)
+                     ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   void InitializeClasses(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                         ThreadPool& thread_pool, base::TimingLogger& timings)
+                         ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void InitializeClasses(jobject class_loader, const DexFile& dex_file,
-                         ThreadPool& thread_pool, base::TimingLogger& timings)
+                         ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_, compiled_classes_lock_);
 
-  void UpdateImageClasses(base::TimingLogger& timings)
+  void UpdateImageClasses(TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   static void FindClinitImageClassesCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool& thread_pool, base::TimingLogger& timings);
+               ThreadPool& thread_pool, TimingLogger& timings);
   void CompileDexFile(jobject class_loader, const DexFile& dex_file,
-                      ThreadPool& thread_pool, base::TimingLogger& timings)
+                      ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void CompileMethod(const DexFile::CodeItem* code_item, uint32_t access_flags,
                      InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx,
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index bfc93b3..a5eb94f 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -36,12 +36,13 @@
 class CompilerDriverTest : public CommonTest {
  protected:
   void CompileAll(jobject class_loader) LOCKS_EXCLUDED(Locks::mutator_lock_) {
-    base::TimingLogger timings("CompilerDriverTest::CompileAll", false, false);
+    TimingLogger timings("CompilerDriverTest::CompileAll", false, false);
     timings.StartSplit("CompileAll");
     compiler_driver_->CompileAll(class_loader,
                                  Runtime::Current()->GetCompileTimeClassPath(class_loader),
                                  timings);
     MakeAllExecutable(class_loader);
+    timings.EndSplit();
   }
 
   void EnsureCompiled(jobject class_loader, const char* class_name, const char* method,
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 9d9c064..c71cc97 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -46,7 +46,7 @@
     {
       jobject class_loader = NULL;
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      base::TimingLogger timings("ImageTest::WriteRead", false, false);
+      TimingLogger timings("ImageTest::WriteRead", false, false);
       timings.StartSplit("CompileAll");
 #if defined(ART_USE_PORTABLE_COMPILER)
       // TODO: we disable this for portable so the test executes in a reasonable amount of time.
@@ -60,13 +60,14 @@
 
       ScopedObjectAccess soa(Thread::Current());
       OatWriter oat_writer(class_linker->GetBootClassPath(),
-                           0, 0, "", compiler_driver_.get());
+                           0, 0, "", compiler_driver_.get(), &timings);
       bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
                                                 !kIsTargetBuild,
                                                 class_linker->GetBootClassPath(),
                                                 oat_writer,
                                                 tmp_elf.GetFile());
       ASSERT_TRUE(success);
+      timings.EndSplit();
     }
   }
   // Workound bug that mcld::Linker::emit closes tmp_elf by reopening as tmp_oat.
@@ -98,7 +99,7 @@
     gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
     ASSERT_FALSE(space->IsImageSpace());
     ASSERT_TRUE(space != NULL);
-    ASSERT_TRUE(space->IsDlMallocSpace());
+    ASSERT_TRUE(space->IsMallocSpace());
     ASSERT_GE(sizeof(image_header) + space->Size(), static_cast<size_t>(file->GetLength()));
   }
 
@@ -140,7 +141,7 @@
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
   ASSERT_TRUE(heap->HasImageSpace());
-  ASSERT_TRUE(heap->GetNonMovingSpace()->IsDlMallocSpace());
+  ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
 
   gc::space::ImageSpace* image_space = heap->GetImageSpace();
   image_space->VerifyImageAllocations();
diff --git a/compiler/leb128_encoder_test.cc b/compiler/leb128_encoder_test.cc
new file mode 100644
index 0000000..4fa8075
--- /dev/null
+++ b/compiler/leb128_encoder_test.cc
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/histogram-inl.h"
+#include "common_test.h"
+#include "leb128.h"
+#include "leb128_encoder.h"
+
+namespace art {
+
+class Leb128Test : public CommonTest {};
+
+struct DecodeUnsignedLeb128TestCase {
+  uint32_t decoded;
+  uint8_t leb128_data[5];
+};
+
+static DecodeUnsignedLeb128TestCase uleb128_tests[] = {
+    {0,          {0, 0, 0, 0, 0}},
+    {1,          {1, 0, 0, 0, 0}},
+    {0x7F,       {0x7F, 0, 0, 0, 0}},
+    {0x80,       {0x80, 1, 0, 0, 0}},
+    {0x81,       {0x81, 1, 0, 0, 0}},
+    {0xFF,       {0xFF, 1, 0, 0, 0}},
+    {0x4000,     {0x80, 0x80, 1, 0, 0}},
+    {0x4001,     {0x81, 0x80, 1, 0, 0}},
+    {0x4081,     {0x81, 0x81, 1, 0, 0}},
+    {0x0FFFFFFF, {0xFF, 0xFF, 0xFF, 0x7F, 0}},
+    {0xFFFFFFFF, {0xFF, 0xFF, 0xFF, 0xFF, 0xF}},
+};
+
+TEST_F(Leb128Test, Singles) {
+  // Test individual encodings.
+  for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
+    UnsignedLeb128EncodingVector builder;
+    builder.PushBack(uleb128_tests[i].decoded);
+    const uint8_t* data_ptr = &uleb128_tests[i].leb128_data[0];
+    const uint8_t* encoded_data_ptr = &builder.GetData()[0];
+    for (size_t j = 0; j < 5; ++j) {
+      if (j < builder.GetData().size()) {
+        EXPECT_EQ(data_ptr[j], encoded_data_ptr[j]) << " i = " << i << " j = " << j;
+      } else {
+        EXPECT_EQ(data_ptr[j], 0U) << " i = " << i << " j = " << j;
+      }
+    }
+    EXPECT_EQ(DecodeUnsignedLeb128(&data_ptr), uleb128_tests[i].decoded) << " i = " << i;
+  }
+}
+
+TEST_F(Leb128Test, Stream) {
+  // Encode a number of entries.
+  UnsignedLeb128EncodingVector builder;
+  for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
+    builder.PushBack(uleb128_tests[i].decoded);
+  }
+  const uint8_t* encoded_data_ptr = &builder.GetData()[0];
+  for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
+    const uint8_t* data_ptr = &uleb128_tests[i].leb128_data[0];
+    for (size_t j = 0; j < 5; ++j) {
+      if (data_ptr[j] != 0) {
+        EXPECT_EQ(data_ptr[j], encoded_data_ptr[j]) << " i = " << i << " j = " << j;
+      }
+    }
+    EXPECT_EQ(DecodeUnsignedLeb128(&encoded_data_ptr), uleb128_tests[i].decoded) << " i = " << i;
+  }
+}
+
+TEST_F(Leb128Test, Speed) {
+  UniquePtr<Histogram<uint64_t> > enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
+  UniquePtr<Histogram<uint64_t> > dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
+  UnsignedLeb128EncodingVector builder;
+  // Push back 1024 chunks of 1024 values measuring encoding speed.
+  uint64_t last_time = NanoTime();
+  for (size_t i = 0; i < 1024; i++) {
+    for (size_t j = 0; j < 1024; j++) {
+      builder.PushBack((i * 1024) + j);
+    }
+    uint64_t cur_time = NanoTime();
+    enc_hist->AddValue(cur_time - last_time);
+    last_time = cur_time;
+  }
+  // Verify encoding and measure decode speed.
+  const uint8_t* encoded_data_ptr = &builder.GetData()[0];
+  last_time = NanoTime();
+  for (size_t i = 0; i < 1024; i++) {
+    for (size_t j = 0; j < 1024; j++) {
+      EXPECT_EQ(DecodeUnsignedLeb128(&encoded_data_ptr), (i * 1024) + j);
+    }
+    uint64_t cur_time = NanoTime();
+    dec_hist->AddValue(cur_time - last_time);
+    last_time = cur_time;
+  }
+
+  Histogram<uint64_t>::CumulativeData enc_data;
+  enc_hist->CreateHistogram(&enc_data);
+  enc_hist->PrintConfidenceIntervals(std::cout, 0.99, enc_data);
+
+  Histogram<uint64_t>::CumulativeData dec_data;
+  dec_hist->CreateHistogram(&dec_data);
+  dec_hist->PrintConfidenceIntervals(std::cout, 0.99, dec_data);
+}
+
+}  // namespace art
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index c423f34..fd0a69d 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -67,6 +67,7 @@
 };
 
 TEST_F(OatTest, WriteRead) {
+  TimingLogger timings("CommonTest::WriteRead", false, false);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: make selectable
@@ -82,7 +83,7 @@
                                             insn_features, false, NULL, 2, true));
   jobject class_loader = NULL;
   if (kCompile) {
-    base::TimingLogger timings("OatTest::WriteRead", false, false);
+    TimingLogger timings("OatTest::WriteRead", false, false);
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), timings);
   }
 
@@ -92,7 +93,8 @@
                        42U,
                        4096U,
                        "lue.art",
-                       compiler_driver_.get());
+                       compiler_driver_.get(),
+                       &timings);
   bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
                                             !kIsTargetBuild,
                                             class_linker->GetBootClassPath(),
@@ -101,7 +103,6 @@
   ASSERT_TRUE(success);
 
   if (kCompile) {  // OatWriter strips the code, regenerate to compare
-    base::TimingLogger timings("CommonTest::WriteRead", false, false);
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), timings);
   }
   std::string error_msg;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 28fb147..8382469 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -40,7 +40,8 @@
                      uint32_t image_file_location_oat_checksum,
                      uint32_t image_file_location_oat_begin,
                      const std::string& image_file_location,
-                     const CompilerDriver* compiler)
+                     const CompilerDriver* compiler,
+                     TimingLogger* timings)
   : compiler_driver_(compiler),
     dex_files_(&dex_files),
     image_file_location_oat_checksum_(image_file_location_oat_checksum),
@@ -77,12 +78,31 @@
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0) {
-  size_t offset = InitOatHeader();
-  offset = InitOatDexFiles(offset);
-  offset = InitDexFiles(offset);
-  offset = InitOatClasses(offset);
-  offset = InitOatCode(offset);
-  offset = InitOatCodeDexFiles(offset);
+  size_t offset;
+  {
+    TimingLogger::ScopedSplit split("InitOatHeader", timings);
+    offset = InitOatHeader();
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatDexFiles", timings);
+    offset = InitOatDexFiles(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitDexFiles", timings);
+    offset = InitDexFiles(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatClasses", timings);
+    offset = InitOatClasses(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatCode", timings);
+    offset = InitOatCode(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatCodeDexFiles", timings);
+    offset = InitOatCodeDexFiles(offset);
+  }
   size_ = offset;
 
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5d947cf..64275e6 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -67,7 +67,8 @@
             uint32_t image_file_location_oat_checksum,
             uint32_t image_file_location_oat_begin,
             const std::string& image_file_location,
-            const CompilerDriver* compiler);
+            const CompilerDriver* compiler,
+            TimingLogger* timings);
 
   const OatHeader& GetOatHeader() const {
     return *oat_header_;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 3781921..8b232700 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -242,7 +242,7 @@
                                       bool image,
                                       UniquePtr<CompilerDriver::DescriptorSet>& image_classes,
                                       bool dump_stats,
-                                      base::TimingLogger& timings) {
+                                      TimingLogger& timings) {
     // SirtRef and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = NULL;
     Thread* self = Thread::Current();
@@ -280,6 +280,7 @@
     uint32_t image_file_location_oat_checksum = 0;
     uint32_t image_file_location_oat_data_begin = 0;
     if (!driver->IsImage()) {
+      TimingLogger::ScopedSplit split("Loading image checksum", &timings);
       gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
       image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum();
       image_file_location_oat_data_begin =
@@ -294,8 +295,10 @@
                          image_file_location_oat_checksum,
                          image_file_location_oat_data_begin,
                          image_file_location,
-                         driver.get());
+                         driver.get(),
+                         &timings);
 
+    TimingLogger::ScopedSplit split("Writing ELF", &timings);
     if (!driver->WriteElf(android_root, is_host, dex_files, oat_writer, oat_file)) {
       LOG(ERROR) << "Failed to write ELF file " << oat_file->GetPath();
       return NULL;
@@ -600,7 +603,7 @@
 }
 
 static int dex2oat(int argc, char** argv) {
-  base::TimingLogger timings("compiler", false, false);
+  TimingLogger timings("compiler", false, false);
 
   InitLogging(argv);
 
@@ -1091,7 +1094,7 @@
 
   if (is_host) {
     if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
-      LOG(INFO) << Dumpable<base::TimingLogger>(timings);
+      LOG(INFO) << Dumpable<TimingLogger>(timings);
     }
     return EXIT_SUCCESS;
   }
@@ -1133,7 +1136,7 @@
   timings.EndSplit();
 
   if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
-    LOG(INFO) << Dumpable<base::TimingLogger>(timings);
+    LOG(INFO) << Dumpable<TimingLogger>(timings);
   }
 
   // Everything was successfully written, do an explicit exit here to avoid running Runtime
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 65f2383..936fb07 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -427,9 +427,9 @@
           args << Rt << "," << Rd << ", [" << Rn;
           const char *sign = U ? "+" : "-";
           if (P == 0 && W == 1) {
-            args << "], #" << sign << imm8;
+            args << "], #" << sign << (imm8 << 2);
           } else {
-            args << ", #" << sign << imm8 << "]";
+            args << ", #" << sign << (imm8 << 2) << "]";
             if (W == 1) {
               args << "!";
             }
@@ -854,7 +854,7 @@
           } else if (op3 == 0x4) {
             opcode << "teq";
           } else if (op3 == 0x8) {
-            opcode << "cmw";
+            opcode << "cmn.w";
           } else {
             opcode << "cmp.w";
           }
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 97cbdd9..2aa59d5 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -42,6 +42,7 @@
 	dex_instruction.cc \
 	elf_file.cc \
 	gc/allocator/dlmalloc.cc \
+	gc/allocator/rosalloc.cc \
 	gc/accounting/card_table.cc \
 	gc/accounting/gc_allocator.cc \
 	gc/accounting/heap_bitmap.cc \
@@ -53,10 +54,13 @@
 	gc/collector/semi_space.cc \
 	gc/collector/sticky_mark_sweep.cc \
 	gc/heap.cc \
+	gc/reference_queue.cc \
 	gc/space/bump_pointer_space.cc \
 	gc/space/dlmalloc_space.cc \
 	gc/space/image_space.cc \
 	gc/space/large_object_space.cc \
+	gc/space/malloc_space.cc \
+	gc/space/rosalloc_space.cc \
 	gc/space/space.cc \
 	hprof/hprof.cc \
 	image.cc \
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index 0345266..9e08ae6 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -170,20 +170,20 @@
   os << FormatDuration(Max() * kAdjust, unit) << "\n";
 }
 
-template <class Value> inline void Histogram<Value>::CreateHistogram(CumulativeData& out_data) {
+template <class Value> inline void Histogram<Value>::CreateHistogram(CumulativeData* out_data) {
   DCHECK_GT(sample_size_, 0ull);
-  out_data.freq_.clear();
-  out_data.perc_.clear();
+  out_data->freq_.clear();
+  out_data->perc_.clear();
   uint64_t accumulated = 0;
-  out_data.freq_.push_back(accumulated);
-  out_data.perc_.push_back(0.0);
+  out_data->freq_.push_back(accumulated);
+  out_data->perc_.push_back(0.0);
   for (size_t idx = 0; idx < frequency_.size(); idx++) {
     accumulated += frequency_[idx];
-    out_data.freq_.push_back(accumulated);
-    out_data.perc_.push_back(static_cast<double>(accumulated) / static_cast<double>(sample_size_));
+    out_data->freq_.push_back(accumulated);
+    out_data->perc_.push_back(static_cast<double>(accumulated) / static_cast<double>(sample_size_));
   }
-  DCHECK_EQ(out_data.freq_.back(), sample_size_);
-  DCHECK_LE(std::abs(out_data.perc_.back() - 1.0), 0.001);
+  DCHECK_EQ(out_data->freq_.back(), sample_size_);
+  DCHECK_LE(std::abs(out_data->perc_.back() - 1.0), 0.001);
 }
 
 template <class Value>
diff --git a/runtime/base/histogram.h b/runtime/base/histogram.h
index 2a02cf4..e22b6e1 100644
--- a/runtime/base/histogram.h
+++ b/runtime/base/histogram.h
@@ -47,7 +47,7 @@
   // cumulative_freq[i] = sum(frequency[j] : 0 < j < i )
   // Accumulative summation of percentiles; which is the frequency / SampleSize
   // cumulative_perc[i] = sum(frequency[j] / SampleSize : 0 < j < i )
-  void CreateHistogram(CumulativeData& data);
+  void CreateHistogram(CumulativeData* data);
   // Reset the cumulative values, next time CreateHistogram is called it will recreate the cache.
   void Reset();
   double Mean() const;
diff --git a/runtime/base/histogram_test.cc b/runtime/base/histogram_test.cc
index 534440c..9d371f5 100644
--- a/runtime/base/histogram_test.cc
+++ b/runtime/base/histogram_test.cc
@@ -85,7 +85,7 @@
   hist->AddValue(145);
   hist->AddValue(155);
 
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
   EXPECT_EQ(875, static_cast<int>(PerValue * 10));
 }
@@ -117,7 +117,7 @@
   hist->AddValue(200);
   hist->AddValue(205);
   hist->AddValue(212);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
 
   std::string text;
@@ -132,7 +132,6 @@
 
 TEST(Histtest, Reset) {
   UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("Reset", 5));
-  Histogram<uint64_t>::CumulativeData data;
 
   double PerValue;
   hist->AddValue(0);
@@ -160,7 +159,8 @@
   hist->AddValue(200);
   hist->AddValue(205);
   hist->AddValue(212);
-  hist->CreateHistogram(data);
+  Histogram<uint64_t>::CumulativeData data;
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
 
   std::string text;
@@ -185,7 +185,7 @@
   hist->AddValue(68);
   hist->AddValue(75);
   hist->AddValue(93);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   hist->AddValue(110);
   hist->AddValue(121);
   hist->AddValue(132);
@@ -194,14 +194,14 @@
   hist->AddValue(155);
   hist->AddValue(163);
   hist->AddValue(168);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   hist->AddValue(175);
   hist->AddValue(182);
   hist->AddValue(193);
   hist->AddValue(200);
   hist->AddValue(205);
   hist->AddValue(212);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
   std::stringstream stream;
   std::string expected("MultipleCreateHist:\t99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
@@ -217,7 +217,7 @@
   Histogram<uint64_t>::CumulativeData data;
 
   hist->AddValue(1);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   std::stringstream stream;
   std::string expected = "SingleValue:\t99% C.I. 1us-1us Avg: 1us Max: 1us\n";
   hist->PrintConfidenceIntervals(stream, 0.99, data);
@@ -234,7 +234,7 @@
   for (uint64_t idx = 0ull; idx < 150ull; idx++) {
     hist->AddValue(0);
   }
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   per_995 = hist->Percentile(0.995, data);
   EXPECT_EQ(per_995, 0);
   hist->Reset();
@@ -243,7 +243,7 @@
       hist->AddValue(val);
     }
   }
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   per_005 = hist->Percentile(0.005, data);
   per_995 = hist->Percentile(0.995, data);
   EXPECT_EQ(1, per_005);
@@ -260,7 +260,7 @@
     }
   }
   hist->AddValue(10000);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   std::stringstream stream;
   std::string expected = "SpikyValues:\t99% C.I. 0.089us-2541.825us Avg: 95.033us Max: 10000us\n";
   hist->PrintConfidenceIntervals(stream, 0.99, data);
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index 45a546f..bebbd70 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -74,12 +74,11 @@
   return total;
 }
 
-void CumulativeLogger::AddLogger(const base::TimingLogger &logger) {
+void CumulativeLogger::AddLogger(const TimingLogger &logger) {
   MutexLock mu(Thread::Current(), lock_);
-  const base::TimingLogger::SplitTimings& splits = logger.GetSplits();
-  for (base::TimingLogger::SplitTimingsIterator it = splits.begin(), end = splits.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  const TimingLogger::SplitTimings& splits = logger.GetSplits();
+  for (auto it = splits.begin(), end = splits.end(); it != end; ++it) {
+    TimingLogger::SplitTiming split = *it;
     uint64_t split_time = split.first;
     const char* split_name = split.second;
     AddPair(split_name, split_time);
@@ -101,7 +100,8 @@
   delta_time /= kAdjust;
 
   if (histograms_.find(label) == histograms_.end()) {
-    // TODO: Shoud this be a defined constant so we we know out of which orifice 16 and 100 were picked?
+    // TODO: Should this be a defined constant so we we know out of which orifice 16 and 100 were
+    //       picked?
     const size_t max_buckets = Runtime::Current()->GetHeap()->IsLowMemoryMode() ? 16 : 100;
     // TODO: Should this be a defined constant so we know 50 of WTF?
     histograms_[label] = new Histogram<uint64_t>(label.c_str(), 50, max_buckets);
@@ -115,7 +115,7 @@
   for (CumulativeLogger::HistogramsIterator it = histograms_.begin(), end = histograms_.end();
        it != end; ++it) {
     Histogram<uint64_t>::CumulativeData cumulative_data;
-    it->second->CreateHistogram(cumulative_data);
+    it->second->CreateHistogram(&cumulative_data);
     it->second->PrintConfidenceIntervals(os, 0.99, cumulative_data);
     // Reset cumulative values to save memory. We don't expect DumpHistogram to be called often, so
     // it is not performance critical.
@@ -123,9 +123,6 @@
   os << "Done Dumping histograms \n";
 }
 
-
-namespace base {
-
 TimingLogger::TimingLogger(const char* name, bool precise, bool verbose)
     : name_(name), precise_(precise), verbose_(verbose), current_split_(NULL) {
 }
@@ -136,33 +133,35 @@
 }
 
 void TimingLogger::StartSplit(const char* new_split_label) {
-  DCHECK(new_split_label != NULL) << "Starting split (" << new_split_label << ") with null label.";
-  TimingLogger::ScopedSplit* explicit_scoped_split = new TimingLogger::ScopedSplit(new_split_label, this);
+  DCHECK(new_split_label != nullptr) << "Starting split with null label.";
+  TimingLogger::ScopedSplit* explicit_scoped_split =
+      new TimingLogger::ScopedSplit(new_split_label, this);
   explicit_scoped_split->explicit_ = true;
 }
 
 void TimingLogger::EndSplit() {
-  CHECK(current_split_ != NULL) << "Ending a non-existent split.";
-  DCHECK(current_split_->label_ != NULL);
-  DCHECK(current_split_->explicit_ == true) << "Explicitly ending scoped split: " << current_split_->label_;
-
+  CHECK(current_split_ != nullptr) << "Ending a non-existent split.";
+  DCHECK(current_split_->label_ != nullptr);
+  DCHECK(current_split_->explicit_ == true)
+      << "Explicitly ending scoped split: " << current_split_->label_;
   delete current_split_;
+  // TODO: current_split_ = nullptr;
 }
 
 // Ends the current split and starts the one given by the label.
 void TimingLogger::NewSplit(const char* new_split_label) {
-  CHECK(current_split_ != NULL) << "Inserting a new split (" << new_split_label
-                                << ") into a non-existent split.";
-  DCHECK(new_split_label != NULL) << "New split (" << new_split_label << ") with null label.";
-
-  current_split_->TailInsertSplit(new_split_label);
+  if (current_split_ == nullptr) {
+    StartSplit(new_split_label);
+  } else {
+    DCHECK(new_split_label != nullptr) << "New split (" << new_split_label << ") with null label.";
+    current_split_->TailInsertSplit(new_split_label);
+  }
 }
 
 uint64_t TimingLogger::GetTotalNs() const {
   uint64_t total_ns = 0;
-  for (base::TimingLogger::SplitTimingsIterator it = splits_.begin(), end = splits_.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
+    TimingLogger::SplitTiming split = *it;
     total_ns += split.first;
   }
   return total_ns;
@@ -171,9 +170,8 @@
 void TimingLogger::Dump(std::ostream &os) const {
   uint64_t longest_split = 0;
   uint64_t total_ns = 0;
-  for (base::TimingLogger::SplitTimingsIterator it = splits_.begin(), end = splits_.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
+    TimingLogger::SplitTiming split = *it;
     uint64_t split_time = split.first;
     longest_split = std::max(longest_split, split_time);
     total_ns += split_time;
@@ -182,9 +180,8 @@
   TimeUnit tu = GetAppropriateTimeUnit(longest_split);
   uint64_t divisor = GetNsToTimeUnitDivisor(tu);
   // Print formatted splits.
-  for (base::TimingLogger::SplitTimingsIterator it = splits_.begin(), end = splits_.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
+    const TimingLogger::SplitTiming& split = *it;
     uint64_t split_time = split.first;
     if (!precise_ && divisor >= 1000) {
       // Make the fractional part 0.
@@ -231,7 +228,7 @@
     LOG(INFO) << "End: " << label_ << " " << PrettyDuration(split_time);
   }
 
-  // If one or more enclosed explcitly started splits are not terminated we can
+  // If one or more enclosed explicitly started splits are not terminated we can
   // either fail or "unwind" the stack of splits in the timing logger to 'this'
   // (by deleting the intervening scoped splits). This implements the latter.
   TimingLogger::ScopedSplit* current = timing_logger_->current_split_;
@@ -293,5 +290,4 @@
   ATRACE_BEGIN(label_);
 }
 
-}  // namespace base
 }  // namespace art
diff --git a/runtime/base/timing_logger.h b/runtime/base/timing_logger.h
index 501d2d7..f1f7855 100644
--- a/runtime/base/timing_logger.h
+++ b/runtime/base/timing_logger.h
@@ -26,10 +26,7 @@
 #include <map>
 
 namespace art {
-
-namespace base {
-  class TimingLogger;
-}  // namespace base
+class TimingLogger;
 
 class CumulativeLogger {
  public:
@@ -44,7 +41,7 @@
   // Allow the name to be modified, particularly when the cumulative logger is a field within a
   // parent class that is unable to determine the "name" of a sub-class.
   void SetName(const std::string& name);
-  void AddLogger(const base::TimingLogger& logger) LOCKS_EXCLUDED(lock_);
+  void AddLogger(const TimingLogger& logger) LOCKS_EXCLUDED(lock_);
   size_t GetIterations() const;
 
  private:
@@ -65,19 +62,17 @@
   DISALLOW_COPY_AND_ASSIGN(CumulativeLogger);
 };
 
-namespace base {
-
-
 // A timing logger that knows when a split starts for the purposes of logging tools, like systrace.
 class TimingLogger {
  public:
   // Splits are nanosecond times and split names.
   typedef std::pair<uint64_t, const char*> SplitTiming;
   typedef std::vector<SplitTiming> SplitTimings;
-  typedef std::vector<SplitTiming>::const_iterator SplitTimingsIterator;
 
   explicit TimingLogger(const char* name, bool precise, bool verbose);
-
+  ~TimingLogger() {
+    // TODO: DCHECK(current_split_ == nullptr) << "Forgot to end split: " << current_split_->label_;
+  }
   // Clears current splits and labels.
   void Reset();
 
@@ -143,7 +138,7 @@
   friend class ScopedSplit;
  protected:
   // The name of the timing logger.
-  const char* name_;
+  const char* const name_;
 
   // Do we want to print the exactly recorded split (true) or round down to the time unit being
   // used (false).
@@ -162,7 +157,6 @@
   DISALLOW_COPY_AND_ASSIGN(TimingLogger);
 };
 
-}  // namespace base
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_TIMING_LOGGER_H_
diff --git a/runtime/base/timing_logger_test.cc b/runtime/base/timing_logger_test.cc
index 8f28e48..03cc9cc 100644
--- a/runtime/base/timing_logger_test.cc
+++ b/runtime/base/timing_logger_test.cc
@@ -26,13 +26,13 @@
 
 TEST_F(TimingLoggerTest, StartEnd) {
   const char* split1name = "First Split";
-  base::TimingLogger timings("StartEnd", true, false);
+  TimingLogger timings("StartEnd", true, false);
 
   timings.StartSplit(split1name);
 
   timings.EndSplit();  // Ends split1.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(1U, splits.size());
   EXPECT_STREQ(splits[0].second, split1name);
@@ -43,7 +43,7 @@
   const char* split1name = "First Split";
   const char* split2name = "Second Split";
   const char* split3name = "Third Split";
-  base::TimingLogger timings("StartNewEnd", true, false);
+  TimingLogger timings("StartNewEnd", true, false);
 
   timings.StartSplit(split1name);
 
@@ -53,7 +53,7 @@
 
   timings.EndSplit();  // Ends split3.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(3U, splits.size());
   EXPECT_STREQ(splits[0].second, split1name);
@@ -67,7 +67,7 @@
   const char* split3name = "Third Split";
   const char* split4name = "Fourth Split";
   const char* split5name = "Fifth Split";
-  base::TimingLogger timings("StartNewEndNested", true, false);
+  TimingLogger timings("StartNewEndNested", true, false);
 
   timings.StartSplit(split1name);
 
@@ -85,7 +85,7 @@
 
   timings.EndSplit();  // Ends split2.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(5U, splits.size());
   EXPECT_STREQ(splits[0].second, split1name);
@@ -101,25 +101,25 @@
   const char* innersplit1 = "Inner Split 1";
   const char* innerinnersplit1 = "Inner Inner Split 1";
   const char* innersplit2 = "Inner Split 2";
-  base::TimingLogger timings("Scoped", true, false);
+  TimingLogger timings("Scoped", true, false);
 
   {
-      base::TimingLogger::ScopedSplit outer(outersplit, &timings);
+      TimingLogger::ScopedSplit outer(outersplit, &timings);
 
       {
-          base::TimingLogger::ScopedSplit inner1(innersplit1, &timings);
+          TimingLogger::ScopedSplit inner1(innersplit1, &timings);
 
           {
-              base::TimingLogger::ScopedSplit innerinner1(innerinnersplit1, &timings);
+              TimingLogger::ScopedSplit innerinner1(innerinnersplit1, &timings);
           }  // Ends innerinnersplit1.
       }  // Ends innersplit1.
 
       {
-          base::TimingLogger::ScopedSplit inner2(innersplit2, &timings);
+          TimingLogger::ScopedSplit inner2(innersplit2, &timings);
       }  // Ends innersplit2.
   }  // Ends outersplit.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(4U, splits.size());
   EXPECT_STREQ(splits[0].second, innerinnersplit1);
@@ -134,12 +134,12 @@
   const char* innersplit = "Inner Split";
   const char* innerinnersplit1 = "Inner Inner Split 1";
   const char* innerinnersplit2 = "Inner Inner Split 2";
-  base::TimingLogger timings("Scoped", true, false);
+  TimingLogger timings("Scoped", true, false);
 
   timings.StartSplit(outersplit);
 
   {
-      base::TimingLogger::ScopedSplit inner(innersplit, &timings);
+      TimingLogger::ScopedSplit inner(innersplit, &timings);
 
       timings.StartSplit(innerinnersplit1);
 
@@ -148,7 +148,7 @@
 
   timings.EndSplit();  // Ends outersplit.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(4U, splits.size());
   EXPECT_STREQ(splits[0].second, innerinnersplit1);
diff --git a/runtime/common_test.h b/runtime/common_test.h
index 7cc29a1..57cf71a 100644
--- a/runtime/common_test.h
+++ b/runtime/common_test.h
@@ -237,7 +237,6 @@
 // input 'str' is a comma separated list of feature names.  Parse it and
 // return the InstructionSetFeatures object.
 static InstructionSetFeatures ParseFeatureList(std::string str) {
-  LOG(INFO) << "Parsing features " << str;
   InstructionSetFeatures result;
   typedef std::vector<std::string> FeatureList;
   FeatureList features;
@@ -582,10 +581,11 @@
 
   void CompileMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(method != NULL);
-    base::TimingLogger timings("CommonTest::CompileMethod", false, false);
+    TimingLogger timings("CommonTest::CompileMethod", false, false);
     timings.StartSplit("CompileOne");
     compiler_driver_->CompileOne(method, timings);
     MakeExecutable(method);
+    timings.EndSplit();
   }
 
   void CompileDirectMethod(SirtRef<mirror::ClassLoader>& class_loader, const char* class_name,
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index d63cb1c..bcee0764 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -107,26 +107,26 @@
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::PostLocationEvent(method, 0, this_object, Dbg::kMethodEntry);
+    Dbg::PostLocationEvent(method, 0, this_object, Dbg::kMethodEntry, nullptr);
   }
 
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
                             const mirror::ArtMethod* method,
                             uint32_t dex_pc, const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    UNUSED(return_value);
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::PostLocationEvent(method, dex_pc, this_object, Dbg::kMethodExit);
+    Dbg::PostLocationEvent(method, dex_pc, this_object, Dbg::kMethodExit, &return_value);
   }
 
-  virtual void MethodUnwind(Thread* thread, const mirror::ArtMethod* method,
-                            uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
+                            const mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
     LOG(ERROR) << "Unexpected method unwind event in debugger " << PrettyMethod(method)
-        << " " << dex_pc;
+               << " " << dex_pc;
   }
 
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
@@ -1179,41 +1179,37 @@
   return accessFlags;
 }
 
-static const uint16_t kEclipseWorkaroundSlot = 1000;
-
 /*
- * Eclipse appears to expect that the "this" reference is in slot zero.
- * If it's not, the "variables" display will show two copies of "this",
- * possibly because it gets "this" from SF.ThisObject and then displays
- * all locals with nonzero slot numbers.
- *
- * So, we remap the item in slot 0 to 1000, and remap "this" to zero.  On
- * SF.GetValues / SF.SetValues we map them back.
- *
- * TODO: jdb uses the value to determine whether a variable is a local or an argument,
- * by checking whether it's less than the number of arguments. To make that work, we'd
- * have to "mangle" all the arguments to come first, not just the implicit argument 'this'.
+ * Circularly shifts registers so that arguments come first. Debuggers
+ * expect slots to begin with arguments, but dex code places them at
+ * the end.
  */
-static uint16_t MangleSlot(uint16_t slot, const char* name) {
-  uint16_t newSlot = slot;
-  if (strcmp(name, "this") == 0) {
-    newSlot = 0;
-  } else if (slot == 0) {
-    newSlot = kEclipseWorkaroundSlot;
+static uint16_t MangleSlot(uint16_t slot, mirror::ArtMethod* m)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+  uint16_t ins_size = code_item->ins_size_;
+  uint16_t locals_size = code_item->registers_size_ - ins_size;
+  if (slot >= locals_size) {
+    return slot - locals_size;
+  } else {
+    return slot + ins_size;
   }
-  return newSlot;
 }
 
+/*
+ * Circularly shifts registers so that arguments come last. Reverts
+ * slots to dex style argument placement.
+ */
 static uint16_t DemangleSlot(uint16_t slot, mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (slot == kEclipseWorkaroundSlot) {
-    return 0;
-  } else if (slot == 0) {
-    const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
-    CHECK(code_item != NULL) << PrettyMethod(m);
-    return code_item->registers_size_ - code_item->ins_size_;
+  const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+  uint16_t ins_size = code_item->ins_size_;
+  uint16_t locals_size = code_item->registers_size_ - ins_size;
+  if (slot < ins_size) {
+    return slot + locals_size;
+  } else {
+    return slot - ins_size;
   }
-  return slot;
 }
 
 JDWP::JdwpError Dbg::OutputDeclaredFields(JDWP::RefTypeId class_id, bool with_generic, JDWP::ExpandBuf* pReply) {
@@ -1332,16 +1328,18 @@
 
 void Dbg::OutputVariableTable(JDWP::RefTypeId, JDWP::MethodId method_id, bool with_generic, JDWP::ExpandBuf* pReply) {
   struct DebugCallbackContext {
+    mirror::ArtMethod* method;
     JDWP::ExpandBuf* pReply;
     size_t variable_count;
     bool with_generic;
 
-    static void Callback(void* context, uint16_t slot, uint32_t startAddress, uint32_t endAddress, const char* name, const char* descriptor, const char* signature) {
+    static void Callback(void* context, uint16_t slot, uint32_t startAddress, uint32_t endAddress, const char* name, const char* descriptor, const char* signature)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       DebugCallbackContext* pContext = reinterpret_cast<DebugCallbackContext*>(context);
 
-      VLOG(jdwp) << StringPrintf("    %2zd: %d(%d) '%s' '%s' '%s' actual slot=%d mangled slot=%d", pContext->variable_count, startAddress, endAddress - startAddress, name, descriptor, signature, slot, MangleSlot(slot, name));
+      VLOG(jdwp) << StringPrintf("    %2zd: %d(%d) '%s' '%s' '%s' actual slot=%d mangled slot=%d", pContext->variable_count, startAddress, endAddress - startAddress, name, descriptor, signature, slot, MangleSlot(slot, pContext->method));
 
-      slot = MangleSlot(slot, name);
+      slot = MangleSlot(slot, pContext->method);
 
       expandBufAdd8BE(pContext->pReply, startAddress);
       expandBufAddUtf8String(pContext->pReply, name);
@@ -1369,6 +1367,7 @@
   expandBufAdd4BE(pReply, 0);
 
   DebugCallbackContext context;
+  context.method = m;
   context.pReply = pReply;
   context.variable_count = 0;
   context.with_generic = with_generic;
@@ -1379,6 +1378,13 @@
   JDWP::Set4BE(expandBufGetBuffer(pReply) + variable_count_offset, context.variable_count);
 }
 
+void Dbg::OutputMethodReturnValue(JDWP::MethodId method_id, const JValue* return_value,
+                                  JDWP::ExpandBuf* pReply) {
+  mirror::ArtMethod* m = FromMethodId(method_id);
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(MethodHelper(m).GetShorty());
+  OutputJValue(tag, return_value, pReply);
+}
+
 JDWP::JdwpError Dbg::GetBytecodes(JDWP::RefTypeId, JDWP::MethodId method_id,
                                   std::vector<uint8_t>& bytecodes)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1447,25 +1453,18 @@
   }
 
   JDWP::JdwpTag tag = BasicTagFromDescriptor(FieldHelper(f).GetTypeDescriptor());
-
-  if (IsPrimitiveTag(tag)) {
-    expandBufAdd1(pReply, tag);
-    if (tag == JDWP::JT_BOOLEAN || tag == JDWP::JT_BYTE) {
-      expandBufAdd1(pReply, f->Get32(o));
-    } else if (tag == JDWP::JT_CHAR || tag == JDWP::JT_SHORT) {
-      expandBufAdd2BE(pReply, f->Get32(o));
-    } else if (tag == JDWP::JT_FLOAT || tag == JDWP::JT_INT) {
-      expandBufAdd4BE(pReply, f->Get32(o));
-    } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
-      expandBufAdd8BE(pReply, f->Get64(o));
-    } else {
-      LOG(FATAL) << "Unknown tag: " << tag;
-    }
+  JValue field_value;
+  if (tag == JDWP::JT_VOID) {
+    LOG(FATAL) << "Unknown tag: " << tag;
+  } else if (!IsPrimitiveTag(tag)) {
+    field_value.SetL(f->GetObject(o));
+  } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
+    field_value.SetJ(f->Get64(o));
   } else {
-    mirror::Object* value = f->GetObject(o);
-    expandBufAdd1(pReply, TagFromObject(value));
-    expandBufAddObjectId(pReply, gRegistry->Add(value));
+    field_value.SetI(f->Get32(o));
   }
+  Dbg::OutputJValue(tag, &field_value, pReply);
+
   return JDWP::ERR_NONE;
 }
 
@@ -1543,6 +1542,27 @@
   return s->ToModifiedUtf8();
 }
 
+void Dbg::OutputJValue(JDWP::JdwpTag tag, const JValue* return_value, JDWP::ExpandBuf* pReply) {
+  if (IsPrimitiveTag(tag)) {
+    expandBufAdd1(pReply, tag);
+    if (tag == JDWP::JT_BOOLEAN || tag == JDWP::JT_BYTE) {
+      expandBufAdd1(pReply, return_value->GetI());
+    } else if (tag == JDWP::JT_CHAR || tag == JDWP::JT_SHORT) {
+      expandBufAdd2BE(pReply, return_value->GetI());
+    } else if (tag == JDWP::JT_FLOAT || tag == JDWP::JT_INT) {
+      expandBufAdd4BE(pReply, return_value->GetI());
+    } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
+      expandBufAdd8BE(pReply, return_value->GetJ());
+    } else {
+      CHECK_EQ(tag, JDWP::JT_VOID);
+    }
+  } else {
+    mirror::Object* value = return_value->GetL();
+    expandBufAdd1(pReply, TagFromObject(value));
+    expandBufAddObjectId(pReply, gRegistry->Add(value));
+  }
+}
+
 JDWP::JdwpError Dbg::GetThreadName(JDWP::ObjectId thread_id, std::string& name) {
   ScopedObjectAccessUnchecked soa(Thread::Current());
   MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
@@ -2212,8 +2232,8 @@
   visitor.WalkStack();
 }
 
-void Dbg::PostLocationEvent(const mirror::ArtMethod* m, int dex_pc,
-                            mirror::Object* this_object, int event_flags) {
+void Dbg::PostLocationEvent(const mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
+                            int event_flags, const JValue* return_value) {
   mirror::Class* c = m->GetDeclaringClass();
 
   JDWP::JdwpLocation location;
@@ -2228,7 +2248,7 @@
   if (gRegistry->Contains(this_object)) {
     this_id = gRegistry->Add(this_object);
   }
-  gJdwpState->PostLocationEvent(&location, this_id, event_flags);
+  gJdwpState->PostLocationEvent(&location, this_id, event_flags, return_value);
 }
 
 void Dbg::PostException(Thread* thread, const ThrowLocation& throw_location,
@@ -2341,7 +2361,7 @@
   // If there's something interesting going on, see if it matches one
   // of the debugger filters.
   if (event_flags != 0) {
-    Dbg::PostLocationEvent(m, dex_pc, this_object, event_flags);
+    Dbg::PostLocationEvent(m, dex_pc, this_object, event_flags, nullptr);
   }
 }
 
@@ -3401,6 +3421,14 @@
   JDWP::Set4BE(&heap_id[0], 1);  // Heap id (bogus; we only have one heap).
   Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHST") : CHUNK_TYPE("HPST"), sizeof(heap_id), heap_id);
 
+  Thread* self = Thread::Current();
+
+  // To allow the Walk/InspectAll() below to exclusively-lock the
+  // mutator lock, temporarily release the shared access to the
+  // mutator lock here by transitioning to the suspended state.
+  Locks::mutator_lock_->AssertSharedHeld(self);
+  self->TransitionFromRunnableToSuspended(kSuspended);
+
   // Send a series of heap segment chunks.
   HeapChunkContext context((what == HPSG_WHAT_MERGED_OBJECTS), native);
   if (native) {
@@ -3408,18 +3436,21 @@
   } else {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
-    Thread* self = Thread::Current();
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     typedef std::vector<gc::space::ContinuousSpace*>::const_iterator It;
     for (It cur = spaces.begin(), end = spaces.end(); cur != end; ++cur) {
-      if ((*cur)->IsDlMallocSpace()) {
-        (*cur)->AsDlMallocSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
+      if ((*cur)->IsMallocSpace()) {
+        (*cur)->AsMallocSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
       }
     }
     // Walk the large objects, these are not in the AllocSpace.
     heap->GetLargeObjectsSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
   }
 
+  // Shared-lock the mutator lock back.
+  self->TransitionFromSuspendedToRunnable();
+  Locks::mutator_lock_->AssertSharedHeld(self);
+
   // Finally, send a heap end chunk.
   Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHEN") : CHUNK_TYPE("HPEN"), sizeof(heap_id), heap_id);
 }
diff --git a/runtime/debugger.h b/runtime/debugger.h
index a774d55..0a7cf5a 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -264,6 +264,9 @@
   static void OutputVariableTable(JDWP::RefTypeId ref_type_id, JDWP::MethodId id, bool with_generic,
                                   JDWP::ExpandBuf* pReply)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void OutputMethodReturnValue(JDWP::MethodId method_id, const JValue* return_value,
+                                      JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError GetBytecodes(JDWP::RefTypeId class_id, JDWP::MethodId method_id,
                                       std::vector<uint8_t>& bytecodes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -288,6 +291,8 @@
 
   static std::string StringToUtf8(JDWP::ObjectId string_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void OutputJValue(JDWP::JdwpTag tag, const JValue* return_value, JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
    * Thread, ThreadGroup, Frame
@@ -361,7 +366,8 @@
     kMethodExit     = 0x08,
   };
   static void PostLocationEvent(const mirror::ArtMethod* method, int pcOffset,
-                                mirror::Object* thisPtr, int eventFlags)
+                                mirror::Object* thisPtr, int eventFlags,
+                                const JValue* return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostException(Thread* thread, const ThrowLocation& throw_location,
                             mirror::ArtMethod* catch_method,
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index a02823e..0ddcdf3 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -510,7 +510,8 @@
 }
 
 const DexFile::ProtoId* DexFile::FindProtoId(uint16_t return_type_idx,
-                                         const std::vector<uint16_t>& signature_type_idxs) const {
+                                             const uint16_t* signature_type_idxs,
+                                             uint32_t signature_length) const {
   int32_t lo = 0;
   int32_t hi = NumProtoIds() - 1;
   while (hi >= lo) {
@@ -520,7 +521,7 @@
     if (compare == 0) {
       DexFileParameterIterator it(*this, proto);
       size_t i = 0;
-      while (it.HasNext() && i < signature_type_idxs.size() && compare == 0) {
+      while (it.HasNext() && i < signature_length && compare == 0) {
         compare = signature_type_idxs[i] - it.GetTypeIdx();
         it.Next();
         i++;
@@ -528,7 +529,7 @@
       if (compare == 0) {
         if (it.HasNext()) {
           compare = -1;
-        } else if (i < signature_type_idxs.size()) {
+        } else if (i < signature_length) {
           compare = 1;
         }
       }
@@ -721,9 +722,9 @@
   for (;;)  {
     uint8_t opcode = *stream++;
     uint16_t reg;
-    uint16_t name_idx;
-    uint16_t descriptor_idx;
-    uint16_t signature_idx = 0;
+    uint32_t name_idx;
+    uint32_t descriptor_idx;
+    uint32_t signature_idx = 0;
 
     switch (opcode) {
       case DBG_END_SEQUENCE:
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 51ab8d8..84026a4 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -661,7 +661,11 @@
 
   // Looks up a proto id for a given return type and signature type list
   const ProtoId* FindProtoId(uint16_t return_type_idx,
-                             const std::vector<uint16_t>& signature_type_idxs_) const;
+                             const uint16_t* signature_type_idxs, uint32_t signature_length) const;
+  const ProtoId* FindProtoId(uint16_t return_type_idx,
+                             const std::vector<uint16_t>& signature_type_idxs) const {
+    return FindProtoId(return_type_idx, &signature_type_idxs[0], signature_type_idxs.size());
+  }
 
   // Given a signature place the type ids into the given vector, returns true on success
   bool CreateTypeList(const StringPiece& signature, uint16_t* return_type_idx,
diff --git a/runtime/gc/accounting/mod_union_table-inl.h b/runtime/gc/accounting/mod_union_table-inl.h
index fb425df..19c6768 100644
--- a/runtime/gc/accounting/mod_union_table-inl.h
+++ b/runtime/gc/accounting/mod_union_table-inl.h
@@ -37,7 +37,7 @@
     typedef std::vector<space::ContinuousSpace*>::const_iterator It;
     for (It it = spaces.begin(); it != spaces.end(); ++it) {
       if ((*it)->Contains(ref)) {
-        return (*it)->IsDlMallocSpace();
+        return (*it)->IsMallocSpace();
       }
     }
     // Assume it points to a large object.
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
new file mode 100644
index 0000000..9e65894
--- /dev/null
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -0,0 +1,1630 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/mutex-inl.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "rosalloc.h"
+
+#include <map>
+#include <list>
+#include <vector>
+
+namespace art {
+namespace gc {
+namespace allocator {
+
+// If true, log verbose details of operations.
+static const bool kTraceRosAlloc = false;
+// If true, check that the returned memory is actually zero.
+static const bool kCheckZeroMemory = kIsDebugBuild;
+
+extern "C" void* art_heap_rosalloc_morecore(RosAlloc* rosalloc, intptr_t increment);
+
+size_t RosAlloc::bracketSizes[kNumOfSizeBrackets];
+size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
+size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
+size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
+size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
+size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
+bool RosAlloc::initialized_ = false;
+
+RosAlloc::RosAlloc(void* base, size_t capacity)
+    : base_(reinterpret_cast<byte*>(base)), footprint_(capacity),
+      capacity_(capacity),
+      lock_("rosalloc global lock", kRosAllocGlobalLock),
+      bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock) {
+  DCHECK(RoundUp(capacity, kPageSize) == capacity);
+  if (!initialized_) {
+    Initialize();
+  }
+  VLOG(heap) << "RosAlloc base="
+             << std::hex << (intptr_t)base_ << ", end="
+             << std::hex << (intptr_t)(base_ + capacity_)
+             << ", capacity=" << std::dec << capacity_;
+  memset(current_runs_, 0, sizeof(current_runs_));
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    size_bracket_locks_[i] = new Mutex("an rosalloc size bracket lock",
+                                       kRosAllocBracketLock);
+  }
+  size_t num_of_pages = capacity_ / kPageSize;
+  page_map_.resize(num_of_pages);
+  free_page_run_size_map_.resize(num_of_pages);
+
+  FreePageRun* free_pages = reinterpret_cast<FreePageRun*>(base_);
+  if (kIsDebugBuild) {
+    free_pages->magic_num_ = kMagicNumFree;
+  }
+  free_pages->SetByteSize(this, capacity_);
+  DCHECK_EQ(capacity_ % kPageSize, static_cast<size_t>(0));
+  free_pages->ReleasePages(this);
+  free_page_runs_.insert(free_pages);
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::RosAlloc() : Inserted run 0x" << std::hex
+              << reinterpret_cast<intptr_t>(free_pages)
+              << " into free_page_runs_";
+  }
+}
+
+void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) {
+  lock_.AssertHeld(self);
+  DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject);
+  FreePageRun* res = NULL;
+  size_t req_byte_size = num_pages * kPageSize;
+  // Find the lowest address free page run that's large enough.
+  for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) {
+    FreePageRun* fpr = *it;
+    DCHECK(fpr->IsFree());
+    size_t fpr_byte_size = fpr->ByteSize(this);
+    DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
+    if (req_byte_size <= fpr_byte_size) {
+      // Found one.
+      free_page_runs_.erase(it++);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x"
+                  << std::hex << reinterpret_cast<intptr_t>(fpr)
+                  << " from free_page_runs_";
+      }
+      if (req_byte_size < fpr_byte_size) {
+        // Split.
+        FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
+        if (kIsDebugBuild) {
+          remainder->magic_num_ = kMagicNumFree;
+        }
+        remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
+        DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        // Don't need to call madvise on remainder here.
+        free_page_runs_.insert(remainder);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(remainder)
+                    << " into free_page_runs_";
+        }
+        fpr->SetByteSize(this, req_byte_size);
+        DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+      }
+      res = fpr;
+      break;
+    } else {
+      ++it;
+    }
+  }
+
+  // Failed to allocate pages. Grow the footprint, if possible.
+  if (UNLIKELY(res == NULL && capacity_ > footprint_)) {
+    FreePageRun* last_free_page_run = NULL;
+    size_t last_free_page_run_size;
+    auto it = free_page_runs_.rbegin();
+    if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
+      // There is a free page run at the end.
+      DCHECK(last_free_page_run->IsFree());
+      DCHECK(page_map_[ToPageMapIndex(last_free_page_run)] == kPageMapEmpty);
+      last_free_page_run_size = last_free_page_run->ByteSize(this);
+    } else {
+      // There is no free page run at the end.
+      last_free_page_run_size = 0;
+    }
+    DCHECK_LT(last_free_page_run_size, req_byte_size);
+    if (capacity_ - footprint_ + last_free_page_run_size >= req_byte_size) {
+      // If we grow the heap, we can allocate it.
+      size_t increment = std::min(std::max(2 * MB, req_byte_size - last_free_page_run_size),
+                                  capacity_ - footprint_);
+      DCHECK_EQ(increment % kPageSize, static_cast<size_t>(0));
+      size_t new_footprint = footprint_ + increment;
+      size_t new_num_of_pages = new_footprint / kPageSize;
+      DCHECK_LT(page_map_.size(), new_num_of_pages);
+      DCHECK_LT(free_page_run_size_map_.size(), new_num_of_pages);
+      page_map_.resize(new_num_of_pages);
+      free_page_run_size_map_.resize(new_num_of_pages);
+      art_heap_rosalloc_morecore(this, increment);
+      if (last_free_page_run_size > 0) {
+        // There was a free page run at the end. Expand its size.
+        DCHECK_EQ(last_free_page_run_size, last_free_page_run->ByteSize(this));
+        last_free_page_run->SetByteSize(this, last_free_page_run_size + increment);
+        DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        DCHECK(last_free_page_run->End(this) == base_ + new_footprint);
+      } else {
+        // Otherwise, insert a new free page run at the end.
+        FreePageRun* new_free_page_run = reinterpret_cast<FreePageRun*>(base_ + footprint_);
+        if (kIsDebugBuild) {
+          new_free_page_run->magic_num_ = kMagicNumFree;
+        }
+        new_free_page_run->SetByteSize(this, increment);
+        DCHECK_EQ(new_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        free_page_runs_.insert(new_free_page_run);
+        DCHECK(*free_page_runs_.rbegin() == new_free_page_run);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AlloPages() : Grew the heap by inserting run 0x"
+                    << std::hex << reinterpret_cast<intptr_t>(new_free_page_run)
+                    << " into free_page_runs_";
+        }
+      }
+      DCHECK_LE(footprint_ + increment, capacity_);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : increased the footprint from "
+                  << footprint_ << " to " << new_footprint;
+      }
+      footprint_ = new_footprint;
+
+      // And retry the last free page run.
+      it = free_page_runs_.rbegin();
+      DCHECK(it != free_page_runs_.rend());
+      FreePageRun* fpr = *it;
+      if (kIsDebugBuild && last_free_page_run_size > 0) {
+        DCHECK(last_free_page_run != NULL);
+        DCHECK_EQ(last_free_page_run, fpr);
+      }
+      size_t fpr_byte_size = fpr->ByteSize(this);
+      DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
+      DCHECK_LE(req_byte_size, fpr_byte_size);
+      free_page_runs_.erase(fpr);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
+                  << " from free_page_runs_";
+      }
+      if (req_byte_size < fpr_byte_size) {
+        // Split if there's a remainder.
+        FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
+        if (kIsDebugBuild) {
+          remainder->magic_num_ = kMagicNumFree;
+        }
+        remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
+        DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        free_page_runs_.insert(remainder);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(remainder)
+                    << " into free_page_runs_";
+        }
+        fpr->SetByteSize(this, req_byte_size);
+        DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+      }
+      res = fpr;
+    }
+  }
+  if (LIKELY(res != NULL)) {
+    // Update the page map.
+    size_t page_map_idx = ToPageMapIndex(res);
+    for (size_t i = 0; i < num_pages; i++) {
+      DCHECK(page_map_[page_map_idx + i] == kPageMapEmpty);
+    }
+    switch (page_map_type) {
+    case kPageMapRun:
+      page_map_[page_map_idx] = kPageMapRun;
+      for (size_t i = 1; i < num_pages; i++) {
+        page_map_[page_map_idx + i] = kPageMapRunPart;
+      }
+      break;
+    case kPageMapLargeObject:
+      page_map_[page_map_idx] = kPageMapLargeObject;
+      for (size_t i = 1; i < num_pages; i++) {
+        page_map_[page_map_idx + i] = kPageMapLargeObjectPart;
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unreachable - page map type: " << page_map_type;
+      break;
+    }
+    if (kIsDebugBuild) {
+      // Clear the first page which isn't madvised away in the debug
+      // build for the magic number.
+      memset(res, 0, kPageSize);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocPages() : 0x" << std::hex << reinterpret_cast<intptr_t>(res)
+                << "-0x" << (reinterpret_cast<intptr_t>(res) + num_pages * kPageSize)
+                << "(" << std::dec << (num_pages * kPageSize) << ")";
+    }
+    return res;
+  }
+
+  // Fail.
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::AllocPages() : NULL";
+  }
+  return nullptr;
+}
+
+void RosAlloc::FreePages(Thread* self, void* ptr) {
+  lock_.AssertHeld(self);
+  size_t pm_idx = ToPageMapIndex(ptr);
+  DCHECK(pm_idx < page_map_.size());
+  byte pm_type = page_map_[pm_idx];
+  DCHECK(pm_type == kPageMapRun || pm_type == kPageMapLargeObject);
+  byte pm_part_type;
+  switch (pm_type) {
+  case kPageMapRun:
+    pm_part_type = kPageMapRunPart;
+    break;
+  case kPageMapLargeObject:
+    pm_part_type = kPageMapLargeObjectPart;
+    break;
+  default:
+    pm_part_type = kPageMapEmpty;
+    LOG(FATAL) << "Unreachable - RosAlloc::FreePages() : " << "pm_idx=" << pm_idx << ", pm_type="
+               << static_cast<int>(pm_type) << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    return;
+  }
+  // Update the page map and count the number of pages.
+  size_t num_pages = 1;
+  page_map_[pm_idx] = kPageMapEmpty;
+  size_t idx = pm_idx + 1;
+  size_t end = page_map_.size();
+  while (idx < end && page_map_[idx] == pm_part_type) {
+    page_map_[idx] = kPageMapEmpty;
+    num_pages++;
+    idx++;
+  }
+
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreePages() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+              << "-0x" << (reinterpret_cast<intptr_t>(ptr) + num_pages * kPageSize)
+              << "(" << std::dec << (num_pages * kPageSize) << ")";
+  }
+
+  // Turn it into a free run.
+  FreePageRun* fpr = reinterpret_cast<FreePageRun*>(ptr);
+  if (kIsDebugBuild) {
+    fpr->magic_num_ = kMagicNumFree;
+  }
+  fpr->SetByteSize(this, num_pages * kPageSize);
+  DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+
+  DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
+  if (!free_page_runs_.empty()) {
+    // Try to coalesce in the higher address direction.
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
+                << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
+                << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
+                << (fpr->End(this) == End() ? page_map_.size() : ToPageMapIndex(fpr->End(this))) << "]";
+    }
+    auto higher_it = free_page_runs_.upper_bound(fpr);
+    if (higher_it != free_page_runs_.end()) {
+      for (auto it = higher_it; it != free_page_runs_.end(); ) {
+        FreePageRun* h = *it;
+        DCHECK_EQ(h->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a higher free page run 0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(h) << " [" << std::dec << ToPageMapIndex(h) << "] -0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(h->End(this)) << " [" << std::dec
+                    << (h->End(this) == End() ? page_map_.size() : ToPageMapIndex(h->End(this))) << "]";
+        }
+        if (fpr->End(this) == h->Begin()) {
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Success";
+          }
+          free_page_runs_.erase(it++);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(h)
+                      << " from free_page_runs_";
+          }
+          fpr->SetByteSize(this, fpr->ByteSize(this) + h->ByteSize(this));
+          DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        } else {
+          // Not adjacent. Stop.
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Fail";
+          }
+          break;
+        }
+      }
+    }
+    // Try to coalesce in the lower address direction.
+    auto lower_it = free_page_runs_.upper_bound(fpr);
+    if (lower_it != free_page_runs_.begin()) {
+      --lower_it;
+      for (auto it = lower_it; ; ) {
+        // We want to try to coalesce with the first element but
+        // there's no "<=" operator for the iterator.
+        bool to_exit_loop = it == free_page_runs_.begin();
+
+        FreePageRun* l = *it;
+        DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a lower free page run 0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(l) << " [" << std::dec << ToPageMapIndex(l) << "] -0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(l->End(this)) << " [" << std::dec
+                    << (l->End(this) == End() ? page_map_.size() : ToPageMapIndex(l->End(this))) << "]";
+        }
+        if (l->End(this) == fpr->Begin()) {
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Success";
+          }
+          free_page_runs_.erase(it--);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(l)
+                      << " from free_page_runs_";
+          }
+          l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
+          DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+          fpr = l;
+        } else {
+          // Not adjacent. Stop.
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Fail";
+          }
+          break;
+        }
+        if (to_exit_loop) {
+          break;
+        }
+      }
+    }
+  }
+
+  // Insert it.
+  DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+  DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
+  fpr->ReleasePages(this);
+  free_page_runs_.insert(fpr);
+  DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
+              << " into free_page_runs_";
+  }
+}
+
+void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) {
+  if (UNLIKELY(size > kLargeSizeThreshold)) {
+    size_t num_pages = RoundUp(size, kPageSize) / kPageSize;
+    void* r;
+    {
+      MutexLock mu(self, lock_);
+      r = AllocPages(self, num_pages, kPageMapLargeObject);
+    }
+    if (bytes_allocated != NULL) {
+      *bytes_allocated = num_pages * kPageSize;
+    }
+    if (kTraceRosAlloc) {
+      if (r != NULL) {
+        LOG(INFO) << "RosAlloc::Alloc() : (large obj) 0x" << std::hex << reinterpret_cast<intptr_t>(r)
+                  << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
+                  << "(" << std::dec << (num_pages * kPageSize) << ")";
+      } else {
+        LOG(INFO) << "RosAlloc::Alloc() : (large obj) NULL";
+      }
+    }
+    // Check if the returned memory is really all zero.
+    if (kCheckZeroMemory && r != NULL) {
+      byte* bytes = reinterpret_cast<byte*>(r);
+      for (size_t i = 0; i < size; ++i) {
+        DCHECK_EQ(bytes[i], 0);
+      }
+    }
+    return r;
+  }
+  void* m = AllocFromRun(self, size, bytes_allocated);
+  // Check if the returned memory is really all zero.
+  if (kCheckZeroMemory && m != NULL) {
+    byte* bytes = reinterpret_cast<byte*>(m);
+    for (size_t i = 0; i < size; ++i) {
+      DCHECK_EQ(bytes[i], 0);
+    }
+  }
+  return m;
+}
+
+void RosAlloc::FreeInternal(Thread* self, void* ptr) {
+  DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+  size_t pm_idx = RoundDownToPageMapIndex(ptr);
+  bool free_from_run = false;
+  Run* run = NULL;
+  {
+    MutexLock mu(self, lock_);
+    DCHECK(pm_idx < page_map_.size());
+    byte page_map_entry = page_map_[pm_idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreeInternal() : " << std::hex << ptr << ", pm_idx=" << std::dec << pm_idx
+                << ", page_map_entry=" << static_cast<int>(page_map_entry);
+    }
+    switch (page_map_[pm_idx]) {
+      case kPageMapEmpty:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+      case kPageMapLargeObject:
+        FreePages(self, ptr);
+        return;
+      case kPageMapLargeObjectPart:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+      case kPageMapRun:
+      case kPageMapRunPart: {
+        free_from_run = true;
+        size_t pi = pm_idx;
+        DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
+        // Find the beginning of the run.
+        while (page_map_[pi] != kPageMapRun) {
+          pi--;
+          DCHECK(pi < capacity_ / kPageSize);
+        }
+        DCHECK(page_map_[pi] == kPageMapRun);
+        run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+        DCHECK(run->magic_num_ == kMagicNum);
+        break;
+      }
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+    }
+  }
+  if (LIKELY(free_from_run)) {
+    DCHECK(run != NULL);
+    FreeFromRun(self, ptr, run);
+  }
+}
+
+void RosAlloc::Free(Thread* self, void* ptr) {
+  ReaderMutexLock rmu(self, bulk_free_lock_);
+  FreeInternal(self, ptr);
+}
+
+RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
+  Run* new_run;
+  size_t num_pages = numOfPages[idx];
+  // Get the lowest address non-full run from the binary tree.
+  Run* temp = NULL;
+  std::set<Run*>* bt = &non_full_runs_[idx];
+  std::set<Run*>::iterator found = bt->lower_bound(temp);
+  if (found != bt->end()) {
+    // If there's one, use it as the current run.
+    Run* non_full_run = *found;
+    DCHECK(non_full_run != NULL);
+    new_run = non_full_run;
+    DCHECK_EQ(new_run->is_thread_local_, 0);
+    bt->erase(found);
+    DCHECK_EQ(non_full_run->is_thread_local_, 0);
+  } else {
+    // If there's none, allocate a new run and use it as the
+    // current run.
+    {
+      MutexLock mu(self, lock_);
+      new_run = reinterpret_cast<Run*>(AllocPages(self, num_pages, kPageMapRun));
+    }
+    if (new_run == NULL) {
+      return NULL;
+    }
+    if (kIsDebugBuild) {
+      new_run->magic_num_ = kMagicNum;
+    }
+    new_run->size_bracket_idx_ = idx;
+    new_run->top_slot_idx_ = 0;
+    new_run->ClearBitMaps();
+    new_run->to_be_bulk_freed_ = false;
+  }
+  return new_run;
+}
+
+void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) {
+  DCHECK(size <= kLargeSizeThreshold);
+  size_t bracket_size;
+  size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
+  DCHECK_EQ(idx, SizeToIndex(size));
+  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+  DCHECK_EQ(bracket_size, bracketSizes[idx]);
+  DCHECK(size <= bracket_size);
+  DCHECK(size > 512 || bracket_size - size < 16);
+
+  void* slot_addr;
+
+  if (LIKELY(idx <= kMaxThreadLocalSizeBracketIdx)) {
+    // Use a thread-local run.
+    Run* thread_local_run = reinterpret_cast<Run*>(self->rosalloc_runs_[idx]);
+    if (UNLIKELY(thread_local_run == NULL)) {
+      MutexLock mu(self, *size_bracket_locks_[idx]);
+      thread_local_run = RefillRun(self, idx);
+      if (UNLIKELY(thread_local_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+      thread_local_run->is_thread_local_ = 1;
+      self->rosalloc_runs_[idx] = thread_local_run;
+      DCHECK(!thread_local_run->IsFull());
+    }
+
+    DCHECK(thread_local_run != NULL);
+    DCHECK_NE(thread_local_run->is_thread_local_, 0);
+    slot_addr = thread_local_run->AllocSlot();
+
+    if (UNLIKELY(slot_addr == NULL)) {
+      // The run got full. Try to free slots.
+      DCHECK(thread_local_run->IsFull());
+      MutexLock mu(self, *size_bracket_locks_[idx]);
+      bool is_all_free_after_merge;
+      if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
+        // Some slot got freed. Keep it.
+        DCHECK(!thread_local_run->IsFull());
+        DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
+        if (is_all_free_after_merge) {
+          // Reinstate the bump index mode if it's all free.
+          DCHECK_EQ(thread_local_run->top_slot_idx_, numOfSlots[idx]);
+          thread_local_run->top_slot_idx_ = 0;
+        }
+      } else {
+        // No slots got freed. Try to refill the thread-local run.
+        DCHECK(thread_local_run->IsFull());
+        self->rosalloc_runs_[idx] = NULL;
+        thread_local_run->is_thread_local_ = 0;
+        if (kIsDebugBuild) {
+          full_runs_[idx].insert(thread_local_run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(thread_local_run)
+                      << " into full_runs_[" << std::dec << idx << "]";
+          }
+        }
+        DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+        DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
+        thread_local_run = RefillRun(self, idx);
+        if (UNLIKELY(thread_local_run == NULL)) {
+          return NULL;
+        }
+        DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+        DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+        thread_local_run->is_thread_local_ = 1;
+        self->rosalloc_runs_[idx] = thread_local_run;
+        DCHECK(!thread_local_run->IsFull());
+      }
+
+      DCHECK(thread_local_run != NULL);
+      DCHECK(!thread_local_run->IsFull());
+      DCHECK_NE(thread_local_run->is_thread_local_, 0);
+      slot_addr = thread_local_run->AllocSlot();
+      // Must succeed now with a new run.
+      DCHECK(slot_addr != NULL);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
+                << "(" << std::dec << (bracket_size) << ")";
+    }
+  } else {
+    // Use the (shared) current run.
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    Run* current_run = current_runs_[idx];
+    if (UNLIKELY(current_run == NULL)) {
+      current_run = RefillRun(self, idx);
+      if (UNLIKELY(current_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
+      current_run->is_thread_local_ = 0;
+      current_runs_[idx] = current_run;
+      DCHECK(!current_run->IsFull());
+    }
+    DCHECK(current_run != NULL);
+    slot_addr = current_run->AllocSlot();
+    if (UNLIKELY(slot_addr == NULL)) {
+      // The current run got full. Try to refill it.
+      DCHECK(current_run->IsFull());
+      current_runs_[idx] = NULL;
+      if (kIsDebugBuild) {
+        // Insert it into full_runs and set the current run to NULL.
+        full_runs_[idx].insert(current_run);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run)
+                    << " into full_runs_[" << std::dec << idx << "]";
+        }
+      }
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
+      current_run = RefillRun(self, idx);
+      if (UNLIKELY(current_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(current_run != NULL);
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
+      current_run->is_thread_local_ = 0;
+      current_runs_[idx] = current_run;
+      DCHECK(!current_run->IsFull());
+      slot_addr = current_run->AllocSlot();
+      // Must succeed now with a new run.
+      DCHECK(slot_addr != NULL);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
+                << "(" << std::dec << (bracket_size) << ")";
+    }
+  }
+  if (LIKELY(bytes_allocated != NULL)) {
+    *bytes_allocated = bracket_size;
+  }
+  memset(slot_addr, 0, size);
+  return slot_addr;
+}
+
+void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
+  DCHECK(run->magic_num_ == kMagicNum);
+  DCHECK(run < ptr && ptr < run->End());
+  size_t idx = run->size_bracket_idx_;
+  MutexLock mu(self, *size_bracket_locks_[idx]);
+  bool run_was_full = false;
+  if (kIsDebugBuild) {
+    run_was_full = run->IsFull();
+  }
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreeFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr);
+  }
+  if (LIKELY(run->is_thread_local_ != 0)) {
+    // It's a thread-local run. Just mark the thread-local free bit map and return.
+    DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
+    DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+    DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+    run->MarkThreadLocalFreeBitMap(ptr);
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreeFromRun() : Freed a slot in a thread local run 0x" << std::hex
+                << reinterpret_cast<intptr_t>(run);
+    }
+    // A thread local run will be kept as a thread local even if it's become all free.
+    return;
+  }
+  // Free the slot in the run.
+  run->FreeSlot(ptr);
+  std::set<Run*>* non_full_runs = &non_full_runs_[idx];
+  if (run->IsAllFree()) {
+    // It has just become completely free. Free the pages of this run.
+    std::set<Run*>::iterator pos = non_full_runs->find(run);
+    if (pos != non_full_runs->end()) {
+      non_full_runs->erase(pos);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(run) << " from non_full_runs_";
+      }
+    }
+    if (run == current_runs_[idx]) {
+      current_runs_[idx] = NULL;
+    }
+    DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+    DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+    {
+      MutexLock mu(self, lock_);
+      FreePages(self, run);
+    }
+  } else {
+    // It is not completely free. If it wasn't the current run or
+    // already in the non-full run set (i.e., it was full) insert it
+    // into the non-full run set.
+    if (run != current_runs_[idx]) {
+      hash_set<Run*, hash_run, eq_run>* full_runs =
+          kIsDebugBuild ? &full_runs_[idx] : NULL;
+      std::set<Run*>::iterator pos = non_full_runs->find(run);
+      if (pos == non_full_runs->end()) {
+        DCHECK(run_was_full);
+        DCHECK(full_runs->find(run) != full_runs->end());
+        if (kIsDebugBuild) {
+          full_runs->erase(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run) << " from full_runs_";
+          }
+        }
+        non_full_runs->insert(run);
+        DCHECK(!run->IsFull());
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreeFromRun() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(run)
+                    << " into non_full_runs_[" << std::dec << idx << "]";
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::Run::Dump() {
+  size_t idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  std::string bit_map_str;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = alloc_bit_map_[v];
+    if (v != num_vec - 1) {
+      bit_map_str.append(StringPrintf("%x-", vec));
+    } else {
+      bit_map_str.append(StringPrintf("%x", vec));
+    }
+  }
+  LOG(INFO) << "Run : " << std::hex << reinterpret_cast<intptr_t>(this)
+            << std::dec << ", idx=" << idx << ", bit_map=" << bit_map_str;
+}
+
+void* RosAlloc::Run::AllocSlot() {
+  size_t idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  DCHECK_LE(top_slot_idx_, num_slots);
+  if (LIKELY(top_slot_idx_ < num_slots)) {
+    // If it's in bump index mode, grab the top slot and increment the top index.
+    size_t slot_idx = top_slot_idx_;
+    byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+    }
+    top_slot_idx_++;
+    size_t vec_idx = slot_idx / 32;
+    size_t vec_off = slot_idx % 32;
+    uint32_t* vec = &alloc_bit_map_[vec_idx];
+    DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+    *vec |= 1 << vec_off;
+    DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+    return slot_addr;
+  }
+  // Not in bump index mode. Search the alloc bit map for an empty slot.
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slot_idx = 0;
+  bool found_slot = false;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t *vecp = &alloc_bit_map_[v];
+    uint32_t ffz1 = __builtin_ffs(~*vecp);
+    uint32_t ffz;
+    // TODO: Use LIKELY or UNLIKELY here?
+    if (LIKELY(ffz1 > 0 && (ffz = ffz1 - 1) + v * 32 < num_slots)) {
+      // Found an empty slot. Set the bit.
+      DCHECK_EQ((*vecp & (1 << ffz)), static_cast<uint32_t>(0));
+      *vecp |= (1 << ffz);
+      DCHECK_NE((*vecp & (1 << ffz)), static_cast<uint32_t>(0));
+      slot_idx = ffz + v * 32;
+      found_slot = true;
+      break;
+    }
+  }
+  if (LIKELY(found_slot)) {
+    byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+    }
+    return slot_addr;
+  }
+  return NULL;
+}
+
+inline void RosAlloc::Run::FreeSlot(void* ptr) {
+  DCHECK_EQ(is_thread_local_, 0);
+  byte idx = size_bracket_idx_;
+  size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+      - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
+  DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+  size_t slot_idx = offset_from_slot_base / bracketSizes[idx];
+  DCHECK(slot_idx < numOfSlots[idx]);
+  size_t vec_idx = slot_idx / 32;
+  if (kIsDebugBuild) {
+    size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
+    DCHECK(vec_idx < num_vec);
+  }
+  size_t vec_off = slot_idx % 32;
+  uint32_t* vec = &alloc_bit_map_[vec_idx];
+  DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  *vec &= ~(1 << vec_off);
+  DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+              << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+  }
+}
+
+inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
+  DCHECK_NE(is_thread_local_, 0);
+  // Free slots in the alloc bit map based on the thread local free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  bool changed = false;
+  uint32_t* vecp = &alloc_bit_map_[0];
+  uint32_t* tl_free_vecp = &thread_local_free_bit_map()[0];
+  bool is_all_free_after = true;
+  for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
+    uint32_t tl_free_vec = *tl_free_vecp;
+    uint32_t vec_before = *vecp;
+    uint32_t vec_after;
+    if (tl_free_vec != 0) {
+      vec_after = vec_before & ~tl_free_vec;
+      *vecp = vec_after;
+      changed = true;
+      *tl_free_vecp = 0;  // clear the thread local free bit map.
+    } else {
+      vec_after = vec_before;
+    }
+    if (vec_after != 0) {
+      is_all_free_after = false;
+    }
+    DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
+  }
+  *is_all_free_after_out = is_all_free_after;
+  // Return true if there was at least a bit set in the thread-local
+  // free bit map and at least a bit in the alloc bit map changed.
+  return changed;
+}
+
+inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
+  DCHECK_EQ(is_thread_local_, 0);
+  // Free slots in the alloc bit map based on the bulk free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  uint32_t* vecp = &alloc_bit_map_[0];
+  uint32_t* free_vecp = &bulk_free_bit_map()[0];
+  for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
+    uint32_t free_vec = *free_vecp;
+    if (free_vec != 0) {
+      *vecp &= ~free_vec;
+      *free_vecp = 0;  // clear the bulk free bit map.
+    }
+    DCHECK_EQ(*free_vecp, static_cast<uint32_t>(0));
+  }
+}
+
+inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
+  DCHECK_NE(is_thread_local_, 0);
+  // Union the thread local bit map with the bulk free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  uint32_t* to_vecp = &thread_local_free_bit_map()[0];
+  uint32_t* from_vecp = &bulk_free_bit_map()[0];
+  for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
+    uint32_t from_vec = *from_vecp;
+    if (from_vec != 0) {
+      *to_vecp |= from_vec;
+      *from_vecp = 0;  // clear the from free bit map.
+    }
+    DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
+  }
+}
+
+inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
+  DCHECK_NE(is_thread_local_, 0);
+  MarkFreeBitMapShared(ptr, thread_local_free_bit_map(), "MarkThreadLocalFreeBitMap");
+}
+
+inline void RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
+  MarkFreeBitMapShared(ptr, bulk_free_bit_map(), "MarkFreeBitMap");
+}
+
+inline void RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
+                                              const char* caller_name) {
+  byte idx = size_bracket_idx_;
+  size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+      - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
+  DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+  size_t slot_idx = offset_from_slot_base / bracketSizes[idx];
+  DCHECK(slot_idx < numOfSlots[idx]);
+  size_t vec_idx = slot_idx / 32;
+  if (kIsDebugBuild) {
+    size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
+    DCHECK(vec_idx < num_vec);
+  }
+  size_t vec_off = slot_idx % 32;
+  uint32_t* vec = &free_bit_map_base[vec_idx];
+  DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  *vec |= 1 << vec_off;
+  DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex
+              << reinterpret_cast<intptr_t>(ptr)
+              << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+  }
+}
+
+inline bool RosAlloc::Run::IsAllFree() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = alloc_bit_map_[v];
+    if (vec != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline bool RosAlloc::Run::IsFull() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slots = 0;
+  for (size_t v = 0; v < num_vec; v++, slots += 32) {
+    DCHECK(num_slots >= slots);
+    uint32_t vec = alloc_bit_map_[v];
+    uint32_t mask = (num_slots - slots >= 32) ? static_cast<uint32_t>(-1)
+        : (1 << (num_slots - slots)) - 1;
+    DCHECK(num_slots - slots >= 32 ? mask == static_cast<uint32_t>(-1) : true);
+    if (vec != mask) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void RosAlloc::Run::ClearBitMaps() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  memset(alloc_bit_map_, 0, sizeof(uint32_t) * num_vec * 3);
+}
+
+void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                                    void* arg) {
+  size_t idx = size_bracket_idx_;
+  byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
+  size_t num_slots = numOfSlots[idx];
+  size_t bracket_size = IndexToBracketSize(idx);
+  DCHECK_EQ(slot_base + num_slots * bracket_size, reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize);
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slots = 0;
+  for (size_t v = 0; v < num_vec; v++, slots += 32) {
+    DCHECK(num_slots >= slots);
+    uint32_t vec = alloc_bit_map_[v];
+    size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
+    for (size_t i = 0; i < end; ++i) {
+      bool is_allocated = ((vec >> i) & 0x1) != 0;
+      byte* slot_addr = slot_base + (slots + i) * bracket_size;
+      if (is_allocated) {
+        handler(slot_addr, slot_addr + bracket_size, bracket_size, arg);
+      } else {
+        handler(slot_addr, slot_addr + bracket_size, 0, arg);
+      }
+    }
+  }
+}
+
+void RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
+  if (false) {
+    // Used only to test Free() as GC uses only BulkFree().
+    for (size_t i = 0; i < num_ptrs; ++i) {
+      FreeInternal(self, ptrs[i]);
+    }
+    return;
+  }
+
+  WriterMutexLock wmu(self, bulk_free_lock_);
+
+  // First mark slots to free in the bulk free bit map without locking the
+  // size bracket locks. On host, hash_set is faster than vector + flag.
+#ifdef HAVE_ANDROID_OS
+  std::vector<Run*> runs;
+#else
+  hash_set<Run*, hash_run, eq_run> runs;
+#endif
+  {
+    for (size_t i = 0; i < num_ptrs; i++) {
+      void* ptr = ptrs[i];
+      ptrs[i] = NULL;
+      DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+      size_t pm_idx = RoundDownToPageMapIndex(ptr);
+      bool free_from_run = false;
+      Run* run = NULL;
+      {
+        MutexLock mu(self, lock_);
+        DCHECK(pm_idx < page_map_.size());
+        byte page_map_entry = page_map_[pm_idx];
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
+                    << std::dec << pm_idx
+                    << ", page_map_entry=" << static_cast<int>(page_map_entry);
+        }
+        if (LIKELY(page_map_entry == kPageMapRun)) {
+          free_from_run = true;
+          run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+          DCHECK(run->magic_num_ == kMagicNum);
+        } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
+          free_from_run = true;
+          size_t pi = pm_idx;
+          DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
+          // Find the beginning of the run.
+          while (page_map_[pi] != kPageMapRun) {
+            pi--;
+            DCHECK(pi < capacity_ / kPageSize);
+          }
+          DCHECK(page_map_[pi] == kPageMapRun);
+          run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+          DCHECK(run->magic_num_ == kMagicNum);
+        } else if (page_map_entry == kPageMapLargeObject) {
+          FreePages(self, ptr);
+        } else {
+          LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
+        }
+      }
+      if (LIKELY(free_from_run)) {
+        DCHECK(run != NULL);
+        // Set the bit in the bulk free bit map.
+        run->MarkBulkFreeBitMap(ptr);
+#ifdef HAVE_ANDROID_OS
+        if (!run->to_be_bulk_freed_) {
+          run->to_be_bulk_freed_ = true;
+          runs.push_back(run);
+        }
+#else
+        runs.insert(run);
+#endif
+      }
+    }
+  }
+
+  // Now, iterate over the affected runs and update the alloc bit map
+  // based on the bulk free bit map (for non-thread-local runs) and
+  // union the bulk free bit map into the thread-local free bit map
+  // (for thread-local runs.)
+#ifdef HAVE_ANDROID_OS
+  typedef std::vector<Run*>::iterator It;
+#else
+  typedef hash_set<Run*, hash_run, eq_run>::iterator It;
+#endif
+  for (It it = runs.begin(); it != runs.end(); ++it) {
+    Run* run = *it;
+#ifdef HAVE_ANDROID_OS
+    DCHECK(run->to_be_bulk_freed_);
+    run->to_be_bulk_freed_ = false;
+#endif
+    size_t idx = run->size_bracket_idx_;
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    if (run->is_thread_local_ != 0) {
+      DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
+      DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+      run->UnionBulkFreeBitMapToThreadLocalFreeBitMap();
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x"
+                  << std::hex << reinterpret_cast<intptr_t>(run);
+      }
+      DCHECK_NE(run->is_thread_local_, 0);
+      // A thread local run will be kept as a thread local even if
+      // it's become all free.
+    } else {
+      bool run_was_full = run->IsFull();
+      run->MergeBulkFreeBitMapIntoAllocBitMap();
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(run);
+      }
+      // Check if the run should be moved to non_full_runs_ or
+      // free_page_runs_.
+      std::set<Run*>* non_full_runs = &non_full_runs_[idx];
+      hash_set<Run*, hash_run, eq_run>* full_runs =
+          kIsDebugBuild ? &full_runs_[idx] : NULL;
+      if (run->IsAllFree()) {
+        // It has just become completely free. Free the pages of the
+        // run.
+        bool run_was_current = run == current_runs_[idx];
+        if (run_was_current) {
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          // If it was a current run, reuse it.
+        } else if (run_was_full) {
+          // If it was full, remove it from the full run set (debug
+          // only.)
+          if (kIsDebugBuild) {
+            hash_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
+            DCHECK(pos != full_runs->end());
+            full_runs->erase(pos);
+            if (kTraceRosAlloc) {
+              LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                        << reinterpret_cast<intptr_t>(run)
+                        << " from full_runs_";
+            }
+            DCHECK(full_runs->find(run) == full_runs->end());
+          }
+        } else {
+          // If it was in a non full run set, remove it from the set.
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) != non_full_runs->end());
+          non_full_runs->erase(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run)
+                      << " from non_full_runs_";
+          }
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+        }
+        if (!run_was_current) {
+          MutexLock mu(self, lock_);
+          FreePages(self, run);
+        }
+      } else {
+        // It is not completely free. If it wasn't the current run or
+        // already in the non-full run set (i.e., it was full) insert
+        // it into the non-full run set.
+        if (run == current_runs_[idx]) {
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          DCHECK(full_runs->find(run) == full_runs->end());
+          // If it was a current run, keep it.
+        } else if (run_was_full) {
+          // If it was full, remove it from the full run set (debug
+          // only) and insert into the non-full run set.
+          DCHECK(full_runs->find(run) != full_runs->end());
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          if (kIsDebugBuild) {
+            full_runs->erase(run);
+            if (kTraceRosAlloc) {
+              LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                        << reinterpret_cast<intptr_t>(run)
+                        << " from full_runs_";
+            }
+          }
+          non_full_runs->insert(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::BulkFree() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run)
+                      << " into non_full_runs_[" << std::dec << idx;
+          }
+        } else {
+          // If it was not full, so leave it in the non full run set.
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) != non_full_runs->end());
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::DumpPageMap(Thread* self) {
+  MutexLock mu(self, lock_);
+  size_t end = page_map_.size();
+  FreePageRun* curr_fpr = NULL;
+  size_t curr_fpr_size = 0;
+  size_t remaining_curr_fpr_size = 0;
+  size_t num_running_empty_pages = 0;
+  for (size_t i = 0; i < end; ++i) {
+    byte pm = page_map_[i];
+    switch (pm) {
+      case kPageMapEmpty: {
+        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+        if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
+          // Encountered a fresh free page run.
+          DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+          DCHECK(fpr->IsFree());
+          DCHECK(curr_fpr == NULL);
+          DCHECK_EQ(curr_fpr_size, static_cast<size_t>(0));
+          curr_fpr = fpr;
+          curr_fpr_size = fpr->ByteSize(this);
+          DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
+          remaining_curr_fpr_size = curr_fpr_size - kPageSize;
+          LOG(INFO) << "[" << i << "]=Empty (FPR start)"
+                    << " fpr_size=" << curr_fpr_size
+                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          if (remaining_curr_fpr_size == 0) {
+            // Reset at the end of the current free page run.
+            curr_fpr = NULL;
+            curr_fpr_size = 0;
+          }
+          LOG(INFO) << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr);
+          DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
+        } else {
+          // Still part of the current free page run.
+          DCHECK_NE(num_running_empty_pages, static_cast<size_t>(0));
+          DCHECK(curr_fpr != NULL && curr_fpr_size > 0 && remaining_curr_fpr_size > 0);
+          DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
+          DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
+          remaining_curr_fpr_size -= kPageSize;
+          LOG(INFO) << "[" << i << "]=Empty (FPR part)"
+                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          if (remaining_curr_fpr_size == 0) {
+            // Reset at the end of the current free page run.
+            curr_fpr = NULL;
+            curr_fpr_size = 0;
+          }
+        }
+        num_running_empty_pages++;
+        break;
+      }
+      case kPageMapLargeObject: {
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Large (start)";
+        break;
+      }
+      case kPageMapLargeObjectPart:
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Large (part)";
+        break;
+      case kPageMapRun: {
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+        size_t idx = run->size_bracket_idx_;
+        LOG(INFO) << "[" << i << "]=Run (start)"
+                  << " idx=" << idx
+                  << " numOfPages=" << numOfPages[idx]
+                  << " thread_local=" << static_cast<int>(run->is_thread_local_)
+                  << " is_all_free=" << (run->IsAllFree() ? 1 : 0);
+        break;
+      }
+      case kPageMapRunPart:
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Run (part)";
+        break;
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+    }
+  }
+}
+
+size_t RosAlloc::UsableSize(void* ptr) {
+  DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+  size_t pm_idx = RoundDownToPageMapIndex(ptr);
+  MutexLock mu(Thread::Current(), lock_);
+  switch (page_map_[pm_idx]) {
+  case kPageMapEmpty:
+    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    break;
+  case kPageMapLargeObject: {
+    size_t num_pages = 1;
+    size_t idx = pm_idx + 1;
+    size_t end = page_map_.size();
+    while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
+      num_pages++;
+      idx++;
+    }
+    return num_pages * kPageSize;
+  }
+  case kPageMapLargeObjectPart:
+    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    break;
+  case kPageMapRun:
+  case kPageMapRunPart: {
+    // Find the beginning of the run.
+    while (page_map_[pm_idx] != kPageMapRun) {
+      pm_idx--;
+      DCHECK(pm_idx < capacity_ / kPageSize);
+    }
+    DCHECK(page_map_[pm_idx] == kPageMapRun);
+    Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+    DCHECK(run->magic_num_ == kMagicNum);
+    size_t idx = run->size_bracket_idx_;
+    size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+        - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
+    DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+    return IndexToBracketSize(idx);
+  }
+  default:
+    LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+    break;
+  }
+  return 0;
+}
+
+bool RosAlloc::Trim() {
+  MutexLock mu(Thread::Current(), lock_);
+  FreePageRun* last_free_page_run;
+  DCHECK_EQ(footprint_ % kPageSize, static_cast<size_t>(0));
+  auto it = free_page_runs_.rbegin();
+  if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
+    // Remove the last free page run, if any.
+    DCHECK(last_free_page_run->IsFree());
+    DCHECK(page_map_[ToPageMapIndex(last_free_page_run)] == kPageMapEmpty);
+    DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+    DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
+    free_page_runs_.erase(last_free_page_run);
+    size_t decrement = last_free_page_run->ByteSize(this);
+    size_t new_footprint = footprint_ - decrement;
+    DCHECK_EQ(new_footprint % kPageSize, static_cast<size_t>(0));
+    size_t new_num_of_pages = new_footprint / kPageSize;
+    DCHECK_GE(page_map_.size(), new_num_of_pages);
+    page_map_.resize(new_num_of_pages);
+    DCHECK_EQ(page_map_.size(), new_num_of_pages);
+    free_page_run_size_map_.resize(new_num_of_pages);
+    DCHECK_EQ(free_page_run_size_map_.size(), new_num_of_pages);
+    art_heap_rosalloc_morecore(this, -(static_cast<intptr_t>(decrement)));
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Trim() : decreased the footprint from "
+                << footprint_ << " to " << new_footprint;
+    }
+    DCHECK_LT(new_footprint, footprint_);
+    DCHECK_LT(new_footprint, capacity_);
+    footprint_ = new_footprint;
+    return true;
+  }
+  return false;
+}
+
+void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                          void* arg) {
+  // Note: no need to use this to release pages as we already do so in FreePages().
+  if (handler == NULL) {
+    return;
+  }
+  MutexLock mu(Thread::Current(), lock_);
+  size_t pm_end = page_map_.size();
+  size_t i = 0;
+  while (i < pm_end) {
+    byte pm = page_map_[i];
+    switch (pm) {
+      case kPageMapEmpty: {
+        // The start of a free page run.
+        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+        DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+        size_t fpr_size = fpr->ByteSize(this);
+        DCHECK(IsAligned<kPageSize>(fpr_size));
+        void* start = fpr;
+        void* end = reinterpret_cast<byte*>(start) + fpr_size;
+        handler(start, end, 0, arg);
+        size_t num_pages = fpr_size / kPageSize;
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapEmpty);
+          }
+        }
+        i += fpr_size / kPageSize;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapLargeObject: {
+        // The start of a large object.
+        size_t num_pages = 1;
+        size_t idx = i + 1;
+        while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
+          num_pages++;
+          idx++;
+        }
+        void* start = base_ + i * kPageSize;
+        void* end = base_ + (i + num_pages) * kPageSize;
+        size_t used_bytes = num_pages * kPageSize;
+        handler(start, end, used_bytes, arg);
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapLargeObjectPart);
+          }
+        }
+        i += num_pages;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapLargeObjectPart:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+      case kPageMapRun: {
+        // The start of a run.
+        Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+        DCHECK(run->magic_num_ == kMagicNum);
+        run->InspectAllSlots(handler, arg);
+        size_t num_pages = numOfPages[run->size_bracket_idx_];
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapRunPart);
+          }
+        }
+        i += num_pages;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapRunPart:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+    }
+  }
+}
+
+size_t RosAlloc::Footprint() {
+  MutexLock mu(Thread::Current(), lock_);
+  return footprint_;
+}
+
+size_t RosAlloc::FootprintLimit() {
+  MutexLock mu(Thread::Current(), lock_);
+  return capacity_;
+}
+
+void RosAlloc::SetFootprintLimit(size_t new_capacity) {
+  MutexLock mu(Thread::Current(), lock_);
+  DCHECK_EQ(RoundUp(new_capacity, kPageSize), new_capacity);
+  // Only growing is supported here. But Trim() is supported.
+  if (capacity_ < new_capacity) {
+    capacity_ = new_capacity;
+    VLOG(heap) << "new capacity=" << capacity_;
+  }
+}
+
+void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
+  Thread* self = Thread::Current();
+  for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) {
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    Run* thread_local_run = reinterpret_cast<Run*>(thread->rosalloc_runs_[idx]);
+    if (thread_local_run != NULL) {
+      DCHECK_EQ(thread_local_run->magic_num_, kMagicNum);
+      DCHECK_NE(thread_local_run->is_thread_local_, 0);
+      thread->rosalloc_runs_[idx] = NULL;
+      // Note the thread local run may not be full here.
+      bool dont_care;
+      thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
+      thread_local_run->is_thread_local_ = 0;
+      thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
+      DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+      if (thread_local_run->IsFull()) {
+        if (kIsDebugBuild) {
+          full_runs_[idx].insert(thread_local_run);
+          DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(thread_local_run)
+                      << " into full_runs_[" << std::dec << idx << "]";
+          }
+        }
+      } else if (thread_local_run->IsAllFree()) {
+        MutexLock mu(self, lock_);
+        FreePages(self, thread_local_run);
+      } else {
+        non_full_runs_[idx].insert(thread_local_run);
+        DCHECK(non_full_runs_[idx].find(thread_local_run) != non_full_runs_[idx].end());
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(thread_local_run)
+                    << " into non_full_runs_[" << std::dec << idx << "]";
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::RevokeAllThreadLocalRuns() {
+  // This is called when a mutator thread won't allocate such as at
+  // the Zygote creation time or during the GC pause.
+  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+  std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+  for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
+    Thread* t = *it;
+    RevokeThreadLocalRuns(t);
+  }
+}
+
+void RosAlloc::Initialize() {
+  // Check the consistency of the number of size brackets.
+  DCHECK_EQ(Thread::kRosAllocNumOfSizeBrackets, kNumOfSizeBrackets);
+  // bracketSizes.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    if (i < kNumOfSizeBrackets - 2) {
+      bracketSizes[i] = 16 * (i + 1);
+    } else if (i == kNumOfSizeBrackets - 2) {
+      bracketSizes[i] = 1 * KB;
+    } else {
+      DCHECK(i == kNumOfSizeBrackets - 1);
+      bracketSizes[i] = 2 * KB;
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "bracketSizes[" << i << "]=" << bracketSizes[i];
+    }
+  }
+  // numOfPages.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    if (i < 4) {
+      numOfPages[i] = 1;
+    } else if (i < 8) {
+      numOfPages[i] = 2;
+    } else if (i < 16) {
+      numOfPages[i] = 4;
+    } else if (i < 32) {
+      numOfPages[i] = 8;
+    } else if (i == 32) {
+      DCHECK(i = kNumOfSizeBrackets - 2);
+      numOfPages[i] = 16;
+    } else {
+      DCHECK(i = kNumOfSizeBrackets - 1);
+      numOfPages[i] = 32;
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "numOfPages[" << i << "]=" << numOfPages[i];
+    }
+  }
+  // Compute numOfSlots and slotOffsets.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    size_t bracket_size = bracketSizes[i];
+    size_t run_size = kPageSize * numOfPages[i];
+    size_t max_num_of_slots = run_size / bracket_size;
+    // Compute the actual number of slots by taking the header and
+    // alignment into account.
+    size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t));
+    DCHECK_EQ(fixed_header_size, static_cast<size_t>(8));
+    size_t header_size = 0;
+    size_t bulk_free_bit_map_offset = 0;
+    size_t thread_local_free_bit_map_offset = 0;
+    size_t num_of_slots = 0;
+    // Search for the maximum number of slots that allows enough space
+    // for the header (including the bit maps.)
+    for (int s = max_num_of_slots; s >= 0; s--) {
+      size_t tmp_slots_size = bracket_size * s;
+      size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte;
+      size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size;
+      size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size;
+      size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size;
+      size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size;
+      size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size;
+      // Align up the unaligned header size. bracket_size may not be a power of two.
+      size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
+          tmp_unaligned_header_size :
+          tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
+      DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
+      DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
+      if (tmp_slots_size + tmp_header_size <= run_size) {
+        // Found the right number of slots, that is, there was enough
+        // space for the header (including the bit maps.)
+        num_of_slots = s;
+        header_size = tmp_header_size;
+        bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off;
+        thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off;
+        break;
+      }
+    }
+    DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0);
+    // Add the padding for the alignment remainder.
+    header_size += run_size % bracket_size;
+    DCHECK(header_size + num_of_slots * bracket_size == run_size);
+    numOfSlots[i] = num_of_slots;
+    headerSizes[i] = header_size;
+    bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset;
+    threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset;
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i]
+                << ", headerSizes[" << i << "]=" << headerSizes[i]
+                << ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i]
+                << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
+    }
+  }
+}
+
+void RosAlloc::BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  if (used_bytes == 0) {
+    return;
+  }
+  size_t* bytes_allocated = reinterpret_cast<size_t*>(arg);
+  *bytes_allocated += used_bytes;
+}
+
+void RosAlloc::ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  if (used_bytes == 0) {
+    return;
+  }
+  size_t* objects_allocated = reinterpret_cast<size_t*>(arg);
+  ++(*objects_allocated);
+}
+
+}  // namespace allocator
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
new file mode 100644
index 0000000..c81306f
--- /dev/null
+++ b/runtime/gc/allocator/rosalloc.h
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
+#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
+
+#include <set>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <sys/mman.h>
+#include <vector>
+
+#include "base/mutex.h"
+#include "base/logging.h"
+#include "globals.h"
+#include "utils.h"
+
+// A boilerplate to use hash_map/hash_set both on host and device.
+#ifdef HAVE_ANDROID_OS
+#include <hash_map>
+#include <hash_set>
+using std::hash_map;
+using std::hash_set;
+#else  // HAVE_ANDROID_OS
+#ifdef __DEPRECATED
+#define ROSALLOC_OLD__DEPRECATED __DEPRECATED
+#undef __DEPRECATED
+#endif
+#include <ext/hash_map>
+#include <ext/hash_set>
+#ifdef ROSALLOC_OLD__DEPRECATED
+#define __DEPRECATED ROSALLOC_OLD__DEPRECATED
+#undef ROSALLOC_OLD__DEPRECATED
+#endif
+using __gnu_cxx::hash_map;
+using __gnu_cxx::hash_set;
+#endif  // HAVE_ANDROID_OS
+
+namespace art {
+namespace gc {
+namespace allocator {
+
+// A Runs-of-slots memory allocator.
+class RosAlloc {
+ private:
+  // Rerepresents a run of free pages.
+  class FreePageRun {
+   public:
+    byte magic_num_;  // The magic number used for debugging only.
+
+    bool IsFree() const {
+      if (kIsDebugBuild) {
+        return magic_num_ == kMagicNumFree;
+      }
+      return true;
+    }
+    size_t ByteSize(RosAlloc* rosalloc) const EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      const byte* fpr_base = reinterpret_cast<const byte*>(this);
+      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
+      size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
+      DCHECK_GE(byte_size, static_cast<size_t>(0));
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      return byte_size;
+    }
+    void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
+        EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      byte* fpr_base = reinterpret_cast<byte*>(this);
+      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
+      rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
+    }
+    void* Begin() {
+      return reinterpret_cast<void*>(this);
+    }
+    void* End(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      byte* fpr_base = reinterpret_cast<byte*>(this);
+      byte* end = fpr_base + ByteSize(rosalloc);
+      return end;
+    }
+    void ReleasePages(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      size_t byte_size = ByteSize(rosalloc);
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      if (kIsDebugBuild) {
+        // Exclude the first page that stores the magic number.
+        DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
+        byte_size -= kPageSize;
+        if (byte_size > 0) {
+          madvise(reinterpret_cast<byte*>(this) + kPageSize, byte_size, MADV_DONTNEED);
+        }
+      } else {
+        madvise(this, byte_size, MADV_DONTNEED);
+      }
+    }
+  };
+
+  // Represents a run of memory slots of the same size.
+  //
+  // A run's memory layout:
+  //
+  // +-------------------+
+  // | magic_num         |
+  // +-------------------+
+  // | size_bracket_idx  |
+  // +-------------------+
+  // | is_thread_local   |
+  // +-------------------+
+  // | to_be_bulk_freed  |
+  // +-------------------+
+  // | top_slot_idx      |
+  // +-------------------+
+  // |                   |
+  // | alloc bit map     |
+  // |                   |
+  // +-------------------+
+  // |                   |
+  // | bulk free bit map |
+  // |                   |
+  // +-------------------+
+  // |                   |
+  // | thread-local free |
+  // | bit map           |
+  // |                   |
+  // +-------------------+
+  // | padding due to    |
+  // | alignment         |
+  // +-------------------+
+  // | slot 0            |
+  // +-------------------+
+  // | slot 1            |
+  // +-------------------+
+  // | slot 2            |
+  // +-------------------+
+  // ...
+  // +-------------------+
+  // | last slot         |
+  // +-------------------+
+  //
+  class Run {
+   public:
+    byte magic_num_;             // The magic number used for debugging.
+    byte size_bracket_idx_;      // The index of the size bracket of this run.
+    byte is_thread_local_;       // True if this run is used as a thread-local run.
+    byte to_be_bulk_freed_;      // Used within BulkFree() to flag a run that's involved with a bulk free.
+    uint32_t top_slot_idx_;      // The top slot index when this run is in bump index mode.
+    uint32_t alloc_bit_map_[0];  // The bit map that allocates if each slot is in use.
+
+    // bulk_free_bit_map_[] : The bit map that is used for GC to
+    // temporarily mark the slots to free without using a lock. After
+    // all the slots to be freed in a run are marked, all those slots
+    // get freed in bulk with one locking per run, as opposed to one
+    // locking per slot to minimize the lock contention. This is used
+    // within BulkFree().
+
+    // thread_local_free_bit_map_[] : The bit map that is used for GC
+    // to temporarily mark the slots to free in a thread-local run
+    // without using a lock (without synchronizing the thread that
+    // owns the thread-local run.) When the thread-local run becomes
+    // full, the thread will check this bit map and update the
+    // allocation bit map of the run (that is, the slots get freed.)
+
+    // Returns the byte size of the header except for the bit maps.
+    static size_t fixed_header_size() {
+      Run temp;
+      size_t size = reinterpret_cast<byte*>(&temp.alloc_bit_map_) - reinterpret_cast<byte*>(&temp);
+      DCHECK_EQ(size, static_cast<size_t>(8));
+      return size;
+    }
+    // Returns the base address of the free bit map.
+    uint32_t* bulk_free_bit_map() {
+      return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]);
+    }
+    // Returns the base address of the thread local free bit map.
+    uint32_t* thread_local_free_bit_map() {
+      return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]);
+    }
+    void* End() {
+      return reinterpret_cast<byte*>(this) + kPageSize * numOfPages[size_bracket_idx_];
+    }
+    // Frees slots in the allocation bit map with regard to the
+    // thread-local free bit map. Used when a thread-local run becomes
+    // full.
+    bool MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out);
+    // Frees slots in the allocation bit map with regard to the bulk
+    // free bit map. Used in a bulk free.
+    void MergeBulkFreeBitMapIntoAllocBitMap();
+    // Unions the slots to be freed in the free bit map into the
+    // thread-local free bit map. In a bulk free, as a two-step
+    // process, GC will first record all the slots to free in a run in
+    // the free bit map where it can write without a lock, and later
+    // acquire a lock once per run to union the bits of the free bit
+    // map to the thread-local free bit map.
+    void UnionBulkFreeBitMapToThreadLocalFreeBitMap();
+    // Allocates a slot in a run.
+    void* AllocSlot();
+    // Frees a slot in a run. This is used in a non-bulk free.
+    void FreeSlot(void* ptr);
+    // Marks the slots to free in the bulk free bit map.
+    void MarkBulkFreeBitMap(void* ptr);
+    // Marks the slots to free in the thread-local free bit map.
+    void MarkThreadLocalFreeBitMap(void* ptr);
+    // Returns true if all the slots in the run are not in use.
+    bool IsAllFree();
+    // Returns true if all the slots in the run are in use.
+    bool IsFull();
+    // Clear all the bit maps.
+    void ClearBitMaps();
+    // Iterate over all the slots and apply the given function.
+    void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
+    // Dump the run metadata for debugging.
+    void Dump();
+
+   private:
+    // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap().
+    void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
+  };
+
+  // The magic number for a run.
+  static const byte kMagicNum = 42;
+  // The magic number for free pages.
+  static const byte kMagicNumFree = 43;
+  // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
+  static const size_t kNumOfSizeBrackets = 34;
+  // The number of smaller size brackets that are 16 bytes apart.
+  static const size_t kNumOfQuantumSizeBrackets = 32;
+  // The sizes (the slot sizes, in bytes) of the size brackets.
+  static size_t bracketSizes[kNumOfSizeBrackets];
+  // The numbers of pages that are used for runs for each size bracket.
+  static size_t numOfPages[kNumOfSizeBrackets];
+  // The numbers of slots of the runs for each size bracket.
+  static size_t numOfSlots[kNumOfSizeBrackets];
+  // The header sizes in bytes of the runs for each size bracket.
+  static size_t headerSizes[kNumOfSizeBrackets];
+  // The byte offsets of the bulk free bit maps of the runs for each size bracket.
+  static size_t bulkFreeBitMapOffsets[kNumOfSizeBrackets];
+  // The byte offsets of the thread-local free bit maps of the runs for each size bracket.
+  static size_t threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
+
+  // Initialize the run specs (the above arrays).
+  static void Initialize();
+  static bool initialized_;
+
+  // Returns the byte size of the bracket size from the index.
+  static size_t IndexToBracketSize(size_t idx) {
+    DCHECK(idx < kNumOfSizeBrackets);
+    return bracketSizes[idx];
+  }
+  // Returns the index of the size bracket from the bracket size.
+  static size_t BracketSizeToIndex(size_t size) {
+    DCHECK(16 <= size && ((size < 1 * KB && size % 16 == 0) || size == 1 * KB || size == 2 * KB));
+    size_t idx;
+    if (UNLIKELY(size == 1 * KB)) {
+      idx = kNumOfSizeBrackets - 2;
+    } else if (UNLIKELY(size == 2 * KB)) {
+      idx = kNumOfSizeBrackets - 1;
+    } else {
+      DCHECK(size < 1 * KB);
+      DCHECK_EQ(size % 16, static_cast<size_t>(0));
+      idx = size / 16 - 1;
+    }
+    DCHECK(bracketSizes[idx] == size);
+    return idx;
+  }
+  // Rounds up the size up the nearest bracket size.
+  static size_t RoundToBracketSize(size_t size) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      return RoundUp(size, 16);
+    } else if (512 < size && size <= 1 * KB) {
+      return 1 * KB;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      return 2 * KB;
+    }
+  }
+  // Returns the size bracket index from the byte size with rounding.
+  static size_t SizeToIndex(size_t size) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      return RoundUp(size, 16) / 16 - 1;
+    } else if (512 < size && size <= 1 * KB) {
+      return kNumOfSizeBrackets - 2;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      return kNumOfSizeBrackets - 1;
+    }
+  }
+  // A combination of SizeToIndex() and RoundToBracketSize().
+  static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      size_t bracket_size = RoundUp(size, 16);
+      *bracket_size_out = bracket_size;
+      size_t idx = bracket_size / 16 - 1;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    } else if (512 < size && size <= 1 * KB) {
+      size_t bracket_size = 1024;
+      *bracket_size_out = bracket_size;
+      size_t idx = kNumOfSizeBrackets - 2;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      size_t bracket_size = 2048;
+      *bracket_size_out = bracket_size;
+      size_t idx = kNumOfSizeBrackets - 1;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    }
+  }
+  // Returns the page map index from an address. Requires that the
+  // address is page size aligned.
+  size_t ToPageMapIndex(const void* addr) const {
+    DCHECK(base_ <= addr && addr < base_ + capacity_);
+    size_t byte_offset = reinterpret_cast<const byte*>(addr) - base_;
+    DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
+    return byte_offset / kPageSize;
+  }
+  // Returns the page map index from an address with rounding.
+  size_t RoundDownToPageMapIndex(void* addr) {
+    DCHECK(base_ <= addr && addr < reinterpret_cast<byte*>(base_) + capacity_);
+    return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
+  }
+
+  // A memory allocation request larger than this size is treated as a large object and allocated
+  // at a page-granularity.
+  static const size_t kLargeSizeThreshold = 2048;
+
+  // We use use thread-local runs for the size Brackets whose indexes
+  // are less than or equal to this index. We use shared (current)
+  // runs for the rest.
+  static const size_t kMaxThreadLocalSizeBracketIdx = 10;
+
+  struct hash_run {
+    size_t operator()(const RosAlloc::Run* r) const {
+      return reinterpret_cast<size_t>(r);
+    }
+  };
+
+  struct eq_run {
+    bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
+      return r1 == r2;
+    }
+  };
+
+  // The base address of the memory region that's managed by this allocator.
+  byte* base_;
+
+  // The footprint in bytes of the currently allocated portion of the
+  // memory region.
+  size_t footprint_;
+
+  // The maximum footprint. The address, base_ + capacity_, indicates
+  // the end of the memory region that's managed by this allocator.
+  size_t capacity_;
+
+  // The run sets that hold the runs whose slots are not all
+  // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
+  std::set<Run*> non_full_runs_[kNumOfSizeBrackets];
+  // The run sets that hold the runs whose slots are all full. This is
+  // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
+  hash_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
+  // The set of free pages.
+  std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_);
+  // The free page run whose end address is the end of the memory
+  // region that's managed by this allocator, if any.
+  FreePageRun* last_free_page_run_;
+  // The current runs where the allocations are first attempted for
+  // the size brackes that do not use thread-local
+  // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
+  Run* current_runs_[kNumOfSizeBrackets];
+  // The mutexes, one per size bracket.
+  Mutex* size_bracket_locks_[kNumOfSizeBrackets];
+  // The types of page map entries.
+  enum {
+    kPageMapEmpty           = 0,  // Not allocated.
+    kPageMapRun             = 1,  // The beginning of a run.
+    kPageMapRunPart         = 2,  // The non-beginning part of a run.
+    kPageMapLargeObject     = 3,  // The beginning of a large object.
+    kPageMapLargeObjectPart = 4,  // The non-beginning part of a large object.
+  };
+  // The table that indicates what pages are currently used for.
+  std::vector<byte> page_map_ GUARDED_BY(lock_);
+  // The table that indicates the size of free page runs. These sizes
+  // are stored here to avoid storing in the free page header and
+  // release backing pages.
+  std::vector<size_t> free_page_run_size_map_ GUARDED_BY(lock_);
+  // The global lock. Used to guard the page map, the free page set,
+  // and the footprint.
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // The reader-writer lock to allow one bulk free at a time while
+  // allowing multiple individual frees at the same time.
+  ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  // The base address of the memory region that's managed by this allocator.
+  byte* Begin() { return base_; }
+  // The end address of the memory region that's managed by this allocator.
+  byte* End() { return base_ + capacity_; }
+
+  // Page-granularity alloc/free
+  void* AllocPages(Thread* self, size_t num_pages, byte page_map_type)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  void FreePages(Thread* self, void* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  // Allocate/free a run slot.
+  void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(lock_);
+  void FreeFromRun(Thread* self, void* ptr, Run* run)
+      LOCKS_EXCLUDED(lock_);
+
+  // Used to acquire a new/reused run for a size bracket. Used when a
+  // thread-local or current run gets full.
+  Run* RefillRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_);
+
+  // The internal of non-bulk Free().
+  void FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_);
+
+ public:
+  RosAlloc(void* base, size_t capacity);
+  void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(lock_);
+  void Free(Thread* self, void* ptr)
+      LOCKS_EXCLUDED(bulk_free_lock_);
+  void BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
+      LOCKS_EXCLUDED(bulk_free_lock_);
+  // Returns the size of the allocated slot for a given allocated memory chunk.
+  size_t UsableSize(void* ptr);
+  // Returns the size of the allocated slot for a given size.
+  size_t UsableSize(size_t bytes) {
+    if (UNLIKELY(bytes > kLargeSizeThreshold)) {
+      return RoundUp(bytes, kPageSize);
+    } else {
+      return RoundToBracketSize(bytes);
+    }
+  }
+  // Try to reduce the current footprint by releasing the free page
+  // run at the end of the memory region, if any.
+  bool Trim();
+  // Iterates over all the memory slots and apply the given function.
+  void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                  void* arg)
+      LOCKS_EXCLUDED(lock_);
+  // Returns the current footprint.
+  size_t Footprint() LOCKS_EXCLUDED(lock_);
+  // Returns the current capacity, maximum footprint.
+  size_t FootprintLimit() LOCKS_EXCLUDED(lock_);
+  // Update the current capacity.
+  void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_);
+  // Releases the thread-local runs assigned to the given thread back to the common set of runs.
+  void RevokeThreadLocalRuns(Thread* thread);
+  // Releases the thread-local runs assigned to all the threads back to the common set of runs.
+  void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
+  // Dumps the page map for debugging.
+  void DumpPageMap(Thread* self);
+
+  // Callbacks for InspectAll that will count the number of bytes
+  // allocated and objects allocated, respectively.
+  static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+  static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+};
+
+}  // namespace allocator
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 1789103..56d9ef4 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -83,6 +83,7 @@
     thread_list->SuspendAll();
     MarkingPhase();
     ReclaimPhase();
+    GetHeap()->RevokeAllThreadLocalBuffers();
     thread_list->ResumeAll();
     ATRACE_END();
     uint64_t pause_end = NanoTime();
@@ -101,6 +102,9 @@
       ATRACE_END();
       ATRACE_BEGIN("All mutator threads suspended");
       done = HandleDirtyObjectsPhase();
+      if (done) {
+        GetHeap()->RevokeAllThreadLocalBuffers();
+      }
       ATRACE_END();
       uint64_t pause_end = NanoTime();
       ATRACE_BEGIN("Resuming mutator threads");
@@ -135,7 +139,7 @@
       if (live_bitmap != mark_bitmap) {
         heap_->GetLiveBitmap()->ReplaceBitmap(live_bitmap, mark_bitmap);
         heap_->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-        space->AsDlMallocSpace()->SwapBitmaps();
+        space->AsMallocSpace()->SwapBitmaps();
       }
     }
   }
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 6111c2f..a80f593 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -64,7 +64,7 @@
 
   void RegisterPause(uint64_t nano_length);
 
-  base::TimingLogger& GetTimings() {
+  TimingLogger& GetTimings() {
     return timings_;
   }
 
@@ -131,7 +131,7 @@
   const bool verbose_;
 
   uint64_t duration_ns_;
-  base::TimingLogger timings_;
+  TimingLogger timings_;
 
   // Cumulative statistics.
   uint64_t total_time_ns_;
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 56dc0e5..58068b1 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -157,7 +157,7 @@
 
 void MarkSweep::InitializePhase() {
   timings_.Reset();
-  base::TimingLogger::ScopedSplit split("InitializePhase", &timings_);
+  TimingLogger::ScopedSplit split("InitializePhase", &timings_);
   mark_stack_ = heap_->mark_stack_.get();
   DCHECK(mark_stack_ != nullptr);
   SetImmuneRange(nullptr, nullptr);
@@ -185,14 +185,14 @@
 }
 
 void MarkSweep::ProcessReferences(Thread* self) {
-  base::TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
+  TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  ProcessReferences(&soft_reference_list_, clear_soft_references_, &weak_reference_list_,
-                    &finalizer_reference_list_, &phantom_reference_list_);
+  GetHeap()->ProcessReferences(timings_, clear_soft_references_, &IsMarkedCallback,
+                               &RecursiveMarkObjectCallback, this);
 }
 
 bool MarkSweep::HandleDirtyObjectsPhase() {
-  base::TimingLogger::ScopedSplit split("HandleDirtyObjectsPhase", &timings_);
+  TimingLogger::ScopedSplit split("HandleDirtyObjectsPhase", &timings_);
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
 
@@ -238,7 +238,7 @@
 }
 
 void MarkSweep::MarkingPhase() {
-  base::TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
+  TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
   Thread* self = Thread::Current();
 
   BindBitmaps();
@@ -272,7 +272,7 @@
     if (IsImmuneSpace(space)) {
       const char* name = space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
           "UpdateAndMarkImageModUnionTable";
-      base::TimingLogger::ScopedSplit split(name, &timings_);
+      TimingLogger::ScopedSplit split(name, &timings_);
       accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
       CHECK(mod_union_table != nullptr);
       mod_union_table->UpdateAndMarkReferences(MarkRootCallback, this);
@@ -297,7 +297,7 @@
 }
 
 void MarkSweep::ReclaimPhase() {
-  base::TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
+  TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
   Thread* self = Thread::Current();
 
   if (!IsConcurrent()) {
@@ -312,7 +312,7 @@
   if (IsConcurrent()) {
     Runtime::Current()->AllowNewSystemWeaks();
 
-    base::TimingLogger::ScopedSplit split("UnMarkAllocStack", &timings_);
+    TimingLogger::ScopedSplit split("UnMarkAllocStack", &timings_);
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     accounting::ObjectStack* allocation_stack = GetHeap()->allocation_stack_.get();
     // The allocation stack contains things allocated since the start of the GC. These may have been
@@ -363,7 +363,7 @@
 }
 
 void MarkSweep::FindDefaultMarkBitmap() {
-  base::TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
+  TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
     if (bitmap != nullptr &&
@@ -407,6 +407,13 @@
   }
 }
 
+mirror::Object* MarkSweep::RecursiveMarkObjectCallback(mirror::Object* obj, void* arg) {
+  MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
+  mark_sweep->MarkObject(obj);
+  mark_sweep->ProcessMarkStack(true);
+  return obj;
+}
+
 inline void MarkSweep::UnMarkObjectNonNull(const Object* obj) {
   DCHECK(!IsImmune(obj));
   // Try to take advantage of locality of references within a space, failing this find the space
@@ -606,8 +613,8 @@
 }
 
 void MarkSweep::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsDlMallocSpace());
-  space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+  CHECK(space->IsMallocSpace());
+  space::MallocSpace* alloc_space = space->AsMallocSpace();
   accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
   accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
   GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
@@ -932,7 +939,7 @@
 // Populates the mark stack based on the set of marked objects and
 // recursively marks until the mark stack is emptied.
 void MarkSweep::RecursiveMark() {
-  base::TimingLogger::ScopedSplit split("RecursiveMark", &timings_);
+  TimingLogger::ScopedSplit split("RecursiveMark", &timings_);
   // RecursiveMark will build the lists of known instances of the Reference classes.
   // See DelayReferenceReferent for details.
   CHECK(soft_reference_list_ == NULL);
@@ -992,7 +999,7 @@
   ProcessMarkStack(false);
 }
 
-mirror::Object* MarkSweep::SystemWeakIsMarkedCallback(Object* object, void* arg) {
+mirror::Object* MarkSweep::IsMarkedCallback(Object* object, void* arg) {
   if (reinterpret_cast<MarkSweep*>(arg)->IsMarked(object)) {
     return object;
   }
@@ -1013,7 +1020,7 @@
 void MarkSweep::SweepSystemWeaks() {
   Runtime* runtime = Runtime::Current();
   timings_.StartSplit("SweepSystemWeaks");
-  runtime->SweepSystemWeaks(SystemWeakIsMarkedCallback, this);
+  runtime->SweepSystemWeaks(IsMarkedCallback, this);
   timings_.EndSplit();
 }
 
@@ -1119,7 +1126,7 @@
 }
 
 void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitmaps) {
-  space::DlMallocSpace* space = heap_->GetNonMovingSpace();
+  space::MallocSpace* space = heap_->GetNonMovingSpace();
   timings_.StartSplit("SweepArray");
   // Newly allocated objects MUST be in the alloc space and those are the only objects which we are
   // going to free.
@@ -1198,14 +1205,14 @@
 
 void MarkSweep::Sweep(bool swap_bitmaps) {
   DCHECK(mark_stack_->IsEmpty());
-  base::TimingLogger::ScopedSplit("Sweep", &timings_);
+  TimingLogger::ScopedSplit("Sweep", &timings_);
 
   const bool partial = (GetGcType() == kGcTypePartial);
   SweepCallbackContext scc;
   scc.mark_sweep = this;
   scc.self = Thread::Current();
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (!space->IsDlMallocSpace()) {
+    if (!space->IsMallocSpace()) {
       continue;
     }
     // We always sweep always collect spaces.
@@ -1217,19 +1224,19 @@
     if (sweep_space) {
       uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
       uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-      scc.space = space->AsDlMallocSpace();
+      scc.space = space->AsMallocSpace();
       accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
       accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
       if (swap_bitmaps) {
         std::swap(live_bitmap, mark_bitmap);
       }
       if (!space->IsZygoteSpace()) {
-        base::TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
+        TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
         // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
         accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
                                            &SweepCallback, reinterpret_cast<void*>(&scc));
       } else {
-        base::TimingLogger::ScopedSplit split("SweepZygote", &timings_);
+        TimingLogger::ScopedSplit split("SweepZygote", &timings_);
         // Zygote sweep takes care of dirtying cards and clearing live bits, does not free actual
         // memory.
         accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
@@ -1242,7 +1249,7 @@
 }
 
 void MarkSweep::SweepLargeObjects(bool swap_bitmaps) {
-  base::TimingLogger::ScopedSplit("SweepLargeObjects", &timings_);
+  TimingLogger::ScopedSplit("SweepLargeObjects", &timings_);
   // Sweep large objects
   space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
   accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
@@ -1267,7 +1274,7 @@
 
 void MarkSweep::CheckReference(const Object* obj, const Object* ref, MemberOffset offset, bool is_static) {
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace() && space->Contains(ref)) {
+    if (space->IsMallocSpace() && space->Contains(ref)) {
       DCHECK(IsMarked(obj));
 
       bool is_marked = IsMarked(ref);
@@ -1314,40 +1321,7 @@
   DCHECK(klass != nullptr);
   DCHECK(klass->IsReferenceClass());
   DCHECK(obj != NULL);
-  Object* referent = heap_->GetReferenceReferent(obj);
-  if (referent != NULL && !IsMarked(referent)) {
-    if (kCountJavaLangRefs) {
-      ++reference_count_;
-    }
-    Thread* self = Thread::Current();
-    // TODO: Remove these locks, and use atomic stacks for storing references?
-    // We need to check that the references haven't already been enqueued since we can end up
-    // scanning the same reference multiple times due to dirty cards.
-    if (klass->IsSoftReferenceClass()) {
-      MutexLock mu(self, *heap_->GetSoftRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &soft_reference_list_);
-      }
-    } else if (klass->IsWeakReferenceClass()) {
-      MutexLock mu(self, *heap_->GetWeakRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &weak_reference_list_);
-      }
-    } else if (klass->IsFinalizerReferenceClass()) {
-      MutexLock mu(self, *heap_->GetFinalizerRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &finalizer_reference_list_);
-      }
-    } else if (klass->IsPhantomReferenceClass()) {
-      MutexLock mu(self, *heap_->GetPhantomRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &phantom_reference_list_);
-      }
-    } else {
-      LOG(FATAL) << "Invalid reference type " << PrettyClass(klass)
-                 << " " << std::hex << klass->GetAccessFlags();
-    }
-  }
+  heap_->DelayReferenceReferent(klass, obj, IsMarkedCallback, this);
 }
 
 class MarkObjectVisitor {
@@ -1435,43 +1409,6 @@
   timings_.EndSplit();
 }
 
-// Walks the reference list marking any references subject to the
-// reference clearing policy.  References with a black referent are
-// removed from the list.  References with white referents biased
-// toward saving are blackened and also removed from the list.
-void MarkSweep::PreserveSomeSoftReferences(Object** list) {
-  DCHECK(list != NULL);
-  Object* clear = NULL;
-  size_t counter = 0;
-
-  DCHECK(mark_stack_->IsEmpty());
-
-  timings_.StartSplit("PreserveSomeSoftReferences");
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent == NULL) {
-      // Referent was cleared by the user during marking.
-      continue;
-    }
-    bool is_marked = IsMarked(referent);
-    if (!is_marked && ((++counter) & 1)) {
-      // Referent is white and biased toward saving, mark it.
-      MarkObject(referent);
-      is_marked = true;
-    }
-    if (!is_marked) {
-      // Referent is white, queue it for clearing.
-      heap_->EnqueuePendingReference(ref, &clear);
-    }
-  }
-  *list = clear;
-  timings_.EndSplit();
-
-  // Restart the mark with the newly black references added to the root set.
-  ProcessMarkStack(true);
-}
-
 inline bool MarkSweep::IsMarked(const Object* object) const
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
   if (IsImmune(object)) {
@@ -1484,103 +1421,11 @@
   return heap_->GetMarkBitmap()->Test(object);
 }
 
-// Unlink the reference list clearing references objects with white
-// referents.  Cleared references registered to a reference queue are
-// scheduled for appending by the heap worker thread.
-void MarkSweep::ClearWhiteReferences(Object** list) {
-  DCHECK(list != NULL);
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent != NULL && !IsMarked(referent)) {
-      // Referent is white, clear it.
-      heap_->ClearReferenceReferent(ref);
-      if (heap_->IsEnqueuable(ref)) {
-        heap_->EnqueueReference(ref, &cleared_reference_list_);
-      }
-    }
-  }
-  DCHECK(*list == NULL);
-}
-
-// Enqueues finalizer references with white referents.  White
-// referents are blackened, moved to the zombie field, and the
-// referent field is cleared.
-void MarkSweep::EnqueueFinalizerReferences(Object** list) {
-  DCHECK(list != NULL);
-  timings_.StartSplit("EnqueueFinalizerReferences");
-  MemberOffset zombie_offset = heap_->GetFinalizerReferenceZombieOffset();
-  bool has_enqueued = false;
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent != NULL && !IsMarked(referent)) {
-      MarkObject(referent);
-      // If the referent is non-null the reference must queuable.
-      DCHECK(heap_->IsEnqueuable(ref));
-      ref->SetFieldObject(zombie_offset, referent, false);
-      heap_->ClearReferenceReferent(ref);
-      heap_->EnqueueReference(ref, &cleared_reference_list_);
-      has_enqueued = true;
-    }
-  }
-  timings_.EndSplit();
-  if (has_enqueued) {
-    ProcessMarkStack(true);
-  }
-  DCHECK(*list == NULL);
-}
-
-// Process reference class instances and schedule finalizations.
-void MarkSweep::ProcessReferences(Object** soft_references, bool clear_soft,
-                                  Object** weak_references,
-                                  Object** finalizer_references,
-                                  Object** phantom_references) {
-  CHECK(soft_references != NULL);
-  CHECK(weak_references != NULL);
-  CHECK(finalizer_references != NULL);
-  CHECK(phantom_references != NULL);
-  CHECK(mark_stack_->IsEmpty());
-
-  // Unless we are in the zygote or required to clear soft references
-  // with white references, preserve some white referents.
-  if (!clear_soft && !Runtime::Current()->IsZygote()) {
-    PreserveSomeSoftReferences(soft_references);
-  }
-
-  timings_.StartSplit("ProcessReferences");
-  // Clear all remaining soft and weak references with white
-  // referents.
-  ClearWhiteReferences(soft_references);
-  ClearWhiteReferences(weak_references);
-  timings_.EndSplit();
-
-  // Preserve all white objects with finalize methods and schedule
-  // them for finalization.
-  EnqueueFinalizerReferences(finalizer_references);
-
-  timings_.StartSplit("ProcessReferences");
-  // Clear all f-reachable soft and weak references with white
-  // referents.
-  ClearWhiteReferences(soft_references);
-  ClearWhiteReferences(weak_references);
-
-  // Clear all phantom references with white referents.
-  ClearWhiteReferences(phantom_references);
-
-  // At this point all reference lists should be empty.
-  DCHECK(*soft_references == NULL);
-  DCHECK(*weak_references == NULL);
-  DCHECK(*finalizer_references == NULL);
-  DCHECK(*phantom_references == NULL);
-  timings_.EndSplit();
-}
-
 void MarkSweep::UnBindBitmaps() {
-  base::TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace()) {
-      space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+    if (space->IsMallocSpace()) {
+      space::MallocSpace* alloc_space = space->AsMallocSpace();
       if (alloc_space->temp_bitmap_.get() != NULL) {
         // At this point, the temp_bitmap holds our old mark bitmap.
         accounting::SpaceBitmap* new_bitmap = alloc_space->temp_bitmap_.release();
@@ -1594,13 +1439,9 @@
 }
 
 void MarkSweep::FinishPhase() {
-  base::TimingLogger::ScopedSplit split("FinishPhase", &timings_);
+  TimingLogger::ScopedSplit split("FinishPhase", &timings_);
   // Can't enqueue references if we hold the mutator lock.
-  Object* cleared_references = GetClearedReferences();
   Heap* heap = GetHeap();
-  timings_.NewSplit("EnqueueClearedReferences");
-  heap->EnqueueClearedReferences(&cleared_references);
-
   timings_.NewSplit("PostGcVerification");
   heap->PostGcVerification(this);
 
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index cc58412..53d85b0 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -189,6 +189,10 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                             Locks::mutator_lock_);
 
+  static mirror::Object* RecursiveMarkObjectCallback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
@@ -212,10 +216,7 @@
   // Returns true if the object has its bit set in the mark bitmap.
   bool IsMarked(const mirror::Object* object) const;
 
-  static mirror::Object* SystemWeakIsMarkedCallback(mirror::Object* object, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  static mirror::Object* SystemWeakIsMarkedArrayCallback(mirror::Object* object, void* arg)
+  static mirror::Object* IsMarkedCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   static void VerifyImageRootVisitor(mirror::Object* root, void* arg)
@@ -349,13 +350,6 @@
   void ClearWhiteReferences(mirror::Object** list)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  void ProcessReferences(mirror::Object** soft_references, bool clear_soft_references,
-                         mirror::Object** weak_references,
-                         mirror::Object** finalizer_references,
-                         mirror::Object** phantom_references)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Whether or not we count how many of each type of object were scanned.
   static const bool kCountScannedTypes = false;
 
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index d833631..00794d6 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -139,7 +139,7 @@
 
 void SemiSpace::InitializePhase() {
   timings_.Reset();
-  base::TimingLogger::ScopedSplit split("InitializePhase", &timings_);
+  TimingLogger::ScopedSplit split("InitializePhase", &timings_);
   mark_stack_ = heap_->mark_stack_.get();
   DCHECK(mark_stack_ != nullptr);
   immune_begin_ = nullptr;
@@ -156,16 +156,16 @@
 }
 
 void SemiSpace::ProcessReferences(Thread* self) {
-  base::TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
+  TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  ProcessReferences(&soft_reference_list_, clear_soft_references_, &weak_reference_list_,
-                    &finalizer_reference_list_, &phantom_reference_list_);
+  GetHeap()->ProcessReferences(timings_, clear_soft_references_, &MarkedForwardingAddressCallback,
+                               &RecursiveMarkObjectCallback, this);
 }
 
 void SemiSpace::MarkingPhase() {
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
-  base::TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
+  TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
   // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
   // wrong space.
   heap_->SwapSemiSpaces();
@@ -198,7 +198,7 @@
       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
       CHECK(table != nullptr);
       // TODO: Improve naming.
-      base::TimingLogger::ScopedSplit split(
+      TimingLogger::ScopedSplit split(
           space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
                                    "UpdateAndMarkImageModUnionTable",
                                    &timings_);
@@ -218,7 +218,7 @@
 }
 
 void SemiSpace::ReclaimPhase() {
-  base::TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
+  TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
   Thread* self = Thread::Current();
   ProcessReferences(self);
   {
@@ -344,6 +344,15 @@
   return ret;
 }
 
+Object* SemiSpace::RecursiveMarkObjectCallback(Object* root, void* arg) {
+  DCHECK(root != nullptr);
+  DCHECK(arg != nullptr);
+  SemiSpace* semi_space = reinterpret_cast<SemiSpace*>(arg);
+  mirror::Object* ret = semi_space->MarkObject(root);
+  semi_space->ProcessMarkStack(true);
+  return ret;
+}
+
 Object* SemiSpace::MarkRootCallback(Object* root, void* arg) {
   DCHECK(root != nullptr);
   DCHECK(arg != nullptr);
@@ -359,8 +368,8 @@
 }
 
 void SemiSpace::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsDlMallocSpace());
-  space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+  CHECK(space->IsMallocSpace());
+  space::MallocSpace* alloc_space = space->AsMallocSpace();
   accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
   accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
   GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
@@ -374,13 +383,13 @@
   return obj;
 }
 
-mirror::Object* SemiSpace::SystemWeakIsMarkedCallback(Object* object, void* arg) {
+mirror::Object* SemiSpace::MarkedForwardingAddressCallback(Object* object, void* arg) {
   return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
 }
 
 void SemiSpace::SweepSystemWeaks() {
   timings_.StartSplit("SweepSystemWeaks");
-  Runtime::Current()->SweepSystemWeaks(SystemWeakIsMarkedCallback, this);
+  Runtime::Current()->SweepSystemWeaks(MarkedForwardingAddressCallback, this);
   timings_.EndSplit();
 }
 
@@ -417,14 +426,14 @@
 
 void SemiSpace::Sweep(bool swap_bitmaps) {
   DCHECK(mark_stack_->IsEmpty());
-  base::TimingLogger::ScopedSplit("Sweep", &timings_);
+  TimingLogger::ScopedSplit("Sweep", &timings_);
 
   const bool partial = (GetGcType() == kGcTypePartial);
   SweepCallbackContext scc;
   scc.mark_sweep = this;
   scc.self = Thread::Current();
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (!space->IsDlMallocSpace()) {
+    if (!space->IsMallocSpace()) {
       continue;
     }
     // We always sweep always collect spaces.
@@ -433,22 +442,22 @@
       // We sweep full collect spaces when the GC isn't a partial GC (ie its full).
       sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect);
     }
-    if (sweep_space && space->IsDlMallocSpace()) {
+    if (sweep_space && space->IsMallocSpace()) {
       uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
       uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-      scc.space = space->AsDlMallocSpace();
+      scc.space = space->AsMallocSpace();
       accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
       accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
       if (swap_bitmaps) {
         std::swap(live_bitmap, mark_bitmap);
       }
       if (!space->IsZygoteSpace()) {
-        base::TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
+        TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
         // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
         accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
                                            &SweepCallback, reinterpret_cast<void*>(&scc));
       } else {
-        base::TimingLogger::ScopedSplit split("SweepZygote", &timings_);
+        TimingLogger::ScopedSplit split("SweepZygote", &timings_);
         // Zygote sweep takes care of dirtying cards and clearing live bits, does not free actual
         // memory.
         accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
@@ -461,7 +470,7 @@
 }
 
 void SemiSpace::SweepLargeObjects(bool swap_bitmaps) {
-  base::TimingLogger::ScopedSplit("SweepLargeObjects", &timings_);
+  TimingLogger::ScopedSplit("SweepLargeObjects", &timings_);
   // Sweep large objects
   space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
   accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
@@ -487,45 +496,7 @@
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void SemiSpace::DelayReferenceReferent(mirror::Class* klass, Object* obj) {
-  DCHECK(klass != nullptr);
-  DCHECK(klass->IsReferenceClass());
-  DCHECK(obj != nullptr);
-  Object* referent = heap_->GetReferenceReferent(obj);
-  if (referent != nullptr) {
-    Object* forward_address = GetMarkedForwardAddress(referent);
-    if (forward_address == nullptr) {
-      Thread* self = Thread::Current();
-      // TODO: Remove these locks, and use atomic stacks for storing references?
-      // We need to check that the references haven't already been enqueued since we can end up
-      // scanning the same reference multiple times due to dirty cards.
-      if (klass->IsSoftReferenceClass()) {
-        MutexLock mu(self, *heap_->GetSoftRefQueueLock());
-        if (!heap_->IsEnqueued(obj)) {
-          heap_->EnqueuePendingReference(obj, &soft_reference_list_);
-        }
-      } else if (klass->IsWeakReferenceClass()) {
-        MutexLock mu(self, *heap_->GetWeakRefQueueLock());
-        if (!heap_->IsEnqueued(obj)) {
-          heap_->EnqueuePendingReference(obj, &weak_reference_list_);
-        }
-      } else if (klass->IsFinalizerReferenceClass()) {
-        MutexLock mu(self, *heap_->GetFinalizerRefQueueLock());
-        if (!heap_->IsEnqueued(obj)) {
-          heap_->EnqueuePendingReference(obj, &finalizer_reference_list_);
-        }
-      } else if (klass->IsPhantomReferenceClass()) {
-        MutexLock mu(self, *heap_->GetPhantomRefQueueLock());
-        if (!heap_->IsEnqueued(obj)) {
-          heap_->EnqueuePendingReference(obj, &phantom_reference_list_);
-        }
-      } else {
-        LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
-                   << klass->GetAccessFlags();
-      }
-    } else if (referent != forward_address) {
-      heap_->SetReferenceReferent(obj, forward_address);
-    }
-  }
+  heap_->DelayReferenceReferent(klass, obj, MarkedForwardingAddressCallback, this);
 }
 
 // Visit all of the references of an object and update.
@@ -555,48 +526,6 @@
   timings_.EndSplit();
 }
 
-// Walks the reference list marking any references subject to the
-// reference clearing policy.  References with a black referent are
-// removed from the list.  References with white referents biased
-// toward saving are blackened and also removed from the list.
-void SemiSpace::PreserveSomeSoftReferences(Object** list) {
-  DCHECK(list != NULL);
-  Object* clear = NULL;
-  size_t counter = 0;
-  DCHECK(mark_stack_->IsEmpty());
-  timings_.StartSplit("PreserveSomeSoftReferences");
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent == NULL) {
-      // Referent was cleared by the user during marking.
-      continue;
-    }
-    Object* forward_address = GetMarkedForwardAddress(referent);
-    bool is_marked = forward_address != nullptr;
-    if (!is_marked && ((++counter) & 1)) {
-      // Referent is white and biased toward saving, mark it.
-      forward_address = MarkObject(referent);
-      if (referent != forward_address) {
-        // Update the referent if we moved it.
-        heap_->SetReferenceReferent(ref, forward_address);
-      }
-    } else {
-      if (!is_marked) {
-        // Referent is white, queue it for clearing.
-        heap_->EnqueuePendingReference(ref, &clear);
-      } else if (referent != forward_address) {
-        CHECK(forward_address != nullptr);
-        heap_->SetReferenceReferent(ref, forward_address);
-      }
-    }
-  }
-  *list = clear;
-  timings_.EndSplit();
-  // Restart the mark with the newly black references added to the root set.
-  ProcessMarkStack(true);
-}
-
 inline Object* SemiSpace::GetMarkedForwardAddress(mirror::Object* obj) const
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
   // All immune objects are assumed marked.
@@ -618,117 +547,11 @@
   return heap_->GetMarkBitmap()->Test(obj) ? obj : nullptr;
 }
 
-// Unlink the reference list clearing references objects with white
-// referents.  Cleared references registered to a reference queue are
-// scheduled for appending by the heap worker thread.
-void SemiSpace::ClearWhiteReferences(Object** list) {
-  DCHECK(list != NULL);
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent != nullptr) {
-      Object* forward_address = GetMarkedForwardAddress(referent);
-      if (forward_address == nullptr) {
-        // Referent is white, clear it.
-        heap_->ClearReferenceReferent(ref);
-        if (heap_->IsEnqueuable(ref)) {
-          heap_->EnqueueReference(ref, &cleared_reference_list_);
-        }
-      } else if (referent != forward_address) {
-        heap_->SetReferenceReferent(ref, forward_address);
-      }
-    }
-  }
-  DCHECK(*list == NULL);
-}
-
-// Enqueues finalizer references with white referents.  White
-// referents are blackened, moved to the zombie field, and the
-// referent field is cleared.
-void SemiSpace::EnqueueFinalizerReferences(Object** list) {
-  // *list = NULL;
-  // return;
-  DCHECK(list != NULL);
-  timings_.StartSplit("EnqueueFinalizerReferences");
-  MemberOffset zombie_offset = heap_->GetFinalizerReferenceZombieOffset();
-  bool has_enqueued = false;
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent != nullptr) {
-      Object* forward_address = GetMarkedForwardAddress(referent);
-      // Not marked.
-      if (forward_address == nullptr) {
-        forward_address = MarkObject(referent);
-        // If the referent is non-null the reference must queuable.
-        DCHECK(heap_->IsEnqueuable(ref));
-        // Move the referent to the zombie field.
-        ref->SetFieldObject(zombie_offset, forward_address, false);
-        heap_->ClearReferenceReferent(ref);
-        heap_->EnqueueReference(ref, &cleared_reference_list_);
-        has_enqueued = true;
-      } else if (referent != forward_address) {
-        heap_->SetReferenceReferent(ref, forward_address);
-      }
-    }
-  }
-  timings_.EndSplit();
-  if (has_enqueued) {
-    ProcessMarkStack(true);
-  }
-  DCHECK(*list == NULL);
-}
-
-// Process reference class instances and schedule finalizations.
-void SemiSpace::ProcessReferences(Object** soft_references, bool clear_soft,
-                                  Object** weak_references,
-                                  Object** finalizer_references,
-                                  Object** phantom_references) {
-  CHECK(soft_references != NULL);
-  CHECK(weak_references != NULL);
-  CHECK(finalizer_references != NULL);
-  CHECK(phantom_references != NULL);
-  CHECK(mark_stack_->IsEmpty());
-
-  // Unless we are in the zygote or required to clear soft references
-  // with white references, preserve some white referents.
-  if (!clear_soft && !Runtime::Current()->IsZygote()) {
-    PreserveSomeSoftReferences(soft_references);
-  }
-
-  timings_.StartSplit("ProcessReferences");
-  // Clear all remaining soft and weak references with white
-  // referents.
-  ClearWhiteReferences(soft_references);
-  ClearWhiteReferences(weak_references);
-  timings_.EndSplit();
-
-  // Preserve all white objects with finalize methods and schedule
-  // them for finalization.
-  EnqueueFinalizerReferences(finalizer_references);
-
-  timings_.StartSplit("ProcessReferences");
-  // Clear all f-reachable soft and weak references with white
-  // referents.
-  ClearWhiteReferences(soft_references);
-  ClearWhiteReferences(weak_references);
-
-  // Clear all phantom references with white referents.
-  ClearWhiteReferences(phantom_references);
-
-  // At this point all reference lists should be empty.
-  DCHECK(*soft_references == NULL);
-  DCHECK(*weak_references == NULL);
-  DCHECK(*finalizer_references == NULL);
-  DCHECK(*phantom_references == NULL);
-  timings_.EndSplit();
-}
-
 void SemiSpace::UnBindBitmaps() {
-  base::TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace()) {
-      space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+    if (space->IsMallocSpace()) {
+      space::MallocSpace* alloc_space = space->AsMallocSpace();
       if (alloc_space->HasBoundBitmaps()) {
         alloc_space->UnBindBitmaps();
         heap_->GetMarkBitmap()->ReplaceBitmap(alloc_space->GetLiveBitmap(),
@@ -749,13 +572,9 @@
 }
 
 void SemiSpace::FinishPhase() {
-  base::TimingLogger::ScopedSplit split("FinishPhase", &timings_);
+  TimingLogger::ScopedSplit split("FinishPhase", &timings_);
   // Can't enqueue references if we hold the mutator lock.
-  Object* cleared_references = GetClearedReferences();
   Heap* heap = GetHeap();
-  timings_.NewSplit("EnqueueClearedReferences");
-  heap->EnqueueClearedReferences(&cleared_references);
-
   timings_.NewSplit("PostGcVerification");
   heap->PostGcVerification(this);
 
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 13d5195..0f0cae1 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -150,12 +150,15 @@
   static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
+  static mirror::Object* RecursiveMarkObjectCallback(mirror::Object* root, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
  protected:
   // Returns null if the object is not marked, otherwise returns the forwarding address (same as
   // object for non movable things).
   mirror::Object* GetMarkedForwardAddress(mirror::Object* object) const;
 
-  static mirror::Object* SystemWeakIsMarkedCallback(mirror::Object* object, void* arg)
+  static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Marks or unmarks a large object based on whether or not set is true. If set is true, then we
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index b27b8df..ee6077a 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -38,7 +38,7 @@
   // know what was allocated since the last GC. A side-effect of binding the allocation space mark
   // and live bitmap is that marking the objects will place them in the live bitmap.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace() &&
+    if (space->IsMallocSpace() &&
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
       BindLiveToMarkBitmap(space);
     }
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 1d3c0d8..e6829e2 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -23,6 +23,7 @@
 #include "gc/space/bump_pointer_space-inl.h"
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/large_object_space.h"
+#include "gc/space/rosalloc_space-inl.h"
 #include "object_utils.h"
 #include "runtime.h"
 #include "thread.h"
@@ -41,7 +42,15 @@
                                                                    &obj, &bytes_allocated);
   if (LIKELY(!large_object_allocation)) {
     // Non-large object allocation.
-    obj = AllocateUninstrumented(self, non_moving_space_, byte_count, &bytes_allocated);
+    if (!kUseRosAlloc) {
+      DCHECK(non_moving_space_->IsDlMallocSpace());
+      obj = AllocateUninstrumented(self, reinterpret_cast<space::DlMallocSpace*>(non_moving_space_),
+                                   byte_count, &bytes_allocated);
+    } else {
+      DCHECK(non_moving_space_->IsRosAllocSpace());
+      obj = AllocateUninstrumented(self, reinterpret_cast<space::RosAllocSpace*>(non_moving_space_),
+                                   byte_count, &bytes_allocated);
+    }
     // Ensure that we did not allocate into a zygote space.
     DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
   }
@@ -131,6 +140,16 @@
   return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
 }
 
+// RosAllocSpace-specific version.
+inline mirror::Object* Heap::TryToAllocateUninstrumented(Thread* self, space::RosAllocSpace* space, size_t alloc_size,
+                                                         bool grow, size_t* bytes_allocated) {
+  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
+    return NULL;
+  }
+  DCHECK(!running_on_valgrind_);
+  return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
+}
+
 template <class T>
 inline mirror::Object* Heap::AllocateUninstrumented(Thread* self, T* space, size_t alloc_size,
                                                     size_t* bytes_allocated) {
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 70df3d3..763bfe9 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -41,6 +41,7 @@
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
+#include "gc/space/rosalloc_space-inl.h"
 #include "gc/space/space-inl.h"
 #include "heap-inl.h"
 #include "image.h"
@@ -82,10 +83,11 @@
       long_gc_log_threshold_(long_gc_log_threshold),
       ignore_max_footprint_(ignore_max_footprint),
       have_zygote_space_(false),
-      soft_ref_queue_lock_(NULL),
-      weak_ref_queue_lock_(NULL),
-      finalizer_ref_queue_lock_(NULL),
-      phantom_ref_queue_lock_(NULL),
+      soft_reference_queue_(this),
+      weak_reference_queue_(this),
+      finalizer_reference_queue_(this),
+      phantom_reference_queue_(this),
+      cleared_references_(this),
       is_gc_running_(false),
       last_gc_type_(collector::kGcTypeNone),
       next_gc_type_(collector::kGcTypePartial),
@@ -166,9 +168,13 @@
   }
 
   const char* name = Runtime::Current()->IsZygote() ? "zygote space" : "alloc space";
-  non_moving_space_ = space::DlMallocSpace::Create(name, initial_size, growth_limit, capacity,
-                                                   requested_alloc_space_begin);
-
+  if (!kUseRosAlloc) {
+    non_moving_space_ = space::DlMallocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                     requested_alloc_space_begin);
+  } else {
+    non_moving_space_ = space::RosAllocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                     requested_alloc_space_begin);
+  }
   if (kMovingCollector) {
     // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
     // TODO: Having 3+ spaces as big as the large heap size can cause virtual memory fragmentation
@@ -233,13 +239,6 @@
   gc_complete_lock_ = new Mutex("GC complete lock");
   gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable",
                                                 *gc_complete_lock_));
-
-  // Create the reference queue locks, this is required so for parallel object scanning in the GC.
-  soft_ref_queue_lock_ = new Mutex("Soft reference queue lock");
-  weak_ref_queue_lock_ = new Mutex("Weak reference queue lock");
-  finalizer_ref_queue_lock_ = new Mutex("Finalizer reference queue lock");
-  phantom_ref_queue_lock_ = new Mutex("Phantom reference queue lock");
-
   last_gc_time_ns_ = NanoTime();
   last_gc_size_ = GetBytesAllocated();
 
@@ -488,8 +487,8 @@
     }
 
     continuous_spaces_.push_back(continuous_space);
-    if (continuous_space->IsDlMallocSpace()) {
-      non_moving_space_ = continuous_space->AsDlMallocSpace();
+    if (continuous_space->IsMallocSpace()) {
+      non_moving_space_ = continuous_space->AsMallocSpace();
     }
 
     // Ensure that spaces remain sorted in increasing order of start address.
@@ -507,7 +506,7 @@
       } else if (space->IsZygoteSpace()) {
         CHECK(!seen_alloc);
         seen_zygote = true;
-      } else if (space->IsDlMallocSpace()) {
+      } else if (space->IsMallocSpace()) {
         seen_alloc = true;
       }
     }
@@ -599,10 +598,6 @@
   STLDeleteElements(&continuous_spaces_);
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
-  delete soft_ref_queue_lock_;
-  delete weak_ref_queue_lock_;
-  delete finalizer_ref_queue_lock_;
-  delete phantom_ref_queue_lock_;
   VLOG(heap) << "Finished ~Heap()";
 }
 
@@ -640,6 +635,106 @@
   return FindDiscontinuousSpaceFromObject(obj, true);
 }
 
+struct SoftReferenceArgs {
+  RootVisitor* is_marked_callback_;
+  RootVisitor* recursive_mark_callback_;
+  void* arg_;
+};
+
+mirror::Object* Heap::PreserveSoftReferenceCallback(mirror::Object* obj, void* arg) {
+  SoftReferenceArgs* args  = reinterpret_cast<SoftReferenceArgs*>(arg);
+  // TODO: Not preserve all soft references.
+  return args->recursive_mark_callback_(obj, args->arg_);
+}
+
+// Process reference class instances and schedule finalizations.
+void Heap::ProcessReferences(TimingLogger& timings, bool clear_soft,
+                             RootVisitor* is_marked_callback,
+                             RootVisitor* recursive_mark_object_callback, void* arg) {
+  // Unless we are in the zygote or required to clear soft references with white references,
+  // preserve some white referents.
+  if (!clear_soft && !Runtime::Current()->IsZygote()) {
+    SoftReferenceArgs soft_reference_args;
+    soft_reference_args.is_marked_callback_ = is_marked_callback;
+    soft_reference_args.recursive_mark_callback_ = recursive_mark_object_callback;
+    soft_reference_args.arg_ = arg;
+    soft_reference_queue_.PreserveSomeSoftReferences(&PreserveSoftReferenceCallback,
+                                                     &soft_reference_args);
+  }
+  timings.StartSplit("ProcessReferences");
+  // Clear all remaining soft and weak references with white referents.
+  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  timings.EndSplit();
+  // Preserve all white objects with finalize methods and schedule them for finalization.
+  timings.StartSplit("EnqueueFinalizerReferences");
+  finalizer_reference_queue_.EnqueueFinalizerReferences(cleared_references_, is_marked_callback,
+                                                        recursive_mark_object_callback, arg);
+  timings.EndSplit();
+  timings.StartSplit("ProcessReferences");
+  // Clear all f-reachable soft and weak references with white referents.
+  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  // Clear all phantom references with white referents.
+  phantom_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  // At this point all reference queues other than the cleared references should be empty.
+  DCHECK(soft_reference_queue_.IsEmpty());
+  DCHECK(weak_reference_queue_.IsEmpty());
+  DCHECK(finalizer_reference_queue_.IsEmpty());
+  DCHECK(phantom_reference_queue_.IsEmpty());
+  timings.EndSplit();
+}
+
+bool Heap::IsEnqueued(mirror::Object* ref) const {
+  // Since the references are stored as cyclic lists it means that once enqueued, the pending next
+  // will always be non-null.
+  return ref->GetFieldObject<mirror::Object*>(GetReferencePendingNextOffset(), false) != nullptr;
+}
+
+bool Heap::IsEnqueuable(const mirror::Object* ref) const {
+  DCHECK(ref != nullptr);
+  const mirror::Object* queue =
+      ref->GetFieldObject<mirror::Object*>(GetReferenceQueueOffset(), false);
+  const mirror::Object* queue_next =
+      ref->GetFieldObject<mirror::Object*>(GetReferenceQueueNextOffset(), false);
+  return queue != nullptr && queue_next == nullptr;
+}
+
+// Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
+// marked, put it on the appropriate list in the heap for later processing.
+void Heap::DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj,
+                                  RootVisitor mark_visitor, void* arg) {
+  DCHECK(klass != nullptr);
+  DCHECK(klass->IsReferenceClass());
+  DCHECK(obj != nullptr);
+  mirror::Object* referent = GetReferenceReferent(obj);
+  if (referent != nullptr) {
+    mirror::Object* forward_address = mark_visitor(referent, arg);
+    // Null means that the object is not currently marked.
+    if (forward_address == nullptr) {
+      Thread* self = Thread::Current();
+      // TODO: Remove these locks, and use atomic stacks for storing references?
+      // We need to check that the references haven't already been enqueued since we can end up
+      // scanning the same reference multiple times due to dirty cards.
+      if (klass->IsSoftReferenceClass()) {
+        soft_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else if (klass->IsWeakReferenceClass()) {
+        weak_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else if (klass->IsFinalizerReferenceClass()) {
+        finalizer_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else if (klass->IsPhantomReferenceClass()) {
+        phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else {
+        LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
+                   << klass->GetAccessFlags();
+      }
+    } else if (referent != forward_address) {
+      // Referent is already marked and we need to update it.
+      SetReferenceReferent(obj, forward_address);
+    }
+  }
+}
+
 space::ImageSpace* Heap::GetImageSpace() const {
   for (const auto& space : continuous_spaces_) {
     if (space->IsImageSpace()) {
@@ -667,8 +762,15 @@
   if (!large_object_allocation && total_bytes_free >= byte_count) {
     size_t max_contiguous_allocation = 0;
     for (const auto& space : continuous_spaces_) {
-      if (space->IsDlMallocSpace()) {
-        space->AsDlMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+      if (space->IsMallocSpace()) {
+        // To allow the Walk/InspectAll() to exclusively-lock the mutator
+        // lock, temporarily release the shared access to the mutator
+        // lock here by transitioning to the suspended state.
+        Locks::mutator_lock_->AssertSharedHeld(self);
+        self->TransitionFromRunnableToSuspended(kSuspended);
+        space->AsMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+        self->TransitionFromSuspendedToRunnable();
+        Locks::mutator_lock_->AssertSharedHeld(self);
       }
     }
     oss << "; failed due to fragmentation (largest possible contiguous allocation "
@@ -744,7 +846,15 @@
                                                                  &bytes_allocated);
   if (LIKELY(!large_object_allocation)) {
     // Non-large object allocation.
-    obj = AllocateInstrumented(self, non_moving_space_, byte_count, &bytes_allocated);
+    if (!kUseRosAlloc) {
+      DCHECK(non_moving_space_->IsDlMallocSpace());
+      obj = AllocateInstrumented(self, reinterpret_cast<space::DlMallocSpace*>(non_moving_space_),
+                                 byte_count, &bytes_allocated);
+    } else {
+      DCHECK(non_moving_space_->IsRosAllocSpace());
+      obj = AllocateInstrumented(self, reinterpret_cast<space::RosAllocSpace*>(non_moving_space_),
+                                 byte_count, &bytes_allocated);
+    }
     // Ensure that we did not allocate into a zygote space.
     DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
   }
@@ -776,8 +886,8 @@
   uint64_t total_alloc_space_size = 0;
   uint64_t managed_reclaimed = 0;
   for (const auto& space : continuous_spaces_) {
-    if (space->IsDlMallocSpace() && !space->IsZygoteSpace()) {
-      gc::space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+    if (space->IsMallocSpace() && !space->IsZygoteSpace()) {
+      gc::space::MallocSpace* alloc_space = space->AsMallocSpace();
       total_alloc_space_size += alloc_space->Size();
       managed_reclaimed += alloc_space->Trim();
     }
@@ -843,7 +953,6 @@
     if (i > 0) {
       NanoSleep(MsToNs(10));
     }
-
     if (search_allocation_stack) {
       if (sorted) {
         if (allocation_stack_->ContainsSorted(const_cast<mirror::Object*>(obj))) {
@@ -1012,6 +1121,19 @@
   }
 }
 
+// RosAllocSpace-specific version.
+inline mirror::Object* Heap::TryToAllocateInstrumented(Thread* self, space::RosAllocSpace* space, size_t alloc_size,
+                                                       bool grow, size_t* bytes_allocated) {
+  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
+    return NULL;
+  }
+  if (LIKELY(!running_on_valgrind_)) {
+    return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
+  } else {
+    return space->Alloc(self, alloc_size, bytes_allocated);
+  }
+}
+
 template <class T>
 inline mirror::Object* Heap::AllocateInstrumented(Thread* self, T* space, size_t alloc_size,
                                                   size_t* bytes_allocated) {
@@ -1301,14 +1423,14 @@
   }
   // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
   // the remaining available heap memory.
-  space::DlMallocSpace* zygote_space = non_moving_space_;
+  space::MallocSpace* zygote_space = non_moving_space_;
   non_moving_space_ = zygote_space->CreateZygoteSpace("alloc space");
   non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
   // Change the GC retention policy of the zygote space to only collect when full.
   zygote_space->SetGcRetentionPolicy(space::kGcRetentionPolicyFullCollect);
   AddSpace(non_moving_space_);
   have_zygote_space_ = true;
-  zygote_space->InvalidateMSpace();
+  zygote_space->InvalidateAllocator();
   // Create the zygote space mod union table.
   accounting::ModUnionTable* mod_union_table =
       new accounting::ModUnionTableCardCache("zygote space mod-union table", this, zygote_space);
@@ -1442,6 +1564,9 @@
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
 
+  // Enqueue cleared references.
+  EnqueueClearedReferences();
+
   // Grow the heap so that we know when to perform the next GC.
   GrowForUtilization(gc_type, collector->GetDurationNs());
 
@@ -1474,7 +1599,7 @@
                   << PrettySize(total_memory) << ", " << "paused " << pause_string.str()
                   << " total " << PrettyDuration((duration / 1000) * 1000);
         if (VLOG_IS_ON(heap)) {
-            LOG(INFO) << Dumpable<base::TimingLogger>(collector->GetTimings());
+            LOG(INFO) << Dumpable<TimingLogger>(collector->GetTimings());
         }
     }
   }
@@ -1547,8 +1672,8 @@
 
         // Attmept to find the class inside of the recently freed objects.
         space::ContinuousSpace* ref_space = heap_->FindContinuousSpaceFromObject(ref, true);
-        if (ref_space != nullptr && ref_space->IsDlMallocSpace()) {
-          space::DlMallocSpace* space = ref_space->AsDlMallocSpace();
+        if (ref_space != nullptr && ref_space->IsMallocSpace()) {
+          space::MallocSpace* space = ref_space->AsMallocSpace();
           mirror::Class* ref_class = space->FindRecentFreedObject(ref);
           if (ref_class != nullptr) {
             LOG(ERROR) << "Reference " << ref << " found as a recently freed object with class "
@@ -1808,17 +1933,17 @@
   return it->second;
 }
 
-void Heap::ProcessCards(base::TimingLogger& timings) {
+void Heap::ProcessCards(TimingLogger& timings) {
   // Clear cards and keep track of cards cleared in the mod-union table.
   for (const auto& space : continuous_spaces_) {
     accounting::ModUnionTable* table = FindModUnionTableFromSpace(space);
     if (table != nullptr) {
       const char* name = space->IsZygoteSpace() ? "ZygoteModUnionClearCards" :
           "ImageModUnionClearCards";
-      base::TimingLogger::ScopedSplit split(name, &timings);
+      TimingLogger::ScopedSplit split(name, &timings);
       table->ClearCards();
     } else if (space->GetType() != space::kSpaceTypeBumpPointerSpace) {
-      base::TimingLogger::ScopedSplit split("AllocSpaceClearCards", &timings);
+      TimingLogger::ScopedSplit split("AllocSpaceClearCards", &timings);
       // No mod union table for the AllocSpace. Age the cards so that the GC knows that these cards
       // were dirty before the GC started.
       // TODO: Don't need to use atomic.
@@ -2072,72 +2197,6 @@
   return reference->GetFieldObject<mirror::Object*>(reference_referent_offset_, true);
 }
 
-void Heap::ClearReferenceReferent(mirror::Object* reference) {
-  DCHECK(reference != NULL);
-  DCHECK_NE(reference_referent_offset_.Uint32Value(), 0U);
-  reference->SetFieldObject(reference_referent_offset_, nullptr, true);
-}
-
-// Returns true if the reference object has not yet been enqueued.
-bool Heap::IsEnqueuable(const mirror::Object* ref) {
-  DCHECK(ref != NULL);
-  const mirror::Object* queue =
-      ref->GetFieldObject<mirror::Object*>(reference_queue_offset_, false);
-  const mirror::Object* queue_next =
-      ref->GetFieldObject<mirror::Object*>(reference_queueNext_offset_, false);
-  return (queue != NULL) && (queue_next == NULL);
-}
-
-void Heap::EnqueueReference(mirror::Object* ref, mirror::Object** cleared_reference_list) {
-  DCHECK(ref != NULL);
-  CHECK(ref->GetFieldObject<mirror::Object*>(reference_queue_offset_, false) != NULL);
-  CHECK(ref->GetFieldObject<mirror::Object*>(reference_queueNext_offset_, false) == NULL);
-  EnqueuePendingReference(ref, cleared_reference_list);
-}
-
-bool Heap::IsEnqueued(mirror::Object* ref) {
-  // Since the references are stored as cyclic lists it means that once enqueued, the pending next
-  // will always be non-null.
-  return ref->GetFieldObject<mirror::Object*>(GetReferencePendingNextOffset(), false) != nullptr;
-}
-
-void Heap::EnqueuePendingReference(mirror::Object* ref, mirror::Object** list) {
-  DCHECK(ref != NULL);
-  DCHECK(list != NULL);
-  if (*list == NULL) {
-    // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
-    ref->SetFieldObject(reference_pendingNext_offset_, ref, false);
-    *list = ref;
-  } else {
-    mirror::Object* head =
-        (*list)->GetFieldObject<mirror::Object*>(reference_pendingNext_offset_, false);
-    ref->SetFieldObject(reference_pendingNext_offset_, head, false);
-    (*list)->SetFieldObject(reference_pendingNext_offset_, ref, false);
-  }
-}
-
-mirror::Object* Heap::DequeuePendingReference(mirror::Object** list) {
-  DCHECK(list != NULL);
-  DCHECK(*list != NULL);
-  mirror::Object* head = (*list)->GetFieldObject<mirror::Object*>(reference_pendingNext_offset_,
-                                                                  false);
-  mirror::Object* ref;
-
-  // Note: the following code is thread-safe because it is only called from ProcessReferences which
-  // is single threaded.
-  if (*list == head) {
-    ref = *list;
-    *list = NULL;
-  } else {
-    mirror::Object* next = head->GetFieldObject<mirror::Object*>(reference_pendingNext_offset_,
-                                                                 false);
-    (*list)->SetFieldObject(reference_pendingNext_offset_, next, false);
-    ref = head;
-  }
-  ref->SetFieldObject(reference_pendingNext_offset_, NULL, false);
-  return ref;
-}
-
 void Heap::AddFinalizerReference(Thread* self, mirror::Object* object) {
   ScopedObjectAccess soa(self);
   JValue result;
@@ -2168,20 +2227,18 @@
   }
 }
 
-void Heap::EnqueueClearedReferences(mirror::Object** cleared) {
-  DCHECK(cleared != nullptr);
-  mirror::Object* list = *cleared;
-  if (list != nullptr) {
+void Heap::EnqueueClearedReferences() {
+  if (!cleared_references_.IsEmpty()) {
     // When a runtime isn't started there are no reference queues to care about so ignore.
     if (LIKELY(Runtime::Current()->IsStarted())) {
       ScopedObjectAccess soa(Thread::Current());
       JValue result;
       ArgArray arg_array(NULL, 0);
-      arg_array.Append(reinterpret_cast<uint32_t>(list));
+      arg_array.Append(reinterpret_cast<uint32_t>(cleared_references_.GetList()));
       soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(),
           arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
     }
-    *cleared = nullptr;
+    cleared_references_.Clear();
   }
 }
 
@@ -2256,6 +2313,14 @@
   }
 }
 
+void Heap::RevokeThreadLocalBuffers(Thread* thread) {
+  non_moving_space_->RevokeThreadLocalBuffers(thread);
+}
+
+void Heap::RevokeAllThreadLocalBuffers() {
+  non_moving_space_->RevokeAllThreadLocalBuffers();
+}
+
 bool Heap::IsGCRequestPending() const {
   return concurrent_start_bytes_ != std::numeric_limits<size_t>::max();
 }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 0fa000f..3da3943 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -31,6 +31,7 @@
 #include "jni.h"
 #include "locks.h"
 #include "offsets.h"
+#include "reference_queue.h"
 #include "root_visitor.h"
 #include "safe_map.h"
 #include "thread_pool.h"
@@ -68,6 +69,8 @@
   class DlMallocSpace;
   class ImageSpace;
   class LargeObjectSpace;
+  class MallocSpace;
+  class RosAllocSpace;
   class Space;
   class SpaceTest;
   class ContinuousMemMapAllocSpace;
@@ -105,6 +108,9 @@
 };
 static constexpr HeapVerificationMode kDesiredHeapVerification = kNoHeapVerification;
 
+// If true, use rosalloc/RosAllocSpace instead of dlmalloc/DlMallocSpace
+static constexpr bool kUseRosAlloc = true;
+
 class Heap {
  public:
   // If true, measure the total allocation time.
@@ -289,32 +295,26 @@
                            MemberOffset reference_queueNext_offset,
                            MemberOffset reference_pendingNext_offset,
                            MemberOffset finalizer_reference_zombie_offset);
-
-  void SetReferenceReferent(mirror::Object* reference, mirror::Object* referent)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::Object* GetReferenceReferent(mirror::Object* reference)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void ClearReferenceReferent(mirror::Object* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // Returns true if the reference object has not yet been enqueued.
-  bool IsEnqueuable(const mirror::Object* ref);
-  void EnqueueReference(mirror::Object* ref, mirror::Object** list)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsEnqueued(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void EnqueuePendingReference(mirror::Object* ref, mirror::Object** list)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::Object* DequeuePendingReference(mirror::Object** list)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  MemberOffset GetReferencePendingNextOffset() {
-    DCHECK_NE(reference_pendingNext_offset_.Uint32Value(), 0U);
+  MemberOffset GetReferenceReferentOffset() const {
+    return reference_referent_offset_;
+  }
+  MemberOffset GetReferenceQueueOffset() const {
+    return reference_queue_offset_;
+  }
+  MemberOffset GetReferenceQueueNextOffset() const {
+    return reference_queueNext_offset_;
+  }
+  MemberOffset GetReferencePendingNextOffset() const {
     return reference_pendingNext_offset_;
   }
-
-  MemberOffset GetFinalizerReferenceZombieOffset() {
-    DCHECK_NE(finalizer_reference_zombie_offset_.Uint32Value(), 0U);
+  MemberOffset GetFinalizerReferenceZombieOffset() const {
     return finalizer_reference_zombie_offset_;
   }
+  static mirror::Object* PreserveSoftReferenceCallback(mirror::Object* obj, void* arg);
+  void ProcessReferences(TimingLogger& timings, bool clear_soft, RootVisitor* is_marked_callback,
+                         RootVisitor* recursive_mark_object_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Enable verification of object references when the runtime is sufficiently initialized.
   void EnableObjectValidation() {
@@ -418,6 +418,9 @@
   // Trim the managed and native heaps by releasing unused memory back to the OS.
   void Trim();
 
+  void RevokeThreadLocalBuffers(Thread* thread);
+  void RevokeAllThreadLocalBuffers();
+
   accounting::HeapBitmap* GetLiveBitmap() SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     return live_bitmap_.get();
   }
@@ -452,7 +455,7 @@
   // Assumes there is only one image space.
   space::ImageSpace* GetImageSpace() const;
 
-  space::DlMallocSpace* GetNonMovingSpace() const {
+  space::MallocSpace* GetNonMovingSpace() const {
     return non_moving_space_;
   }
 
@@ -460,22 +463,6 @@
     return large_object_space_;
   }
 
-  Mutex* GetSoftRefQueueLock() {
-    return soft_ref_queue_lock_;
-  }
-
-  Mutex* GetWeakRefQueueLock() {
-    return weak_ref_queue_lock_;
-  }
-
-  Mutex* GetFinalizerRefQueueLock() {
-    return finalizer_ref_queue_lock_;
-  }
-
-  Mutex* GetPhantomRefQueueLock() {
-    return phantom_ref_queue_lock_;
-  }
-
   void DumpSpaces(std::ostream& stream = LOG(INFO));
 
   // GC performance measuring
@@ -560,6 +547,12 @@
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Try to allocate a number of bytes, this function never does any GCs. RosAllocSpace-specialized version.
+  mirror::Object* TryToAllocateInstrumented(Thread* self, space::RosAllocSpace* space, size_t alloc_size,
+                                            bool grow, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   mirror::Object* TryToAllocateUninstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
                                               bool grow, size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
@@ -570,13 +563,30 @@
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  mirror::Object* TryToAllocateUninstrumented(Thread* self, space::RosAllocSpace* space, size_t alloc_size,
+                                              bool grow, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsOutOfMemoryOnAllocation(size_t alloc_size, bool grow);
 
   // Pushes a list of cleared references out to the managed heap.
-  void EnqueueClearedReferences(mirror::Object** cleared_references);
-
+  void SetReferenceReferent(mirror::Object* reference, mirror::Object* referent)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* GetReferenceReferent(mirror::Object* reference)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ClearReferenceReferent(mirror::Object* reference)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetReferenceReferent(reference, nullptr);
+  }
+  void EnqueueClearedReferences();
+  // Returns true if the reference object has not yet been enqueued.
+  bool IsEnqueuable(const mirror::Object* ref) const;
+  bool IsEnqueued(mirror::Object* ref) const;
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj, RootVisitor mark_visitor,
+                              void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Print a reference queue.
   void PrintReferenceQueue(std::ostream& os, mirror::Object** queue);
 
@@ -638,7 +648,7 @@
   void SwapStacks();
 
   // Clear cards and update the mod union table.
-  void ProcessCards(base::TimingLogger& timings);
+  void ProcessCards(TimingLogger& timings);
 
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_;
@@ -651,7 +661,7 @@
 
   // A space where non-movable objects are allocated, when compaction is enabled it contains
   // Classes, ArtMethods, ArtFields, and non moving objects.
-  space::DlMallocSpace* non_moving_space_;
+  space::MallocSpace* non_moving_space_;
 
   // The large object space we are currently allocating into.
   space::LargeObjectSpace* large_object_space_;
@@ -699,12 +709,12 @@
   Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   UniquePtr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_);
 
-  // Mutexes held when adding references to reference queues.
-  // TODO: move to a UniquePtr, currently annotalysis is confused that UniquePtr isn't lockable.
-  Mutex* soft_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Mutex* weak_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Mutex* finalizer_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Mutex* phantom_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // Reference queues.
+  ReferenceQueue soft_reference_queue_;
+  ReferenceQueue weak_reference_queue_;
+  ReferenceQueue finalizer_reference_queue_;
+  ReferenceQueue phantom_reference_queue_;
+  ReferenceQueue cleared_references_;
 
   // True while the garbage collector is running.
   volatile bool is_gc_running_ GUARDED_BY(gc_complete_lock_);
@@ -819,16 +829,12 @@
 
   // offset of java.lang.ref.Reference.referent
   MemberOffset reference_referent_offset_;
-
   // offset of java.lang.ref.Reference.queue
   MemberOffset reference_queue_offset_;
-
   // offset of java.lang.ref.Reference.queueNext
   MemberOffset reference_queueNext_offset_;
-
   // offset of java.lang.ref.Reference.pendingNext
   MemberOffset reference_pendingNext_offset_;
-
   // offset of java.lang.ref.FinalizerReference.zombie
   MemberOffset finalizer_reference_zombie_offset_;
 
@@ -861,6 +867,7 @@
 
   friend class collector::MarkSweep;
   friend class collector::SemiSpace;
+  friend class ReferenceQueue;
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
new file mode 100644
index 0000000..d006349
--- /dev/null
+++ b/runtime/gc/reference_queue.cc
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reference_queue.h"
+
+#include "accounting/card_table-inl.h"
+#include "heap.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+
+namespace art {
+namespace gc {
+
+ReferenceQueue::ReferenceQueue(Heap* heap)
+    : lock_("reference queue lock"),
+      heap_(heap),
+      list_(nullptr) {
+}
+
+void ReferenceQueue::AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Object* ref) {
+  DCHECK(ref != NULL);
+  MutexLock mu(self, lock_);
+  if (!heap_->IsEnqueued(ref)) {
+    EnqueuePendingReference(ref);
+  }
+}
+
+void ReferenceQueue::EnqueueReference(mirror::Object* ref) {
+  CHECK(heap_->IsEnqueuable(ref));
+  EnqueuePendingReference(ref);
+}
+
+void ReferenceQueue::EnqueuePendingReference(mirror::Object* ref) {
+  DCHECK(ref != NULL);
+  MemberOffset pending_next_offset = heap_->GetReferencePendingNextOffset();
+  DCHECK_NE(pending_next_offset.Uint32Value(), 0U);
+  if (IsEmpty()) {
+    // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
+    ref->SetFieldObject(pending_next_offset, ref, false);
+    list_ = ref;
+  } else {
+    mirror::Object* head =
+        list_->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+    ref->SetFieldObject(pending_next_offset, head, false);
+    list_->SetFieldObject(pending_next_offset, ref, false);
+  }
+}
+
+mirror::Object* ReferenceQueue::DequeuePendingReference() {
+  DCHECK(!IsEmpty());
+  MemberOffset pending_next_offset = heap_->GetReferencePendingNextOffset();
+  mirror::Object* head = list_->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+  DCHECK(head != nullptr);
+  mirror::Object* ref;
+  // Note: the following code is thread-safe because it is only called from ProcessReferences which
+  // is single threaded.
+  if (list_ == head) {
+    ref = list_;
+    list_ = nullptr;
+  } else {
+    mirror::Object* next = head->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+    list_->SetFieldObject(pending_next_offset, next, false);
+    ref = head;
+  }
+  ref->SetFieldObject(pending_next_offset, nullptr, false);
+  return ref;
+}
+
+void ReferenceQueue::Dump(std::ostream& os) const {
+  mirror::Object* cur = list_;
+  os << "Reference starting at list_=" << list_ << "\n";
+  while (cur != nullptr) {
+    mirror::Object* pending_next =
+        cur->GetFieldObject<mirror::Object*>(heap_->GetReferencePendingNextOffset(), false);
+    os << "PendingNext=" << pending_next;
+    if (cur->GetClass()->IsFinalizerReferenceClass()) {
+      os << " Zombie=" <<
+          cur->GetFieldObject<mirror::Object*>(heap_->GetFinalizerReferenceZombieOffset(), false);
+    }
+    os << "\n";
+    cur = pending_next;
+  }
+}
+
+void ReferenceQueue::ClearWhiteReferences(ReferenceQueue& cleared_references, RootVisitor visitor,
+                                          void* arg) {
+  while (!IsEmpty()) {
+    mirror::Object* ref = DequeuePendingReference();
+    mirror::Object* referent = heap_->GetReferenceReferent(ref);
+    if (referent != nullptr) {
+      mirror::Object* forward_address = visitor(referent, arg);
+      if (forward_address == nullptr) {
+        // Referent is white, clear it.
+        heap_->ClearReferenceReferent(ref);
+        if (heap_->IsEnqueuable(ref)) {
+          cleared_references.EnqueuePendingReference(ref);
+        }
+      } else if (referent != forward_address) {
+        // Object moved, need to updated the referrent.
+        heap_->SetReferenceReferent(ref, forward_address);
+      }
+    }
+  }
+}
+
+void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
+                                                RootVisitor is_marked_callback,
+                                                RootVisitor recursive_mark_callback, void* arg) {
+  while (!IsEmpty()) {
+    mirror::Object* ref = DequeuePendingReference();
+    mirror::Object* referent = heap_->GetReferenceReferent(ref);
+    if (referent != nullptr) {
+      mirror::Object* forward_address = is_marked_callback(referent, arg);
+      // If the referent isn't marked, mark it and update the
+      if (forward_address == nullptr) {
+        forward_address = recursive_mark_callback(referent, arg);
+        // If the referent is non-null the reference must queuable.
+        DCHECK(heap_->IsEnqueuable(ref));
+        // Move the updated referent to the zombie field.
+        ref->SetFieldObject(heap_->GetFinalizerReferenceZombieOffset(), forward_address, false);
+        heap_->ClearReferenceReferent(ref);
+        cleared_references.EnqueueReference(ref);
+      } else if (referent != forward_address) {
+        heap_->SetReferenceReferent(ref, forward_address);
+      }
+    }
+  }
+}
+
+void ReferenceQueue::PreserveSomeSoftReferences(RootVisitor preserve_callback, void* arg) {
+  ReferenceQueue cleared(heap_);
+  while (!IsEmpty()) {
+    mirror::Object* ref = DequeuePendingReference();
+    mirror::Object* referent = heap_->GetReferenceReferent(ref);
+    if (referent != nullptr) {
+      mirror::Object* forward_address = preserve_callback(referent, arg);
+      if (forward_address == nullptr) {
+        // Either the reference isn't marked or we don't wish to preserve it.
+        cleared.EnqueuePendingReference(ref);
+      } else {
+        heap_->SetReferenceReferent(ref, forward_address);
+      }
+    }
+  }
+  list_ = cleared.GetList();
+}
+
+}  // namespace gc
+}  // namespace art
+
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
new file mode 100644
index 0000000..89589c3
--- /dev/null
+++ b/runtime/gc/reference_queue.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_REFERENCE_QUEUE_H_
+#define ART_RUNTIME_GC_REFERENCE_QUEUE_H_
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+#include "atomic_integer.h"
+#include "base/timing_logger.h"
+#include "globals.h"
+#include "gtest/gtest.h"
+#include "jni.h"
+#include "locks.h"
+#include "offsets.h"
+#include "root_visitor.h"
+#include "thread_pool.h"
+
+namespace art {
+namespace gc {
+
+class Heap;
+
+// Used to temporarily store java.lang.ref.Reference(s) during GC and prior to queueing on the
+// appropriate java.lang.ref.ReferenceQueue. The linked list is maintained in the
+// java.lang.ref.Reference objects.
+class ReferenceQueue {
+ public:
+  explicit ReferenceQueue(Heap* heap);
+  // Enqueue a reference if is not already enqueued. Thread safe to call from multiple threads
+  // since it uses a lock to avoid a race between checking for the references presence and adding
+  // it.
+  void AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Object* ref)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
+  // Enqueue a reference, unlike EnqueuePendingReference, enqueue reference checks that the
+  // reference IsEnqueueable. Not thread safe, used when mutators are paused to minimize lock
+  // overhead.
+  void EnqueueReference(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void EnqueuePendingReference(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* DequeuePendingReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Enqueues finalizer references with white referents.  White referents are blackened, moved to the
+  // zombie field, and the referent field is cleared.
+  void EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
+                                  RootVisitor is_marked_callback,
+                                  RootVisitor recursive_mark_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Walks the reference list marking any references subject to the reference clearing policy.
+  // References with a black referent are removed from the list.  References with white referents
+  // biased toward saving are blackened and also removed from the list.
+  void PreserveSomeSoftReferences(RootVisitor preserve_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Unlink the reference list clearing references objects with white referents.  Cleared references
+  // registered to a reference queue are scheduled for appending by the heap worker thread.
+  void ClearWhiteReferences(ReferenceQueue& cleared_references, RootVisitor visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Dump(std::ostream& os) const
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsEmpty() const {
+    return list_ == nullptr;
+  }
+  void Clear() {
+    list_ = nullptr;
+  }
+  mirror::Object* GetList() {
+    return list_;
+  }
+
+ private:
+  // Lock, used for parallel GC reference enqueuing. It allows for multiple threads simultaneously
+  // calling AtomicEnqueueIfNotEnqueued.
+  Mutex lock_;
+  // The heap contains the reference offsets.
+  Heap* const heap_;
+  // The actual reference list. Not a root since it will be nullptr when the GC is not running.
+  mirror::Object* list_;
+};
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_REFERENCE_QUEUE_H_
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
index fb2c66b..c14a4e1 100644
--- a/runtime/gc/space/dlmalloc_space-inl.h
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_
 
 #include "dlmalloc_space.h"
+#include "thread.h"
 
 namespace art {
 namespace gc {
@@ -28,7 +29,7 @@
   mirror::Object* obj;
   {
     MutexLock mu(self, lock_);
-    obj = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+    obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
   }
   if (LIKELY(obj != NULL)) {
     // Zero freshly allocated memory, done while not holding the space's lock.
@@ -37,8 +38,9 @@
   return obj;
 }
 
-inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated) {
-  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes));
+inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t num_bytes,
+                                                               size_t* bytes_allocated) {
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_for_alloc_, num_bytes));
   if (LIKELY(result != NULL)) {
     if (kDebugSpaces) {
       CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 1c7aa22..b067bbc 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -13,13 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "dlmalloc_space.h"
+
 #include "dlmalloc_space-inl.h"
 #include "gc/accounting/card_table.h"
 #include "gc/heap.h"
+#include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
 #include "thread.h"
+#include "thread_list.h"
 #include "utils.h"
 
 #include <valgrind.h>
@@ -29,16 +33,6 @@
 namespace gc {
 namespace space {
 
-// TODO: Remove define macro
-#define CHECK_MEMORY_CALL(call, args, what) \
-  do { \
-    int rc = call args; \
-    if (UNLIKELY(rc != 0)) { \
-      errno = rc; \
-      PLOG(FATAL) << # call << " failed for " << what; \
-    } \
-  } while (false)
-
 static const bool kPrefetchDuringDlMallocFreeList = true;
 
 // Number of bytes to use as a red zone (rdz). A red zone of this size will be placed before and
@@ -114,81 +108,38 @@
   DISALLOW_COPY_AND_ASSIGN(ValgrindDlMallocSpace);
 };
 
-size_t DlMallocSpace::bitmap_index_ = 0;
-
 DlMallocSpace::DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin,
-                       byte* end, byte* limit, size_t growth_limit)
-    : ContinuousMemMapAllocSpace(name, mem_map, begin, end, limit, kGcRetentionPolicyAlwaysCollect),
-      recent_free_pos_(0), total_bytes_freed_(0), total_objects_freed_(0),
-      lock_("allocation space lock", kAllocSpaceLock), mspace_(mspace),
-      growth_limit_(growth_limit) {
+                             byte* end, byte* limit, size_t growth_limit)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit),
+      total_bytes_freed_(0), total_objects_freed_(0), mspace_(mspace), mspace_for_alloc_(mspace) {
   CHECK(mspace != NULL);
-  size_t bitmap_index = bitmap_index_++;
-  static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
-  live_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #" << bitmap_index;
-  mark_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #" << bitmap_index;
-  for (auto& freed : recent_freed_objects_) {
-    freed.first = nullptr;
-    freed.second = nullptr;
-  }
 }
 
-DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t
-                                     growth_limit, size_t capacity, byte* requested_begin) {
-  // Memory we promise to dlmalloc before it asks for morecore.
-  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
-  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
-  // size of the large allocation) will be greater than the footprint limit.
-  size_t starting_size = kPageSize;
+DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                                     size_t capacity, byte* requested_begin) {
   uint64_t start_time = 0;
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     start_time = NanoTime();
-    VLOG(startup) << "Space::CreateAllocSpace entering " << name
+    VLOG(startup) << "DlMallocSpace::Create entering " << name
                   << " initial_size=" << PrettySize(initial_size)
                   << " growth_limit=" << PrettySize(growth_limit)
                   << " capacity=" << PrettySize(capacity)
                   << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
   }
 
-  // Sanity check arguments
-  if (starting_size > initial_size) {
-    initial_size = starting_size;
-  }
-  if (initial_size > growth_limit) {
-    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the initial size ("
-        << PrettySize(initial_size) << ") is larger than its capacity ("
-        << PrettySize(growth_limit) << ")";
+  // Memory we promise to dlmalloc before it asks for morecore.
+  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
+  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
+  // size of the large allocation) will be greater than the footprint limit.
+  size_t starting_size = kPageSize;
+  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
+                                 requested_begin);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
+               << PrettySize(capacity);
     return NULL;
   }
-  if (growth_limit > capacity) {
-    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the growth limit capacity ("
-        << PrettySize(growth_limit) << ") is larger than the capacity ("
-        << PrettySize(capacity) << ")";
-    return NULL;
-  }
-
-  // Page align growth limit and capacity which will be used to manage mmapped storage
-  growth_limit = RoundUp(growth_limit, kPageSize);
-  capacity = RoundUp(capacity, kPageSize);
-
-  std::string error_msg;
-  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
-                                                 PROT_READ | PROT_WRITE, &error_msg));
-  if (mem_map.get() == NULL) {
-    LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
-        << PrettySize(capacity) << ": " << error_msg;
-    return NULL;
-  }
-
-  void* mspace = CreateMallocSpace(mem_map->Begin(), starting_size, initial_size);
+  void* mspace = CreateMspace(mem_map->Begin(), starting_size, initial_size);
   if (mspace == NULL) {
     LOG(ERROR) << "Failed to initialize mspace for alloc space (" << name << ")";
     return NULL;
@@ -201,24 +152,23 @@
   }
 
   // Everything is set so record in immutable structure and leave
-  MemMap* mem_map_ptr = mem_map.release();
   DlMallocSpace* space;
-  byte* begin = mem_map_ptr->Begin();
+  byte* begin = mem_map->Begin();
   if (RUNNING_ON_VALGRIND > 0) {
-    space = new ValgrindDlMallocSpace(name, mem_map_ptr, mspace, begin, end, begin + capacity,
+    space = new ValgrindDlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity,
                                       growth_limit, initial_size);
   } else {
-    space = new DlMallocSpace(name, mem_map_ptr, mspace, begin, end, begin + capacity, growth_limit);
+    space = new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit);
   }
   // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
-    LOG(INFO) << "Space::CreateAllocSpace exiting (" << PrettyDuration(NanoTime() - start_time)
+    LOG(INFO) << "DlMallocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
         << " ) " << *space;
   }
   return space;
 }
 
-void* DlMallocSpace::CreateMallocSpace(void* begin, size_t morecore_start, size_t initial_size) {
+void* DlMallocSpace::CreateMspace(void* begin, size_t morecore_start, size_t initial_size) {
   // clear errno to allow PLOG on error
   errno = 0;
   // create mspace using our backing storage starting at begin and with a footprint of
@@ -234,14 +184,6 @@
   return msp;
 }
 
-void DlMallocSpace::SwapBitmaps() {
-  live_bitmap_.swap(mark_bitmap_);
-  // Swap names to get more descriptive diagnostics.
-  std::string temp_name(live_bitmap_->GetName());
-  live_bitmap_->SetName(mark_bitmap_->GetName());
-  mark_bitmap_->SetName(temp_name);
-}
-
 mirror::Object* DlMallocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
   return AllocNonvirtual(self, num_bytes, bytes_allocated);
 }
@@ -250,11 +192,11 @@
   mirror::Object* result;
   {
     MutexLock mu(self, lock_);
-    // Grow as much as possible within the mspace.
+    // Grow as much as possible within the space.
     size_t max_allowed = Capacity();
     mspace_set_footprint_limit(mspace_, max_allowed);
     // Try the allocation.
-    result = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+    result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
     // Shrink back down as small as possible.
     size_t footprint = mspace_footprint(mspace_);
     mspace_set_footprint_limit(mspace_, footprint);
@@ -268,83 +210,9 @@
   return result;
 }
 
-void DlMallocSpace::SetGrowthLimit(size_t growth_limit) {
-  growth_limit = RoundUp(growth_limit, kPageSize);
-  growth_limit_ = growth_limit;
-  if (Size() > growth_limit_) {
-    end_ = begin_ + growth_limit;
-  }
-}
-
-DlMallocSpace* DlMallocSpace::CreateZygoteSpace(const char* alloc_space_name) {
-  end_ = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(end_), kPageSize));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(end_));
-  DCHECK(IsAligned<kPageSize>(begin_));
-  DCHECK(IsAligned<kPageSize>(end_));
-  size_t size = RoundUp(Size(), kPageSize);
-  // Trim the heap so that we minimize the size of the Zygote space.
-  Trim();
-  // TODO: Not hardcode these in?
-  const size_t starting_size = kPageSize;
-  const size_t initial_size = 2 * MB;
-  // Remaining size is for the new alloc space.
-  const size_t growth_limit = growth_limit_ - size;
-  const size_t capacity = Capacity() - size;
-  VLOG(heap) << "Begin " << reinterpret_cast<const void*>(begin_) << "\n"
-             << "End " << reinterpret_cast<const void*>(end_) << "\n"
-             << "Size " << size << "\n"
-             << "GrowthLimit " << growth_limit_ << "\n"
-             << "Capacity " << Capacity();
-  SetGrowthLimit(RoundUp(size, kPageSize));
-  SetFootprintLimit(RoundUp(size, kPageSize));
-  // FIXME: Do we need reference counted pointers here?
-  // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
-  VLOG(heap) << "Creating new AllocSpace: ";
-  VLOG(heap) << "Size " << GetMemMap()->Size();
-  VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit);
-  VLOG(heap) << "Capacity " << PrettySize(capacity);
-  // Remap the tail.
-  std::string error_msg;
-  UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
-                                                    PROT_READ | PROT_WRITE, &error_msg));
-  CHECK(mem_map.get() != nullptr) << error_msg;
-  void* mspace = CreateMallocSpace(end_, starting_size, initial_size);
-  // Protect memory beyond the initial size.
-  byte* end = mem_map->Begin() + starting_size;
-  if (capacity - initial_size > 0) {
-    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
-  }
-  DlMallocSpace* alloc_space =
-      new DlMallocSpace(alloc_space_name, mem_map.release(), mspace, end_, end, limit_,
-                        growth_limit);
-  SetLimit(End());
-  live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
-  CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
-  mark_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
-  CHECK_EQ(mark_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
-  VLOG(heap) << "zygote space creation done";
-  return alloc_space;
-}
-
-mirror::Class* DlMallocSpace::FindRecentFreedObject(const mirror::Object* obj) {
-  size_t pos = recent_free_pos_;
-  // Start at the most recently freed object and work our way back since there may be duplicates
-  // caused by dlmalloc reusing memory.
-  if (kRecentFreeCount > 0) {
-    for (size_t i = 0; i + 1 < kRecentFreeCount + 1; ++i) {
-      pos = pos != 0 ? pos - 1 : kRecentFreeMask;
-      if (recent_freed_objects_[pos].first == obj) {
-        return recent_freed_objects_[pos].second;
-      }
-    }
-  }
-  return nullptr;
-}
-
-void DlMallocSpace::RegisterRecentFree(mirror::Object* ptr) {
-  recent_freed_objects_[recent_free_pos_] = std::make_pair(ptr, ptr->GetClass());
-  recent_free_pos_ = (recent_free_pos_ + 1) & kRecentFreeMask;
+MallocSpace* DlMallocSpace::CreateInstance(const std::string& name, MemMap* mem_map, void* allocator, byte* begin, byte* end,
+                                           byte* limit, size_t growth_limit) {
+  return new DlMallocSpace(name, mem_map, allocator, begin, end, limit, growth_limit);
 }
 
 size_t DlMallocSpace::Free(Thread* self, mirror::Object* ptr) {
@@ -411,40 +279,11 @@
 // Callback from dlmalloc when it needs to increase the footprint
 extern "C" void* art_heap_morecore(void* mspace, intptr_t increment) {
   Heap* heap = Runtime::Current()->GetHeap();
-  DCHECK_EQ(heap->GetNonMovingSpace()->GetMspace(), mspace);
+  DCHECK(heap->GetNonMovingSpace()->IsDlMallocSpace());
+  DCHECK_EQ(heap->GetNonMovingSpace()->AsDlMallocSpace()->GetMspace(), mspace);
   return heap->GetNonMovingSpace()->MoreCore(increment);
 }
 
-void* DlMallocSpace::MoreCore(intptr_t increment) {
-  lock_.AssertHeld(Thread::Current());
-  byte* original_end = end_;
-  if (increment != 0) {
-    VLOG(heap) << "DlMallocSpace::MoreCore " << PrettySize(increment);
-    byte* new_end = original_end + increment;
-    if (increment > 0) {
-      // Should never be asked to increase the allocation beyond the capacity of the space. Enforced
-      // by mspace_set_footprint_limit.
-      CHECK_LE(new_end, Begin() + Capacity());
-      CHECK_MEMORY_CALL(mprotect, (original_end, increment, PROT_READ | PROT_WRITE), GetName());
-    } else {
-      // Should never be asked for negative footprint (ie before begin)
-      CHECK_GT(original_end + increment, Begin());
-      // Advise we don't need the pages and protect them
-      // TODO: by removing permissions to the pages we may be causing TLB shoot-down which can be
-      // expensive (note the same isn't true for giving permissions to a page as the protected
-      // page shouldn't be in a TLB). We should investigate performance impact of just
-      // removing ignoring the memory protection change here and in Space::CreateAllocSpace. It's
-      // likely just a useful debug feature.
-      size_t size = -increment;
-      CHECK_MEMORY_CALL(madvise, (new_end, size, MADV_DONTNEED), GetName());
-      CHECK_MEMORY_CALL(mprotect, (new_end, size, PROT_NONE), GetName());
-    }
-    // Update end_
-    end_ = new_end;
-  }
-  return original_end;
-}
-
 // Virtual functions can't get inlined.
 inline size_t DlMallocSpace::InternalAllocationSize(const mirror::Object* obj) {
   return AllocationSizeNonvirtual(obj);
@@ -481,32 +320,9 @@
   return mspace_footprint_limit(mspace_);
 }
 
-// Returns the old mark bitmap.
-accounting::SpaceBitmap* DlMallocSpace::BindLiveToMarkBitmap() {
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
-  temp_bitmap_.reset(mark_bitmap);
-  mark_bitmap_.reset(live_bitmap);
-  return mark_bitmap;
-}
-
-bool DlMallocSpace::HasBoundBitmaps() const {
-  return temp_bitmap_.get() != nullptr;
-}
-
-void DlMallocSpace::UnBindBitmaps() {
-  CHECK(HasBoundBitmaps());
-  // At this point, the temp_bitmap holds our old mark bitmap.
-  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
-  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
-  mark_bitmap_.reset(new_bitmap);
-  DCHECK(temp_bitmap_.get() == NULL);
-}
-
-
 void DlMallocSpace::SetFootprintLimit(size_t new_size) {
   MutexLock mu(Thread::Current(), lock_);
-  VLOG(heap) << "DLMallocSpace::SetFootprintLimit " << PrettySize(new_size);
+  VLOG(heap) << "DlMallocSpace::SetFootprintLimit " << PrettySize(new_size);
   // Compare against the actual footprint, rather than the Size(), because the heap may not have
   // grown all the way to the allowed size yet.
   size_t current_space_size = mspace_footprint(mspace_);
@@ -517,36 +333,26 @@
   mspace_set_footprint_limit(mspace_, new_size);
 }
 
-void DlMallocSpace::Dump(std::ostream& os) const {
-  os << GetType()
-      << " begin=" << reinterpret_cast<void*>(Begin())
-      << ",end=" << reinterpret_cast<void*>(End())
-      << ",size=" << PrettySize(Size()) << ",capacity=" << PrettySize(Capacity())
-      << ",name=\"" << GetName() << "\"]";
-}
-
 uint64_t DlMallocSpace::GetBytesAllocated() {
-  if (mspace_ != nullptr) {
-    MutexLock mu(Thread::Current(), lock_);
-    size_t bytes_allocated = 0;
-    mspace_inspect_all(mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
-    return bytes_allocated;
-  } else {
-    return Size();
-  }
+  MutexLock mu(Thread::Current(), lock_);
+  size_t bytes_allocated = 0;
+  mspace_inspect_all(mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
+  return bytes_allocated;
 }
 
 uint64_t DlMallocSpace::GetObjectsAllocated() {
-  if (mspace_ != nullptr) {
-    MutexLock mu(Thread::Current(), lock_);
-    size_t objects_allocated = 0;
-    mspace_inspect_all(mspace_, DlmallocObjectsAllocatedCallback, &objects_allocated);
-    return objects_allocated;
-  } else {
-    return 0;
-  }
+  MutexLock mu(Thread::Current(), lock_);
+  size_t objects_allocated = 0;
+  mspace_inspect_all(mspace_, DlmallocObjectsAllocatedCallback, &objects_allocated);
+  return objects_allocated;
 }
 
+#ifndef NDEBUG
+void DlMallocSpace::CheckMoreCoreForPrecondition() {
+  lock_.AssertHeld(Thread::Current());
+}
+#endif
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 59dafe3..d18d4ad 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_H_
 
 #include "gc/allocator/dlmalloc.h"
+#include "malloc_space.h"
 #include "space.h"
 
 namespace art {
@@ -30,33 +31,18 @@
 namespace space {
 
 // An alloc space is a space where objects may be allocated and garbage collected.
-class DlMallocSpace : public ContinuousMemMapAllocSpace {
+class DlMallocSpace : public MallocSpace {
  public:
-  typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
-
-  SpaceType GetType() const {
-    if (GetGcRetentionPolicy() == kGcRetentionPolicyFullCollect) {
-      return kSpaceTypeZygoteSpace;
-    } else {
-      return kSpaceTypeAllocSpace;
-    }
-  }
-
-  // Create a AllocSpace with the requested sizes. The requested
+  // Create a DlMallocSpace with the requested sizes. The requested
   // base address is not guaranteed to be granted, if it is required,
-  // the caller should call Begin on the returned space to confirm
-  // the request was granted.
+  // the caller should call Begin on the returned space to confirm the
+  // request was granted.
   static DlMallocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
                                size_t capacity, byte* requested_begin);
 
-  // Allocate num_bytes without allowing the underlying mspace to grow.
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
                                           size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
-
-  // Allocate num_bytes allowing the underlying mspace to grow.
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-
-  // Return the storage space required by obj.
   virtual size_t AllocationSize(const mirror::Object* obj);
   virtual size_t Free(Thread* self, mirror::Object* ptr);
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
@@ -64,17 +50,19 @@
   mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
 
   size_t AllocationSizeNonvirtual(const mirror::Object* obj) {
-    return mspace_usable_size(const_cast<void*>(reinterpret_cast<const void*>(obj))) +
-        kChunkOverhead;
+    void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
+    return mspace_usable_size(obj_ptr) + kChunkOverhead;
   }
 
-  void* MoreCore(intptr_t increment);
+#ifndef NDEBUG
+  // Override only in the debug build.
+  void CheckMoreCoreForPrecondition();
+#endif
 
   void* GetMspace() const {
     return mspace_;
   }
 
-  // Hands unused pages back to the system.
   size_t Trim();
 
   // Perform a mspace_inspect_all which calls back for each allocation chunk. The chunk may not be
@@ -93,39 +81,8 @@
   // allocations fail we GC before increasing the footprint limit and allowing the mspace to grow.
   void SetFootprintLimit(size_t limit);
 
-  // Removes the fork time growth limit on capacity, allowing the application to allocate up to the
-  // maximum reserved size of the heap.
-  void ClearGrowthLimit() {
-    growth_limit_ = NonGrowthLimitCapacity();
-  }
-
-  // Override capacity so that we only return the possibly limited capacity
-  size_t Capacity() const {
-    return growth_limit_;
-  }
-
-  // The total amount of memory reserved for the alloc space.
-  size_t NonGrowthLimitCapacity() const {
-    return GetMemMap()->Size();
-  }
-
-  accounting::SpaceBitmap* GetLiveBitmap() const {
-    return live_bitmap_.get();
-  }
-
-  accounting::SpaceBitmap* GetMarkBitmap() const {
-    return mark_bitmap_.get();
-  }
-
-  void Dump(std::ostream& os) const;
-
-  void SetGrowthLimit(size_t growth_limit);
-
-  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
-  void SwapBitmaps();
-
-  // Turn ourself into a zygote space and return a new alloc space which has our unused memory.
-  DlMallocSpace* CreateZygoteSpace(const char* alloc_space_name);
+  MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                              byte* begin, byte* end, byte* limit, size_t growth_limit);
 
   uint64_t GetBytesAllocated();
   uint64_t GetObjectsAllocated();
@@ -136,18 +93,18 @@
     return GetObjectsAllocated() + total_objects_freed_;
   }
 
-  // Returns the old mark bitmap.
-  accounting::SpaceBitmap* BindLiveToMarkBitmap();
-  bool HasBoundBitmaps() const;
-  void UnBindBitmaps();
-
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
-  // Used to ensure that failure happens when you free / allocate into an invalidated space. If we
-  // don't do this we may get heap corruption instead of a segfault at null.
-  void InvalidateMSpace() {
-    mspace_ = nullptr;
+  virtual void InvalidateAllocator() {
+    mspace_for_alloc_ = nullptr;
+  }
+
+  virtual bool IsDlMallocSpace() const {
+    return true;
+  }
+  virtual DlMallocSpace* AsDlMallocSpace() {
+    return this;
   }
 
  protected:
@@ -156,45 +113,28 @@
 
  private:
   size_t InternalAllocationSize(const mirror::Object* obj);
-  mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated)
+
+  mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  bool Init(size_t initial_size, size_t maximum_size, size_t growth_size, byte* requested_base);
-  void RegisterRecentFree(mirror::Object* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  static void* CreateMallocSpace(void* base, size_t morecore_start, size_t initial_size);
 
-  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
-
-  // Recent allocation buffer.
-  static constexpr size_t kRecentFreeCount = kDebugSpaces ? (1 << 16) : 0;
-  static constexpr size_t kRecentFreeMask = kRecentFreeCount - 1;
-  std::pair<const mirror::Object*, mirror::Class*> recent_freed_objects_[kRecentFreeCount];
-  size_t recent_free_pos_;
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) {
+    return CreateMspace(base, morecore_start, initial_size);
+  }
+  static void* CreateMspace(void* base, size_t morecore_start, size_t initial_size);
 
   // Approximate number of bytes and objects which have been deallocated in the space.
   size_t total_bytes_freed_;
   size_t total_objects_freed_;
 
-  static size_t bitmap_index_;
-
   // The boundary tag overhead.
   static const size_t kChunkOverhead = kWordSize;
 
-  // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
-  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-
   // Underlying malloc space
-  void* mspace_;
+  void* const mspace_;
 
-  // The capacity of the alloc space until such time that ClearGrowthLimit is called.
-  // The underlying mem_map_ controls the maximum size we allow the heap to grow to. The growth
-  // limit is a value <= to the mem_map_ capacity used for ergonomic reasons because of the zygote.
-  // Prior to forking the zygote the heap will have a maximally sized mem_map_ but the growth_limit_
-  // will be set to a lower value. The growth_limit_ is used as the capacity of the alloc_space_,
-  // however, capacity normally can't vary. In the case of the growth_limit_ it can be cleared
-  // one time by a call to ClearGrowthLimit.
-  size_t growth_limit_;
+  // A mspace pointer used for allocation. Equals to what mspace_
+  // points to or nullptr after InvalidateAllocator() is called.
+  void* mspace_for_alloc_;
 
   friend class collector::MarkSweep;
 
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index 07fb288..d374ad3 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -116,7 +116,8 @@
   virtual ~FreeListSpace();
   static FreeListSpace* Create(const std::string& name, byte* requested_begin, size_t capacity);
 
-  size_t AllocationSize(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  size_t AllocationSize(const mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* obj);
   bool Contains(const mirror::Object* obj) const;
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
new file mode 100644
index 0000000..785b5ed
--- /dev/null
+++ b/runtime/gc/space/malloc_space.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "malloc_space.h"
+
+#include "gc/accounting/card_table.h"
+#include "gc/heap.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "utils.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+size_t MallocSpace::bitmap_index_ = 0;
+
+MallocSpace::MallocSpace(const std::string& name, MemMap* mem_map,
+                         byte* begin, byte* end, byte* limit, size_t growth_limit)
+    : ContinuousMemMapAllocSpace(name, mem_map, begin, end, limit, kGcRetentionPolicyAlwaysCollect),
+      recent_free_pos_(0), lock_("allocation space lock", kAllocSpaceLock),
+      growth_limit_(growth_limit) {
+  size_t bitmap_index = bitmap_index_++;
+  static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
+  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
+  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
+  live_bitmap_.reset(accounting::SpaceBitmap::Create(
+      StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+      Begin(), Capacity()));
+  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #" << bitmap_index;
+  mark_bitmap_.reset(accounting::SpaceBitmap::Create(
+      StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+      Begin(), Capacity()));
+  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #" << bitmap_index;
+  for (auto& freed : recent_freed_objects_) {
+    freed.first = nullptr;
+    freed.second = nullptr;
+  }
+}
+
+MemMap* MallocSpace::CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
+                                  size_t* growth_limit, size_t* capacity, byte* requested_begin) {
+  // Sanity check arguments
+  if (starting_size > *initial_size) {
+    *initial_size = starting_size;
+  }
+  if (*initial_size > *growth_limit) {
+    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the initial size ("
+        << PrettySize(*initial_size) << ") is larger than its capacity ("
+        << PrettySize(*growth_limit) << ")";
+    return NULL;
+  }
+  if (*growth_limit > *capacity) {
+    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the growth limit capacity ("
+        << PrettySize(*growth_limit) << ") is larger than the capacity ("
+        << PrettySize(*capacity) << ")";
+    return NULL;
+  }
+
+  // Page align growth limit and capacity which will be used to manage mmapped storage
+  *growth_limit = RoundUp(*growth_limit, kPageSize);
+  *capacity = RoundUp(*capacity, kPageSize);
+
+  std::string error_msg;
+  MemMap* mem_map = MemMap::MapAnonymous(name.c_str(), requested_begin, *capacity,
+                                         PROT_READ | PROT_WRITE, &error_msg);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
+               << PrettySize(*capacity) << ": " << error_msg;
+    return NULL;
+  }
+  return mem_map;
+}
+
+void MallocSpace::SwapBitmaps() {
+  live_bitmap_.swap(mark_bitmap_);
+  // Swap names to get more descriptive diagnostics.
+  std::string temp_name(live_bitmap_->GetName());
+  live_bitmap_->SetName(mark_bitmap_->GetName());
+  mark_bitmap_->SetName(temp_name);
+}
+
+mirror::Class* MallocSpace::FindRecentFreedObject(const mirror::Object* obj) {
+  size_t pos = recent_free_pos_;
+  // Start at the most recently freed object and work our way back since there may be duplicates
+  // caused by dlmalloc reusing memory.
+  if (kRecentFreeCount > 0) {
+    for (size_t i = 0; i + 1 < kRecentFreeCount + 1; ++i) {
+      pos = pos != 0 ? pos - 1 : kRecentFreeMask;
+      if (recent_freed_objects_[pos].first == obj) {
+        return recent_freed_objects_[pos].second;
+      }
+    }
+  }
+  return nullptr;
+}
+
+void MallocSpace::RegisterRecentFree(mirror::Object* ptr) {
+  recent_freed_objects_[recent_free_pos_] = std::make_pair(ptr, ptr->GetClass());
+  recent_free_pos_ = (recent_free_pos_ + 1) & kRecentFreeMask;
+}
+
+void MallocSpace::SetGrowthLimit(size_t growth_limit) {
+  growth_limit = RoundUp(growth_limit, kPageSize);
+  growth_limit_ = growth_limit;
+  if (Size() > growth_limit_) {
+    end_ = begin_ + growth_limit;
+  }
+}
+
+void* MallocSpace::MoreCore(intptr_t increment) {
+  CheckMoreCoreForPrecondition();
+  byte* original_end = end_;
+  if (increment != 0) {
+    VLOG(heap) << "MallocSpace::MoreCore " << PrettySize(increment);
+    byte* new_end = original_end + increment;
+    if (increment > 0) {
+      // Should never be asked to increase the allocation beyond the capacity of the space. Enforced
+      // by mspace_set_footprint_limit.
+      CHECK_LE(new_end, Begin() + Capacity());
+      CHECK_MEMORY_CALL(mprotect, (original_end, increment, PROT_READ | PROT_WRITE), GetName());
+    } else {
+      // Should never be asked for negative footprint (ie before begin). Zero footprint is ok.
+      CHECK_GE(original_end + increment, Begin());
+      // Advise we don't need the pages and protect them
+      // TODO: by removing permissions to the pages we may be causing TLB shoot-down which can be
+      // expensive (note the same isn't true for giving permissions to a page as the protected
+      // page shouldn't be in a TLB). We should investigate performance impact of just
+      // removing ignoring the memory protection change here and in Space::CreateAllocSpace. It's
+      // likely just a useful debug feature.
+      size_t size = -increment;
+      CHECK_MEMORY_CALL(madvise, (new_end, size, MADV_DONTNEED), GetName());
+      CHECK_MEMORY_CALL(mprotect, (new_end, size, PROT_NONE), GetName());
+    }
+    // Update end_
+    end_ = new_end;
+  }
+  return original_end;
+}
+
+// Returns the old mark bitmap.
+accounting::SpaceBitmap* MallocSpace::BindLiveToMarkBitmap() {
+  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
+  temp_bitmap_.reset(mark_bitmap);
+  mark_bitmap_.reset(live_bitmap);
+  return mark_bitmap;
+}
+
+bool MallocSpace::HasBoundBitmaps() const {
+  return temp_bitmap_.get() != nullptr;
+}
+
+void MallocSpace::UnBindBitmaps() {
+  CHECK(HasBoundBitmaps());
+  // At this point, the temp_bitmap holds our old mark bitmap.
+  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
+  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
+  mark_bitmap_.reset(new_bitmap);
+  DCHECK(temp_bitmap_.get() == NULL);
+}
+
+MallocSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name) {
+  // For RosAlloc, revoke thread local runs before creating a new
+  // alloc space so that we won't mix thread local runs from different
+  // alloc spaces.
+  RevokeAllThreadLocalBuffers();
+  end_ = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(end_), kPageSize));
+  DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
+  DCHECK(IsAligned<accounting::CardTable::kCardSize>(end_));
+  DCHECK(IsAligned<kPageSize>(begin_));
+  DCHECK(IsAligned<kPageSize>(end_));
+  size_t size = RoundUp(Size(), kPageSize);
+  // Trim the heap so that we minimize the size of the Zygote space.
+  Trim();
+  // TODO: Not hardcode these in?
+  const size_t starting_size = kPageSize;
+  const size_t initial_size = 2 * MB;
+  // Remaining size is for the new alloc space.
+  const size_t growth_limit = growth_limit_ - size;
+  const size_t capacity = Capacity() - size;
+  VLOG(heap) << "Begin " << reinterpret_cast<const void*>(begin_) << "\n"
+             << "End " << reinterpret_cast<const void*>(end_) << "\n"
+             << "Size " << size << "\n"
+             << "GrowthLimit " << growth_limit_ << "\n"
+             << "Capacity " << Capacity();
+  SetGrowthLimit(RoundUp(size, kPageSize));
+  SetFootprintLimit(RoundUp(size, kPageSize));
+  // FIXME: Do we need reference counted pointers here?
+  // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
+  VLOG(heap) << "Creating new AllocSpace: ";
+  VLOG(heap) << "Size " << GetMemMap()->Size();
+  VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit);
+  VLOG(heap) << "Capacity " << PrettySize(capacity);
+  // Remap the tail.
+  std::string error_msg;
+  UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
+                                                    PROT_READ | PROT_WRITE, &error_msg));
+  CHECK(mem_map.get() != nullptr) << error_msg;
+  void* allocator = CreateAllocator(end_, starting_size, initial_size);
+  // Protect memory beyond the initial size.
+  byte* end = mem_map->Begin() + starting_size;
+  if (capacity - initial_size > 0) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
+  }
+  MallocSpace* alloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator,
+                                            end_, end, limit_, growth_limit);
+  SetLimit(End());
+  live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
+  CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
+  mark_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
+  CHECK_EQ(mark_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
+  VLOG(heap) << "zygote space creation done";
+  return alloc_space;
+}
+
+void MallocSpace::Dump(std::ostream& os) const {
+  os << GetType()
+      << " begin=" << reinterpret_cast<void*>(Begin())
+      << ",end=" << reinterpret_cast<void*>(End())
+      << ",size=" << PrettySize(Size()) << ",capacity=" << PrettySize(Capacity())
+      << ",name=\"" << GetName() << "\"]";
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
new file mode 100644
index 0000000..189f01c
--- /dev/null
+++ b/runtime/gc/space/malloc_space.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_MALLOC_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_MALLOC_SPACE_H_
+
+#include "space.h"
+
+namespace art {
+namespace gc {
+
+namespace collector {
+  class MarkSweep;
+}  // namespace collector
+
+namespace space {
+
+// TODO: Remove define macro
+#define CHECK_MEMORY_CALL(call, args, what) \
+  do { \
+    int rc = call args; \
+    if (UNLIKELY(rc != 0)) { \
+      errno = rc; \
+      PLOG(FATAL) << # call << " failed for " << what; \
+    } \
+  } while (false)
+
+// const bool kUseRosAlloc = true;
+
+// A common parent of DlMallocSpace and RosAllocSpace.
+class MallocSpace : public ContinuousMemMapAllocSpace {
+ public:
+  typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
+
+  SpaceType GetType() const {
+    if (GetGcRetentionPolicy() == kGcRetentionPolicyFullCollect) {
+      return kSpaceTypeZygoteSpace;
+    } else {
+      return kSpaceTypeAllocSpace;
+    }
+  }
+
+  // Allocate num_bytes without allowing the underlying space to grow.
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
+                                          size_t* bytes_allocated) = 0;
+  // Allocate num_bytes allowing the underlying space to grow.
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
+  // Return the storage space required by obj.
+  virtual size_t AllocationSize(const mirror::Object* obj) = 0;
+  virtual size_t Free(Thread* self, mirror::Object* ptr) = 0;
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
+
+#ifndef NDEBUG
+  virtual void CheckMoreCoreForPrecondition() {}  // to be overridden in the debug build.
+#else
+  void CheckMoreCoreForPrecondition() {}  // no-op in the non-debug build.
+#endif
+
+  void* MoreCore(intptr_t increment);
+
+  // Hands unused pages back to the system.
+  virtual size_t Trim() = 0;
+
+  // Perform a mspace_inspect_all which calls back for each allocation chunk. The chunk may not be
+  // in use, indicated by num_bytes equaling zero.
+  virtual void Walk(WalkCallback callback, void* arg) = 0;
+
+  // Returns the number of bytes that the space has currently obtained from the system. This is
+  // greater or equal to the amount of live data in the space.
+  virtual size_t GetFootprint() = 0;
+
+  // Returns the number of bytes that the heap is allowed to obtain from the system via MoreCore.
+  virtual size_t GetFootprintLimit() = 0;
+
+  // Set the maximum number of bytes that the heap is allowed to obtain from the system via
+  // MoreCore. Note this is used to stop the mspace growing beyond the limit to Capacity. When
+  // allocations fail we GC before increasing the footprint limit and allowing the mspace to grow.
+  virtual void SetFootprintLimit(size_t limit) = 0;
+
+  // Removes the fork time growth limit on capacity, allowing the application to allocate up to the
+  // maximum reserved size of the heap.
+  void ClearGrowthLimit() {
+    growth_limit_ = NonGrowthLimitCapacity();
+  }
+
+  // Override capacity so that we only return the possibly limited capacity
+  size_t Capacity() const {
+    return growth_limit_;
+  }
+
+  // The total amount of memory reserved for the alloc space.
+  size_t NonGrowthLimitCapacity() const {
+    return GetMemMap()->Size();
+  }
+
+  accounting::SpaceBitmap* GetLiveBitmap() const {
+    return live_bitmap_.get();
+  }
+
+  accounting::SpaceBitmap* GetMarkBitmap() const {
+    return mark_bitmap_.get();
+  }
+
+  void Dump(std::ostream& os) const;
+
+  void SetGrowthLimit(size_t growth_limit);
+
+  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
+  void SwapBitmaps();
+
+  virtual MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                                      byte* begin, byte* end, byte* limit, size_t growth_limit) = 0;
+
+  // Turn ourself into a zygote space and return a new alloc space which has our unused memory.
+  MallocSpace* CreateZygoteSpace(const char* alloc_space_name);
+
+  virtual uint64_t GetBytesAllocated() = 0;
+  virtual uint64_t GetObjectsAllocated() = 0;
+  virtual uint64_t GetTotalBytesAllocated() = 0;
+  virtual uint64_t GetTotalObjectsAllocated() = 0;
+
+  // Returns the old mark bitmap.
+  accounting::SpaceBitmap* BindLiveToMarkBitmap();
+  bool HasBoundBitmaps() const;
+  void UnBindBitmaps();
+
+  // Returns the class of a recently freed object.
+  mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
+
+  // Used to ensure that failure happens when you free / allocate into an invalidated space. If we
+  // don't do this we may get heap corruption instead of a segfault at null.
+  virtual void InvalidateAllocator() = 0;
+
+ protected:
+  MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
+              byte* limit, size_t growth_limit);
+
+  static MemMap* CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
+                              size_t* growth_limit, size_t* capacity, byte* requested_begin);
+
+  virtual void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) = 0;
+
+  void RegisterRecentFree(mirror::Object* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+
+  // Recent allocation buffer.
+  static constexpr size_t kRecentFreeCount = kDebugSpaces ? (1 << 16) : 0;
+  static constexpr size_t kRecentFreeMask = kRecentFreeCount - 1;
+  std::pair<const mirror::Object*, mirror::Class*> recent_freed_objects_[kRecentFreeCount];
+  size_t recent_free_pos_;
+
+  static size_t bitmap_index_;
+
+  // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  // The capacity of the alloc space until such time that ClearGrowthLimit is called.
+  // The underlying mem_map_ controls the maximum size we allow the heap to grow to. The growth
+  // limit is a value <= to the mem_map_ capacity used for ergonomic reasons because of the zygote.
+  // Prior to forking the zygote the heap will have a maximally sized mem_map_ but the growth_limit_
+  // will be set to a lower value. The growth_limit_ is used as the capacity of the alloc_space_,
+  // however, capacity normally can't vary. In the case of the growth_limit_ it can be cleared
+  // one time by a call to ClearGrowthLimit.
+  size_t growth_limit_;
+
+  friend class collector::MarkSweep;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MallocSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_MALLOC_SPACE_H_
diff --git a/runtime/gc/space/rosalloc_space-inl.h b/runtime/gc/space/rosalloc_space-inl.h
new file mode 100644
index 0000000..0fe5dd7
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space-inl.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
+#define ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
+
+#include "rosalloc_space.h"
+#include "thread.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+inline mirror::Object* RosAllocSpace::AllocNonvirtual(Thread* self, size_t num_bytes,
+                                                      size_t* bytes_allocated) {
+  mirror::Object* obj;
+  obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
+  // RosAlloc zeroes memory internally.
+  return obj;
+}
+
+inline mirror::Object* RosAllocSpace::AllocWithoutGrowthLocked(Thread* self, size_t num_bytes,
+                                                               size_t* bytes_allocated) {
+  size_t rosalloc_size = 0;
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(
+      rosalloc_for_alloc_->Alloc(self, num_bytes,
+                                 &rosalloc_size));
+  if (LIKELY(result != NULL)) {
+    if (kDebugSpaces) {
+      CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
+            << ") not in bounds of allocation space " << *this;
+    }
+    DCHECK(bytes_allocated != NULL);
+    *bytes_allocated = rosalloc_size;
+  }
+  return result;
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
new file mode 100644
index 0000000..a3d2dfa
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rosalloc_space.h"
+
+#include "rosalloc_space-inl.h"
+#include "gc/accounting/card_table.h"
+#include "gc/heap.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "utils.h"
+
+#include <valgrind.h>
+#include <memcheck/memcheck.h>
+
+namespace art {
+namespace gc {
+namespace space {
+
+static const bool kPrefetchDuringRosAllocFreeList = true;
+
+RosAllocSpace::RosAllocSpace(const std::string& name, MemMap* mem_map,
+                             art::gc::allocator::RosAlloc* rosalloc, byte* begin, byte* end,
+                             byte* limit, size_t growth_limit)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit), rosalloc_(rosalloc),
+      rosalloc_for_alloc_(rosalloc) {
+  CHECK(rosalloc != NULL);
+}
+
+RosAllocSpace* RosAllocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                                     size_t capacity, byte* requested_begin) {
+  uint64_t start_time = 0;
+  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
+    start_time = NanoTime();
+    VLOG(startup) << "RosAllocSpace::Create entering " << name
+                  << " initial_size=" << PrettySize(initial_size)
+                  << " growth_limit=" << PrettySize(growth_limit)
+                  << " capacity=" << PrettySize(capacity)
+                  << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
+  }
+
+  // Memory we promise to rosalloc before it asks for morecore.
+  // Note: making this value large means that large allocations are unlikely to succeed as rosalloc
+  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
+  // size of the large allocation) will be greater than the footprint limit.
+  size_t starting_size = kPageSize;
+  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
+                                 requested_begin);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
+               << PrettySize(capacity);
+    return NULL;
+  }
+  allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size);
+  if (rosalloc == NULL) {
+    LOG(ERROR) << "Failed to initialize rosalloc for alloc space (" << name << ")";
+    return NULL;
+  }
+
+  // Protect memory beyond the initial size.
+  byte* end = mem_map->Begin() + starting_size;
+  if (capacity - initial_size > 0) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), name);
+  }
+
+  // Everything is set so record in immutable structure and leave
+  RosAllocSpace* space;
+  byte* begin = mem_map->Begin();
+  if (RUNNING_ON_VALGRIND > 0) {
+    // TODO: support valgrind.
+    LOG(FATAL) << "Unimplemented";
+    space = NULL;
+  } else {
+    space = new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
+  }
+  // We start out with only the initial size possibly containing objects.
+  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
+    LOG(INFO) << "RosAllocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
+        << " ) " << *space;
+  }
+  return space;
+}
+
+allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_start, size_t initial_size) {
+  // clear errno to allow PLOG on error
+  errno = 0;
+  // create rosalloc using our backing storage starting at begin and
+  // with a footprint of morecore_start. When morecore_start bytes of
+  // memory is exhaused morecore will be called.
+  allocator::RosAlloc* rosalloc = new art::gc::allocator::RosAlloc(begin, morecore_start);
+  if (rosalloc != NULL) {
+    rosalloc->SetFootprintLimit(initial_size);
+  } else {
+    PLOG(ERROR) << "RosAlloc::Create failed";
+    }
+  return rosalloc;
+}
+
+mirror::Object* RosAllocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  return AllocNonvirtual(self, num_bytes, bytes_allocated);
+}
+
+mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  mirror::Object* result;
+  {
+    MutexLock mu(self, lock_);
+    // Grow as much as possible within the space.
+    size_t max_allowed = Capacity();
+    rosalloc_->SetFootprintLimit(max_allowed);
+    // Try the allocation.
+    result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
+    // Shrink back down as small as possible.
+    size_t footprint = rosalloc_->Footprint();
+    rosalloc_->SetFootprintLimit(footprint);
+  }
+  // Note RosAlloc zeroes memory internally.
+  // Return the new allocation or NULL.
+  CHECK(!kDebugSpaces || result == NULL || Contains(result));
+  return result;
+}
+
+MallocSpace* RosAllocSpace::CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                                           byte* begin, byte* end, byte* limit, size_t growth_limit) {
+  return new RosAllocSpace(name, mem_map, reinterpret_cast<allocator::RosAlloc*>(allocator),
+                           begin, end, limit, growth_limit);
+}
+
+size_t RosAllocSpace::Free(Thread* self, mirror::Object* ptr) {
+  if (kDebugSpaces) {
+    CHECK(ptr != NULL);
+    CHECK(Contains(ptr)) << "Free (" << ptr << ") not in bounds of heap " << *this;
+  }
+  const size_t bytes_freed = InternalAllocationSize(ptr);
+  total_bytes_freed_atomic_.fetch_add(bytes_freed);
+  ++total_objects_freed_atomic_;
+  if (kRecentFreeCount > 0) {
+    MutexLock mu(self, lock_);
+    RegisterRecentFree(ptr);
+  }
+  rosalloc_->Free(self, ptr);
+  return bytes_freed;
+}
+
+size_t RosAllocSpace::FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+  DCHECK(ptrs != NULL);
+
+  // Don't need the lock to calculate the size of the freed pointers.
+  size_t bytes_freed = 0;
+  for (size_t i = 0; i < num_ptrs; i++) {
+    mirror::Object* ptr = ptrs[i];
+    const size_t look_ahead = 8;
+    if (kPrefetchDuringRosAllocFreeList && i + look_ahead < num_ptrs) {
+      __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + look_ahead]));
+    }
+    bytes_freed += InternalAllocationSize(ptr);
+  }
+
+  if (kRecentFreeCount > 0) {
+    MutexLock mu(self, lock_);
+    for (size_t i = 0; i < num_ptrs; i++) {
+      RegisterRecentFree(ptrs[i]);
+    }
+  }
+
+  if (kDebugSpaces) {
+    size_t num_broken_ptrs = 0;
+    for (size_t i = 0; i < num_ptrs; i++) {
+      if (!Contains(ptrs[i])) {
+        num_broken_ptrs++;
+        LOG(ERROR) << "FreeList[" << i << "] (" << ptrs[i] << ") not in bounds of heap " << *this;
+      } else {
+        size_t size = rosalloc_->UsableSize(ptrs[i]);
+        memset(ptrs[i], 0xEF, size);
+      }
+    }
+    CHECK_EQ(num_broken_ptrs, 0u);
+  }
+
+  rosalloc_->BulkFree(self, reinterpret_cast<void**>(ptrs), num_ptrs);
+  total_bytes_freed_atomic_.fetch_add(bytes_freed);
+  total_objects_freed_atomic_.fetch_add(num_ptrs);
+  return bytes_freed;
+}
+
+// Callback from rosalloc when it needs to increase the footprint
+extern "C" void* art_heap_rosalloc_morecore(allocator::RosAlloc* rosalloc, intptr_t increment) {
+  Heap* heap = Runtime::Current()->GetHeap();
+  DCHECK(heap->GetNonMovingSpace()->IsRosAllocSpace());
+  DCHECK_EQ(heap->GetNonMovingSpace()->AsRosAllocSpace()->GetRosAlloc(), rosalloc);
+  return heap->GetNonMovingSpace()->MoreCore(increment);
+}
+
+// Virtual functions can't get inlined.
+inline size_t RosAllocSpace::InternalAllocationSize(const mirror::Object* obj) {
+  return AllocationSizeNonvirtual(obj);
+}
+
+size_t RosAllocSpace::AllocationSize(const mirror::Object* obj) {
+  return InternalAllocationSize(obj);
+}
+
+size_t RosAllocSpace::Trim() {
+  MutexLock mu(Thread::Current(), lock_);
+  // Trim to release memory at the end of the space.
+  rosalloc_->Trim();
+  // No inspect_all necessary here as trimming of pages is built-in.
+  return 0;
+}
+
+void RosAllocSpace::Walk(void(*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                         void* arg) {
+  InspectAllRosAlloc(callback, arg);
+  callback(NULL, NULL, 0, arg);  // Indicate end of a space.
+}
+
+size_t RosAllocSpace::GetFootprint() {
+  MutexLock mu(Thread::Current(), lock_);
+  return rosalloc_->Footprint();
+}
+
+size_t RosAllocSpace::GetFootprintLimit() {
+  MutexLock mu(Thread::Current(), lock_);
+  return rosalloc_->FootprintLimit();
+}
+
+void RosAllocSpace::SetFootprintLimit(size_t new_size) {
+  MutexLock mu(Thread::Current(), lock_);
+  VLOG(heap) << "RosAllocSpace::SetFootprintLimit " << PrettySize(new_size);
+  // Compare against the actual footprint, rather than the Size(), because the heap may not have
+  // grown all the way to the allowed size yet.
+  size_t current_space_size = rosalloc_->Footprint();
+  if (new_size < current_space_size) {
+    // Don't let the space grow any more.
+    new_size = current_space_size;
+  }
+  rosalloc_->SetFootprintLimit(new_size);
+}
+
+uint64_t RosAllocSpace::GetBytesAllocated() {
+  size_t bytes_allocated = 0;
+  InspectAllRosAlloc(art::gc::allocator::RosAlloc::BytesAllocatedCallback, &bytes_allocated);
+  return bytes_allocated;
+}
+
+uint64_t RosAllocSpace::GetObjectsAllocated() {
+  size_t objects_allocated = 0;
+  InspectAllRosAlloc(art::gc::allocator::RosAlloc::ObjectsAllocatedCallback, &objects_allocated);
+  return objects_allocated;
+}
+
+void RosAllocSpace::InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                                       void* arg) NO_THREAD_SAFETY_ANALYSIS {
+  // TODO: NO_THREAD_SAFETY_ANALYSIS.
+  Thread* self = Thread::Current();
+  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    // The mutators are already suspended. For example, a call path
+    // from SignalCatcher::HandleSigQuit().
+    rosalloc_->InspectAll(callback, arg);
+  } else {
+    // The mutators are not suspended yet.
+    DCHECK(!Locks::mutator_lock_->IsSharedHeld(self));
+    ThreadList* tl = Runtime::Current()->GetThreadList();
+    tl->SuspendAll();
+    {
+      MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+      MutexLock mu2(self, *Locks::thread_list_lock_);
+      rosalloc_->InspectAll(callback, arg);
+    }
+    tl->ResumeAll();
+  }
+}
+
+void RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) {
+  rosalloc_->RevokeThreadLocalRuns(thread);
+}
+
+void RosAllocSpace::RevokeAllThreadLocalBuffers() {
+  rosalloc_->RevokeAllThreadLocalRuns();
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
new file mode 100644
index 0000000..6311580
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
+
+#include "gc/allocator/rosalloc.h"
+#include "malloc_space.h"
+#include "space.h"
+
+namespace art {
+namespace gc {
+
+namespace collector {
+  class MarkSweep;
+}  // namespace collector
+
+namespace space {
+
+// An alloc space is a space where objects may be allocated and garbage collected.
+class RosAllocSpace : public MallocSpace {
+ public:
+  // Create a RosAllocSpace with the requested sizes. The requested
+  // base address is not guaranteed to be granted, if it is required,
+  // the caller should call Begin on the returned space to confirm the
+  // request was granted.
+  static RosAllocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                               size_t capacity, byte* requested_begin);
+
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
+                                          size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+  virtual size_t AllocationSize(const mirror::Object* obj);
+  virtual size_t Free(Thread* self, mirror::Object* ptr);
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
+
+  mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+
+  size_t AllocationSizeNonvirtual(const mirror::Object* obj)
+      NO_THREAD_SAFETY_ANALYSIS {
+    // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held.
+    void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
+    // obj is a valid object. Use its class in the header to get the size.
+    size_t size = obj->SizeOf();
+    size_t size_by_size = rosalloc_->UsableSize(size);
+    if (kIsDebugBuild) {
+      size_t size_by_ptr = rosalloc_->UsableSize(obj_ptr);
+      if (size_by_size != size_by_ptr) {
+        LOG(INFO) << "Found a bad sized obj of size " << size
+                  << " at " << std::hex << reinterpret_cast<intptr_t>(obj_ptr) << std::dec
+                  << " size_by_size=" << size_by_size << " size_by_ptr=" << size_by_ptr;
+      }
+      DCHECK_EQ(size_by_size, size_by_ptr);
+    }
+    return size_by_size;
+  }
+
+  art::gc::allocator::RosAlloc* GetRosAlloc() {
+    return rosalloc_;
+  }
+
+  size_t Trim();
+  void Walk(WalkCallback callback, void* arg) LOCKS_EXCLUDED(lock_);
+  size_t GetFootprint();
+  size_t GetFootprintLimit();
+  void SetFootprintLimit(size_t limit);
+
+  MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                              byte* begin, byte* end, byte* limit, size_t growth_limit);
+
+  uint64_t GetBytesAllocated();
+  uint64_t GetObjectsAllocated();
+  uint64_t GetTotalBytesAllocated() {
+    return GetBytesAllocated() + total_bytes_freed_atomic_;
+  }
+  uint64_t GetTotalObjectsAllocated() {
+    return GetObjectsAllocated() + total_objects_freed_atomic_;
+  }
+
+  void RevokeThreadLocalBuffers(Thread* thread);
+  void RevokeAllThreadLocalBuffers();
+
+  // Returns the class of a recently freed object.
+  mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
+
+  virtual void InvalidateAllocator() {
+    rosalloc_for_alloc_ = NULL;
+  }
+
+  virtual bool IsRosAllocSpace() const {
+    return true;
+  }
+  virtual RosAllocSpace* AsRosAllocSpace() {
+    return this;
+  }
+
+ protected:
+  RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc,
+                byte* begin, byte* end, byte* limit, size_t growth_limit);
+
+ private:
+  size_t InternalAllocationSize(const mirror::Object* obj);
+  mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) {
+    return CreateRosAlloc(base, morecore_start, initial_size);
+  }
+  static allocator::RosAlloc* CreateRosAlloc(void* base, size_t morecore_start, size_t initial_size);
+
+
+  void InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                          void* arg)
+      LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_);
+
+  // Approximate number of bytes and objects which have been deallocated in the space.
+  AtomicInteger total_bytes_freed_atomic_;
+  AtomicInteger total_objects_freed_atomic_;
+
+  // Underlying rosalloc.
+  art::gc::allocator::RosAlloc* const rosalloc_;
+
+  // A rosalloc pointer used for allocation. Equals to what rosalloc_
+  // points to or nullptr after InvalidateAllocator() is called.
+  art::gc::allocator::RosAlloc* rosalloc_for_alloc_;
+
+  friend class collector::MarkSweep;
+
+  DISALLOW_COPY_AND_ASSIGN(RosAllocSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
diff --git a/runtime/gc/space/space-inl.h b/runtime/gc/space/space-inl.h
index f1031ff..0c1d7a2 100644
--- a/runtime/gc/space/space-inl.h
+++ b/runtime/gc/space/space-inl.h
@@ -31,9 +31,10 @@
   return down_cast<ImageSpace*>(down_cast<MemMapSpace*>(this));
 }
 
-inline DlMallocSpace* Space::AsDlMallocSpace() {
-  DCHECK(IsDlMallocSpace());
-  return down_cast<DlMallocSpace*>(down_cast<MemMapSpace*>(this));
+inline MallocSpace* Space::AsMallocSpace() {
+  DCHECK(GetType() == kSpaceTypeAllocSpace || GetType() == kSpaceTypeZygoteSpace);
+  DCHECK(IsDlMallocSpace() || IsRosAllocSpace());
+  return down_cast<MallocSpace*>(down_cast<MemMapSpace*>(this));
 }
 
 inline LargeObjectSpace* Space::AsLargeObjectSpace() {
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 4c05dde..38b602e 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -44,8 +44,10 @@
 
 class AllocSpace;
 class ContinuousSpace;
-class DlMallocSpace;
 class DiscontinuousSpace;
+class MallocSpace;
+class DlMallocSpace;
+class RosAllocSpace;
 class ImageSpace;
 class LargeObjectSpace;
 
@@ -106,11 +108,26 @@
   ImageSpace* AsImageSpace();
 
   // Is this a dlmalloc backed allocation space?
-  bool IsDlMallocSpace() const {
+  bool IsMallocSpace() const {
     SpaceType type = GetType();
     return type == kSpaceTypeAllocSpace || type == kSpaceTypeZygoteSpace;
   }
-  DlMallocSpace* AsDlMallocSpace();
+  MallocSpace* AsMallocSpace();
+
+  virtual bool IsDlMallocSpace() const {
+    return false;
+  }
+  virtual DlMallocSpace* AsDlMallocSpace() {
+    LOG(FATAL) << "Unreachable";
+    return NULL;
+  }
+  virtual bool IsRosAllocSpace() const {
+    return false;
+  }
+  virtual RosAllocSpace* AsRosAllocSpace() {
+    LOG(FATAL) << "Unreachable";
+    return NULL;
+  }
 
   // Is this the space allocated into by the Zygote and no-longer in use?
   bool IsZygoteSpace() const {
@@ -195,6 +212,16 @@
   // Returns how many bytes were freed.
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
 
+  // Revoke any sort of thread-local buffers that are used to speed up
+  // allocations for the given thread, if the alloc space
+  // implementation uses any. No-op by default.
+  virtual void RevokeThreadLocalBuffers(Thread* /*thread*/) {}
+
+  // Revoke any sort of thread-local buffers that are used to speed up
+  // allocations for all the threads, if the alloc space
+  // implementation uses any. No-op by default.
+  virtual void RevokeAllThreadLocalBuffers() {}
+
  protected:
   AllocSpace() {}
   virtual ~AllocSpace() {}
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index 383714b..6b597ae 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -20,6 +20,8 @@
 #include "common_test.h"
 #include "globals.h"
 #include "UniquePtr.h"
+#include "mirror/array-inl.h"
+#include "mirror/object-inl.h"
 
 #include <stdint.h>
 
@@ -34,8 +36,25 @@
   void SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size);
 
   void AddSpace(ContinuousSpace* space) {
+    // For RosAlloc, revoke the thread local runs before moving onto a
+    // new alloc space.
+    Runtime::Current()->GetHeap()->RevokeAllThreadLocalBuffers();
     Runtime::Current()->GetHeap()->AddSpace(space);
   }
+  void InstallClass(mirror::Object* o, size_t size) NO_THREAD_SAFETY_ANALYSIS {
+    // Note the minimum size, which is the size of a zero-length byte array, is 12.
+    EXPECT_GE(size, static_cast<size_t>(12));
+    SirtRef<mirror::ClassLoader> null_loader(Thread::Current(), NULL);
+    mirror::Class* byte_array_class = Runtime::Current()->GetClassLinker()->FindClass("[B", null_loader);
+    EXPECT_TRUE(byte_array_class != NULL);
+    o->SetClass(byte_array_class);
+    mirror::Array* arr = o->AsArray();
+    // size_t header_size = sizeof(mirror::Object) + 4;
+    size_t header_size = arr->DataOffset(1).Uint32Value();
+    int32_t length = size - header_size;
+    arr->SetLength(length);
+    EXPECT_EQ(arr->SizeOf(), size);
+  }
 };
 
 static size_t test_rand(size_t* seed) {
@@ -87,7 +106,7 @@
 // the GC works with the ZygoteSpace.
 TEST_F(SpaceTest, ZygoteSpace) {
     size_t dummy = 0;
-    DlMallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
+    MallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
     ASSERT_TRUE(space != NULL);
 
     // Make space findable to the heap, will also delete space when runtime is cleaned up
@@ -97,6 +116,7 @@
     // Succeeds, fits without adjusting the footprint limit.
     mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
+    InstallClass(ptr1, 1 * MB);
 
     // Fails, requires a higher footprint limit.
     mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -107,6 +127,7 @@
     mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
     EXPECT_TRUE(ptr3 != NULL);
     EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+    InstallClass(ptr3, 8 * MB);
 
     // Fails, requires a higher footprint limit.
     mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
@@ -123,8 +144,9 @@
     EXPECT_LE(8U * MB, free3);
 
     // Succeeds, now that memory has been freed.
-    void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
+    mirror::Object* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
     EXPECT_TRUE(ptr6 != NULL);
+    InstallClass(ptr6, 9 * MB);
 
     // Final clean up.
     size_t free1 = space->AllocationSize(ptr1);
@@ -141,6 +163,7 @@
     // Succeeds, fits without adjusting the footprint limit.
     ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
+    InstallClass(ptr1, 1 * MB);
 
     // Fails, requires a higher footprint limit.
     ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -149,6 +172,7 @@
     // Succeeds, adjusts the footprint.
     ptr3 = space->AllocWithGrowth(self, 2 * MB, &dummy);
     EXPECT_TRUE(ptr3 != NULL);
+    InstallClass(ptr3, 2 * MB);
     space->Free(self, ptr3);
 
     // Final clean up.
@@ -169,6 +193,7 @@
   // Succeeds, fits without adjusting the footprint limit.
   mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
   EXPECT_TRUE(ptr1 != NULL);
+  InstallClass(ptr1, 1 * MB);
 
   // Fails, requires a higher footprint limit.
   mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -179,6 +204,7 @@
   mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
   EXPECT_TRUE(ptr3 != NULL);
   EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  InstallClass(ptr3, 8 * MB);
 
   // Fails, requires a higher footprint limit.
   mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
@@ -195,8 +221,9 @@
   EXPECT_LE(8U * MB, free3);
 
   // Succeeds, now that memory has been freed.
-  void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
+  mirror::Object* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
   EXPECT_TRUE(ptr6 != NULL);
+  InstallClass(ptr6, 9 * MB);
 
   // Final clean up.
   size_t free1 = space->AllocationSize(ptr1);
@@ -278,8 +305,9 @@
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->Alloc(self, 16, &allocation_size);
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
+    InstallClass(lots_of_objects[i], 16);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
   // Release memory and check pointers are NULL
@@ -292,8 +320,9 @@
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->AllocWithGrowth(self, 1024, &allocation_size);
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
+    InstallClass(lots_of_objects[i], 1024);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
   // Release memory and check pointers are NULL
@@ -310,22 +339,20 @@
     // No allocation can succeed
     return;
   }
-  // Mspace for raw dlmalloc operations
-  void* mspace = space->GetMspace();
 
-  // mspace's footprint equals amount of resources requested from system
-  size_t footprint = mspace_footprint(mspace);
+  // The space's footprint equals amount of resources requested from system
+  size_t footprint = space->GetFootprint();
 
-  // mspace must at least have its book keeping allocated
+  // The space must at least have its book keeping allocated
   EXPECT_GT(footprint, 0u);
 
-  // mspace but it shouldn't exceed the initial size
+  // But it shouldn't exceed the initial size
   EXPECT_LE(footprint, growth_limit);
 
   // space's size shouldn't exceed the initial size
   EXPECT_LE(space->Size(), growth_limit);
 
-  // this invariant should always hold or else the mspace has grown to be larger than what the
+  // this invariant should always hold or else the space has grown to be larger than what the
   // space believes its size is (which will break invariants)
   EXPECT_GE(space->Size(), footprint);
 
@@ -345,8 +372,9 @@
         alloc_size = object_size;
       } else {
         alloc_size = test_rand(&rand_seed) % static_cast<size_t>(-object_size);
-        if (alloc_size < 8) {
-          alloc_size = 8;
+        // Note the minimum size, which is the size of a zero-length byte array, is 12.
+        if (alloc_size < 12) {
+          alloc_size = 12;
         }
       }
       mirror::Object* object;
@@ -356,9 +384,10 @@
       } else {
         object = space->AllocWithGrowth(self, alloc_size, &bytes_allocated);
       }
-      footprint = mspace_footprint(mspace);
+      footprint = space->GetFootprint();
       EXPECT_GE(space->Size(), footprint);  // invariant
       if (object != NULL) {  // allocation succeeded
+        InstallClass(object, alloc_size);
         lots_of_objects.get()[i] = object;
         size_t allocation_size = space->AllocationSize(object);
         EXPECT_EQ(bytes_allocated, allocation_size);
@@ -395,7 +424,7 @@
     space->Trim();
 
     // Bounds sanity
-    footprint = mspace_footprint(mspace);
+    footprint = space->GetFootprint();
     EXPECT_LE(amount_allocated, growth_limit);
     EXPECT_GE(footprint, amount_allocated);
     EXPECT_LE(footprint, growth_limit);
@@ -421,13 +450,21 @@
       space->Free(self, object);
       lots_of_objects.get()[i] = NULL;
       amount_allocated -= allocation_size;
-      footprint = mspace_footprint(mspace);
+      footprint = space->GetFootprint();
       EXPECT_GE(space->Size(), footprint);  // invariant
     }
 
     free_increment >>= 1;
   }
 
+  // The space has become empty here before allocating a large object
+  // below. For RosAlloc, revoke thread-local runs, which are kept
+  // even when empty for a performance reason, so that they won't
+  // cause the following large object allocation to fail due to
+  // potential fragmentation. Note they are normally revoked at each
+  // GC (but no GC here.)
+  space->RevokeAllThreadLocalBuffers();
+
   // All memory was released, try a large allocation to check freed memory is being coalesced
   mirror::Object* large_object;
   size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4);
@@ -438,9 +475,10 @@
     large_object = space->AllocWithGrowth(self, three_quarters_space, &bytes_allocated);
   }
   EXPECT_TRUE(large_object != NULL);
+  InstallClass(large_object, three_quarters_space);
 
   // Sanity check footprint
-  footprint = mspace_footprint(mspace);
+  footprint = space->GetFootprint();
   EXPECT_LE(footprint, growth_limit);
   EXPECT_GE(space->Size(), footprint);
   EXPECT_LE(space->Size(), growth_limit);
@@ -449,7 +487,7 @@
   space->Free(self, large_object);
 
   // Sanity check footprint
-  footprint = mspace_footprint(mspace);
+  footprint = space->GetFootprint();
   EXPECT_LE(footprint, growth_limit);
   EXPECT_GE(space->Size(), footprint);
   EXPECT_LE(space->Size(), growth_limit);
@@ -488,8 +526,8 @@
   }
 
 // Each size test is its own test so that we get a fresh heap each time
-TEST_F(SpaceTest, SizeFootPrintGrowthLimitAndTrim_AllocationsOf_8B) {
-  SizeFootPrintGrowthLimitAndTrimDriver(8);
+TEST_F(SpaceTest, SizeFootPrintGrowthLimitAndTrim_AllocationsOf_12B) {
+  SizeFootPrintGrowthLimitAndTrimDriver(12);
 }
 TEST_SizeFootPrintGrowthLimitAndTrim(16B, 16)
 TEST_SizeFootPrintGrowthLimitAndTrim(24B, 24)
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 0f4fa4e..b3b4731 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -44,6 +44,8 @@
 
 namespace instrumentation {
 
+const bool kVerboseInstrumentation = false;
+
 // Do we want to deoptimize for method entry and exit listeners or just try to intercept
 // invocations? Deoptimization forces all code to run in the interpreter and considerably hurts the
 // application's performance.
@@ -57,10 +59,7 @@
 
 bool Instrumentation::InstallStubsForClass(mirror::Class* klass) {
   bool uninstall = !entry_exit_stubs_installed_ && !interpreter_stubs_installed_;
-  ClassLinker* class_linker = NULL;
-  if (uninstall) {
-    class_linker = Runtime::Current()->GetClassLinker();
-  }
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   bool is_initialized = klass->IsInitialized();
   for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
     mirror::ArtMethod* method = klass->GetDirectMethod(i);
@@ -76,7 +75,14 @@
         }
       } else {  // !uninstall
         if (!interpreter_stubs_installed_ || method->IsNative()) {
-          new_code = GetQuickInstrumentationEntryPoint();
+          // Do not overwrite resolution trampoline. When the trampoline initializes the method's
+          // class, all its static methods' code will be set to the instrumentation entry point.
+          // For more details, see ClassLinker::FixupStaticTrampolines.
+          if (is_initialized || !method->IsStatic() || method->IsConstructor()) {
+            new_code = GetQuickInstrumentationEntryPoint();
+          } else {
+            new_code = GetResolutionTrampoline(class_linker);
+          }
         } else {
           new_code = GetCompiledCodeToInterpreterBridge();
         }
@@ -448,7 +454,14 @@
     method->SetEntryPointFromCompiledCode(code);
   } else {
     if (!interpreter_stubs_installed_ || method->IsNative()) {
-      method->SetEntryPointFromCompiledCode(GetQuickInstrumentationEntryPoint());
+      // Do not overwrite resolution trampoline. When the trampoline initializes the method's
+      // class, all its static methods' code will be set to the instrumentation entry point.
+      // For more details, see ClassLinker::FixupStaticTrampolines.
+      if (code == GetResolutionTrampoline(Runtime::Current()->GetClassLinker())) {
+        method->SetEntryPointFromCompiledCode(code);
+      } else {
+        method->SetEntryPointFromCompiledCode(GetQuickInstrumentationEntryPoint());
+      }
     } else {
       method->SetEntryPointFromCompiledCode(GetCompiledCodeToInterpreterBridge());
     }
@@ -501,7 +514,7 @@
                                         uint32_t dex_pc) const {
   if (have_method_unwind_listeners_) {
     for (InstrumentationListener* listener : method_unwind_listeners_) {
-      listener->MethodUnwind(thread, method, dex_pc);
+      listener->MethodUnwind(thread, this_object, method, dex_pc);
     }
   }
 }
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 25a4eec..6bfc2d7 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -36,8 +36,6 @@
 
 namespace instrumentation {
 
-const bool kVerboseInstrumentation = false;
-
 // Interpreter handler tables.
 enum InterpreterHandlerTable {
   kMainHandlerTable = 0,          // Main handler table: no suspend check, no instrumentation.
@@ -68,8 +66,9 @@
 
   // Call-back for when a method is popped due to an exception throw. A method will either cause a
   // MethodExited call-back or a MethodUnwind call-back when its activation is removed.
-  virtual void MethodUnwind(Thread* thread, const mirror::ArtMethod* method,
-                            uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
+                            const mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when the dex pc moves in a method.
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index a1657d0..fd78bf2 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -31,6 +31,7 @@
 struct iovec;
 
 namespace art {
+  union JValue;
 namespace mirror {
   class ArtMethod;
 }  // namespace mirror
@@ -185,8 +186,11 @@
    * issuing a MethodEntry on a native method.
    *
    * "eventFlags" indicates the types of events that have occurred.
+   *
+   * "returnValue" is non-null for MethodExit events only.
    */
-  bool PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags)
+  bool PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags,
+                         const JValue* returnValue)
      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 345549d..61bd1ed 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -719,7 +719,8 @@
  *  - Single-step to a line with a breakpoint.  Should get a single
  *    event message with both events in it.
  */
-bool JdwpState::PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags) {
+bool JdwpState::PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags,
+                                  const JValue* returnValue) {
   ModBasket basket;
   basket.pLoc = pLoc;
   basket.classId = pLoc->class_id;
@@ -771,9 +772,7 @@
     }
     if ((eventFlags & Dbg::kMethodExit) != 0) {
       FindMatchingEvents(EK_METHOD_EXIT, &basket, match_list, &match_count);
-
-      // TODO: match EK_METHOD_EXIT_WITH_RETURN_VALUE too; we need to include the 'value', though.
-      // FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, &basket, match_list, &match_count);
+      FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, &basket, match_list, &match_count);
     }
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
@@ -792,6 +791,9 @@
         expandBufAdd4BE(pReq, match_list[i]->requestId);
         expandBufAdd8BE(pReq, basket.threadId);
         expandBufAddLocation(pReq, *pLoc);
+        if (match_list[i]->eventKind == EK_METHOD_EXIT_WITH_RETURN_VALUE) {
+          Dbg::OutputMethodReturnValue(pLoc->method_id, returnValue, pReq);
+        }
       }
     }
 
diff --git a/runtime/locks.h b/runtime/locks.h
index 2262218..2308e95 100644
--- a/runtime/locks.h
+++ b/runtime/locks.h
@@ -37,6 +37,9 @@
   kThreadSuspendCountLock,
   kAbortLock,
   kJdwpSocketLock,
+  kRosAllocGlobalLock,
+  kRosAllocBracketLock,
+  kRosAllocBulkFreeLock,
   kAllocSpaceLock,
   kMarkSweepMarkStackLock,
   kDefaultMutexLevel,
diff --git a/runtime/mapping_table.h b/runtime/mapping_table.h
index 9955f30..c468c1e 100644
--- a/runtime/mapping_table.h
+++ b/runtime/mapping_table.h
@@ -30,7 +30,7 @@
 
   uint32_t TotalSize() const PURE {
     const uint8_t* table = encoded_table_;
-    if (table == NULL) {
+    if (table == nullptr) {
       return 0;
     } else {
       return DecodeUnsignedLeb128(&table);
@@ -39,7 +39,7 @@
 
   uint32_t DexToPcSize() const PURE {
     const uint8_t* table = encoded_table_;
-    if (table == NULL) {
+    if (table == nullptr) {
       return 0;
     } else {
       uint32_t total_size = DecodeUnsignedLeb128(&table);
@@ -50,9 +50,11 @@
 
   const uint8_t* FirstDexToPcPtr() const {
     const uint8_t* table = encoded_table_;
-    if (table != NULL) {
-      DecodeUnsignedLeb128(&table);  // Total_size, unused.
+    if (table != nullptr) {
+      uint32_t total_size = DecodeUnsignedLeb128(&table);
       uint32_t pc_to_dex_size = DecodeUnsignedLeb128(&table);
+      // We must have dex to pc entries or else the loop will go beyond the end of the table.
+      DCHECK_GT(total_size, pc_to_dex_size);
       for (uint32_t i = 0; i < pc_to_dex_size; ++i) {
         DecodeUnsignedLeb128(&table);  // Move ptr past native PC.
         DecodeUnsignedLeb128(&table);  // Move ptr past dex PC.
@@ -64,15 +66,15 @@
   class DexToPcIterator {
    public:
     DexToPcIterator(const MappingTable* table, uint32_t element) :
-        table_(table), element_(element), end_(table_->DexToPcSize()), encoded_table_ptr_(NULL),
+        table_(table), element_(element), end_(table_->DexToPcSize()), encoded_table_ptr_(nullptr),
         native_pc_offset_(0), dex_pc_(0) {
-      if (element == 0) {
-        encoded_table_ptr_ = table_->FirstDexToPcPtr();
+      if (element == 0) {  // An iterator wanted from the start.
         if (end_ > 0) {
+          encoded_table_ptr_ = table_->FirstDexToPcPtr();
           native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
           dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
         }
-      } else {
+      } else {  // An iterator wanted from the end.
         DCHECK_EQ(table_->DexToPcSize(), element);
       }
     }
@@ -102,7 +104,7 @@
     const MappingTable* const table_;  // The original table.
     uint32_t element_;  // A value in the range 0 to end_.
     const uint32_t end_;  // Equal to table_->DexToPcSize().
-    const uint8_t* encoded_table_ptr_;  // Either NULL or points to encoded data after this entry.
+    const uint8_t* encoded_table_ptr_;  // Either nullptr or points to encoded data after this entry.
     uint32_t native_pc_offset_;  // The current value of native pc offset.
     uint32_t dex_pc_;  // The current value of dex pc.
   };
@@ -118,7 +120,7 @@
 
   uint32_t PcToDexSize() const PURE {
     const uint8_t* table = encoded_table_;
-    if (table == NULL) {
+    if (table == nullptr) {
       return 0;
     } else {
       DecodeUnsignedLeb128(&table);  // Total_size, unused.
@@ -129,7 +131,7 @@
 
   const uint8_t* FirstPcToDexPtr() const {
     const uint8_t* table = encoded_table_;
-    if (table != NULL) {
+    if (table != nullptr) {
       DecodeUnsignedLeb128(&table);  // Total_size, unused.
       DecodeUnsignedLeb128(&table);  // PC to Dex size, unused.
     }
@@ -139,15 +141,15 @@
   class PcToDexIterator {
    public:
     PcToDexIterator(const MappingTable* table, uint32_t element) :
-        table_(table), element_(element), end_(table_->PcToDexSize()), encoded_table_ptr_(NULL),
+        table_(table), element_(element), end_(table_->PcToDexSize()), encoded_table_ptr_(nullptr),
         native_pc_offset_(0), dex_pc_(0) {
-      if (element == 0) {
-        encoded_table_ptr_ = table_->FirstPcToDexPtr();
+      if (element == 0) {  // An iterator wanted from the start.
         if (end_ > 0) {
+          encoded_table_ptr_ = table_->FirstPcToDexPtr();
           native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
           dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
         }
-      } else {
+      } else {  // An iterator wanted from the end.
         DCHECK_EQ(table_->PcToDexSize(), element);
       }
     }
@@ -177,7 +179,7 @@
     const MappingTable* const table_;  // The original table.
     uint32_t element_;  // A value in the range 0 to PcToDexSize.
     const uint32_t end_;  // Equal to table_->PcToDexSize().
-    const uint8_t* encoded_table_ptr_;  // Either NULL or points to encoded data after this entry.
+    const uint8_t* encoded_table_ptr_;  // Either null or points to encoded data after this entry.
     uint32_t native_pc_offset_;  // The current value of native pc offset.
     uint32_t dex_pc_;  // The current value of dex pc.
   };
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 96c3e78..66fa100 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -267,14 +267,14 @@
     if (space->IsImageSpace()) {
       // Currently don't include the image space.
     } else if (space->IsZygoteSpace()) {
-      gc::space::DlMallocSpace* dlmalloc_space = space->AsDlMallocSpace();
-      zygoteSize += dlmalloc_space->GetFootprint();
-      zygoteUsed += dlmalloc_space->GetBytesAllocated();
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      zygoteSize += malloc_space->GetFootprint();
+      zygoteUsed += malloc_space->GetBytesAllocated();
     } else {
       // This is the alloc space.
-      gc::space::DlMallocSpace* dlmalloc_space = space->AsDlMallocSpace();
-      allocSize += dlmalloc_space->GetFootprint();
-      allocUsed += dlmalloc_space->GetBytesAllocated();
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      allocSize += malloc_space->GetFootprint();
+      allocUsed += malloc_space->GetBytesAllocated();
     }
   }
   typedef std::vector<gc::space::DiscontinuousSpace*>::const_iterator It2;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 8e39023..08c0ba0 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -124,6 +124,11 @@
   heap_->WaitForGcToComplete(self);
   heap_->DeleteThreadPool();
 
+  // For RosAlloc, revoke thread local runs. Note that in tests
+  // (common_test.h) we repeat allocating and deleting Runtime
+  // objects.
+  heap_->RevokeAllThreadLocalBuffers();
+
   // Make sure our internal threads are dead before we start tearing down things they're using.
   Dbg::StopJdwp();
   delete signal_catcher_;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index bcfc51b..1add507 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -931,6 +931,7 @@
   state_and_flags_.as_struct.flags = 0;
   state_and_flags_.as_struct.state = kNative;
   memset(&held_mutexes_[0], 0, sizeof(held_mutexes_));
+  memset(rosalloc_runs_, 0, sizeof(rosalloc_runs_));
 }
 
 bool Thread::IsStillStarting() const {
@@ -1024,6 +1025,8 @@
   delete name_;
   delete stack_trace_sample_;
 
+  Runtime::Current()->GetHeap()->RevokeThreadLocalBuffers(this);
+
   TearDownAlternateSignalStack();
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 745b1ac..db2f7b4 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -798,6 +798,15 @@
 
   friend class ScopedThreadStateChange;
 
+ public:
+  // Thread-local rosalloc runs. There are 34 size brackets in rosalloc
+  // runs (RosAlloc::kNumOfSizeBrackets). We can't refer to the
+  // RosAlloc class due to a header file circular dependency issue.
+  // To compensate, we check that the two values match at RosAlloc
+  // initialization time.
+  static const size_t kRosAllocNumOfSizeBrackets = 34;
+  void* rosalloc_runs_[kRosAllocNumOfSizeBrackets];
+
   DISALLOW_COPY_AND_ASSIGN(Thread);
 };
 
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index aca0561..67fcd58 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -43,6 +43,8 @@
 
 ThreadPoolWorker::~ThreadPoolWorker() {
   CHECK_PTHREAD_CALL(pthread_join, (pthread_, NULL), "thread pool worker shutdown");
+  // TODO: Delete this when race condition in pthread_join is fixed.
+  usleep(500);
 }
 
 void ThreadPoolWorker::Run() {
diff --git a/runtime/trace.cc b/runtime/trace.cc
index ec95a87..da2c80a 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -570,7 +570,8 @@
                       thread_clock_diff, wall_clock_diff);
 }
 
-void Trace::MethodUnwind(Thread* thread, const mirror::ArtMethod* method, uint32_t dex_pc) {
+void Trace::MethodUnwind(Thread* thread, mirror::Object* this_object,
+                         const mirror::ArtMethod* method, uint32_t dex_pc) {
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
   ReadClocks(thread, &thread_clock_diff, &wall_clock_diff);
diff --git a/runtime/trace.h b/runtime/trace.h
index ffcb36d..9be015a 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -79,7 +79,8 @@
                             const mirror::ArtMethod* method, uint32_t dex_pc,
                             const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void MethodUnwind(Thread* thread, const mirror::ArtMethod* method, uint32_t dex_pc)
+  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
+                            const mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
                           const mirror::ArtMethod* method, uint32_t new_dex_pc)
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 9cd8f73..c0e0370 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -1029,26 +1029,6 @@
   return true;
 }
 
-static const std::vector<uint8_t>* CreateLengthPrefixedDexGcMap(
-    const std::vector<uint8_t>& gc_map) {
-  std::vector<uint8_t>* length_prefixed_gc_map = new std::vector<uint8_t>;
-  length_prefixed_gc_map->reserve(gc_map.size() + 4);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0xff000000) >> 24);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0x00ff0000) >> 16);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0x0000ff00) >> 8);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0x000000ff) >> 0);
-  length_prefixed_gc_map->insert(length_prefixed_gc_map->end(),
-                                 gc_map.begin(),
-                                 gc_map.end());
-  DCHECK_EQ(gc_map.size() + 4, length_prefixed_gc_map->size());
-  DCHECK_EQ(gc_map.size(),
-            static_cast<size_t>((length_prefixed_gc_map->at(0) << 24) |
-                                (length_prefixed_gc_map->at(1) << 16) |
-                                (length_prefixed_gc_map->at(2) << 8) |
-                                (length_prefixed_gc_map->at(3) << 0)));
-  return length_prefixed_gc_map;
-}
-
 bool MethodVerifier::VerifyCodeFlow() {
   uint16_t registers_size = code_item_->registers_size_;
   uint32_t insns_size = code_item_->insns_size_in_code_units_;
@@ -1088,16 +1068,15 @@
     bool compile = IsCandidateForCompilation(ref, method_access_flags_);
     if (compile) {
       /* Generate a register map and add it to the method. */
-      UniquePtr<const std::vector<uint8_t> > map(GenerateGcMap());
-      if (map.get() == NULL) {
+      const std::vector<uint8_t>* dex_gc_map = GenerateLengthPrefixedGcMap();
+      if (dex_gc_map == NULL) {
         DCHECK_NE(failures_.size(), 0U);
         return false;  // Not a real failure, but a failure to encode
       }
       if (kIsDebugBuild) {
-        VerifyGcMap(*map);
+        VerifyLengthPrefixedGcMap(*dex_gc_map);
       }
-      const std::vector<uint8_t>* dex_gc_map = CreateLengthPrefixedDexGcMap(*(map.get()));
-      verifier::MethodVerifier::SetDexGcMap(ref, *dex_gc_map);
+      verifier::MethodVerifier::SetDexGcMap(ref, dex_gc_map);
     }
 
     if (has_check_casts_) {
@@ -4077,7 +4056,7 @@
   return pc_to_concrete_method_map.release();
 }
 
-const std::vector<uint8_t>* MethodVerifier::GenerateGcMap() {
+const std::vector<uint8_t>* MethodVerifier::GenerateLengthPrefixedGcMap() {
   size_t num_entries, ref_bitmap_bits, pc_bits;
   ComputeGcMapSizes(&num_entries, &ref_bitmap_bits, &pc_bits);
   // There's a single byte to encode the size of each bitmap
@@ -4115,7 +4094,12 @@
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Failed to encode GC map (size=" << table_size << ")";
     return NULL;
   }
-  table->reserve(table_size);
+  table->reserve(table_size + 4);  // table_size plus the length prefix
+  // Write table size
+  table->push_back((table_size & 0xff000000) >> 24);
+  table->push_back((table_size & 0x00ff0000) >> 16);
+  table->push_back((table_size & 0x0000ff00) >> 8);
+  table->push_back((table_size & 0x000000ff) >> 0);
   // Write table header
   table->push_back(format | ((ref_bitmap_bytes >> DexPcToReferenceMap::kRegMapFormatShift) &
                              ~DexPcToReferenceMap::kRegMapFormatMask));
@@ -4133,14 +4117,18 @@
       line->WriteReferenceBitMap(*table, ref_bitmap_bytes);
     }
   }
-  DCHECK_EQ(table->size(), table_size);
+  DCHECK_EQ(table->size(), table_size + 4);  // table_size plus the length prefix
   return table;
 }
 
-void MethodVerifier::VerifyGcMap(const std::vector<uint8_t>& data) {
+void MethodVerifier::VerifyLengthPrefixedGcMap(const std::vector<uint8_t>& data) {
   // Check that for every GC point there is a map entry, there aren't entries for non-GC points,
   // that the table data is well formed and all references are marked (or not) in the bitmap
-  DexPcToReferenceMap map(&data[0], data.size());
+  DCHECK_GE(data.size(), 4u);
+  size_t table_size = data.size() - 4u;
+  DCHECK_EQ(table_size, static_cast<size_t>((data[0] << 24) | (data[1] << 16) |
+                                            (data[2] << 8) | (data[3] << 0)));
+  DexPcToReferenceMap map(&data[4], table_size);
   size_t map_index = 0;
   for (size_t i = 0; i < code_item_->insns_size_in_code_units_; i++) {
     const uint8_t* reg_bitmap = map.FindBitMap(i, false);
@@ -4166,7 +4154,7 @@
   }
 }
 
-void MethodVerifier::SetDexGcMap(MethodReference ref, const std::vector<uint8_t>& gc_map) {
+void MethodVerifier::SetDexGcMap(MethodReference ref, const std::vector<uint8_t>* gc_map) {
   DCHECK(Runtime::Current()->IsCompiler());
   {
     WriterMutexLock mu(Thread::Current(), *dex_gc_maps_lock_);
@@ -4175,7 +4163,7 @@
       delete it->second;
       dex_gc_maps_->erase(it);
     }
-    dex_gc_maps_->Put(ref, &gc_map);
+    dex_gc_maps_->Put(ref, gc_map);
   }
   DCHECK(GetDexGcMap(ref) != NULL);
 }
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 8a663f9..f72898e 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -614,10 +614,10 @@
    * encode it in some clever fashion.
    * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
    */
-  const std::vector<uint8_t>* GenerateGcMap();
+  const std::vector<uint8_t>* GenerateLengthPrefixedGcMap();
 
   // Verify that the GC map associated with method_ is well formed
-  void VerifyGcMap(const std::vector<uint8_t>& data);
+  void VerifyLengthPrefixedGcMap(const std::vector<uint8_t>& data);
 
   // Compute sizes for GC map data
   void ComputeGcMapSizes(size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
@@ -629,7 +629,7 @@
       MethodReferenceComparator> DexGcMapTable;
   static ReaderWriterMutex* dex_gc_maps_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   static DexGcMapTable* dex_gc_maps_ GUARDED_BY(dex_gc_maps_lock_);
-  static void SetDexGcMap(MethodReference ref, const std::vector<uint8_t>& dex_gc_map)
+  static void SetDexGcMap(MethodReference ref, const std::vector<uint8_t>* dex_gc_map)
       LOCKS_EXCLUDED(dex_gc_maps_lock_);
 
 
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index a62e835..9c9673a 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -262,11 +262,11 @@
     FloatType::Destroy();
     DoubleLoType::Destroy();
     DoubleHiType::Destroy();
-    for (uint16_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
+    for (int32_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
       PreciseConstType* type = small_precise_constants_[value - kMinSmallConstant];
       delete type;
+      small_precise_constants_[value - kMinSmallConstant] = nullptr;
     }
-
     RegTypeCache::primitive_initialized_ = false;
     RegTypeCache::primitive_count_ = 0;
   }