Merge "Optimizing: Simplify UShr+And, Shr+And."
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 41e9744..96e13ac 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -158,6 +158,7 @@
     $(LIBART_COMPILER_SRC_FILES_mips) \
 	jni/quick/mips64/calling_convention_mips64.cc \
 	optimizing/code_generator_mips64.cc \
+	optimizing/intrinsics_mips64.cc \
 	utils/mips64/assembler_mips64.cc \
 	utils/mips64/managed_register_mips64.cc \
 
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 7082bed..d5ac341 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1126,7 +1126,7 @@
     for (size_t i = 0 ; i < core_vmap_table_.size(); ++i) {
       // Copy, stripping out the phys register sort key.
       vmap_encoder.PushBackUnsigned(
-          ~(-1 << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment));
+          ~(~0u << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment));
     }
     // Push a marker to take place of lr.
     vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
@@ -1141,7 +1141,7 @@
       for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) {
         // Copy, stripping out the phys register sort key.
         vmap_encoder.PushBackUnsigned(
-            ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment));
+            ~(~0u << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment));
       }
     }
   } else {
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index b5ecf9c..1cd742a 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -391,9 +391,9 @@
     Instruction::IGET_SHORT_QUICK,
     Instruction::INVOKE_LAMBDA,
     Instruction::UNUSED_F4,
-    Instruction::UNUSED_F5,
+    Instruction::CAPTURE_VARIABLE,
     Instruction::CREATE_LAMBDA,
-    Instruction::UNUSED_F7,
+    Instruction::LIBERATE_VARIABLE,
     Instruction::BOX_LAMBDA,
     Instruction::UNBOX_LAMBDA,
     Instruction::UNUSED_FA,
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index e19e74f..7ae405a 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -1693,8 +1693,14 @@
   } else {
     // Chained cmp-and-branch, starting from starting_key.
     for (size_t i = 1; i <= num_entries; i++) {
-      BuildSwitchCaseHelper(instruction, i, i == num_entries, table, value,
-                            starting_key + i - 1, table.GetEntryAt(i), dex_pc);
+      BuildSwitchCaseHelper(instruction,
+                            i,
+                            i == num_entries,
+                            table,
+                            value,
+                            starting_key + i - 1,
+                            table.GetEntryAt(i),
+                            dex_pc);
     }
   }
 }
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 3c6a41d..be05691 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -203,13 +203,13 @@
 
 void CodeGenerator::GenerateSlowPaths() {
   size_t code_start = 0;
-  for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) {
+  for (SlowPathCode* slow_path : slow_paths_) {
     if (disasm_info_ != nullptr) {
       code_start = GetAssembler()->CodeSize();
     }
-    slow_paths_.Get(i)->EmitNativeCode(this);
+    slow_path->EmitNativeCode(this);
     if (disasm_info_ != nullptr) {
-      disasm_info_->AddSlowPathInterval(slow_paths_.Get(i), code_start, GetAssembler()->CodeSize());
+      disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize());
     }
   }
 }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index a1c6db0..b58a3ff 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -261,7 +261,7 @@
   bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
 
   void AddSlowPath(SlowPathCode* slow_path) {
-    slow_paths_.Add(slow_path);
+    slow_paths_.push_back(slow_path);
   }
 
   void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; }
@@ -441,10 +441,12 @@
         graph_(graph),
         compiler_options_(compiler_options),
         src_map_(nullptr),
-        slow_paths_(graph->GetArena(), 8),
+        slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         current_block_index_(0),
         is_leaf_(true),
-        requires_current_method_(false) {}
+        requires_current_method_(false) {
+    slow_paths_.reserve(8);
+  }
 
   // Register allocation logic.
   void AllocateRegistersLocally(HInstruction* instruction) const;
@@ -485,8 +487,20 @@
     return instruction_set == kX86 || instruction_set == kX86_64;
   }
 
-  // Arm64 has its own type for a label, so we need to templatize this method
+  // Arm64 has its own type for a label, so we need to templatize these methods
   // to share the logic.
+
+  template <typename LabelType>
+  LabelType* CommonInitializeLabels() {
+    size_t size = GetGraph()->GetBlocks().size();
+    LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
+                                                                      kArenaAllocCodeGenerator);
+    for (size_t i = 0; i != size; ++i) {
+      new(labels + i) LabelType();
+    }
+    return labels;
+  }
+
   template <typename LabelType>
   LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
     block = FirstNonEmptyBlock(block);
@@ -539,7 +553,7 @@
 
   // Native to dex_pc map used for native debugging/profiling tools.
   DefaultSrcMap* src_map_;
-  GrowableArray<SlowPathCode*> slow_paths_;
+  ArenaVector<SlowPathCode*> slow_paths_;
 
   // The current block index in `block_order_` of the block
   // we are generating code for.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d7b1d24..da7a675 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -432,7 +432,7 @@
                                         arraysize(kFpuCalleeSaves)),
                     compiler_options,
                     stats),
-      block_labels_(graph->GetArena(), 0),
+      block_labels_(nullptr),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
@@ -459,8 +459,8 @@
   for (HBasicBlock* block : *block_order_) {
     // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid
     // FirstNonEmptyBlock() which could lead to adjusting a label more than once.
-    DCHECK_LT(static_cast<size_t>(block->GetBlockId()), block_labels_.Size());
-    Label* block_label = &block_labels_.GetRawStorage()[block->GetBlockId()];
+    DCHECK_LT(block->GetBlockId(), GetGraph()->GetBlocks().size());
+    Label* block_label = &block_labels_[block->GetBlockId()];
     DCHECK_EQ(block_label->IsBound(), !block->IsSingleJump());
     if (block_label->IsBound()) {
       __ AdjustLabelPosition(block_label);
@@ -4034,7 +4034,8 @@
 }
 
 void ParallelMoveResolverARM::EmitMove(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
@@ -4166,7 +4167,8 @@
 }
 
 void ParallelMoveResolverARM::EmitSwap(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 1d98789..111112e 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -299,11 +299,11 @@
   void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
 
   Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
+    return CommonGetLabelOf<Label>(block_labels_, block);
   }
 
   void Initialize() OVERRIDE {
-    block_labels_.SetSize(GetGraph()->GetBlocks().size());
+    block_labels_ = CommonInitializeLabels<Label>();
   }
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -335,7 +335,7 @@
   Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
 
   // Labels for each block that will be compiled.
-  GrowableArray<Label> block_labels_;
+  Label* block_labels_;  // Indexed by block id.
   Label frame_entry_label_;
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index d175532..31900d5 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -610,7 +610,8 @@
 }
 
 void ParallelMoveResolverARM64::EmitMove(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   codegen_->MoveLocation(move->GetDestination(), move->GetSource());
 }
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 8967108..7178081 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -326,12 +326,7 @@
   }
 
   void Initialize() OVERRIDE {
-    HGraph* graph = GetGraph();
-    int length = graph->GetBlocks().size();
-    block_labels_ = graph->GetArena()->AllocArray<vixl::Label>(length);
-    for (int i = 0; i < length; ++i) {
-      new(block_labels_ + i) vixl::Label();
-    }
+    block_labels_ = CommonInitializeLabels<vixl::Label>();
   }
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -400,7 +395,7 @@
   };
 
   // Labels for each block that will be compiled.
-  vixl::Label* block_labels_;
+  vixl::Label* block_labels_;  // Indexed by block id.
   vixl::Label frame_entry_label_;
 
   LocationsBuilderARM64 location_builder_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 8fdd56e..c9f8493 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -20,7 +20,9 @@
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
+#include "intrinsics_mips64.h"
 #include "art_method.h"
+#include "code_generator_utils.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "offsets.h"
@@ -36,7 +38,6 @@
 static constexpr GpuRegister kMethodRegisterArgument = A0;
 
 // We need extra temporary/scratch registers (in addition to AT) in some cases.
-static constexpr GpuRegister TMP = T8;
 static constexpr FpuRegister FTMP = F8;
 
 // ART Thread Register.
@@ -430,7 +431,7 @@
                                         arraysize(kFpuCalleeSaves)),
                     compiler_options,
                     stats),
-      block_labels_(graph->GetArena(), 0),
+      block_labels_(nullptr),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
@@ -452,12 +453,14 @@
 }
 
 void ParallelMoveResolverMIPS64::EmitMove(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType());
 }
 
 void ParallelMoveResolverMIPS64::EmitSwap(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType());
 }
 
@@ -2395,7 +2398,11 @@
 }
 
 void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  // TODO intrinsic function
+  IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
@@ -2404,7 +2411,11 @@
   // invokes must have been pruned by art::PrepareForRegisterAllocation.
   DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
-  // TODO - intrinsic function
+  IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 
   // While SetupBlockedRegisters() blocks registers S2-S8 due to their
@@ -2419,10 +2430,10 @@
   }
 }
 
-static bool TryGenerateIntrinsicCode(HInvoke* invoke,
-                                     CodeGeneratorMIPS64* codegen ATTRIBUTE_UNUSED) {
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) {
   if (invoke->GetLocations()->Intrinsified()) {
-    // TODO - intrinsic function
+    IntrinsicCodeGeneratorMIPS64 intrinsic(codegen);
+    intrinsic.Dispatch(invoke);
     return true;
   }
   return false;
@@ -2531,7 +2542,10 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  // TODO: Try to generate intrinsics code.
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
   LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
   GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index f66ecb3..16461d6 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -270,11 +270,11 @@
   }
 
   Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
+    return CommonGetLabelOf<Label>(block_labels_, block);
   }
 
   void Initialize() OVERRIDE {
-    block_labels_.SetSize(GetGraph()->GetBlocks().size());
+    block_labels_ = CommonInitializeLabels<Label>();
   }
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -315,7 +315,7 @@
 
  private:
   // Labels for each block that will be compiled.
-  GrowableArray<Label> block_labels_;
+  Label* block_labels_;  // Indexed by block id.
   Label frame_entry_label_;
   LocationsBuilderMIPS64 location_builder_;
   InstructionCodeGeneratorMIPS64 instruction_visitor_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index ab3d1d1..277f6b4 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -470,7 +470,7 @@
                     0,
                     compiler_options,
                     stats),
-      block_labels_(graph->GetArena(), 0),
+      block_labels_(nullptr),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
@@ -4630,7 +4630,8 @@
 }
 
 void ParallelMoveResolverX86::EmitMove(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
@@ -4782,7 +4783,8 @@
 }
 
 void ParallelMoveResolverX86::EmitSwap(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f38e1ea..2c2fc65 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -316,11 +316,11 @@
                   bool value_can_be_null);
 
   Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
+    return CommonGetLabelOf<Label>(block_labels_, block);
   }
 
   void Initialize() OVERRIDE {
-    block_labels_.SetSize(GetGraph()->GetBlocks().size());
+    block_labels_ = CommonInitializeLabels<Label>();
   }
 
   bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
@@ -356,7 +356,7 @@
 
  private:
   // Labels for each block that will be compiled.
-  GrowableArray<Label> block_labels_;
+  Label* block_labels_;  // Indexed by block id.
   Label frame_entry_label_;
   LocationsBuilderX86 location_builder_;
   InstructionCodeGeneratorX86 instruction_visitor_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index cfce7a0..453c6fd 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -620,7 +620,7 @@
                                           arraysize(kFpuCalleeSaves)),
                       compiler_options,
                       stats),
-        block_labels_(graph->GetArena(), 0),
+        block_labels_(nullptr),
         location_builder_(graph, this),
         instruction_visitor_(graph, this),
         move_resolver_(graph->GetArena(), this),
@@ -4373,7 +4373,8 @@
 }
 
 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
@@ -4531,7 +4532,8 @@
 }
 
 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1ec3580..197ce63 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -286,11 +286,11 @@
   void Move(Location destination, Location source);
 
   Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
+    return CommonGetLabelOf<Label>(block_labels_, block);
   }
 
   void Initialize() OVERRIDE {
-    block_labels_.SetSize(GetGraph()->GetBlocks().size());
+    block_labels_ = CommonInitializeLabels<Label>();
   }
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
@@ -334,7 +334,7 @@
   };
 
   // Labels for each block that will be compiled.
-  GrowableArray<Label> block_labels_;
+  Label* block_labels_;  // Indexed by block id.
   Label frame_entry_label_;
   LocationsBuilderX86_64 location_builder_;
   InstructionCodeGeneratorX86_64 instruction_visitor_;
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index b322759..007d0e3 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -56,7 +56,11 @@
       if (switch_input->IsIntConstant()) {
         int32_t switch_value = switch_input->AsIntConstant()->GetValue();
         int32_t start_value = switch_instruction->GetStartValue();
-        uint32_t switch_index = static_cast<uint32_t>(switch_value - start_value);
+        // Note: Though the spec forbids packed-switch values to wrap around, we leave
+        // that task to the verifier and use unsigned arithmetic with it's "modulo 2^32"
+        // semantics to check if the value is in range, wrapped or not.
+        uint32_t switch_index =
+            static_cast<uint32_t>(switch_value) - static_cast<uint32_t>(start_value);
         if (switch_index < switch_instruction->GetNumEntries()) {
           live_successors = live_successors.SubArray(switch_index, 1u);
           DCHECK_EQ(live_successors[0], block->GetSuccessor(switch_index));
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index d05c514..2c6c3b7 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -374,6 +374,11 @@
         << instance_of->MustDoNullCheck() << std::noboolalpha;
   }
 
+  void VisitArraySet(HArraySet* array_set) OVERRIDE {
+    StartAttributeStream("value_can_be_null") << std::boolalpha
+        << array_set->GetValueCanBeNull() << std::noboolalpha;
+  }
+
   void VisitInvoke(HInvoke* invoke) OVERRIDE {
     StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex();
     StartAttributeStream("method_name") << PrettyMethod(
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index abdda13..22bca2f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -71,7 +71,8 @@
   void VisitXor(HXor* instruction) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
   void VisitFakeString(HFakeString* fake_string) OVERRIDE;
-  bool IsDominatedByInputNullCheck(HInstruction* instr);
+
+  bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
 
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
@@ -187,14 +188,18 @@
   }
 }
 
-bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* instr) {
-  HInstruction* input = instr->InputAt(0);
+bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInstruction* at) const {
+  if (!input->CanBeNull()) {
+    return true;
+  }
+
   for (HUseIterator<HInstruction*> it(input->GetUses()); !it.Done(); it.Advance()) {
     HInstruction* use = it.Current()->GetUser();
-    if (use->IsNullCheck() && use->StrictlyDominates(instr)) {
+    if (use->IsNullCheck() && use->StrictlyDominates(at)) {
       return true;
     }
   }
+
   return false;
 }
 
@@ -231,7 +236,7 @@
 
 void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
   HInstruction* object = check_cast->InputAt(0);
-  if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) {
+  if (CanEnsureNotNullAt(object, check_cast)) {
     check_cast->ClearMustDoNullCheck();
   }
 
@@ -267,7 +272,7 @@
 void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
   HInstruction* object = instruction->InputAt(0);
   bool can_be_null = true;
-  if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) {
+  if (CanEnsureNotNullAt(object, instruction)) {
     can_be_null = false;
     instruction->ClearMustDoNullCheck();
   }
@@ -305,14 +310,14 @@
 
 void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   if ((instruction->GetValue()->GetType() == Primitive::kPrimNot)
-      && !instruction->GetValue()->CanBeNull()) {
+      && CanEnsureNotNullAt(instruction->GetValue(), instruction)) {
     instruction->ClearValueCanBeNull();
   }
 }
 
 void InstructionSimplifierVisitor::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   if ((instruction->GetValue()->GetType() == Primitive::kPrimNot)
-      && !instruction->GetValue()->CanBeNull()) {
+      && CanEnsureNotNullAt(instruction->GetValue(), instruction)) {
     instruction->ClearValueCanBeNull();
   }
 }
@@ -437,7 +442,7 @@
     instruction->ClearNeedsTypeCheck();
   }
 
-  if (!value->CanBeNull()) {
+  if (CanEnsureNotNullAt(value, instruction)) {
     instruction->ClearValueCanBeNull();
   }
 }
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index b71fdb8..9564622 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -90,7 +90,7 @@
 }
 
 static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) {
-  if (instruction_set == kMips || instruction_set == kMips64) {
+  if (instruction_set == kMips) {
     return Intrinsics::kNone;
   }
   switch (method.opcode) {
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
new file mode 100644
index 0000000..52e2cbe
--- /dev/null
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -0,0 +1,782 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_mips64.h"
+
+#include "arch/mips64/instruction_set_features_mips64.h"
+#include "art_method.h"
+#include "code_generator_mips64.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/mips64/assembler_mips64.h"
+#include "utils/mips64/constants_mips64.h"
+
+namespace art {
+
+namespace mips64 {
+
+IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen)
+  : arena_(codegen->GetGraph()->GetArena()) {
+}
+
+Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() {
+  return reinterpret_cast<Mips64Assembler*>(codegen_->GetAssembler());
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() {
+  return codegen_->GetGraph()->GetArena();
+}
+
+bool IntrinsicLocationsBuilderMIPS64::TryDispatch(HInvoke* invoke) {
+  Dispatch(invoke);
+  LocationSummary* res = invoke->GetLocations();
+  return res != nullptr && res->Intrinsified();
+}
+
+#define __ assembler->
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  FpuRegister in  = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (is64bit) {
+    __ Dmfc1(out, in);
+  } else {
+    __ Mfc1(out, in);
+  }
+}
+
+// long java.lang.Double.doubleToRawLongBits(double)
+void IntrinsicLocationsBuilderMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+
+// int java.lang.Float.floatToRawIntBits(float)
+void IntrinsicLocationsBuilderMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  if (is64bit) {
+    __ Dmtc1(in, out);
+  } else {
+    __ Mtc1(in, out);
+  }
+}
+
+// double java.lang.Double.longBitsToDouble(long)
+void IntrinsicLocationsBuilderMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+// float java.lang.Float.intBitsToFloat(int)
+void IntrinsicLocationsBuilderMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenReverseBytes(LocationSummary* locations,
+                            Primitive::Type type,
+                            Mips64Assembler* assembler) {
+  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  switch (type) {
+    case Primitive::kPrimShort:
+      __ Dsbh(out, in);
+      __ Seh(out, out);
+      break;
+    case Primitive::kPrimInt:
+      __ Rotr(out, in, 16);
+      __ Wsbh(out, out);
+      break;
+    case Primitive::kPrimLong:
+      __ Dsbh(out, in);
+      __ Dshd(out, out);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected size for reverse-bytes: " << type;
+      UNREACHABLE();
+  }
+}
+
+// int java.lang.Integer.reverseBytes(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.reverseBytes(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+// short java.lang.Short.reverseBytes(short)
+void IntrinsicLocationsBuilderMIPS64::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitShortReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+static void GenCountZeroes(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (is64bit) {
+    __ Dclz(out, in);
+  } else {
+    __ Clz(out, in);
+  }
+}
+
+// int java.lang.Integer.numberOfLeadingZeros(int i)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  GenCountZeroes(invoke->GetLocations(), false, GetAssembler());
+}
+
+// int java.lang.Long.numberOfLeadingZeros(long i)
+void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  GenCountZeroes(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenReverse(LocationSummary* locations,
+                       Primitive::Type type,
+                       Mips64Assembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (type == Primitive::kPrimInt) {
+    __ Rotr(out, in, 16);
+    __ Wsbh(out, out);
+    __ Bitswap(out, out);
+  } else {
+    __ Dsbh(out, in);
+    __ Dshd(out, out);
+    __ Dbitswap(out, out);
+  }
+}
+
+// int java.lang.Integer.reverse(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.reverse(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  if (is64bit) {
+    __ AbsD(out, in);
+  } else {
+    __ AbsS(out, in);
+  }
+}
+
+// double java.lang.Math.abs(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+// float java.lang.Math.abs(float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (is64bit) {
+    __ Dsra32(AT, in, 31);
+    __ Xor(out, in, AT);
+    __ Dsubu(out, out, AT);
+  } else {
+    __ Sra(AT, in, 31);
+    __ Xor(out, in, AT);
+    __ Subu(out, out, AT);
+  }
+}
+
+// int java.lang.Math.abs(int)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) {
+  CreateIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+}
+
+// long java.lang.Math.abs(long)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) {
+  CreateIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenMinMaxFP(LocationSummary* locations,
+                        bool is_min,
+                        bool is_double,
+                        Mips64Assembler* assembler) {
+  FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  if (is_double) {
+    if (is_min) {
+      __ MinD(out, lhs, rhs);
+    } else {
+      __ MaxD(out, lhs, rhs);
+    }
+  } else {
+    if (is_min) {
+      __ MinS(out, lhs, rhs);
+    } else {
+      __ MaxS(out, lhs, rhs);
+    }
+  }
+}
+
+static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+// double java.lang.Math.min(double, double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+// float java.lang.Math.min(float, float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+// double java.lang.Math.max(double, double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+// float java.lang.Math.max(float, float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+static void GenMinMax(LocationSummary* locations,
+                      bool is_min,
+                      Mips64Assembler* assembler) {
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (out == lhs) {
+    __ Slt(AT, rhs, lhs);
+    if (is_min) {
+      __ Seleqz(out, lhs, AT);
+      __ Selnez(AT, rhs, AT);
+    } else {
+      __ Selnez(out, lhs, AT);
+      __ Seleqz(AT, rhs, AT);
+    }
+  } else {
+    __ Slt(AT, lhs, rhs);
+    if (is_min) {
+      __ Seleqz(out, rhs, AT);
+      __ Selnez(AT, lhs, AT);
+    } else {
+      __ Selnez(out, rhs, AT);
+      __ Seleqz(AT, lhs, AT);
+    }
+  }
+  __ Or(out, out, AT);
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+// int java.lang.Math.min(int, int)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+}
+
+// long java.lang.Math.min(long, long)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+}
+
+// int java.lang.Math.max(int, int)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+}
+
+// long java.lang.Math.max(long, long)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+}
+
+// double java.lang.Math.sqrt(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathSqrt(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathSqrt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  __ SqrtD(out, in);
+}
+
+static void CreateFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+// double java.lang.Math.rint(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathRint(HInvoke* invoke) {
+  CreateFPToFP(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathRint(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  __ RintD(out, in);
+}
+
+// double java.lang.Math.floor(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathFloor(HInvoke* invoke) {
+  CreateFPToFP(arena_, invoke);
+}
+
+// 0x200 - +zero
+// 0x040 - +infinity
+// 0x020 - -zero
+// 0x004 - -infinity
+// 0x002 - quiet NaN
+// 0x001 - signaling NaN
+const constexpr uint16_t CLASS_MASK = 0x267;
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  Label done;
+
+  // double floor(double in) {
+  //     if in.isNaN || in.isInfinite || in.isZero {
+  //         return in;
+  //     }
+  __ ClassD(out, in);
+  __ Dmfc1(AT, out);
+  __ Andi(AT, AT, CLASS_MASK);       // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
+  __ MovD(out, in);
+  __ Bnezc(AT, &done);
+
+  //     Long outLong = floor(in);
+  //     if outLong == Long.MAX_VALUE {
+  //         // floor() has almost certainly returned a value which
+  //         // can't be successfully represented as a signed 64-bit
+  //         // number.  Java expects that the input value will be
+  //         // returned in these cases.
+  //         // There is also a small probability that floor(in)
+  //         // correctly truncates the input value to Long.MAX_VALUE.  In
+  //         // that case, this exception handling code still does the
+  //         // correct thing.
+  //         return in;
+  //     }
+  __ FloorLD(out, in);
+  __ Dmfc1(AT, out);
+  __ MovD(out, in);
+  __ LoadConst64(TMP, kPrimLongMax);
+  __ Beqc(AT, TMP, &done);
+
+  //     double out = outLong;
+  //     return out;
+  __ Dmtc1(AT, out);
+  __ Cvtdl(out, out);
+  __ Bind(&done);
+  // }
+}
+
+// double java.lang.Math.ceil(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathCeil(HInvoke* invoke) {
+  CreateFPToFP(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathCeil(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  Label done;
+
+  // double ceil(double in) {
+  //     if in.isNaN || in.isInfinite || in.isZero {
+  //         return in;
+  //     }
+  __ ClassD(out, in);
+  __ Dmfc1(AT, out);
+  __ Andi(AT, AT, CLASS_MASK);       // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
+  __ MovD(out, in);
+  __ Bnezc(AT, &done);
+
+  //     Long outLong = ceil(in);
+  //     if outLong == Long.MAX_VALUE {
+  //         // ceil() has almost certainly returned a value which
+  //         // can't be successfully represented as a signed 64-bit
+  //         // number.  Java expects that the input value will be
+  //         // returned in these cases.
+  //         // There is also a small probability that ceil(in)
+  //         // correctly rounds up the input value to Long.MAX_VALUE.  In
+  //         // that case, this exception handling code still does the
+  //         // correct thing.
+  //         return in;
+  //     }
+  __ CeilLD(out, in);
+  __ Dmfc1(AT, out);
+  __ MovD(out, in);
+  __ LoadConst64(TMP, kPrimLongMax);
+  __ Beqc(AT, TMP, &done);
+
+  //     double out = outLong;
+  //     return out;
+  __ Dmtc1(AT, out);
+  __ Cvtdl(out, out);
+  __ Bind(&done);
+  // }
+}
+
+// byte libcore.io.Memory.peekByte(long address)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekByte(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>();
+
+  __ Lb(out, adr, 0);
+}
+
+// short libcore.io.Memory.peekShort(long address)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>();
+
+  __ Lh(out, adr, 0);
+}
+
+// int libcore.io.Memory.peekInt(long address)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>();
+
+  __ Lw(out, adr, 0);
+}
+
+// long libcore.io.Memory.peekLong(long address)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>();
+
+  __ Ld(out, adr, 0);
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+// void libcore.io.Memory.pokeByte(long address, byte value)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeByte(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>();
+
+  __ Sb(val, adr, 0);
+}
+
+// void libcore.io.Memory.pokeShort(long address, short value)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>();
+
+  __ Sh(val, adr, 0);
+}
+
+// void libcore.io.Memory.pokeInt(long address, int value)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>();
+
+  __ Sw(val, adr, 00);
+}
+
+// void libcore.io.Memory.pokeLong(long address, long value)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>();
+
+  __ Sd(val, adr, 0);
+}
+
+// Unimplemented intrinsics.
+
+#define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
+void IntrinsicLocationsBuilderMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}                                                                                      \
+void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
+}
+
+UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
+
+UNIMPLEMENTED_INTRINSIC(ThreadCurrentThread)
+UNIMPLEMENTED_INTRINSIC(UnsafeGet)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetLong)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetLongVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetObject)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetObjectVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafePut)
+UNIMPLEMENTED_INTRINSIC(UnsafePutOrdered)
+UNIMPLEMENTED_INTRINSIC(UnsafePutVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafePutObject)
+UNIMPLEMENTED_INTRINSIC(UnsafePutObjectOrdered)
+UNIMPLEMENTED_INTRINSIC(UnsafePutObjectVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafePutLong)
+UNIMPLEMENTED_INTRINSIC(UnsafePutLongOrdered)
+UNIMPLEMENTED_INTRINSIC(UnsafePutLongVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
+UNIMPLEMENTED_INTRINSIC(StringCharAt)
+UNIMPLEMENTED_INTRINSIC(StringCompareTo)
+UNIMPLEMENTED_INTRINSIC(StringEquals)
+UNIMPLEMENTED_INTRINSIC(StringIndexOf)
+UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes)
+UNIMPLEMENTED_INTRINSIC(StringNewStringFromChars)
+UNIMPLEMENTED_INTRINSIC(StringNewStringFromString)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros)
+
+UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+
+#undef UNIMPLEMENTED_INTRINSIC
+
+#undef __
+
+}  // namespace mips64
+}  // namespace art
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
new file mode 100644
index 0000000..1481d24
--- /dev/null
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace mips64 {
+
+class CodeGeneratorMIPS64;
+class Mips64Assembler;
+
+class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen);
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+  // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+  // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+  // the invoke.
+  bool TryDispatch(HInvoke* invoke);
+
+ private:
+  ArenaAllocator* arena_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64);
+};
+
+class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicCodeGeneratorMIPS64(CodeGeneratorMIPS64* codegen) : codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+  Mips64Assembler* GetAssembler();
+
+  ArenaAllocator* GetAllocator();
+
+  CodeGeneratorMIPS64* codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorMIPS64);
+};
+
+}  // namespace mips64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 2eeba18..76bd595 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -22,7 +22,6 @@
 #include "base/bit_field.h"
 #include "base/bit_vector.h"
 #include "base/value_object.h"
-#include "utils/growable_array.h"
 
 namespace art {
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ef89932..989970f 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -22,7 +22,6 @@
 #include "base/bit_utils.h"
 #include "base/stl_util.h"
 #include "mirror/class-inl.h"
-#include "utils/growable_array.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6b0ccf8..486968c 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -35,7 +35,6 @@
 #include "offsets.h"
 #include "primitive.h"
 #include "utils/arena_bit_vector.h"
-#include "utils/growable_array.h"
 
 namespace art {
 
@@ -2409,7 +2408,9 @@
 // will be the block containing the next Dex opcode.
 class HPackedSwitch : public HTemplateInstruction<1> {
  public:
-  HPackedSwitch(int32_t start_value, uint32_t num_entries, HInstruction* input,
+  HPackedSwitch(int32_t start_value,
+                uint32_t num_entries,
+                HInstruction* input,
                 uint32_t dex_pc = kNoDexPc)
     : HTemplateInstruction(SideEffects::None(), dex_pc),
       start_value_(start_value),
@@ -2430,8 +2431,8 @@
   DECLARE_INSTRUCTION(PackedSwitch);
 
  private:
-  int32_t start_value_;
-  uint32_t num_entries_;
+  const int32_t start_value_;
+  const uint32_t num_entries_;
 
   DISALLOW_COPY_AND_ASSIGN(HPackedSwitch);
 };
@@ -5054,7 +5055,10 @@
 class HParallelMove : public HTemplateInstruction<0> {
  public:
   explicit HParallelMove(ArenaAllocator* arena, uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc), moves_(arena, kDefaultNumberOfMoves) {}
+      : HTemplateInstruction(SideEffects::None(), dex_pc),
+        moves_(arena->Adapter(kArenaAllocMoveOperands)) {
+    moves_.reserve(kDefaultNumberOfMoves);
+  }
 
   void AddMove(Location source,
                Location destination,
@@ -5064,15 +5068,15 @@
     DCHECK(destination.IsValid());
     if (kIsDebugBuild) {
       if (instruction != nullptr) {
-        for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
-          if (moves_.Get(i).GetInstruction() == instruction) {
+        for (const MoveOperands& move : moves_) {
+          if (move.GetInstruction() == instruction) {
             // Special case the situation where the move is for the spill slot
             // of the instruction.
             if ((GetPrevious() == instruction)
                 || ((GetPrevious() == nullptr)
                     && instruction->IsPhi()
                     && instruction->GetBlock() == GetBlock())) {
-              DCHECK_NE(destination.GetKind(), moves_.Get(i).GetDestination().GetKind())
+              DCHECK_NE(destination.GetKind(), move.GetDestination().GetKind())
                   << "Doing parallel moves for the same instruction.";
             } else {
               DCHECK(false) << "Doing parallel moves for the same instruction.";
@@ -5080,26 +5084,27 @@
           }
         }
       }
-      for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
-        DCHECK(!destination.OverlapsWith(moves_.Get(i).GetDestination()))
+      for (const MoveOperands& move : moves_) {
+        DCHECK(!destination.OverlapsWith(move.GetDestination()))
             << "Overlapped destination for two moves in a parallel move: "
-            << moves_.Get(i).GetSource() << " ==> " << moves_.Get(i).GetDestination() << " and "
+            << move.GetSource() << " ==> " << move.GetDestination() << " and "
             << source << " ==> " << destination;
       }
     }
-    moves_.Add(MoveOperands(source, destination, type, instruction));
+    moves_.emplace_back(source, destination, type, instruction);
   }
 
-  MoveOperands* MoveOperandsAt(size_t index) const {
-    return moves_.GetRawStorage() + index;
+  MoveOperands* MoveOperandsAt(size_t index) {
+    DCHECK_LT(index, moves_.size());
+    return &moves_[index];
   }
 
-  size_t NumMoves() const { return moves_.Size(); }
+  size_t NumMoves() const { return moves_.size(); }
 
   DECLARE_INSTRUCTION(ParallelMove);
 
  private:
-  GrowableArray<MoveOperands> moves_;
+  ArenaVector<MoveOperands> moves_;
 
   DISALLOW_COPY_AND_ASSIGN(HParallelMove);
 };
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index f9d812f..fce7769 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -16,6 +16,8 @@
 #include <iostream>
 
 #include "parallel_move_resolver.h"
+
+#include "base/stl_util.h"
 #include "nodes.h"
 
 namespace art {
@@ -28,19 +30,19 @@
   for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
     MoveOperands* move = parallel_move->MoveOperandsAt(i);
     if (!move->IsRedundant()) {
-      moves_.Add(move);
+      moves_.push_back(move);
     }
   }
 }
 
 void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
-  DCHECK(moves_.IsEmpty());
+  DCHECK(moves_.empty());
   // Build up a worklist of moves.
   BuildInitialMoveList(parallel_move);
 
   // Move stack/stack slot to take advantage of a free register on constrained machines.
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& move = *moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    const MoveOperands& move = *moves_[i];
     // Ignore constants and moves already eliminated.
     if (move.IsEliminated() || move.GetSource().IsConstant()) {
       continue;
@@ -52,8 +54,8 @@
     }
   }
 
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& move = *moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    const MoveOperands& move = *moves_[i];
     // Skip constants to perform them last.  They don't block other moves
     // and skipping such moves with register destinations keeps those
     // registers free for the whole algorithm.
@@ -63,8 +65,8 @@
   }
 
   // Perform the moves with constant sources.
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    MoveOperands* move = moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    MoveOperands* move = moves_[i];
     if (!move->IsEliminated()) {
       DCHECK(move->GetSource().IsConstant());
       EmitMove(i);
@@ -73,7 +75,7 @@
     }
   }
 
-  moves_.Reset();
+  moves_.clear();
 }
 
 Location LowOf(Location location) {
@@ -123,7 +125,8 @@
   // which means that a call to PerformMove could change any source operand
   // in the move graph.
 
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   DCHECK(!move->IsPending());
   if (move->IsRedundant()) {
     // Because we swap register pairs first, following, un-pending
@@ -143,8 +146,8 @@
   // as this one's destination blocks this one so recursively perform all
   // such moves.
   MoveOperands* required_swap = nullptr;
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& other_move = *moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    const MoveOperands& other_move = *moves_[i];
     if (other_move.Blocks(destination) && !other_move.IsPending()) {
       // Though PerformMove can change any source operand in the move graph,
       // calling `PerformMove` cannot create a blocking move via a swap
@@ -163,7 +166,7 @@
         // at the next moves. Swapping is not blocked by anything, it just
         // updates other moves's source.
         break;
-      } else if (required_swap == moves_.Get(i)) {
+      } else if (required_swap == moves_[i]) {
         // If `other_move` was swapped, we iterate again to find a new
         // potential cycle.
         required_swap = nullptr;
@@ -171,7 +174,7 @@
       } else if (required_swap != nullptr) {
         // A move is required to swap. We walk back the cycle to find the
         // move by just returning from this `PerforrmMove`.
-        moves_.Get(index)->ClearPending(destination);
+        moves_[index]->ClearPending(destination);
         return required_swap;
       }
     }
@@ -197,14 +200,13 @@
     DCHECK_EQ(required_swap, move);
     do_swap = true;
   } else {
-    for (size_t i = 0; i < moves_.Size(); ++i) {
-      const MoveOperands& other_move = *moves_.Get(i);
-      if (other_move.Blocks(destination)) {
-        DCHECK(other_move.IsPending());
-        if (!move->Is64BitMove() && other_move.Is64BitMove()) {
+    for (MoveOperands* other_move : moves_) {
+      if (other_move->Blocks(destination)) {
+        DCHECK(other_move->IsPending());
+        if (!move->Is64BitMove() && other_move->Is64BitMove()) {
           // We swap 64bits moves before swapping 32bits moves. Go back from the
           // cycle by returning the move that must be swapped.
-          return moves_.Get(i);
+          return other_move;
         }
         do_swap = true;
         break;
@@ -220,12 +222,11 @@
     Location source = move->GetSource();
     Location swap_destination = move->GetDestination();
     move->Eliminate();
-    for (size_t i = 0; i < moves_.Size(); ++i) {
-      const MoveOperands& other_move = *moves_.Get(i);
-      if (other_move.Blocks(source)) {
-        UpdateSourceOf(moves_.Get(i), source, swap_destination);
-      } else if (other_move.Blocks(swap_destination)) {
-        UpdateSourceOf(moves_.Get(i), swap_destination, source);
+    for (MoveOperands* other_move : moves_) {
+      if (other_move->Blocks(source)) {
+        UpdateSourceOf(other_move, source, swap_destination);
+      } else if (other_move->Blocks(swap_destination)) {
+        UpdateSourceOf(other_move, swap_destination, source);
       }
     }
     // If the swap was required because of a 64bits move in the middle of a cycle,
@@ -242,14 +243,14 @@
 }
 
 bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    if (moves_.Get(i)->Blocks(loc)) {
+  for (MoveOperands* move : moves_) {
+    if (move->Blocks(loc)) {
       return false;
     }
   }
 
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    if (moves_.Get(i)->GetDestination().Equals(loc)) {
+  for (MoveOperands* move : moves_) {
+    if (move->GetDestination().Equals(loc)) {
       return true;
     }
   }
@@ -302,8 +303,8 @@
 
 void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
   DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
-  DCHECK(moves_.IsEmpty());
-  DCHECK(scratches_.IsEmpty());
+  DCHECK(moves_.empty());
+  DCHECK(scratches_.empty());
 
   // Backend dependent initialization.
   PrepareForEmitNativeCode();
@@ -311,8 +312,8 @@
   // Build up a worklist of moves.
   BuildInitialMoveList(parallel_move);
 
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& move = *moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    const MoveOperands& move = *moves_[i];
     // Skip constants to perform them last. They don't block other moves and
     // skipping such moves with register destinations keeps those registers
     // free for the whole algorithm.
@@ -324,8 +325,8 @@
   // Perform the moves with constant sources and register destinations with UpdateMoveSource()
   // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
   // from changing the constant sources to stack locations.
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    MoveOperands* move = moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    MoveOperands* move = moves_[i];
     Location destination = move->GetDestination();
     if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
       Location source = move->GetSource();
@@ -344,8 +345,8 @@
   }
 
   // Perform the rest of the moves.
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    MoveOperands* move = moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    MoveOperands* move = moves_[i];
     if (!move->IsEliminated()) {
       EmitMove(i);
       move->Eliminate();
@@ -358,19 +359,18 @@
   // Backend dependent cleanup.
   FinishEmitNativeCode();
 
-  moves_.Reset();
-  scratches_.Reset();
+  moves_.clear();
+  scratches_.clear();
 }
 
 Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
-  for (size_t i = 0; i < scratches_.Size(); ++i) {
-    Location loc = scratches_.Get(i);
+  for (Location loc : scratches_) {
     if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
       return loc;
     }
   }
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    Location loc = moves_.Get(i)->GetDestination();
+  for (MoveOperands* move : moves_) {
+    Location loc = move->GetDestination();
     if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
       return loc;
     }
@@ -380,18 +380,18 @@
 
 void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
   if (kIsDebugBuild) {
-    for (size_t i = 0; i < scratches_.Size(); ++i) {
-      DCHECK(!loc.Equals(scratches_.Get(i)));
+    for (Location scratch : scratches_) {
+      CHECK(!loc.Equals(scratch));
     }
   }
-  scratches_.Add(loc);
+  scratches_.push_back(loc);
 }
 
 void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
   DCHECK(!IsBlockedByMoves(loc));
-  for (size_t i = 0; i < scratches_.Size(); ++i) {
-    if (loc.Equals(scratches_.Get(i))) {
-      scratches_.DeleteAt(i);
+  for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) {
+    if (loc.Equals(*it)) {
+      scratches_.erase(it);
       break;
     }
   }
@@ -406,7 +406,8 @@
   // we will update source operand in the move graph to reduce dependencies in
   // the graph.
 
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   DCHECK(!move->IsPending());
   DCHECK(!move->IsEliminated());
   if (move->IsRedundant()) {
@@ -433,8 +434,8 @@
   // dependencies. Any unperformed, unpending move with a source the same
   // as this one's destination blocks this one so recursively perform all
   // such moves.
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& other_move = *moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    const MoveOperands& other_move = *moves_[i];
     if (other_move.Blocks(destination) && !other_move.IsPending()) {
       PerformMove(i);
     }
@@ -490,8 +491,11 @@
     move->Eliminate();
     UpdateMoveSource(pending_source, pending_destination);
     // Free any unblocked locations in the scratch location list.
-    for (size_t i = 0; i < scratches_.Size(); ++i) {
-      Location scratch = scratches_.Get(i);
+    // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop.
+    // FIXME: If FreeScratchLocation() removes the location from scratches_,
+    // we skip the next location. This happens for arm64.
+    for (size_t i = 0; i < scratches_.size(); ++i) {
+      Location scratch = scratches_[i];
       // Only scratch overlapping with performed move source can be unblocked.
       if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
         FreeScratchLocation(pending_source);
@@ -512,8 +516,7 @@
   // This is not something we must do, but we can use fewer scratch locations with
   // this trick. For example, we can avoid using additional scratch locations for
   // moves (0 -> 1), (1 -> 2), (1 -> 0).
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    MoveOperands* move = moves_.Get(i);
+  for (MoveOperands* move : moves_) {
     if (move->GetSource().Equals(from)) {
       move->SetSource(to);
     }
@@ -522,16 +525,15 @@
 
 void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
     Location destination, Primitive::Type type) {
-  pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr));
+  pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr));
 }
 
 void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
-  pending_moves_.Delete(move);
+  RemoveElement(pending_moves_, move);
 }
 
 MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
-  for (size_t i = 0; i < pending_moves_.Size(); ++i) {
-    MoveOperands* move = pending_moves_.Get(i);
+  for (MoveOperands* move : pending_moves_) {
     Location destination = move->GetDestination();
     // Only moves with destination overlapping with input loc can be unblocked.
     if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
@@ -542,13 +544,13 @@
 }
 
 bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
-  for (size_t i = 0; i < pending_moves_.Size(); ++i) {
-    if (pending_moves_.Get(i)->Blocks(loc)) {
+  for (MoveOperands* move : pending_moves_) {
+    if (move->Blocks(loc)) {
       return true;
     }
   }
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    if (moves_.Get(i)->Blocks(loc)) {
+  for (MoveOperands* move : moves_) {
+    if (move->Blocks(loc)) {
       return true;
     }
   }
@@ -558,7 +560,7 @@
 // So far it is only used for debugging purposes to make sure all pending moves
 // have been performed.
 size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
-  return pending_moves_.Size();
+  return pending_moves_.size();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 9ede910..4278861 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -17,8 +17,8 @@
 #ifndef ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_
 #define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_
 
+#include "base/arena_containers.h"
 #include "base/value_object.h"
-#include "utils/growable_array.h"
 #include "locations.h"
 #include "primitive.h"
 
@@ -31,7 +31,10 @@
 // have their own subclass that implements corresponding virtual functions.
 class ParallelMoveResolver : public ValueObject {
  public:
-  explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {}
+  explicit ParallelMoveResolver(ArenaAllocator* allocator)
+      : moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)) {
+    moves_.reserve(32);
+  }
   virtual ~ParallelMoveResolver() {}
 
   // Resolve a set of parallel moves, emitting assembler instructions.
@@ -41,7 +44,7 @@
   // Build the initial list of moves.
   void BuildInitialMoveList(HParallelMove* parallel_move);
 
-  GrowableArray<MoveOperands*> moves_;
+  ArenaVector<MoveOperands*> moves_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
@@ -120,8 +123,13 @@
 class ParallelMoveResolverNoSwap : public ParallelMoveResolver {
  public:
   explicit ParallelMoveResolverNoSwap(ArenaAllocator* allocator)
-      : ParallelMoveResolver(allocator), scratches_(allocator, 32),
-        pending_moves_(allocator, 8), allocator_(allocator) {}
+      : ParallelMoveResolver(allocator),
+        scratches_(allocator->Adapter(kArenaAllocParallelMoveResolver)),
+        pending_moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)),
+        allocator_(allocator) {
+    scratches_.reserve(32);
+    pending_moves_.reserve(8);
+  }
   virtual ~ParallelMoveResolverNoSwap() {}
 
   // Resolve a set of parallel moves, emitting assembler instructions.
@@ -160,7 +168,7 @@
   void RemoveScratchLocation(Location loc);
 
   // List of scratch locations.
-  GrowableArray<Location> scratches_;
+  ArenaVector<Location> scratches_;
 
  private:
   // Perform the move at the given index in `moves_` (possibly requiring other moves to satisfy
@@ -183,7 +191,7 @@
   size_t GetNumberOfPendingMoves();
 
   // Additional pending moves which might be added to resolve dependency cycle.
-  GrowableArray<MoveOperands*> pending_moves_;
+  ArenaVector<MoveOperands*> pending_moves_;
 
   // Used to allocate pending MoveOperands.
   ArenaAllocator* const allocator_;
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index f8f7010..da91cb8 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -56,7 +56,8 @@
       : ParallelMoveResolverWithSwap(allocator) {}
 
   void EmitMove(size_t index) OVERRIDE {
-    MoveOperands* move = moves_.Get(index);
+    DCHECK_LT(index, moves_.size());
+    MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
     }
@@ -68,7 +69,8 @@
   }
 
   void EmitSwap(size_t index) OVERRIDE {
-    MoveOperands* move = moves_.Get(index);
+    DCHECK_LT(index, moves_.size());
+    MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
     }
@@ -127,7 +129,8 @@
   void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {}
 
   void EmitMove(size_t index) OVERRIDE {
-    MoveOperands* move = moves_.Get(index);
+    DCHECK_LT(index, moves_.size());
+    MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
     }
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index ad8c682..40c75af 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -56,6 +56,24 @@
   DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling);
 };
 
+static bool HasConflictingEquivalent(HPhi* phi) {
+  if (phi->GetNext() == nullptr) {
+    return false;
+  }
+  HPhi* next = phi->GetNext()->AsPhi();
+  if (next->GetRegNumber() == phi->GetRegNumber()) {
+    if (next->GetType() == Primitive::kPrimVoid) {
+      // We only get a void type for an equivalent phi we processed and found out
+      // it was conflicting.
+      return true;
+    } else {
+      // Go to the next phi, in case it is also an equivalent.
+      return HasConflictingEquivalent(next);
+    }
+  }
+  return false;
+}
+
 bool DeadPhiHandling::UpdateType(HPhi* phi) {
   if (phi->IsDead()) {
     // Phi was rendered dead while waiting in the worklist because it was replaced
@@ -87,21 +105,26 @@
     if (new_type == Primitive::kPrimVoid) {
       new_type = input_type;
     } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) {
+      if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) {
+        // If we already asked for an equivalent of the input phi, but that equivalent
+        // ended up conflicting, make this phi conflicting too.
+        conflict = true;
+        break;
+      }
       HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input);
       if (equivalent == nullptr) {
         conflict = true;
         break;
-      } else {
-        phi->ReplaceInput(equivalent, i);
-        if (equivalent->IsPhi()) {
-          DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot);
-          // We created a new phi, but that phi has the same inputs as the old phi. We
-          // add it to the worklist to ensure its inputs can also be converted to reference.
-          // If not, it will remain dead, and the algorithm will make the current phi dead
-          // as well.
-          equivalent->AsPhi()->SetLive();
-          AddToWorklist(equivalent->AsPhi());
-        }
+      }
+      phi->ReplaceInput(equivalent, i);
+      if (equivalent->IsPhi()) {
+        DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot);
+        // We created a new phi, but that phi has the same inputs as the old phi. We
+        // add it to the worklist to ensure its inputs can also be converted to reference.
+        // If not, it will remain dead, and the algorithm will make the current phi dead
+        // as well.
+        equivalent->AsPhi()->SetLive();
+        AddToWorklist(equivalent->AsPhi());
       }
     } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) {
       new_type = Primitive::kPrimNot;
@@ -145,8 +168,14 @@
     if (phi->IsDead() && phi->HasEnvironmentUses()) {
       phi->SetLive();
       if (block->IsLoopHeader()) {
-        // Give a type to the loop phi, to guarantee convergence of the algorithm.
-        phi->SetType(phi->InputAt(0)->GetType());
+        // Give a type to the loop phi to guarantee convergence of the algorithm.
+        // Note that the dead phi may already have a type if it is an equivalent
+        // generated for a typed LoadLocal. In that case we do not change the
+        // type because it could lead to an unsupported PrimNot/Float/Double ->
+        // PrimInt/Long transition and create same type equivalents.
+        if (phi->GetType() == Primitive::kPrimVoid) {
+          phi->SetType(phi->InputAt(0)->GetType());
+        }
         AddToWorklist(phi);
       } else {
         // Because we are doing a reverse post order visit, all inputs of
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 1f0bac5..f27cecc 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -30,8 +30,8 @@
   current_entry_.sp_mask = sp_mask;
   current_entry_.num_dex_registers = num_dex_registers;
   current_entry_.inlining_depth = inlining_depth;
-  current_entry_.dex_register_locations_start_index = dex_register_locations_.Size();
-  current_entry_.inline_infos_start_index = inline_infos_.Size();
+  current_entry_.dex_register_locations_start_index = dex_register_locations_.size();
+  current_entry_.inline_infos_start_index = inline_infos_.size();
   current_entry_.dex_register_map_hash = 0;
   current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound;
   if (num_dex_registers != 0) {
@@ -55,7 +55,7 @@
 
 void StackMapStream::EndStackMapEntry() {
   current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap();
-  stack_maps_.Add(current_entry_);
+  stack_maps_.push_back(current_entry_);
   current_entry_ = StackMapEntry();
 }
 
@@ -73,12 +73,12 @@
     auto it = location_catalog_entries_indices_.Find(location);
     if (it != location_catalog_entries_indices_.end()) {
       // Retrieve the index from the hash map.
-      dex_register_locations_.Add(it->second);
+      dex_register_locations_.push_back(it->second);
     } else {
       // Create a new entry in the location catalog and the hash map.
-      size_t index = location_catalog_entries_.Size();
-      location_catalog_entries_.Add(location);
-      dex_register_locations_.Add(index);
+      size_t index = location_catalog_entries_.size();
+      location_catalog_entries_.push_back(location);
+      dex_register_locations_.push_back(index);
       location_catalog_entries_indices_.Insert(std::make_pair(location, index));
     }
 
@@ -108,7 +108,7 @@
   current_inline_info_.dex_pc = dex_pc;
   current_inline_info_.invoke_type = invoke_type;
   current_inline_info_.num_dex_registers = num_dex_registers;
-  current_inline_info_.dex_register_locations_start_index = dex_register_locations_.Size();
+  current_inline_info_.dex_register_locations_start_index = dex_register_locations_.size();
   if (num_dex_registers != 0) {
     current_inline_info_.live_dex_registers_mask =
         new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true);
@@ -123,14 +123,14 @@
   DCHECK_EQ(current_dex_register_, current_inline_info_.num_dex_registers)
       << "Inline information contains less registers than expected";
   in_inline_frame_ = false;
-  inline_infos_.Add(current_inline_info_);
+  inline_infos_.push_back(current_inline_info_);
   current_inline_info_ = InlineInfoEntry();
 }
 
 uint32_t StackMapStream::ComputeMaxNativePcOffset() const {
   uint32_t max_native_pc_offset = 0u;
-  for (size_t i = 0, size = stack_maps_.Size(); i != size; ++i) {
-    max_native_pc_offset = std::max(max_native_pc_offset, stack_maps_.Get(i).native_pc_offset);
+  for (const StackMapEntry& entry : stack_maps_) {
+    max_native_pc_offset = std::max(max_native_pc_offset, entry.native_pc_offset);
   }
   return max_native_pc_offset;
 }
@@ -147,7 +147,7 @@
                                                           dex_pc_max_,
                                                           max_native_pc_offset,
                                                           register_mask_max_);
-  stack_maps_size_ = stack_maps_.Size() * stack_map_encoding_.ComputeStackMapSize();
+  stack_maps_size_ = stack_maps_.size() * stack_map_encoding_.ComputeStackMapSize();
   dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
 
   // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned.
@@ -170,33 +170,28 @@
 
 size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const {
   size_t size = DexRegisterLocationCatalog::kFixedSize;
-  for (size_t location_catalog_entry_index = 0;
-       location_catalog_entry_index < location_catalog_entries_.Size();
-       ++location_catalog_entry_index) {
-    DexRegisterLocation dex_register_location =
-        location_catalog_entries_.Get(location_catalog_entry_index);
+  for (const DexRegisterLocation& dex_register_location : location_catalog_entries_) {
     size += DexRegisterLocationCatalog::EntrySize(dex_register_location);
   }
   return size;
 }
 
 size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers,
-                                                 const BitVector& live_dex_registers_mask) const {
+                                                 const BitVector* live_dex_registers_mask) const {
+  // For num_dex_registers == 0u live_dex_registers_mask may be null.
+  if (num_dex_registers == 0u) {
+    return 0u;  // No register map will be emitted.
+  }
+  DCHECK(live_dex_registers_mask != nullptr);
+
   // Size of the map in bytes.
   size_t size = DexRegisterMap::kFixedSize;
   // Add the live bit mask for the Dex register liveness.
   size += DexRegisterMap::GetLiveBitMaskSize(num_dex_registers);
   // Compute the size of the set of live Dex register entries.
-  size_t number_of_live_dex_registers = 0;
-  for (size_t dex_register_number = 0;
-       dex_register_number < num_dex_registers;
-       ++dex_register_number) {
-    if (live_dex_registers_mask.IsBitSet(dex_register_number)) {
-      ++number_of_live_dex_registers;
-    }
-  }
+  size_t number_of_live_dex_registers = live_dex_registers_mask->NumSetBits();
   size_t map_entries_size_in_bits =
-      DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size())
+      DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.size())
       * number_of_live_dex_registers;
   size_t map_entries_size_in_bytes =
       RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte;
@@ -207,24 +202,24 @@
 size_t StackMapStream::ComputeDexRegisterMapsSize() const {
   size_t size = 0;
   size_t inline_info_index = 0;
-  for (size_t i = 0; i < stack_maps_.Size(); ++i) {
-    StackMapEntry entry = stack_maps_.Get(i);
+  for (const StackMapEntry& entry : stack_maps_) {
     if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) {
-      size += ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask);
+      size += ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask);
     } else {
       // Entries with the same dex map will have the same offset.
     }
     for (size_t j = 0; j < entry.inlining_depth; ++j) {
-      InlineInfoEntry inline_entry = inline_infos_.Get(inline_info_index++);
+      DCHECK_LT(inline_info_index, inline_infos_.size());
+      InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
       size += ComputeDexRegisterMapSize(inline_entry.num_dex_registers,
-                                        *inline_entry.live_dex_registers_mask);
+                                        inline_entry.live_dex_registers_mask);
     }
   }
   return size;
 }
 
 size_t StackMapStream::ComputeInlineInfoSize() const {
-  return inline_infos_.Size() * InlineInfo::SingleEntrySize()
+  return inline_infos_.size() * InlineInfo::SingleEntrySize()
     // For encoding the depth.
     + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
 }
@@ -244,19 +239,18 @@
       inline_infos_start_, inline_info_size_);
 
   code_info.SetEncoding(stack_map_encoding_);
-  code_info.SetNumberOfStackMaps(stack_maps_.Size());
+  code_info.SetNumberOfStackMaps(stack_maps_.size());
   DCHECK_EQ(code_info.GetStackMapsSize(code_info.ExtractEncoding()), stack_maps_size_);
 
   // Set the Dex register location catalog.
-  code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.Size());
+  code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.size());
   MemoryRegion dex_register_location_catalog_region = region.Subregion(
       dex_register_location_catalog_start_, dex_register_location_catalog_size_);
   DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
   // Offset in `dex_register_location_catalog` where to store the next
   // register location.
   size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize;
-  for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) {
-    DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i);
+  for (DexRegisterLocation dex_register_location : location_catalog_entries_) {
     dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location);
     location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location);
   }
@@ -265,9 +259,9 @@
 
   uintptr_t next_dex_register_map_offset = 0;
   uintptr_t next_inline_info_offset = 0;
-  for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) {
+  for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) {
     StackMap stack_map = code_info.GetStackMapAt(i, stack_map_encoding_);
-    StackMapEntry entry = stack_maps_.Get(i);
+    StackMapEntry entry = stack_maps_[i];
 
     stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc);
     stack_map.SetNativePcOffset(stack_map_encoding_, entry.native_pc_offset);
@@ -291,7 +285,7 @@
         // New dex registers maps should be added to the stack map.
         MemoryRegion register_region = dex_register_locations_region.Subregion(
             next_dex_register_map_offset,
-            ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask));
+            ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask));
         next_dex_register_map_offset += register_region.size();
         DexRegisterMap dex_register_map(register_region);
         stack_map.SetDexRegisterMapOffset(
@@ -318,8 +312,9 @@
           stack_map_encoding_, inline_region.start() - dex_register_locations_region.start());
 
       inline_info.SetDepth(entry.inlining_depth);
+      DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
       for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
-        InlineInfoEntry inline_entry = inline_infos_.Get(depth + entry.inline_infos_start_index);
+        InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index];
         inline_info.SetMethodIndexAtDepth(depth, inline_entry.method_index);
         inline_info.SetDexPcAtDepth(depth, inline_entry.dex_pc);
         inline_info.SetInvokeTypeAtDepth(depth, inline_entry.invoke_type);
@@ -331,7 +326,7 @@
           MemoryRegion register_region = dex_register_locations_region.Subregion(
               next_dex_register_map_offset,
               ComputeDexRegisterMapSize(inline_entry.num_dex_registers,
-                                        *inline_entry.live_dex_registers_mask));
+                                        inline_entry.live_dex_registers_mask));
           next_dex_register_map_offset += register_region.size();
           DexRegisterMap dex_register_map(register_region);
           inline_info.SetDexRegisterMapOffsetAtDepth(
@@ -357,42 +352,43 @@
                                           uint32_t start_index_in_dex_register_locations) const {
   dex_register_map.SetLiveBitMask(num_dex_registers, live_dex_registers_mask);
   // Set the dex register location mapping data.
-  for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
-       dex_register_number < num_dex_registers;
-       ++dex_register_number) {
-    if (live_dex_registers_mask.IsBitSet(dex_register_number)) {
-      size_t location_catalog_entry_index = dex_register_locations_.Get(
-          start_index_in_dex_register_locations + index_in_dex_register_locations);
-      dex_register_map.SetLocationCatalogEntryIndex(
-          index_in_dex_register_locations,
-          location_catalog_entry_index,
-          num_dex_registers,
-          location_catalog_entries_.Size());
-      ++index_in_dex_register_locations;
-    }
+  size_t number_of_live_dex_registers = live_dex_registers_mask.NumSetBits();
+  DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size());
+  DCHECK_LE(start_index_in_dex_register_locations,
+            dex_register_locations_.size() - number_of_live_dex_registers);
+  for (size_t index_in_dex_register_locations = 0;
+      index_in_dex_register_locations != number_of_live_dex_registers;
+       ++index_in_dex_register_locations) {
+    size_t location_catalog_entry_index = dex_register_locations_[
+        start_index_in_dex_register_locations + index_in_dex_register_locations];
+    dex_register_map.SetLocationCatalogEntryIndex(
+        index_in_dex_register_locations,
+        location_catalog_entry_index,
+        num_dex_registers,
+        location_catalog_entries_.size());
   }
 }
 
 size_t StackMapStream::FindEntryWithTheSameDexMap() {
-  size_t current_entry_index = stack_maps_.Size();
+  size_t current_entry_index = stack_maps_.size();
   auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash);
   if (entries_it == dex_map_hash_to_stack_map_indices_.end()) {
     // We don't have a perfect hash functions so we need a list to collect all stack maps
     // which might have the same dex register map.
-    GrowableArray<uint32_t> stack_map_indices(allocator_, 1);
-    stack_map_indices.Add(current_entry_index);
-    dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices);
+    ArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream));
+    stack_map_indices.push_back(current_entry_index);
+    dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash,
+                                           std::move(stack_map_indices));
     return kNoSameDexMapFound;
   }
 
   // We might have collisions, so we need to check whether or not we really have a match.
-  for (size_t i = 0; i < entries_it->second.Size(); i++) {
-    size_t test_entry_index = entries_it->second.Get(i);
-    if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) {
+  for (uint32_t test_entry_index : entries_it->second) {
+    if (HaveTheSameDexMaps(GetStackMap(test_entry_index), current_entry_)) {
       return test_entry_index;
     }
   }
-  entries_it->second.Add(current_entry_index);
+  entries_it->second.push_back(current_entry_index);
   return kNoSameDexMapFound;
 }
 
@@ -406,21 +402,22 @@
   if (a.num_dex_registers != b.num_dex_registers) {
     return false;
   }
-
-  int index_in_dex_register_locations = 0;
-  for (uint32_t i = 0; i < a.num_dex_registers; i++) {
-    if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) {
+  if (a.num_dex_registers != 0u) {
+    DCHECK(a.live_dex_registers_mask != nullptr);
+    DCHECK(b.live_dex_registers_mask != nullptr);
+    if (!a.live_dex_registers_mask->Equal(b.live_dex_registers_mask)) {
       return false;
     }
-    if (a.live_dex_registers_mask->IsBitSet(i)) {
-      size_t a_loc = dex_register_locations_.Get(
-          a.dex_register_locations_start_index + index_in_dex_register_locations);
-      size_t b_loc = dex_register_locations_.Get(
-          b.dex_register_locations_start_index + index_in_dex_register_locations);
-      if (a_loc != b_loc) {
-        return false;
-      }
-      ++index_in_dex_register_locations;
+    size_t number_of_live_dex_registers = a.live_dex_registers_mask->NumSetBits();
+    DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size());
+    DCHECK_LE(a.dex_register_locations_start_index,
+              dex_register_locations_.size() - number_of_live_dex_registers);
+    DCHECK_LE(b.dex_register_locations_start_index,
+              dex_register_locations_.size() - number_of_live_dex_registers);
+    auto a_begin = dex_register_locations_.begin() + a.dex_register_locations_start_index;
+    auto b_begin = dex_register_locations_.begin() + b.dex_register_locations_start_index;
+    if (!std::equal(a_begin, a_begin + number_of_live_dex_registers, b_begin)) {
+      return false;
     }
   }
   return true;
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 703b6f7..4783e28 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -24,7 +24,6 @@
 #include "memory_region.h"
 #include "nodes.h"
 #include "stack_map.h"
-#include "utils/growable_array.h"
 
 namespace art {
 
@@ -62,15 +61,16 @@
  public:
   explicit StackMapStream(ArenaAllocator* allocator)
       : allocator_(allocator),
-        stack_maps_(allocator, 10),
-        location_catalog_entries_(allocator, 4),
-        dex_register_locations_(allocator, 10 * 4),
-        inline_infos_(allocator, 2),
+        stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)),
+        location_catalog_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
+        dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
+        inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
         stack_mask_max_(-1),
         dex_pc_max_(0),
         register_mask_max_(0),
         number_of_stack_maps_with_inline_info_(0),
-        dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()),
+        dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(),
+                                           allocator->Adapter(kArenaAllocStackMapStream)),
         current_entry_(),
         current_inline_info_(),
         stack_mask_size_(0),
@@ -84,7 +84,12 @@
         inline_infos_start_(0),
         needed_size_(0),
         current_dex_register_(0),
-        in_inline_frame_(false) {}
+        in_inline_frame_(false) {
+    stack_maps_.reserve(10);
+    location_catalog_entries_.reserve(4);
+    dex_register_locations_.reserve(10 * 4);
+    inline_infos_.reserve(2);
+  }
 
   // See runtime/stack_map.h to know what these fields contain.
   struct StackMapEntry {
@@ -127,17 +132,17 @@
   void EndInlineInfoEntry();
 
   size_t GetNumberOfStackMaps() const {
-    return stack_maps_.Size();
+    return stack_maps_.size();
   }
 
   const StackMapEntry& GetStackMap(size_t i) const {
-    DCHECK_LT(i, stack_maps_.Size());
-    return stack_maps_.GetRawStorage()[i];
+    DCHECK_LT(i, stack_maps_.size());
+    return stack_maps_[i];
   }
 
   void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) {
-    DCHECK_LT(i, stack_maps_.Size());
-    stack_maps_.GetRawStorage()[i].native_pc_offset = native_pc_offset;
+    DCHECK_LT(i, stack_maps_.size());
+    stack_maps_[i].native_pc_offset = native_pc_offset;
   }
 
   uint32_t ComputeMaxNativePcOffset() const;
@@ -150,7 +155,7 @@
  private:
   size_t ComputeDexRegisterLocationCatalogSize() const;
   size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers,
-                                   const BitVector& live_dex_registers_mask) const;
+                                   const BitVector* live_dex_registers_mask) const;
   size_t ComputeDexRegisterMapsSize() const;
   size_t ComputeInlineInfoSize() const;
 
@@ -164,10 +169,10 @@
                             uint32_t start_index_in_dex_register_locations) const;
 
   ArenaAllocator* allocator_;
-  GrowableArray<StackMapEntry> stack_maps_;
+  ArenaVector<StackMapEntry> stack_maps_;
 
   // A catalog of unique [location_kind, register_value] pairs (per method).
-  GrowableArray<DexRegisterLocation> location_catalog_entries_;
+  ArenaVector<DexRegisterLocation> location_catalog_entries_;
   // Map from Dex register location catalog entries to their indices in the
   // location catalog.
   typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn,
@@ -175,14 +180,14 @@
   LocationCatalogEntriesIndices location_catalog_entries_indices_;
 
   // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
-  GrowableArray<size_t> dex_register_locations_;
-  GrowableArray<InlineInfoEntry> inline_infos_;
+  ArenaVector<size_t> dex_register_locations_;
+  ArenaVector<InlineInfoEntry> inline_infos_;
   int stack_mask_max_;
   uint32_t dex_pc_max_;
   uint32_t register_mask_max_;
   size_t number_of_stack_maps_with_inline_info_;
 
-  ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_;
+  ArenaSafeMap<uint32_t, ArenaVector<uint32_t>> dex_map_hash_to_stack_map_indices_;
 
   StackMapEntry current_entry_;
   InlineInfoEntry current_inline_info_;
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
deleted file mode 100644
index f85e026..0000000
--- a/compiler/utils/growable_array.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_GROWABLE_ARRAY_H_
-#define ART_COMPILER_UTILS_GROWABLE_ARRAY_H_
-
-#include <stdint.h>
-#include <stddef.h>
-
-#include "base/arena_object.h"
-
-namespace art {
-
-// Deprecated
-// TODO: Replace all uses with ArenaVector<T>.
-template<typename T>
-class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> {
-  public:
-    GrowableArray(ArenaAllocator* arena, size_t init_length)
-      : arena_(arena),
-        num_allocated_(init_length),
-        num_used_(0) {
-      elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray);
-    }
-
-    GrowableArray(ArenaAllocator* arena, size_t init_length, T initial_data)
-      : arena_(arena),
-        num_allocated_(init_length),
-        num_used_(init_length) {
-      elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray);
-      for (size_t i = 0; i < init_length; ++i) {
-        elem_list_[i] = initial_data;
-      }
-    }
-
-    bool Contains(T value, size_t start_from = 0) const {
-      for (size_t i = start_from; i < num_used_; ++i) {
-        if (elem_list_[i] == value) {
-          return true;
-        }
-      }
-      return false;
-    }
-
-    // Expand the list size to at least new length.
-    void Resize(size_t new_length) {
-      if (new_length <= num_allocated_) return;
-      // If it's a small list double the size, else grow 1.5x.
-      size_t target_length =
-          (num_allocated_ < 128) ? num_allocated_ << 1 : num_allocated_ + (num_allocated_ >> 1);
-      if (new_length > target_length) {
-         target_length = new_length;
-      }
-      T* new_array = arena_->AllocArray<T>(target_length, kArenaAllocGrowableArray);
-      memcpy(new_array, elem_list_, sizeof(T) * num_allocated_);
-      num_allocated_ = target_length;
-      elem_list_ = new_array;
-    }
-
-    // NOTE: does not return storage, just resets use count.
-    void Reset() {
-      num_used_ = 0;
-    }
-
-    // Insert an element to the end of a list, resizing if necessary.
-    void Insert(T elem) {
-      if (num_used_ == num_allocated_) {
-        Resize(num_used_ + 1);
-      }
-      elem_list_[num_used_++] = elem;
-    }
-
-    void InsertAt(size_t index, T elem) {
-      DCHECK(index <= Size());
-      Insert(elem);
-      for (size_t i = Size() - 1; i > index; --i) {
-        elem_list_[i] = elem_list_[i - 1];
-      }
-      elem_list_[index] = elem;
-    }
-
-    void Add(T elem) {
-      Insert(elem);
-    }
-
-    T Get(size_t index) const {
-      DCHECK_LT(index, num_used_);
-      return elem_list_[index];
-    }
-
-    // Overwrite existing element at position index.  List must be large enough.
-    void Put(size_t index, T elem) {
-      DCHECK_LT(index, num_used_);
-      elem_list_[index] = elem;
-    }
-
-    void Increment(size_t index) {
-      DCHECK_LT(index, num_used_);
-      elem_list_[index]++;
-    }
-
-    /*
-     * Remove an existing element from list.  If there are more than one copy
-     * of the element, only the first one encountered will be deleted.
-     */
-    // TODO: consider renaming this.
-    void Delete(T element) {
-      bool found = false;
-      for (size_t i = 0; i < num_used_ - 1; i++) {
-        if (!found && elem_list_[i] == element) {
-          found = true;
-        }
-        if (found) {
-          elem_list_[i] = elem_list_[i+1];
-        }
-      }
-      // We should either have found the element, or it was the last (unscanned) element.
-      DCHECK(found || (element == elem_list_[num_used_ - 1]));
-      num_used_--;
-    }
-
-    void DeleteAt(size_t index) {
-      for (size_t i = index; i < num_used_ - 1; i++) {
-        elem_list_[i] = elem_list_[i + 1];
-      }
-      num_used_--;
-    }
-
-    size_t GetNumAllocated() const { return num_allocated_; }
-
-    size_t Size() const { return num_used_; }
-
-    bool IsEmpty() const { return num_used_ == 0; }
-
-    T Pop() {
-      DCHECK_GE(num_used_, (size_t)0);
-      return elem_list_[--num_used_];
-    }
-
-    T Peek() const {
-      DCHECK_GE(num_used_, (size_t)0);
-      return elem_list_[num_used_ - 1];
-    }
-
-    void SetSize(size_t new_size) {
-      Resize(new_size);
-      num_used_ = new_size;
-    }
-
-    T* GetRawStorage() const { return elem_list_; }
-
-  private:
-    ArenaAllocator* const arena_;
-    size_t num_allocated_;
-    size_t num_used_;
-    T* elem_list_;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_GROWABLE_ARRAY_H_
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 995a1d5..059c4cd 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -104,6 +104,7 @@
   lambda/box_table.cc \
   lambda/closure.cc \
   lambda/closure_builder.cc \
+  lambda/leaking_allocator.cc \
   jni_internal.cc \
   jobject_comparator.cc \
   linear_alloc.cc \
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 403d348..8f6b1ff 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -30,9 +30,11 @@
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
   gprs_[PC] = &pc_;
+  gprs_[R0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = ArmContext::kBadGprBase + SP;
   pc_ = ArmContext::kBadGprBase + PC;
+  arg0_ = 0;
 }
 
 void ArmContext::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 77bb5c8..ea31055 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -45,6 +45,10 @@
     SetGPR(PC, new_pc);
   }
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(R0, new_arg0_value);
+  }
+
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg] != nullptr;
@@ -84,7 +88,7 @@
   uintptr_t* gprs_[kNumberOfCoreRegisters];
   uint32_t* fprs_[kNumberOfSRegisters];
   // Hold values for sp and pc if they are not located within a stack frame.
-  uintptr_t sp_, pc_;
+  uintptr_t sp_, pc_, arg0_;
 };
 
 }  // namespace arm
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index e45d828..dc1cf8a 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -437,8 +437,8 @@
     ldr  r14, [r0, #56]   @ (LR from gprs_ 56=4*14)
     add  r0, r0, #12      @ increment r0 to skip gprs_[0..2] 12=4*3
     ldm  r0, {r3-r13}     @ load remaining gprs from argument gprs_
-    mov  r0, #0           @ clear result registers r0 and r1
-    mov  r1, #0
+    ldr  r0, [r0, #-12]   @ load r0 value
+    mov  r1, #0           @ clear result register r1
     bx   r2               @ do long jump
 END art_quick_do_long_jump
 
@@ -1142,7 +1142,7 @@
 
     /*
      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
-     * will long jump to the upcall with a special exception of -1.
+     * will long jump to the interpreter bridge.
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 60becc6..4477631 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -31,10 +31,12 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
-  gprs_[LR] = &pc_;
+  gprs_[kPC] = &pc_;
+  gprs_[X0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = Arm64Context::kBadGprBase + SP;
-  pc_ = Arm64Context::kBadGprBase + LR;
+  pc_ = Arm64Context::kBadGprBase + kPC;
+  arg0_ = 0;
 }
 
 void Arm64Context::FillCalleeSaves(const StackVisitor& fr) {
@@ -58,8 +60,8 @@
 }
 
 void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
-  DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
-  DCHECK_NE(reg, static_cast<uint32_t>(XZR));
+  DCHECK_LT(reg, arraysize(gprs_));
+  // Note: we use kPC == XZR, so do not ensure that reg != XZR.
   DCHECK(IsAccessibleGPR(reg));
   DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
   *gprs_[reg] = value;
@@ -124,13 +126,13 @@
 extern "C" NO_RETURN void art_quick_do_long_jump(uint64_t*, uint64_t*);
 
 void Arm64Context::DoLongJump() {
-  uint64_t gprs[kNumberOfXRegisters];
+  uint64_t gprs[arraysize(gprs_)];
   uint64_t fprs[kNumberOfDRegisters];
 
   // The long jump routine called below expects to find the value for SP at index 31.
   DCHECK_EQ(SP, 31);
 
-  for (size_t i = 0; i < kNumberOfXRegisters; ++i) {
+  for (size_t i = 0; i < arraysize(gprs_); ++i) {
     gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index 1c99f3c..11314e0 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -42,20 +42,25 @@
   }
 
   void SetPC(uintptr_t new_lr) OVERRIDE {
-    SetGPR(LR, new_lr);
+    SetGPR(kPC, new_lr);
+  }
+
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(X0, new_arg0_value);
   }
 
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
-    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
+    DCHECK_LT(reg, arraysize(gprs_));
     return gprs_[reg] != nullptr;
   }
 
   uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
-    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
+    DCHECK_LT(reg, arraysize(gprs_));
     return gprs_[reg];
   }
 
   uintptr_t GetGPR(uint32_t reg) OVERRIDE {
+    // Note: PC isn't an available GPR (outside of internals), so don't allow retrieving the value.
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
     DCHECK(IsAccessibleGPR(reg));
     return *gprs_[reg];
@@ -79,12 +84,15 @@
   void SmashCallerSaves() OVERRIDE;
   NO_RETURN void DoLongJump() OVERRIDE;
 
+  static constexpr size_t kPC = kNumberOfXRegisters;
+
  private:
-  // Pointers to register locations, initialized to null or the specific registers below.
-  uintptr_t* gprs_[kNumberOfXRegisters];
+  // Pointers to register locations, initialized to null or the specific registers below. We need
+  // an additional one for the PC.
+  uintptr_t* gprs_[kNumberOfXRegisters + 1];
   uint64_t * fprs_[kNumberOfDRegisters];
-  // Hold values for sp and pc if they are not located within a stack frame.
-  uintptr_t sp_, pc_;
+  // Hold values for sp, pc and arg0 if they are not located within a stack frame.
+  uintptr_t sp_, pc_, arg0_;
 };
 
 }  // namespace arm64
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 169bc38..6812178 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -941,7 +941,7 @@
     // Load GPRs
     // TODO: lots of those are smashed, could optimize.
     add x0, x0, #30*8
-    ldp x30, x1, [x0], #-16
+    ldp x30, x1, [x0], #-16          // LR & SP
     ldp x28, x29, [x0], #-16
     ldp x26, x27, [x0], #-16
     ldp x24, x25, [x0], #-16
@@ -958,10 +958,12 @@
     ldp x2, x3, [x0], #-16
     mov sp, x1
 
-    // TODO: Is it really OK to use LR for the target PC?
-    mov x0, #0
-    mov x1, #0
-    br  xLR
+    // Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
+    ldr x1, [x0, #33*8]
+    // And the value of x0.
+    ldr x0, [x0]
+
+    br  x1
 END art_quick_do_long_jump
 
     /*
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index 9ef761e..9af7c04 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -50,6 +50,9 @@
   // Sets the program counter value.
   virtual void SetPC(uintptr_t new_pc) = 0;
 
+  // Sets the first argument register.
+  virtual void SetArg0(uintptr_t new_arg0_value) = 0;
+
   // Returns whether the given GPR is accessible (read or write).
   virtual bool IsAccessibleGPR(uint32_t reg) = 0;
 
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index bc2bf68..08ab356 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -30,9 +30,11 @@
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
   gprs_[RA] = &ra_;
+  gprs_[A0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = MipsContext::kBadGprBase + SP;
   ra_ = MipsContext::kBadGprBase + RA;
+  arg0_ = 0;
 }
 
 void MipsContext::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index 38cf29a..0affe53 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -78,12 +78,17 @@
   void SmashCallerSaves() OVERRIDE;
   NO_RETURN void DoLongJump() OVERRIDE;
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(A0, new_arg0_value);
+  }
+
  private:
   // Pointers to registers in the stack, initialized to null except for the special cases below.
   uintptr_t* gprs_[kNumberOfCoreRegisters];
   uint32_t* fprs_[kNumberOfFRegisters];
-  // Hold values for sp and ra (return address) if they are not located within a stack frame.
-  uintptr_t sp_, ra_;
+  // Hold values for sp and ra (return address) if they are not located within a stack frame, as
+  // well as the first argument.
+  uintptr_t sp_, ra_, arg0_;
 };
 }  // namespace mips
 }  // namespace art
diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc
index 6637c37..2c17f1c 100644
--- a/runtime/arch/mips64/context_mips64.cc
+++ b/runtime/arch/mips64/context_mips64.cc
@@ -29,10 +29,12 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
-  gprs_[RA] = &ra_;
+  gprs_[T9] = &t9_;
+  gprs_[A0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = Mips64Context::kBadGprBase + SP;
-  ra_ = Mips64Context::kBadGprBase + RA;
+  t9_ = Mips64Context::kBadGprBase + T9;
+  arg0_ = 0;
 }
 
 void Mips64Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h
index e4a144f..84b1c9b 100644
--- a/runtime/arch/mips64/context_mips64.h
+++ b/runtime/arch/mips64/context_mips64.h
@@ -41,7 +41,7 @@
   }
 
   void SetPC(uintptr_t new_pc) OVERRIDE {
-    SetGPR(RA, new_pc);
+    SetGPR(T9, new_pc);
   }
 
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
@@ -78,13 +78,20 @@
   void SmashCallerSaves() OVERRIDE;
   NO_RETURN void DoLongJump() OVERRIDE;
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(A0, new_arg0_value);
+  }
+
  private:
   // Pointers to registers in the stack, initialized to null except for the special cases below.
   uintptr_t* gprs_[kNumberOfGpuRegisters];
   uint64_t* fprs_[kNumberOfFpuRegisters];
-  // Hold values for sp and ra (return address) if they are not located within a stack frame.
-  uintptr_t sp_, ra_;
+  // Hold values for sp and t9 if they are not located within a stack frame. We use t9 for the
+  // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). We
+  // also need the first argument for single-frame deopt.
+  uintptr_t sp_, t9_, arg0_;
 };
+
 }  // namespace mips64
 }  // namespace art
 
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 1b50b2e..ce1b2f3 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -431,7 +431,7 @@
     ld      $ra, 248($a0)
     ld      $a0, 32($a0)
     move    $v0, $zero          # clear result registers v0 and v1
-    jalr    $zero, $ra          # do long jump
+    jalr    $zero, $t9          # do long jump (do not use ra, it must not be clobbered)
     move    $v1, $zero
 END art_quick_do_long_jump
 
diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h
index 38bc8f2..cd94d5e 100644
--- a/runtime/arch/mips64/registers_mips64.h
+++ b/runtime/arch/mips64/registers_mips64.h
@@ -52,6 +52,7 @@
   S6   = 22,
   S7   = 23,
   T8   = 24,  // More temporaries.
+  TMP  = T8,  // scratch register (in addition to AT)
   T9   = 25,
   K0   = 26,  // Reserved for trap handler.
   K1   = 27,
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 7096c82..987ad60 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -29,9 +29,11 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[ESP] = &esp_;
+  gprs_[EAX] = &arg0_;
   // Initialize registers with easy to spot debug values.
   esp_ = X86Context::kBadGprBase + ESP;
   eip_ = X86Context::kBadGprBase + kNumberOfCpuRegisters;
+  arg0_ = 0;
 }
 
 void X86Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index c4a11d8..59beb12 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -44,6 +44,10 @@
     eip_ = new_pc;
   }
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(EAX, new_arg0_value);
+  }
+
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg] != nullptr;
@@ -95,10 +99,10 @@
   // Pointers to register locations. Values are initialized to null or the special registers below.
   uintptr_t* gprs_[kNumberOfCpuRegisters];
   uint32_t* fprs_[kNumberOfFloatRegisters];
-  // Hold values for esp and eip if they are not located within a stack frame. EIP is somewhat
+  // Hold values for esp, eip and arg0 if they are not located within a stack frame. EIP is somewhat
   // special in that it cannot be encoded normally as a register operand to an instruction (except
   // in 64bit addressing modes).
-  uintptr_t esp_, eip_;
+  uintptr_t esp_, eip_, arg0_;
 };
 }  // namespace x86
 }  // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 029a296..f3b15c9 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1695,7 +1695,7 @@
 
     /*
      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
-     * will long jump to the upcall with a special exception of -1.
+     * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 1fe2ef8..3dc7d71 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -29,9 +29,11 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[RSP] = &rsp_;
+  gprs_[RDI] = &arg0_;
   // Initialize registers with easy to spot debug values.
   rsp_ = X86_64Context::kBadGprBase + RSP;
   rip_ = X86_64Context::kBadGprBase + kNumberOfCpuRegisters;
+  arg0_ = 0;
 }
 
 void X86_64Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
index 30bb9ec..f05b7f0 100644
--- a/runtime/arch/x86_64/context_x86_64.h
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -44,6 +44,10 @@
     rip_ = new_pc;
   }
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(RDI, new_arg0_value);
+  }
+
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg] != nullptr;
@@ -82,10 +86,10 @@
   // Pointers to register locations. Values are initialized to null or the special registers below.
   uintptr_t* gprs_[kNumberOfCpuRegisters];
   uint64_t* fprs_[kNumberOfFloatRegisters];
-  // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat
+  // Hold values for rsp, rip and arg0 if they are not located within a stack frame. RIP is somewhat
   // special in that it cannot be encoded normally as a register operand to an instruction (except
   // in 64bit addressing modes).
-  uintptr_t rsp_, rip_;
+  uintptr_t rsp_, rip_, arg0_;
 };
 }  // namespace x86_64
 }  // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 861f802..2f438a3 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1724,18 +1724,18 @@
      * will long jump to the upcall with a special exception of -1.
      */
 DEFINE_FUNCTION art_quick_deoptimize
-    pushq %rsi                     // Entry point for a jump. Fake that we were called.
-                                   // Use hidden arg.
+    pushq %rsi                         // Entry point for a jump. Fake that we were called.
+                                       // Use hidden arg.
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-                                   // Stack should be aligned now.
-    movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
-    call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
+                                       // Stack should be aligned now.
+    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
+    call SYMBOL(artDeoptimize)         // artDeoptimize(Thread*)
     UNREACHABLE
 END_FUNCTION art_quick_deoptimize
 
     /*
      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
-     * will long jump to the upcall with a special exception of -1.
+     * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index c1a1088..691b57f 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -89,6 +89,9 @@
   "PrimTypeProp ",
   "SideEffects  ",
   "RegAllocator ",
+  "StackMapStm  ",
+  "CodeGen      ",
+  "ParallelMove ",
 };
 
 template <bool kCount>
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index be96862..17045c6 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -99,6 +99,9 @@
   kArenaAllocPrimitiveTypePropagation,
   kArenaAllocSideEffectsAnalysis,
   kArenaAllocRegisterAllocator,
+  kArenaAllocStackMapStream,
+  kArenaAllocCodeGenerator,
+  kArenaAllocParallelMoveResolver,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/base/arena_object.h b/runtime/base/arena_object.h
index ab97d0c..56e35d8 100644
--- a/runtime/base/arena_object.h
+++ b/runtime/base/arena_object.h
@@ -40,6 +40,10 @@
     LOG(FATAL) << "UNREACHABLE";
     UNREACHABLE();
   }
+
+  // NOTE: Providing placement new (and matching delete) for constructing container elements.
+  ALWAYS_INLINE void* operator new(size_t, void* ptr) noexcept { return ptr; }
+  ALWAYS_INLINE void operator delete(void*, void*) noexcept { }
 };
 
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 6b9c8aa..8e42040 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -128,7 +128,11 @@
   // the previous error.
   Runtime* const runtime = Runtime::Current();
   if (!runtime->IsAotCompiler()) {  // Give info if this occurs at runtime.
-    LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c);
+    std::string extra;
+    if (c->GetVerifyErrorClass() != nullptr) {
+      extra = PrettyDescriptor(c->GetVerifyErrorClass());
+    }
+    LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c) << ": " << extra;
   }
 
   CHECK(c->IsErroneous()) << PrettyClass(c) << " " << c->GetStatus();
@@ -1316,13 +1320,6 @@
     // Need to make sure to not copy ArtMethods without doing read barriers since the roots are
     // marked concurrently and we don't hold the classlinker_classes_lock_ when we do the copy.
     boot_class_table_.VisitRoots(buffered_visitor);
-    // TODO: Avoid marking these to enable class unloading.
-    JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
-    for (const ClassLoaderData& data : class_loaders_) {
-      mirror::Object* class_loader = vm->DecodeWeakGlobal(self, data.weak_root);
-      // Don't need to update anything since the class loaders will be updated by SweepSystemWeaks.
-      visitor->VisitRootIfNonNull(&class_loader, RootInfo(kRootVMInternal));
-    }
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_class_roots_) {
       mirror::Class* old_ref = root.Read<kWithoutReadBarrier>();
@@ -4266,6 +4263,11 @@
       ClassTable* const table = InsertClassTableForClassLoader(class_loader);
       mirror::Class* existing = table->UpdateClass(descriptor, h_new_class.Get(),
                                                    ComputeModifiedUtf8Hash(descriptor));
+      if (class_loader != nullptr) {
+        // We updated the class in the class table, perform the write barrier so that the GC knows
+        // about the change.
+        Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
+      }
       CHECK_EQ(existing, klass.Get());
       if (kIsDebugBuild && class_loader == nullptr && dex_cache_image_class_lookup_required_) {
         // Check a class loaded with the system class loader matches one in the image if the class
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index 7344d13..e160a10 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -454,8 +454,8 @@
   return FormatOf(Opcode()) == k25x;
 }
 
-// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+1.
-inline void Instruction::GetAllArgs25x(uint32_t arg[kMaxVarArgRegs]) const {
+// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+2.
+inline void Instruction::GetAllArgs25x(uint32_t (&arg)[kMaxVarArgRegs25x]) const {
   DCHECK_EQ(FormatOf(Opcode()), k25x);
 
   /*
@@ -500,19 +500,21 @@
    */
   switch (count) {
     case 4:
-      arg[4] = (Fetch16(0) >> 8) & 0x0f;  // vG
+      arg[5] = (Fetch16(0) >> 8) & 0x0f;  // vG
       FALLTHROUGH_INTENDED;
     case 3:
-      arg[3] = (reg_list >> 12) & 0x0f;  // vF
+      arg[4] = (reg_list >> 12) & 0x0f;  // vF
       FALLTHROUGH_INTENDED;
     case 2:
-      arg[2] = (reg_list >> 8) & 0x0f;  // vE
+      arg[3] = (reg_list >> 8) & 0x0f;  // vE
       FALLTHROUGH_INTENDED;
     case 1:
-      arg[1] = (reg_list >> 4) & 0x0f;  // vD
+      arg[2] = (reg_list >> 4) & 0x0f;  // vD
       FALLTHROUGH_INTENDED;
     default:  // case 0
+      // The required lambda 'this' is actually a pair, but the pair is implicit.
       arg[0] = VRegC_25x();  // vC
+      arg[1] = arg[0] + 1;   // vC + 1
       break;
   }
 }
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index fc4df14..5250b0d 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -322,10 +322,10 @@
     }
     case k25x: {
       if (Opcode() == INVOKE_LAMBDA) {
-        uint32_t arg[kMaxVarArgRegs];
+        uint32_t arg[kMaxVarArgRegs25x];
         GetAllArgs25x(arg);
         const size_t num_extra_var_args = VRegB_25x();
-        DCHECK_LE(num_extra_var_args + 1, kMaxVarArgRegs);
+        DCHECK_LE(num_extra_var_args + 2, arraysize(arg));
 
         // invoke-lambda vC, {vD, vE, vF, vG}
         os << opcode << " v" << arg[0] << ", {";
@@ -333,7 +333,7 @@
           if (i != 0) {
             os << ", ";
           }
-          os << "v" << arg[i+1];
+          os << "v" << arg[i+2];  // Don't print the pair of vC registers. Pair is implicit.
         }
         os << "}";
         break;
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index df2d379..48a12e5 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -180,9 +180,11 @@
     kVerifyVarArgRangeNonZero = 0x100000,
     kVerifyRuntimeOnly        = 0x200000,
     kVerifyError              = 0x400000,
+    kVerifyRegCString         = 0x800000,
   };
 
   static constexpr uint32_t kMaxVarArgRegs = 5;
+  static constexpr uint32_t kMaxVarArgRegs25x = 6;  // lambdas are 2 registers.
 
   // Returns the size (in 2 byte code units) of this instruction.
   size_t SizeInCodeUnits() const {
@@ -408,7 +410,7 @@
   void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const {
     return GetVarArgs(args, Fetch16(0));
   }
-  void GetAllArgs25x(uint32_t args[kMaxVarArgRegs]) const;
+  void GetAllArgs25x(uint32_t (&args)[kMaxVarArgRegs25x]) const;
 
   // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first
   // 16 bits of instruction.
@@ -536,7 +538,7 @@
 
   int GetVerifyTypeArgumentC() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegC | kVerifyRegCField |
-        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
+        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide | kVerifyRegCString));
   }
 
   int GetVerifyExtraFlags() const {
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index a176772..9d7e0c4 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -263,10 +263,10 @@
   V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
   V(0xF3, INVOKE_LAMBDA, "invoke-lambda", k25x, false, kIndexNone, kContinue | kThrow | kInvoke | kExperimental, kVerifyRegC /*TODO: | kVerifyVarArg*/) \
   V(0xF4, UNUSED_F4, "unused-f4", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xF5, UNUSED_F5, "unused-f5", k10x, false, kIndexUnknown, 0, kVerifyError) \
+  V(0xF5, CAPTURE_VARIABLE, "capture-variable", k21c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegBString) \
   /* TODO(iam): get rid of the unused 'false' column */ \
   V(0xF6, CREATE_LAMBDA, "create-lambda", k21c, false_UNUSED, kIndexMethodRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegBMethod) \
-  V(0xF7, UNUSED_F7, "unused-f7", k10x, false, kIndexUnknown, 0, kVerifyError) \
+  V(0xF7, LIBERATE_VARIABLE, "liberate-variable", k22c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCString) \
   V(0xF8, BOX_LAMBDA, "box-lambda", k22x, true, kIndexNone, kContinue | kExperimental, kVerifyRegA | kVerifyRegB) \
   V(0xF9, UNBOX_LAMBDA, "unbox-lambda", k22c, true, kIndexTypeRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
   V(0xFA, UNUSED_FA, "unused-fa", k10x, false, kIndexUnknown, 0, kVerifyError) \
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index d749664..dfd9fcd 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -22,13 +22,16 @@
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
+#include "quick_exception_handler.h"
 #include "stack.h"
 #include "thread.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
 
-NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+
   if (VLOG_IS_ON(deopt)) {
     LOG(INFO) << "Deopting:";
     self->Dump(LOG(INFO));
@@ -39,19 +42,26 @@
   self->QuickDeliverException();
 }
 
-extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ScopedQuickEntrypointChecks sqec(self);
-  artDeoptimizeImpl(self);
-}
-
 extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
+
+  // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
+  // specialized visitor that will show whether a method is Quick or Shadow.
+
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
   self->PushDeoptimizationContext(return_value, false, self->GetException());
-  artDeoptimizeImpl(self);
+
+  QuickExceptionHandler exception_handler(self, true);
+  exception_handler.DeoptimizeSingleFrame();
+  exception_handler.UpdateInstrumentationStack();
+  exception_handler.DeoptimizeSingleFrameArchDependentFixup();
+  // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
+  // be caller-saved. This has the downside that we cannot track incorrect register usage down the
+  // line.
+  exception_handler.DoLongJump(false);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1302c5f..1e9e4fb 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -29,8 +29,10 @@
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "quick_exception_handler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
+#include "stack.h"
 #include "debugger.h"
 
 namespace art {
@@ -646,27 +648,86 @@
   if (method->IsAbstract()) {
     ThrowAbstractMethodError(method);
     return 0;
+  }
+
+  JValue tmp_value;
+  ShadowFrame* deopt_frame = self->PopStackedShadowFrame(
+      StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame, false);
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
+  DCHECK(code_item != nullptr) << PrettyMethod(method);
+  ManagedStack fragment;
+
+  DCHECK(!method->IsNative()) << PrettyMethod(method);
+  uint32_t shorty_len = 0;
+  auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*));
+  const char* shorty = non_proxy_method->GetShorty(&shorty_len);
+
+  JValue result;
+
+  if (deopt_frame != nullptr) {
+    // Coming from single-frame deopt.
+
+    if (kIsDebugBuild) {
+      // Sanity-check: are the methods as expected? We check that the last shadow frame (the bottom
+      // of the call-stack) corresponds to the called method.
+      ShadowFrame* linked = deopt_frame;
+      while (linked->GetLink() != nullptr) {
+        linked = linked->GetLink();
+      }
+      CHECK_EQ(method, linked->GetMethod()) << PrettyMethod(method) << " "
+          << PrettyMethod(linked->GetMethod());
+    }
+
+    if (VLOG_IS_ON(deopt)) {
+      // Print out the stack to verify that it was a single-frame deopt.
+      LOG(INFO) << "Continue-ing from deopt. Stack is:";
+      QuickExceptionHandler::DumpFramesWithType(self, true);
+    }
+
+    mirror::Throwable* pending_exception = nullptr;
+    self->PopDeoptimizationContext(&result, &pending_exception);
+
+    // Push a transition back into managed code onto the linked list in thread.
+    self->PushManagedStackFragment(&fragment);
+
+    // Ensure that the stack is still in order.
+    if (kIsDebugBuild) {
+      class DummyStackVisitor : public StackVisitor {
+       public:
+        explicit DummyStackVisitor(Thread* self_in) SHARED_REQUIRES(Locks::mutator_lock_)
+            : StackVisitor(self_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
+
+        bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+          // Nothing to do here. In a debug build, SanityCheckFrame will do the work in the walking
+          // logic. Just always say we want to continue.
+          return true;
+        }
+      };
+      DummyStackVisitor dsv(self);
+      dsv.WalkStack();
+    }
+
+    // Restore the exception that was pending before deoptimization then interpret the
+    // deoptimized frames.
+    if (pending_exception != nullptr) {
+      self->SetException(pending_exception);
+    }
+    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result);
   } else {
-    DCHECK(!method->IsNative()) << PrettyMethod(method);
     const char* old_cause = self->StartAssertNoThreadSuspension(
         "Building interpreter shadow frame");
-    const DexFile::CodeItem* code_item = method->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(method);
     uint16_t num_regs = code_item->registers_size_;
-    void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
     // No last shadow coming from quick.
-    ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, nullptr, method, 0, memory));
+    ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
+        CREATE_SHADOW_FRAME(num_regs, nullptr, method, 0);
+    ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get();
     size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_;
-    uint32_t shorty_len = 0;
-    auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*));
-    const char* shorty = non_proxy_method->GetShorty(&shorty_len);
     BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len,
                                                       shadow_frame, first_arg_reg);
     shadow_frame_builder.VisitArguments();
     const bool needs_initialization =
         method->IsStatic() && !method->GetDeclaringClass()->IsInitialized();
     // Push a transition back into managed code onto the linked list in thread.
-    ManagedStack fragment;
     self->PushManagedStackFragment(&fragment);
     self->PushShadowFrame(shadow_frame);
     self->EndAssertNoThreadSuspension(old_cause);
@@ -681,24 +742,26 @@
         return 0;
       }
     }
-    JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
-    // Pop transition.
-    self->PopManagedStackFragment(fragment);
 
-    // Request a stack deoptimization if needed
-    ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
-    if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
-      // Push the context of the deoptimization stack so we can restore the return value and the
-      // exception before executing the deoptimized frames.
-      self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
-
-      // Set special exception to cause deoptimization.
-      self->SetException(Thread::GetDeoptimizationException());
-    }
-
-    // No need to restore the args since the method has already been run by the interpreter.
-    return result.GetJ();
+    result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
   }
+
+  // Pop transition.
+  self->PopManagedStackFragment(fragment);
+
+  // Request a stack deoptimization if needed
+  ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
+  if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+    // Push the context of the deoptimization stack so we can restore the return value and the
+    // exception before executing the deoptimized frames.
+    self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+
+    // Set special exception to cause deoptimization.
+    self->SetException(Thread::GetDeoptimizationException());
+  }
+
+  // No need to restore the args since the method has already been run by the interpreter.
+  return result.GetJ();
 }
 
 // Visits arguments on the stack placing them into the args vector, Object* arguments are converted
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index cfe7713..7d664fa 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1963,6 +1963,10 @@
   GrowForUtilization(semi_space_collector_);
   LogGC(kGcCauseHomogeneousSpaceCompact, collector);
   FinishGC(self, collector::kGcTypeFull);
+  {
+    ScopedObjectAccess soa(self);
+    soa.Vm()->UnloadNativeLibraries();
+  }
   return HomogeneousSpaceCompactResult::kSuccess;
 }
 
@@ -2104,6 +2108,10 @@
   DCHECK(collector != nullptr);
   LogGC(kGcCauseCollectorTransition, collector);
   FinishGC(self, collector::kGcTypeFull);
+  {
+    ScopedObjectAccess soa(self);
+    soa.Vm()->UnloadNativeLibraries();
+  }
   int32_t after_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
   int32_t delta_allocated = before_allocated - after_allocated;
   std::string saved_str;
@@ -2588,6 +2596,12 @@
   FinishGC(self, gc_type);
   // Inform DDMS that a GC completed.
   Dbg::GcDidFinish();
+  // Unload native libraries for class unloading. We do this after calling FinishGC to prevent
+  // deadlocks in case the JNI_OnUnload function does allocations.
+  {
+    ScopedObjectAccess soa(self);
+    soa.Vm()->UnloadNativeLibraries();
+  }
   return gc_type;
 }
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 3ac80c6..f783b04 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -21,6 +21,7 @@
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
+#include "stack.h"
 #include "unstarted_runtime.h"
 
 namespace art {
@@ -330,8 +331,9 @@
   }
   // Set up shadow frame with matching number of reference slots to vregs.
   ShadowFrame* last_shadow_frame = self->GetManagedStack()->GetTopShadowFrame();
-  void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
-  ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, last_shadow_frame, method, 0, memory));
+  ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
+      CREATE_SHADOW_FRAME(num_regs, last_shadow_frame, method, 0);
+  ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get();
   self->PushShadowFrame(shadow_frame);
 
   size_t cur_reg = num_regs - num_ins;
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 5fbd687..ad34c9a 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -21,12 +21,16 @@
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "mirror/array-inl.h"
+#include "stack.h"
 #include "unstarted_runtime.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
 namespace interpreter {
 
+// All lambda closures have to be a consecutive pair of virtual registers.
+static constexpr size_t kLambdaVirtualRegisterWidth = 2;
+
 void ThrowNullPointerExceptionFromInterpreter() {
   ThrowNullPointerExceptionFromDexPC();
 }
@@ -483,13 +487,16 @@
 }
 
 // Separate declaration is required solely for the attributes.
-template<bool is_range, bool do_assignability_check> SHARED_REQUIRES(Locks::mutator_lock_)
+template <bool is_range,
+          bool do_assignability_check,
+          size_t kVarArgMax>
+    SHARED_REQUIRES(Locks::mutator_lock_)
 static inline bool DoCallCommon(ArtMethod* called_method,
                                 Thread* self,
                                 ShadowFrame& shadow_frame,
                                 JValue* result,
                                 uint16_t number_of_inputs,
-                                uint32_t arg[Instruction::kMaxVarArgRegs],
+                                uint32_t (&arg)[kVarArgMax],
                                 uint32_t vregC) ALWAYS_INLINE;
 
 SHARED_REQUIRES(Locks::mutator_lock_)
@@ -509,13 +516,15 @@
         Dbg::IsForcedInterpreterNeededForCalling(self, target);
 }
 
-template<bool is_range, bool do_assignability_check>
+template <bool is_range,
+          bool do_assignability_check,
+          size_t kVarArgMax>
 static inline bool DoCallCommon(ArtMethod* called_method,
                                 Thread* self,
                                 ShadowFrame& shadow_frame,
                                 JValue* result,
                                 uint16_t number_of_inputs,
-                                uint32_t arg[Instruction::kMaxVarArgRegs],
+                                uint32_t (&arg)[kVarArgMax],
                                 uint32_t vregC) {
   bool string_init = false;
   // Replace calls to String.<init> with equivalent StringFactory call.
@@ -560,10 +569,10 @@
     number_of_inputs--;
 
     // Rewrite the var-args, dropping the 0th argument ("this")
-    for (uint32_t i = 1; i < Instruction::kMaxVarArgRegs; ++i) {
+    for (uint32_t i = 1; i < arraysize(arg); ++i) {
       arg[i - 1] = arg[i];
     }
-    arg[Instruction::kMaxVarArgRegs - 1] = 0;
+    arg[arraysize(arg) - 1] = 0;
 
     // Rewrite the non-var-arg case
     vregC++;  // Skips the 0th vreg in the range ("this").
@@ -576,9 +585,9 @@
 
   // Allocate shadow frame on the stack.
   const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon");
-  void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
-  ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, called_method, 0,
-                                                    memory));
+  ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
+      CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, 0);
+  ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get();
 
   // Initialize new shadow frame by copying the registers from the callee shadow frame.
   if (do_assignability_check) {
@@ -669,7 +678,7 @@
         AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg);
       }
     } else {
-      DCHECK_LE(number_of_inputs, Instruction::kMaxVarArgRegs);
+      DCHECK_LE(number_of_inputs, arraysize(arg));
 
       for (; arg_index < number_of_inputs; ++arg_index) {
         AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, arg[arg_index]);
@@ -736,12 +745,13 @@
                   const Instruction* inst, uint16_t inst_data, JValue* result) {
   const uint4_t num_additional_registers = inst->VRegB_25x();
   // Argument word count.
-  const uint16_t number_of_inputs = num_additional_registers + 1;
-  // The first input register is always present and is not encoded in the count.
+  const uint16_t number_of_inputs = num_additional_registers + kLambdaVirtualRegisterWidth;
+  // The lambda closure register is always present and is not encoded in the count.
+  // Furthermore, the lambda closure register is always wide, so it counts as 2 inputs.
 
   // TODO: find a cleaner way to separate non-range and range information without duplicating
   //       code.
-  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in invoke-XXX.
+  uint32_t arg[Instruction::kMaxVarArgRegs25x];  // only used in invoke-XXX.
   uint32_t vregC = 0;   // only used in invoke-XXX-range.
   if (is_range) {
     vregC = inst->VRegC_3rc();
@@ -767,7 +777,7 @@
 
   // TODO: find a cleaner way to separate non-range and range information without duplicating
   //       code.
-  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in invoke-XXX.
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};  // only used in invoke-XXX.
   uint32_t vregC = 0;
   if (is_range) {
     vregC = inst->VRegC_3rc();
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 7398778..f57bddb 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -34,7 +34,12 @@
 #include "dex_instruction-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "handle_scope-inl.h"
+#include "lambda/art_lambda_method.h"
 #include "lambda/box_table.h"
+#include "lambda/closure.h"
+#include "lambda/closure_builder-inl.h"
+#include "lambda/leaking_allocator.h"
+#include "lambda/shorty_field_type.h"
 #include "mirror/class-inl.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
@@ -133,32 +138,44 @@
   return success;
 }
 
-// Write out the 'ArtMethod*' into vreg and vreg+1
+// Write out the 'Closure*' into vreg and vreg+1, as if it was a jlong.
 static inline void WriteLambdaClosureIntoVRegs(ShadowFrame& shadow_frame,
-                                               const ArtMethod& called_method,
+                                               const lambda::Closure* lambda_closure,
                                                uint32_t vreg) {
   // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers.
-  uint32_t called_method_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(&called_method));
-  uint32_t called_method_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(&called_method)
+  uint32_t closure_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(lambda_closure));
+  uint32_t closure_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(lambda_closure)
                                                     >> BitSizeOf<uint32_t>());
   // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit.
   static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
 
-  DCHECK_NE(called_method_lo | called_method_hi, 0u);
+  DCHECK_NE(closure_lo | closure_hi, 0u);
 
-  shadow_frame.SetVReg(vreg, called_method_lo);
-  shadow_frame.SetVReg(vreg + 1, called_method_hi);
+  shadow_frame.SetVReg(vreg, closure_lo);
+  shadow_frame.SetVReg(vreg + 1, closure_hi);
 }
 
 // Handles create-lambda instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 // (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
 //
+// The closure must be allocated big enough to hold the data, and should not be
+// pre-initialized. It is initialized with the actual captured variables as a side-effect,
+// although this should be unimportant to the caller since this function also handles storing it to
+// the ShadowFrame.
+//
 // As a work-in-progress implementation, this shoves the ArtMethod object corresponding
 // to the target dex method index into the target register vA and vA + 1.
 template<bool do_access_check>
-static inline bool DoCreateLambda(Thread* self, ShadowFrame& shadow_frame,
-                                  const Instruction* inst) {
+static inline bool DoCreateLambda(Thread* self,
+                                  const Instruction* inst,
+                                  /*inout*/ShadowFrame& shadow_frame,
+                                  /*inout*/lambda::ClosureBuilder* closure_builder,
+                                  /*inout*/lambda::Closure* uninitialized_closure) {
+  DCHECK(closure_builder != nullptr);
+  DCHECK(uninitialized_closure != nullptr);
+  DCHECK_ALIGNED(uninitialized_closure, alignof(lambda::Closure));
+
   /*
    * create-lambda is opcode 0x21c
    * - vA is the target register where the closure will be stored into
@@ -171,16 +188,69 @@
   ArtMethod* const called_method = FindMethodFromCode<kStatic, do_access_check>(
       method_idx, &receiver, sf_method, self);
 
-  uint32_t vregA = inst->VRegA_21c();
+  uint32_t vreg_dest_closure = inst->VRegA_21c();
 
   if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
     CHECK(self->IsExceptionPending());
-    shadow_frame.SetVReg(vregA, 0u);
-    shadow_frame.SetVReg(vregA + 1, 0u);
+    shadow_frame.SetVReg(vreg_dest_closure, 0u);
+    shadow_frame.SetVReg(vreg_dest_closure + 1, 0u);
     return false;
   }
 
-  WriteLambdaClosureIntoVRegs(shadow_frame, *called_method, vregA);
+  lambda::ArtLambdaMethod* initialized_lambda_method;
+  // Initialize the ArtLambdaMethod with the right data.
+  {
+    lambda::ArtLambdaMethod* uninitialized_lambda_method =
+        reinterpret_cast<lambda::ArtLambdaMethod*>(
+            lambda::LeakingAllocator::AllocateMemory(self, sizeof(lambda::ArtLambdaMethod)));
+
+    std::string captured_variables_shorty = closure_builder->GetCapturedVariableShortyTypes();
+    std::string captured_variables_long_type_desc;
+
+    // Synthesize a long type descriptor from the short one.
+    for (char shorty : captured_variables_shorty) {
+      lambda::ShortyFieldType shorty_field_type(shorty);
+      if (shorty_field_type.IsObject()) {
+        // Not the true type, but good enough until we implement verifier support.
+        captured_variables_long_type_desc += "Ljava/lang/Object;";
+        UNIMPLEMENTED(FATAL) << "create-lambda with an object captured variable";
+      } else if (shorty_field_type.IsLambda()) {
+        // Not the true type, but good enough until we implement verifier support.
+        captured_variables_long_type_desc += "Ljava/lang/Runnable;";
+        UNIMPLEMENTED(FATAL) << "create-lambda with a lambda captured variable";
+      } else {
+        // The primitive types have the same length shorty or not, so this is always correct.
+        DCHECK(shorty_field_type.IsPrimitive());
+        captured_variables_long_type_desc += shorty_field_type;
+      }
+    }
+
+    // Copy strings to dynamically allocated storage. This leaks, but that's ok. Fix it later.
+    // TODO: Strings need to come from the DexFile, so they won't need their own allocations.
+    char* captured_variables_type_desc = lambda::LeakingAllocator::MakeFlexibleInstance<char>(
+        self,
+        captured_variables_long_type_desc.size() + 1);
+    strcpy(captured_variables_type_desc, captured_variables_long_type_desc.c_str());
+    char* captured_variables_shorty_copy = lambda::LeakingAllocator::MakeFlexibleInstance<char>(
+        self,
+        captured_variables_shorty.size() + 1);
+    strcpy(captured_variables_shorty_copy, captured_variables_shorty.c_str());
+
+    new (uninitialized_lambda_method) lambda::ArtLambdaMethod(called_method,
+                                                              captured_variables_type_desc,
+                                                              captured_variables_shorty_copy,
+                                                              true);  // innate lambda
+    initialized_lambda_method = uninitialized_lambda_method;
+  }
+
+  // Write all the closure captured variables and the closure header into the closure.
+  lambda::Closure* initialized_closure;
+  {
+    initialized_closure =
+        closure_builder->CreateInPlace(uninitialized_closure, initialized_lambda_method);
+  }
+
+  WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, initialized_closure, vreg_dest_closure);
   return true;
 }
 
@@ -189,13 +259,11 @@
 // Validates that the art method points to a valid lambda function, otherwise throws
 // an exception and returns null.
 // (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-static inline ArtMethod* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame,
-                                                           uint32_t vreg)
+static inline lambda::Closure* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame,
+                                                                 uint32_t vreg)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  // TODO(iam): Introduce a closure abstraction that will contain the captured variables
-  // instead of just an ArtMethod.
-  // This is temporarily using 2 vregs because a native ArtMethod can be up to 64-bit,
-  // but once proper variable capture is implemented it will only use 1 vreg.
+  // Lambda closures take up a consecutive pair of 2 virtual registers.
+  // On 32-bit the high bits are always 0.
   uint32_t vc_value_lo = shadow_frame.GetVReg(vreg);
   uint32_t vc_value_hi = shadow_frame.GetVReg(vreg + 1);
 
@@ -204,17 +272,285 @@
 
   // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit.
   static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
-  ArtMethod* const called_method = reinterpret_cast<ArtMethod* const>(vc_value_ptr);
+  lambda::Closure* const lambda_closure = reinterpret_cast<lambda::Closure*>(vc_value_ptr);
+  DCHECK_ALIGNED(lambda_closure, alignof(lambda::Closure));
 
   // Guard against the user passing a null closure, which is odd but (sadly) semantically valid.
-  if (UNLIKELY(called_method == nullptr)) {
+  if (UNLIKELY(lambda_closure == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
     return nullptr;
-  } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
+  } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(lambda_closure->GetTargetMethod()))) {
+    // Sanity check against data corruption.
     return nullptr;
   }
 
-  return called_method;
+  return lambda_closure;
+}
+
+// Forward declaration for lock annotations. See below for documentation.
+template <bool do_access_check>
+static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame,
+                                                               uint32_t string_idx)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+// Find the c-string data corresponding to a dex file's string index.
+// Otherwise, returns null if not found and throws a VerifyError.
+//
+// Note that with do_access_check=false, we never return null because the verifier
+// must guard against invalid string indices.
+// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
+template <bool do_access_check>
+static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame,
+                                                               uint32_t string_idx) {
+  ArtMethod* method = shadow_frame.GetMethod();
+  const DexFile* dex_file = method->GetDexFile();
+
+  mirror::Class* declaring_class = method->GetDeclaringClass();
+  if (!do_access_check) {
+    // MethodVerifier refuses methods with string_idx out of bounds.
+    DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings());
+  } else {
+    // Access checks enabled: perform string index bounds ourselves.
+    if (string_idx >= dex_file->GetHeader().string_ids_size_) {
+      ThrowVerifyError(declaring_class, "String index '%" PRIu32 "' out of bounds",
+                       string_idx);
+      return nullptr;
+    }
+  }
+
+  const char* type_string = dex_file->StringDataByIdx(string_idx);
+
+  if (UNLIKELY(type_string == nullptr)) {
+    CHECK_EQ(false, do_access_check)
+        << " verifier should've caught invalid string index " << string_idx;
+    CHECK_EQ(true, do_access_check)
+        << " string idx size check should've caught invalid string index " << string_idx;
+  }
+
+  return type_string;
+}
+
+// Handles capture-variable instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
+template<bool do_access_check>
+static inline bool DoCaptureVariable(Thread* self,
+                                     const Instruction* inst,
+                                     /*inout*/ShadowFrame& shadow_frame,
+                                     /*inout*/lambda::ClosureBuilder* closure_builder) {
+  DCHECK(closure_builder != nullptr);
+  using lambda::ShortyFieldType;
+  /*
+   * capture-variable is opcode 0xf6, fmt 0x21c
+   * - vA is the source register of the variable that will be captured
+   * - vB is the string ID of the variable's type that will be captured
+   */
+  const uint32_t source_vreg = inst->VRegA_21c();
+  const uint32_t string_idx = inst->VRegB_21c();
+  // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type.
+
+  const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame,
+                                                                                  string_idx);
+  if (UNLIKELY(type_string == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+
+  char type_first_letter = type_string[0];
+  ShortyFieldType shorty_type;
+  if (do_access_check &&
+      UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) {  // NOLINT: [whitespace/comma] [3]
+    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
+                     "capture-variable vB must be a valid type");
+    return false;
+  } else {
+    // Already verified that the type is valid.
+    shorty_type = ShortyFieldType(type_first_letter);
+  }
+
+  const size_t captured_variable_count = closure_builder->GetCaptureCount();
+
+  // Note: types are specified explicitly so that the closure is packed tightly.
+  switch (shorty_type) {
+    case ShortyFieldType::kBoolean: {
+      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
+      closure_builder->CaptureVariablePrimitive<bool>(primitive_narrow_value);
+      break;
+    }
+    case ShortyFieldType::kByte: {
+      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
+      closure_builder->CaptureVariablePrimitive<int8_t>(primitive_narrow_value);
+      break;
+    }
+    case ShortyFieldType::kChar: {
+      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
+      closure_builder->CaptureVariablePrimitive<uint16_t>(primitive_narrow_value);
+      break;
+    }
+    case ShortyFieldType::kShort: {
+      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
+      closure_builder->CaptureVariablePrimitive<int16_t>(primitive_narrow_value);
+      break;
+    }
+    case ShortyFieldType::kInt: {
+      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
+      closure_builder->CaptureVariablePrimitive<int32_t>(primitive_narrow_value);
+      break;
+    }
+    case ShortyFieldType::kDouble: {
+      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegDouble(source_vreg));
+      break;
+    }
+    case ShortyFieldType::kFloat: {
+      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegFloat(source_vreg));
+      break;
+    }
+    case ShortyFieldType::kLambda: {
+      UNIMPLEMENTED(FATAL) << " capture-variable with type kLambda";
+      // TODO: Capturing lambdas recursively will be done at a later time.
+      UNREACHABLE();
+    }
+    case ShortyFieldType::kLong: {
+      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegLong(source_vreg));
+      break;
+    }
+    case ShortyFieldType::kObject: {
+      closure_builder->CaptureVariableObject(shadow_frame.GetVRegReference(source_vreg));
+      UNIMPLEMENTED(FATAL) << " capture-variable with type kObject";
+      // TODO: finish implementing this. disabled for now since we can't track lambda refs for GC.
+      UNREACHABLE();
+    }
+
+    default:
+      LOG(FATAL) << "Invalid shorty type value " << shorty_type;
+      UNREACHABLE();
+  }
+
+  DCHECK_EQ(captured_variable_count + 1, closure_builder->GetCaptureCount());
+
+  return true;
+}
+
+// Handles capture-variable instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
+template<bool do_access_check>
+static inline bool DoLiberateVariable(Thread* self,
+                                     const Instruction* inst,
+                                     size_t captured_variable_index,
+                                     /*inout*/ShadowFrame& shadow_frame) {
+  using lambda::ShortyFieldType;
+  /*
+   * liberate-variable is opcode 0xf7, fmt 0x22c
+   * - vA is the destination register
+   * - vB is the register with the lambda closure in it
+   * - vC is the string ID which needs to be a valid field type descriptor
+   */
+
+  const uint32_t dest_vreg = inst->VRegA_22c();
+  const uint32_t closure_vreg = inst->VRegB_22c();
+  const uint32_t string_idx = inst->VRegC_22c();
+  // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type.
+
+
+  // Synthesize a long type descriptor from a shorty type descriptor list.
+  // TODO: Fix the dex encoding to contain the long and short type descriptors.
+  const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame,
+                                                                                  string_idx);
+  if (UNLIKELY(do_access_check && type_string == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    shadow_frame.SetVReg(dest_vreg, 0);
+    return false;
+  }
+
+  char type_first_letter = type_string[0];
+  ShortyFieldType shorty_type;
+  if (do_access_check &&
+      UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) {  // NOLINT: [whitespace/comma] [3]
+    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
+                     "liberate-variable vC must be a valid type");
+    shadow_frame.SetVReg(dest_vreg, 0);
+    return false;
+  } else {
+    // Already verified that the type is valid.
+    shorty_type = ShortyFieldType(type_first_letter);
+  }
+
+  // Check for closure being null *after* the type check.
+  // This way we can access the type info in case we fail later, to know how many vregs to clear.
+  const lambda::Closure* lambda_closure =
+      ReadLambdaClosureFromVRegsOrThrow(/*inout*/shadow_frame, closure_vreg);
+
+  // Failed lambda target runtime check, an exception was raised.
+  if (UNLIKELY(lambda_closure == nullptr)) {
+    CHECK(self->IsExceptionPending());
+
+    // Clear the destination vreg(s) to be safe.
+    shadow_frame.SetVReg(dest_vreg, 0);
+    if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
+      shadow_frame.SetVReg(dest_vreg + 1, 0);
+    }
+    return false;
+  }
+
+  if (do_access_check &&
+      UNLIKELY(captured_variable_index >= lambda_closure->GetNumberOfCapturedVariables())) {
+    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
+                     "liberate-variable captured variable index %zu out of bounds",
+                     lambda_closure->GetNumberOfCapturedVariables());
+    // Clear the destination vreg(s) to be safe.
+    shadow_frame.SetVReg(dest_vreg, 0);
+    if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
+      shadow_frame.SetVReg(dest_vreg + 1, 0);
+    }
+    return false;
+  }
+
+  // Verify that the runtime type of the captured-variable matches the requested dex type.
+  if (do_access_check) {
+    ShortyFieldType actual_type = lambda_closure->GetCapturedShortyType(captured_variable_index);
+    if (actual_type != shorty_type) {
+      ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
+                     "cannot liberate-variable of runtime type '%c' to dex type '%c'",
+                     static_cast<char>(actual_type),
+                     static_cast<char>(shorty_type));
+
+      shadow_frame.SetVReg(dest_vreg, 0);
+      if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
+        shadow_frame.SetVReg(dest_vreg + 1, 0);
+      }
+      return false;
+    }
+
+    if (actual_type.IsLambda() || actual_type.IsObject()) {
+      UNIMPLEMENTED(FATAL) << "liberate-variable type checks needs to "
+                           << "parse full type descriptor for objects and lambdas";
+    }
+  }
+
+  // Unpack the captured variable from the closure into the correct type, then save it to the vreg.
+  if (shorty_type.IsPrimitiveNarrow()) {
+    uint32_t primitive_narrow_value =
+        lambda_closure->GetCapturedPrimitiveNarrow(captured_variable_index);
+    shadow_frame.SetVReg(dest_vreg, primitive_narrow_value);
+  } else if (shorty_type.IsPrimitiveWide()) {
+      uint64_t primitive_wide_value =
+          lambda_closure->GetCapturedPrimitiveWide(captured_variable_index);
+      shadow_frame.SetVRegLong(dest_vreg, static_cast<int64_t>(primitive_wide_value));
+  } else if (shorty_type.IsObject()) {
+    mirror::Object* unpacked_object =
+        lambda_closure->GetCapturedObject(captured_variable_index);
+    shadow_frame.SetVRegReference(dest_vreg, unpacked_object);
+
+    UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack objects yet";
+  } else if (shorty_type.IsLambda()) {
+    UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack lambdas yet";
+  } else {
+    LOG(FATAL) << "unreachable";
+    UNREACHABLE();
+  }
+
+  return true;
 }
 
 template<bool do_access_check>
@@ -229,22 +565,24 @@
    *
    * - reading var-args for 0x25 gets us vD,vE,vF,vG (but not vB)
    */
-  uint32_t vC = inst->VRegC_25x();
-  ArtMethod* const called_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vC);
+  uint32_t vreg_closure = inst->VRegC_25x();
+  const lambda::Closure* lambda_closure =
+      ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vreg_closure);
 
   // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(called_method == nullptr)) {
+  if (UNLIKELY(lambda_closure == nullptr)) {
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
     return false;
   }
 
+  ArtMethod* const called_method = lambda_closure->GetTargetMethod();
   // Invoke a non-range lambda
   return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
                                               result);
 }
 
-// Handles invoke-XXX/range instructions.
+// Handles invoke-XXX/range instructions (other than invoke-lambda[-range]).
 // Returns true on success, otherwise throws an exception and returns false.
 template<InvokeType type, bool is_range, bool do_access_check>
 static inline bool DoInvoke(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
@@ -521,17 +859,17 @@
   uint32_t vreg_target_object = inst->VRegA_22x(inst_data);
   uint32_t vreg_source_closure = inst->VRegB_22x();
 
-  ArtMethod* closure_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame,
-                                                                vreg_source_closure);
+  lambda::Closure* lambda_closure = ReadLambdaClosureFromVRegsOrThrow(shadow_frame,
+                                                                      vreg_source_closure);
 
   // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(closure_method == nullptr)) {
+  if (UNLIKELY(lambda_closure == nullptr)) {
     CHECK(self->IsExceptionPending());
     return false;
   }
 
   mirror::Object* closure_as_object =
-      Runtime::Current()->GetLambdaBoxTable()->BoxLambda(closure_method);
+      Runtime::Current()->GetLambdaBoxTable()->BoxLambda(lambda_closure);
 
   // Failed to box the lambda, an exception was raised.
   if (UNLIKELY(closure_as_object == nullptr)) {
@@ -564,16 +902,16 @@
     return false;
   }
 
-  ArtMethod* unboxed_closure = nullptr;
+  lambda::Closure* unboxed_closure = nullptr;
   // Raise an exception if unboxing fails.
   if (!Runtime::Current()->GetLambdaBoxTable()->UnboxLambda(boxed_closure_object,
-                                                            &unboxed_closure)) {
+                                                            /*out*/&unboxed_closure)) {
     CHECK(self->IsExceptionPending());
     return false;
   }
 
   DCHECK(unboxed_closure != nullptr);
-  WriteLambdaClosureIntoVRegs(shadow_frame, *unboxed_closure, vreg_target_closure);
+  WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, unboxed_closure, vreg_target_closure);
   return true;
 }
 
@@ -650,10 +988,13 @@
 #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK
 
 // Explicitly instantiate all DoCreateLambda functions.
-#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check)                                    \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                 \
-bool DoCreateLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame,              \
-                        const Instruction* inst)
+#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check)                                                 \
+template SHARED_REQUIRES(Locks::mutator_lock_)                                                    \
+bool DoCreateLambda<_do_check>(Thread* self,                                                      \
+                               const Instruction* inst,                                           \
+                               /*inout*/ShadowFrame& shadow_frame,                                \
+                               /*inout*/lambda::ClosureBuilder* closure_builder,                  \
+                               /*inout*/lambda::Closure* uninitialized_closure);
 
 EXPLICIT_DO_CREATE_LAMBDA_DECL(false);  // create-lambda
 EXPLICIT_DO_CREATE_LAMBDA_DECL(true);   // create-lambda
@@ -689,7 +1030,29 @@
 EXPLICIT_DO_UNBOX_LAMBDA_DECL(true);   // unbox-lambda
 #undef EXPLICIT_DO_BOX_LAMBDA_DECL
 
+// Explicitly instantiate all DoCaptureVariable functions.
+#define EXPLICIT_DO_CAPTURE_VARIABLE_DECL(_do_check)                                    \
+template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
+bool DoCaptureVariable<_do_check>(Thread* self,                                         \
+                                  const Instruction* inst,                              \
+                                  ShadowFrame& shadow_frame,                            \
+                                  lambda::ClosureBuilder* closure_builder);
 
+EXPLICIT_DO_CAPTURE_VARIABLE_DECL(false);  // capture-variable
+EXPLICIT_DO_CAPTURE_VARIABLE_DECL(true);   // capture-variable
+#undef EXPLICIT_DO_CREATE_LAMBDA_DECL
+
+// Explicitly instantiate all DoLiberateVariable functions.
+#define EXPLICIT_DO_LIBERATE_VARIABLE_DECL(_do_check)                                   \
+template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
+bool DoLiberateVariable<_do_check>(Thread* self,                                        \
+                                   const Instruction* inst,                             \
+                                   size_t captured_variable_index,                      \
+                                   ShadowFrame& shadow_frame);                          \
+
+EXPLICIT_DO_LIBERATE_VARIABLE_DECL(false);  // liberate-variable
+EXPLICIT_DO_LIBERATE_VARIABLE_DECL(true);   // liberate-variable
+#undef EXPLICIT_DO_LIBERATE_LAMBDA_DECL
 }  // namespace interpreter
 }  // namespace art
 
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 72e2ba0..9677d79 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -17,9 +17,13 @@
 #if !defined(__clang__)
 // Clang 3.4 fails to build the goto interpreter implementation.
 
+
+#include "base/stl_util.h"  // MakeUnique
 #include "interpreter_common.h"
 #include "safe_math.h"
 
+#include <memory>  // std::unique_ptr
+
 namespace art {
 namespace interpreter {
 
@@ -179,6 +183,9 @@
     }
   }
 
+  std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
+  size_t lambda_captured_variable_index = 0;
+
   // Jump to first instruction.
   ADVANCE(0);
   UNREACHABLE_CODE_CHECK();
@@ -2412,7 +2419,20 @@
   HANDLE_INSTRUCTION_END();
 
   HANDLE_EXPERIMENTAL_INSTRUCTION_START(CREATE_LAMBDA) {
-    bool success = DoCreateLambda<true>(self, shadow_frame, inst);
+    if (lambda_closure_builder == nullptr) {
+      // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables.
+      lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
+    }
+
+    // TODO: these allocations should not leak, and the lambda method should not be local.
+    lambda::Closure* lambda_closure =
+        reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize()));
+    bool success = DoCreateLambda<do_access_check>(self,
+                                                   inst,
+                                                   /*inout*/shadow_frame,
+                                                   /*inout*/lambda_closure_builder.get(),
+                                                   /*inout*/lambda_closure);
+    lambda_closure_builder.reset(nullptr);  // reset state of variables captured
     POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
   }
   HANDLE_EXPERIMENTAL_INSTRUCTION_END();
@@ -2429,6 +2449,31 @@
   }
   HANDLE_EXPERIMENTAL_INSTRUCTION_END();
 
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(CAPTURE_VARIABLE) {
+    if (lambda_closure_builder == nullptr) {
+      lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
+    }
+
+    bool success = DoCaptureVariable<do_access_check>(self,
+                                                      inst,
+                                                      /*inout*/shadow_frame,
+                                                      /*inout*/lambda_closure_builder.get());
+
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(LIBERATE_VARIABLE) {
+    bool success = DoLiberateVariable<do_access_check>(self,
+                                                           inst,
+                                                           lambda_captured_variable_index,
+                                                           /*inout*/shadow_frame);
+    // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...'
+    lambda_captured_variable_index++;
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_3E)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2465,14 +2510,6 @@
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(UNUSED_F5)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
-  HANDLE_INSTRUCTION_START(UNUSED_F7)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_FA)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index b5cc11e..083dfb5 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -14,9 +14,12 @@
  * limitations under the License.
  */
 
+#include "base/stl_util.h"  // MakeUnique
 #include "interpreter_common.h"
 #include "safe_math.h"
 
+#include <memory>  // std::unique_ptr
+
 namespace art {
 namespace interpreter {
 
@@ -82,6 +85,11 @@
   const uint16_t* const insns = code_item->insns_;
   const Instruction* inst = Instruction::At(insns + dex_pc);
   uint16_t inst_data;
+
+  // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need
+  // to keep this live for the scope of the entire function call.
+  std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
+  size_t lambda_captured_variable_index = 0;
   while (true) {
     dex_pc = inst->GetDexPc(insns);
     shadow_frame.SetDexPC(dex_pc);
@@ -2235,19 +2243,63 @@
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
         break;
       }
+      case Instruction::CAPTURE_VARIABLE: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        if (lambda_closure_builder == nullptr) {
+          lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
+        }
+
+        PREAMBLE();
+        bool success = DoCaptureVariable<do_access_check>(self,
+                                                          inst,
+                                                          /*inout*/shadow_frame,
+                                                          /*inout*/lambda_closure_builder.get());
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
       case Instruction::CREATE_LAMBDA: {
         if (!IsExperimentalInstructionEnabled(inst)) {
           UnexpectedOpcode(inst, shadow_frame);
         }
 
         PREAMBLE();
-        bool success = DoCreateLambda<do_access_check>(self, shadow_frame, inst);
+
+        if (lambda_closure_builder == nullptr) {
+          // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables.
+          lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
+        }
+
+        // TODO: these allocations should not leak, and the lambda method should not be local.
+        lambda::Closure* lambda_closure =
+            reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize()));
+        bool success = DoCreateLambda<do_access_check>(self,
+                                                       inst,
+                                                       /*inout*/shadow_frame,
+                                                       /*inout*/lambda_closure_builder.get(),
+                                                       /*inout*/lambda_closure);
+        lambda_closure_builder.reset(nullptr);  // reset state of variables captured
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
         break;
       }
-      case Instruction::UNUSED_F4:
-      case Instruction::UNUSED_F5:
-      case Instruction::UNUSED_F7: {
+      case Instruction::LIBERATE_VARIABLE: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoLiberateVariable<do_access_check>(self,
+                                                           inst,
+                                                           lambda_captured_variable_index,
+                                                           /*inout*/shadow_frame);
+        // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...'
+        lambda_captured_variable_index++;
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::UNUSED_F4: {
         if (!IsExperimentalInstructionEnabled(inst)) {
           UnexpectedOpcode(inst, shadow_frame);
         }
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 531e039..b5e28e9 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -60,7 +60,7 @@
       : path_(path),
         handle_(handle),
         needs_native_bridge_(false),
-        class_loader_(env->NewGlobalRef(class_loader)),
+        class_loader_(env->NewWeakGlobalRef(class_loader)),
         jni_on_load_lock_("JNI_OnLoad lock"),
         jni_on_load_cond_("JNI_OnLoad condition variable", jni_on_load_lock_),
         jni_on_load_thread_id_(self->GetThreadId()),
@@ -70,11 +70,11 @@
   ~SharedLibrary() {
     Thread* self = Thread::Current();
     if (self != nullptr) {
-      self->GetJniEnv()->DeleteGlobalRef(class_loader_);
+      self->GetJniEnv()->DeleteWeakGlobalRef(class_loader_);
     }
   }
 
-  jobject GetClassLoader() const {
+  jweak GetClassLoader() const {
     return class_loader_;
   }
 
@@ -131,7 +131,13 @@
     return needs_native_bridge_;
   }
 
-  void* FindSymbol(const std::string& symbol_name) {
+  void* FindSymbol(const std::string& symbol_name, const char* shorty = nullptr) {
+    return NeedsNativeBridge()
+        ? FindSymbolWithNativeBridge(symbol_name.c_str(), shorty)
+        : FindSymbolWithoutNativeBridge(symbol_name.c_str());
+  }
+
+  void* FindSymbolWithoutNativeBridge(const std::string& symbol_name) {
     CHECK(!NeedsNativeBridge());
 
     return dlsym(handle_, symbol_name.c_str());
@@ -160,9 +166,9 @@
   // True if a native bridge is required.
   bool needs_native_bridge_;
 
-  // The ClassLoader this library is associated with, a global JNI reference that is
+  // The ClassLoader this library is associated with, a weak global JNI reference that is
   // created/deleted with the scope of the library.
-  const jobject class_loader_;
+  const jweak class_loader_;
 
   // Guards remaining items.
   Mutex jni_on_load_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -184,7 +190,10 @@
     STLDeleteValues(&libraries_);
   }
 
-  void Dump(std::ostream& os) const {
+  // NO_THREAD_SAFETY_ANALYSIS since this may be called from Dumpable. Dumpable can't be annotated
+  // properly due to the template. The caller should be holding the jni_libraries_lock_.
+  void Dump(std::ostream& os) const NO_THREAD_SAFETY_ANALYSIS {
+    Locks::jni_libraries_lock_->AssertHeld(Thread::Current());
     bool first = true;
     for (const auto& library : libraries_) {
       if (!first) {
@@ -195,16 +204,17 @@
     }
   }
 
-  size_t size() const {
+  size_t size() const REQUIRES(Locks::jni_libraries_lock_) {
     return libraries_.size();
   }
 
-  SharedLibrary* Get(const std::string& path) {
+  SharedLibrary* Get(const std::string& path) REQUIRES(Locks::jni_libraries_lock_) {
     auto it = libraries_.find(path);
     return (it == libraries_.end()) ? nullptr : it->second;
   }
 
-  void Put(const std::string& path, SharedLibrary* library) {
+  void Put(const std::string& path, SharedLibrary* library)
+      REQUIRES(Locks::jni_libraries_lock_) {
     libraries_.Put(path, library);
   }
 
@@ -217,24 +227,18 @@
     const mirror::ClassLoader* declaring_class_loader = m->GetDeclaringClass()->GetClassLoader();
     ScopedObjectAccessUnchecked soa(Thread::Current());
     for (const auto& lib : libraries_) {
-      SharedLibrary* library = lib.second;
+      SharedLibrary* const library = lib.second;
       if (soa.Decode<mirror::ClassLoader*>(library->GetClassLoader()) != declaring_class_loader) {
         // We only search libraries loaded by the appropriate ClassLoader.
         continue;
       }
       // Try the short name then the long name...
-      void* fn;
-      if (library->NeedsNativeBridge()) {
-        const char* shorty = m->GetShorty();
-        fn = library->FindSymbolWithNativeBridge(jni_short_name, shorty);
-        if (fn == nullptr) {
-          fn = library->FindSymbolWithNativeBridge(jni_long_name, shorty);
-        }
-      } else {
-        fn = library->FindSymbol(jni_short_name);
-        if (fn == nullptr) {
-          fn = library->FindSymbol(jni_long_name);
-        }
+      const char* shorty = library->NeedsNativeBridge()
+          ? m->GetShorty()
+          : nullptr;
+      void* fn = library->FindSymbol(jni_short_name, shorty);
+      if (fn == nullptr) {
+        fn = library->FindSymbol(jni_long_name, shorty);
       }
       if (fn != nullptr) {
         VLOG(jni) << "[Found native code for " << PrettyMethod(m)
@@ -249,10 +253,49 @@
     return nullptr;
   }
 
- private:
-  AllocationTrackingSafeMap<std::string, SharedLibrary*, kAllocatorTagJNILibraries> libraries_;
-};
+  // Unload native libraries with cleared class loaders.
+  void UnloadNativeLibraries()
+      REQUIRES(!Locks::jni_libraries_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    typedef void (*JNI_OnUnloadFn)(JavaVM*, void*);
+    std::vector<JNI_OnUnloadFn> unload_functions;
+    {
+      MutexLock mu(soa.Self(), *Locks::jni_libraries_lock_);
+      for (auto it = libraries_.begin(); it != libraries_.end(); ) {
+        SharedLibrary* const library = it->second;
+        // If class loader is null then it was unloaded, call JNI_OnUnload.
+        const jweak class_loader = library->GetClassLoader();
+        // If class_loader is a null jobject then it is the boot class loader. We should not unload
+        // the native libraries of the boot class loader.
+        if (class_loader != nullptr &&
+            soa.Decode<mirror::ClassLoader*>(class_loader) == nullptr) {
+          void* const sym = library->FindSymbol("JNI_OnUnload", nullptr);
+          if (sym == nullptr) {
+            VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]";
+          } else {
+            VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]";
+            JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym);
+            unload_functions.push_back(jni_on_unload);
+          }
+          delete library;
+          it = libraries_.erase(it);
+        } else {
+          ++it;
+        }
+      }
+    }
+    // Do this without holding the jni libraries lock to prevent possible deadlocks.
+    for (JNI_OnUnloadFn fn : unload_functions) {
+      VLOG(jni) << "Calling JNI_OnUnload";
+      (*fn)(soa.Vm(), nullptr);
+    }
+  }
 
+ private:
+  AllocationTrackingSafeMap<std::string, SharedLibrary*, kAllocatorTagJNILibraries> libraries_
+      GUARDED_BY(Locks::jni_libraries_lock_);
+};
 
 class JII {
  public:
@@ -641,6 +684,10 @@
   }
 }
 
+void JavaVMExt::UnloadNativeLibraries() {
+  libraries_.get()->UnloadNativeLibraries();
+}
+
 bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject class_loader,
                                   std::string* error_msg) {
   error_msg->clear();
@@ -738,10 +785,8 @@
   void* sym;
   if (needs_native_bridge) {
     library->SetNeedsNativeBridge();
-    sym = library->FindSymbolWithNativeBridge("JNI_OnLoad", nullptr);
-  } else {
-    sym = dlsym(handle, "JNI_OnLoad");
   }
+  sym = library->FindSymbol("JNI_OnLoad", nullptr);
   if (sym == nullptr) {
     VLOG(jni) << "[No JNI_OnLoad found in \"" << path << "\"]";
     was_successful = true;
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index b539bbd..c1fbdc0 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -88,6 +88,11 @@
   bool LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject javaLoader,
                          std::string* error_msg);
 
+  // Unload native libraries with cleared class loaders.
+  void UnloadNativeLibraries()
+      REQUIRES(!Locks::jni_libraries_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   /**
    * Returns a pointer to the code for the native method 'm', found
    * using dlsym(3) on every native library that's been loaded so far.
@@ -184,7 +189,9 @@
   // Not guarded by globals_lock since we sometimes use SynchronizedGet in Thread::DecodeJObject.
   IndirectReferenceTable globals_;
 
-  std::unique_ptr<Libraries> libraries_ GUARDED_BY(Locks::jni_libraries_lock_);
+  // No lock annotation since UnloadNativeLibraries is called on libraries_ but locks the
+  // jni_libraries_lock_ internally.
+  std::unique_ptr<Libraries> libraries_;
 
   // Used by -Xcheck:jni.
   const JNIInvokeInterface* const unchecked_functions_;
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 643bc23..e73ba82 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -67,6 +67,9 @@
   void DumpInfo(std::ostream& os);
   // Add a timing logger to cumulative_timings_.
   void AddTimingLogger(const TimingLogger& logger);
+  JitInstrumentationCache* GetInstrumentationCache() const {
+    return instrumentation_cache_.get();
+  }
 
  private:
   Jit();
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index d437dd5..4f4a97f 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -24,11 +24,21 @@
 namespace art {
 namespace jit {
 
-class JitCompileTask : public Task {
+class JitCompileTask FINAL : public Task {
  public:
-  explicit JitCompileTask(ArtMethod* method) : method_(method) {}
+  explicit JitCompileTask(ArtMethod* method) : method_(method) {
+    ScopedObjectAccess soa(Thread::Current());
+    // Add a global ref to the class to prevent class unloading until compilation is done.
+    klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass());
+    CHECK(klass_ != nullptr);
+  }
 
-  virtual void Run(Thread* self) OVERRIDE {
+  ~JitCompileTask() {
+    ScopedObjectAccess soa(Thread::Current());
+    soa.Vm()->DeleteGlobalRef(soa.Self(), klass_);
+  }
+
+  void Run(Thread* self) OVERRIDE {
     ScopedObjectAccess soa(self);
     VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
     if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
@@ -36,12 +46,13 @@
     }
   }
 
-  virtual void Finalize() OVERRIDE {
+  void Finalize() OVERRIDE {
     delete this;
   }
 
  private:
   ArtMethod* const method_;
+  jobject klass_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
 };
@@ -104,5 +115,31 @@
   }
 }
 
+class WaitForCompilationToFinishTask FINAL : public Task {
+ public:
+  WaitForCompilationToFinishTask() : barrier_(0) {}
+
+  void Wait(Thread* self) {
+    barrier_.Increment(self, 1);
+  }
+
+  void Run(Thread* self ATTRIBUTE_UNUSED) OVERRIDE {}
+
+  void Finalize() OVERRIDE {
+    // Do this in Finalize since Finalize is called after Run by the thread pool.
+    barrier_.Pass(Thread::Current());
+  }
+
+ private:
+  Barrier barrier_;
+  DISALLOW_COPY_AND_ASSIGN(WaitForCompilationToFinishTask);
+};
+
+void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) {
+  std::unique_ptr<WaitForCompilationToFinishTask> task(new WaitForCompilationToFinishTask);
+  thread_pool_->AddTask(self, task.get());
+  task->Wait(self);
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 6fdef65..9eb464b 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -50,6 +50,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateThreadPool();
   void DeleteThreadPool();
+  // Wait until there is no more pending compilation tasks.
+  void WaitForCompilationToFinish(Thread* self);
 
  private:
   size_t hot_method_threshold_;
diff --git a/runtime/lambda/art_lambda_method.h b/runtime/lambda/art_lambda_method.h
index 892d8c6..ea13eb7 100644
--- a/runtime/lambda/art_lambda_method.h
+++ b/runtime/lambda/art_lambda_method.h
@@ -35,7 +35,7 @@
   // (Ownership of strings is retained by the caller and the lifetime should exceed this class).
   ArtLambdaMethod(ArtMethod* target_method,
                   const char* captured_variables_type_descriptor,
-                  const char* captured_variables_shorty_,
+                  const char* captured_variables_shorty,
                   bool innate_lambda = true);
 
   // Get the target method for this lambda that would be used by the invoke-lambda dex instruction.
diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc
index 26575fd..8eef10b 100644
--- a/runtime/lambda/box_table.cc
+++ b/runtime/lambda/box_table.cc
@@ -18,6 +18,8 @@
 #include "base/mutex.h"
 #include "common_throws.h"
 #include "gc_root-inl.h"
+#include "lambda/closure.h"
+#include "lambda/leaking_allocator.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "thread.h"
@@ -26,11 +28,53 @@
 
 namespace art {
 namespace lambda {
+// Temporarily represent the lambda Closure as its raw bytes in an array.
+// TODO: Generate a proxy class for the closure when boxing the first time.
+using BoxedClosurePointerType = mirror::ByteArray*;
+
+static mirror::Class* GetBoxedClosureClass() SHARED_REQUIRES(Locks::mutator_lock_) {
+  return mirror::ByteArray::GetArrayClass();
+}
+
+namespace {
+  // Convenience functions to allocating/deleting box table copies of the closures.
+  struct ClosureAllocator {
+    // Deletes a Closure that was allocated through ::Allocate.
+    static void Delete(Closure* ptr) {
+      delete[] reinterpret_cast<char*>(ptr);
+    }
+
+    // Returns a well-aligned pointer to a newly allocated Closure on the 'new' heap.
+    static Closure* Allocate(size_t size) {
+      DCHECK_GE(size, sizeof(Closure));
+
+      // TODO: Maybe point to the interior of the boxed closure object after we add proxy support?
+      Closure* closure = reinterpret_cast<Closure*>(new char[size]);
+      DCHECK_ALIGNED(closure, alignof(Closure));
+      return closure;
+    }
+  };
+}  // namespace
 
 BoxTable::BoxTable()
   : allow_new_weaks_(true),
     new_weaks_condition_("lambda box table allowed weaks", *Locks::lambda_table_lock_) {}
 
+BoxTable::~BoxTable() {
+  // Free all the copies of our closures.
+  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ++map_iterator) {
+    std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
+
+    Closure* closure = key_value_pair.first;
+
+    // Remove from the map first, so that it doesn't try to access dangling pointer.
+    map_iterator = map_.Erase(map_iterator);
+
+    // Safe to delete, no dangling pointers.
+    ClosureAllocator::Delete(closure);
+  }
+}
+
 mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) {
   Thread* self = Thread::Current();
 
@@ -58,22 +102,29 @@
 
   // Release the lambda table lock here, so that thread suspension is allowed.
 
-  // Convert the ArtMethod into a java.lang.reflect.Method which will serve
+  // Convert the Closure into a managed byte[] which will serve
   // as the temporary 'boxed' version of the lambda. This is good enough
   // to check all the basic object identities that a boxed lambda must retain.
+  // It's also good enough to contain all the captured primitive variables.
 
   // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class
   // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object
-  mirror::Method* method_as_object =
-      mirror::Method::CreateFromArtMethod(self, closure);
+  BoxedClosurePointerType closure_as_array_object =
+      mirror::ByteArray::Alloc(self, closure->GetSize());
+
   // There are no thread suspension points after this, so we don't need to put it into a handle.
 
-  if (UNLIKELY(method_as_object == nullptr)) {
+  if (UNLIKELY(closure_as_array_object == nullptr)) {
     // Most likely an OOM has occurred.
     CHECK(self->IsExceptionPending());
     return nullptr;
   }
 
+  // Write the raw closure data into the byte[].
+  closure->CopyTo(closure_as_array_object->GetRawData(sizeof(uint8_t),  // component size
+                                                      0 /*index*/),     // index
+                  closure_as_array_object->GetLength());
+
   // The method has been successfully boxed into an object, now insert it into the hash map.
   {
     MutexLock mu(self, *Locks::lambda_table_lock_);
@@ -87,38 +138,56 @@
       return value.Read();
     }
 
-    // Otherwise we should insert it into the hash map in this thread.
-    map_.Insert(std::make_pair(closure, ValueType(method_as_object)));
+    // Otherwise we need to insert it into the hash map in this thread.
+
+    // Make a copy for the box table to keep, in case the closure gets collected from the stack.
+    // TODO: GC may need to sweep for roots in the box table's copy of the closure.
+    Closure* closure_table_copy = ClosureAllocator::Allocate(closure->GetSize());
+    closure->CopyTo(closure_table_copy, closure->GetSize());
+
+    // The closure_table_copy needs to be deleted by us manually when we erase it from the map.
+
+    // Actually insert into the table.
+    map_.Insert({closure_table_copy, ValueType(closure_as_array_object)});
   }
 
-  return method_as_object;
+  return closure_as_array_object;
 }
 
 bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) {
   DCHECK(object != nullptr);
   *out_closure = nullptr;
 
+  Thread* self = Thread::Current();
+
   // Note that we do not need to access lambda_table_lock_ here
   // since we don't need to look at the map.
 
   mirror::Object* boxed_closure_object = object;
 
-  // Raise ClassCastException if object is not instanceof java.lang.reflect.Method
-  if (UNLIKELY(!boxed_closure_object->InstanceOf(mirror::Method::StaticClass()))) {
-    ThrowClassCastException(mirror::Method::StaticClass(), boxed_closure_object->GetClass());
+  // Raise ClassCastException if object is not instanceof byte[]
+  if (UNLIKELY(!boxed_closure_object->InstanceOf(GetBoxedClosureClass()))) {
+    ThrowClassCastException(GetBoxedClosureClass(), boxed_closure_object->GetClass());
     return false;
   }
 
   // TODO(iam): We must check that the closure object extends/implements the type
-  // specified in [type id]. This is not currently implemented since it's always a Method.
+  // specified in [type id]. This is not currently implemented since it's always a byte[].
 
   // If we got this far, the inputs are valid.
-  // Write out the java.lang.reflect.Method's embedded ArtMethod* into the vreg target.
-  mirror::AbstractMethod* boxed_closure_as_method =
-      down_cast<mirror::AbstractMethod*>(boxed_closure_object);
+  // Shuffle the byte[] back into a raw closure, then allocate it, copy, and return it.
+  BoxedClosurePointerType boxed_closure_as_array =
+      down_cast<BoxedClosurePointerType>(boxed_closure_object);
 
-  ArtMethod* unboxed_closure = boxed_closure_as_method->GetArtMethod();
-  DCHECK(unboxed_closure != nullptr);
+  const int8_t* unaligned_interior_closure = boxed_closure_as_array->GetData();
+
+  // Allocate a copy that can "escape" and copy the closure data into that.
+  Closure* unboxed_closure =
+      LeakingAllocator::MakeFlexibleInstance<Closure>(self, boxed_closure_as_array->GetLength());
+  // TODO: don't just memcpy the closure, it's unsafe when we add references to the mix.
+  memcpy(unboxed_closure, unaligned_interior_closure, boxed_closure_as_array->GetLength());
+
+  DCHECK_EQ(unboxed_closure->GetSize(), static_cast<size_t>(boxed_closure_as_array->GetLength()));
 
   *out_closure = unboxed_closure;
   return true;
@@ -127,7 +196,7 @@
 BoxTable::ValueType BoxTable::FindBoxedLambda(const ClosureType& closure) const {
   auto map_iterator = map_.Find(closure);
   if (map_iterator != map_.end()) {
-    const std::pair<ClosureType, ValueType>& key_value_pair = *map_iterator;
+    const std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
     const ValueType& value = key_value_pair.second;
 
     DCHECK(!value.IsNull());  // Never store null boxes.
@@ -157,7 +226,7 @@
    */
   std::vector<ClosureType> remove_list;
   for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) {
-    std::pair<ClosureType, ValueType>& key_value_pair = *map_iterator;
+    std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
 
     const ValueType& old_value = key_value_pair.second;
 
@@ -166,10 +235,15 @@
     mirror::Object* new_value = visitor->IsMarked(old_value_raw);
 
     if (new_value == nullptr) {
-      const ClosureType& closure = key_value_pair.first;
       // The object has been swept away.
+      const ClosureType& closure = key_value_pair.first;
+
       // Delete the entry from the map.
-      map_iterator = map_.Erase(map_.Find(closure));
+      map_iterator = map_.Erase(map_iterator);
+
+      // Clean up the memory by deleting the closure.
+      ClosureAllocator::Delete(closure);
+
     } else {
       // The object has been moved.
       // Update the map.
@@ -208,16 +282,33 @@
   new_weaks_condition_.Broadcast(self);
 }
 
-bool BoxTable::EqualsFn::operator()(const ClosureType& lhs, const ClosureType& rhs) const {
+void BoxTable::EmptyFn::MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const {
+  item.first = nullptr;
+
+  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  item.second = ValueType();  // Also clear the GC root.
+}
+
+bool BoxTable::EmptyFn::IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const {
+  return item.first == nullptr;
+}
+
+bool BoxTable::EqualsFn::operator()(const UnorderedMapKeyType& lhs,
+                                    const UnorderedMapKeyType& rhs) const {
   // Nothing needs this right now, but leave this assertion for later when
   // we need to look at the references inside of the closure.
-  if (kIsDebugBuild) {
-    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-  }
+  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
 
-  // TODO: Need rework to use read barriers once closures have references inside of them that can
-  // move. Until then, it's safe to just compare the data inside of it directly.
-  return lhs == rhs;
+  return lhs->ReferenceEquals(rhs);
+}
+
+size_t BoxTable::HashFn::operator()(const UnorderedMapKeyType& key) const {
+  const lambda::Closure* closure = key;
+  DCHECK_ALIGNED(closure, alignof(lambda::Closure));
+
+  // Need to hold mutator_lock_ before calling into Closure::GetHashCode.
+  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  return closure->GetHashCode();
 }
 
 }  // namespace lambda
diff --git a/runtime/lambda/box_table.h b/runtime/lambda/box_table.h
index 9ffda66..adb7332 100644
--- a/runtime/lambda/box_table.h
+++ b/runtime/lambda/box_table.h
@@ -34,6 +34,7 @@
 }  // namespace mirror
 
 namespace lambda {
+struct Closure;  // forward declaration
 
 /*
  * Store a table of boxed lambdas. This is required to maintain object referential equality
@@ -44,7 +45,7 @@
  */
 class BoxTable FINAL {
  public:
-  using ClosureType = art::ArtMethod*;
+  using ClosureType = art::lambda::Closure*;
 
   // Boxes a closure into an object. Returns null and throws an exception on failure.
   mirror::Object* BoxLambda(const ClosureType& closure)
@@ -72,10 +73,9 @@
       REQUIRES(!Locks::lambda_table_lock_);
 
   BoxTable();
-  ~BoxTable() = default;
+  ~BoxTable();
 
  private:
-  // FIXME: This needs to be a GcRoot.
   // Explanation:
   // - After all threads are suspended (exclusive mutator lock),
   //   the concurrent-copying GC can move objects from the "from" space to the "to" space.
@@ -97,30 +97,30 @@
   void BlockUntilWeaksAllowed()
       SHARED_REQUIRES(Locks::lambda_table_lock_);
 
+  // Wrap the Closure into a unique_ptr so that the HashMap can delete its memory automatically.
+  using UnorderedMapKeyType = ClosureType;
+
   // EmptyFn implementation for art::HashMap
   struct EmptyFn {
-    void MakeEmpty(std::pair<ClosureType, ValueType>& item) const {
-      item.first = nullptr;
-    }
-    bool IsEmpty(const std::pair<ClosureType, ValueType>& item) const {
-      return item.first == nullptr;
-    }
+    void MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const
+        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
+
+    bool IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const;
   };
 
   // HashFn implementation for art::HashMap
   struct HashFn {
-    size_t operator()(const ClosureType& key) const {
-      // TODO(iam): Rewrite hash function when ClosureType is no longer an ArtMethod*
-      return static_cast<size_t>(reinterpret_cast<uintptr_t>(key));
-    }
+    size_t operator()(const UnorderedMapKeyType& key) const
+        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
   };
 
   // EqualsFn implementation for art::HashMap
   struct EqualsFn {
-    bool operator()(const ClosureType& lhs, const ClosureType& rhs) const;
+    bool operator()(const UnorderedMapKeyType& lhs, const UnorderedMapKeyType& rhs) const
+        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
   };
 
-  using UnorderedMap = art::HashMap<ClosureType,
+  using UnorderedMap = art::HashMap<UnorderedMapKeyType,
                                     ValueType,
                                     EmptyFn,
                                     HashFn,
diff --git a/runtime/lambda/closure.cc b/runtime/lambda/closure.cc
index 95a17c6..179e4ee 100644
--- a/runtime/lambda/closure.cc
+++ b/runtime/lambda/closure.cc
@@ -124,6 +124,55 @@
   memcpy(target, this, GetSize());
 }
 
+ArtMethod* Closure::GetTargetMethod() const {
+  return const_cast<ArtMethod*>(lambda_info_->GetArtMethod());
+}
+
+uint32_t Closure::GetHashCode() const {
+  // Start with a non-zero constant, a prime number.
+  uint32_t result = 17;
+
+  // Include the hash with the ArtMethod.
+  {
+    uintptr_t method = reinterpret_cast<uintptr_t>(GetTargetMethod());
+    result = 31 * result + Low32Bits(method);
+    if (sizeof(method) == sizeof(uint64_t)) {
+      result = 31 * result + High32Bits(method);
+    }
+  }
+
+  // Include a hash for each captured variable.
+  for (size_t i = 0; i < GetCapturedVariablesSize(); ++i) {
+    // TODO: not safe for GC-able values since the address can move and the hash code would change.
+    uint8_t captured_variable_raw_value;
+    CopyUnsafeAtOffset<uint8_t>(i, /*out*/&captured_variable_raw_value);  // NOLINT: [whitespace/comma] [3]
+
+    result = 31 * result + captured_variable_raw_value;
+  }
+
+  // TODO: Fix above loop to work for objects and lambdas.
+  static_assert(kClosureSupportsGarbageCollection == false,
+               "Need to update above loop to read the hash code from the "
+                "objects and lambdas recursively");
+
+  return result;
+}
+
+bool Closure::ReferenceEquals(const Closure* other) const {
+  DCHECK(other != nullptr);
+
+  // TODO: Need rework to use read barriers once closures have references inside of them that can
+  // move. Until then, it's safe to just compare the data inside of it directly.
+  static_assert(kClosureSupportsReferences == false,
+                "Unsafe to use memcmp in read barrier collector");
+
+  if (GetSize() != other->GetSize()) {
+    return false;
+  }
+
+  return memcmp(this, other, GetSize());
+}
+
 size_t Closure::GetNumberOfCapturedVariables() const {
   // TODO: refactor into art_lambda_method.h. Parsing should only be required here as a DCHECK.
   VariableInfo variable_info =
diff --git a/runtime/lambda/closure.h b/runtime/lambda/closure.h
index 60d117e..31ff194 100644
--- a/runtime/lambda/closure.h
+++ b/runtime/lambda/closure.h
@@ -49,6 +49,19 @@
   // The target_size must be at least as large as GetSize().
   void CopyTo(void* target, size_t target_size) const;
 
+  // Get the target method, i.e. the method that will be dispatched into with invoke-lambda.
+  ArtMethod* GetTargetMethod() const;
+
+  // Calculates the hash code. Value is recomputed each time.
+  uint32_t GetHashCode() const SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Is this the same closure as other? e.g. same target method, same variables captured.
+  //
+  // Determines whether the two Closures are interchangeable instances.
+  // Does *not* call Object#equals recursively. If two Closures compare ReferenceEquals true that
+  // means that they are interchangeable values (usually for the purpose of boxing/unboxing).
+  bool ReferenceEquals(const Closure* other) const SHARED_REQUIRES(Locks::mutator_lock_);
+
   // How many variables were captured?
   size_t GetNumberOfCapturedVariables() const;
 
diff --git a/runtime/lambda/closure_builder-inl.h b/runtime/lambda/closure_builder-inl.h
index 41a803b..3cec21f 100644
--- a/runtime/lambda/closure_builder-inl.h
+++ b/runtime/lambda/closure_builder-inl.h
@@ -35,6 +35,8 @@
 
   values_.push_back(value_storage);
   size_ += sizeof(T);
+
+  shorty_types_ += kShortyType;
 }
 
 }  // namespace lambda
diff --git a/runtime/lambda/closure_builder.cc b/runtime/lambda/closure_builder.cc
index 9c37db8..739e965 100644
--- a/runtime/lambda/closure_builder.cc
+++ b/runtime/lambda/closure_builder.cc
@@ -64,6 +64,8 @@
       UNIMPLEMENTED(FATAL) << "can't yet safely capture objects with read barrier";
     }
   }
+
+  shorty_types_ += ShortyFieldType::kObject;
 }
 
 void ClosureBuilder::CaptureVariableLambda(Closure* closure) {
@@ -78,6 +80,8 @@
 
   // A closure may be sized dynamically, so always query it for the true size.
   size_ += closure->GetSize();
+
+  shorty_types_ += ShortyFieldType::kLambda;
 }
 
 size_t ClosureBuilder::GetSize() const {
@@ -85,9 +89,15 @@
 }
 
 size_t ClosureBuilder::GetCaptureCount() const {
+  DCHECK_EQ(values_.size(), shorty_types_.size());
   return values_.size();
 }
 
+const std::string& ClosureBuilder::GetCapturedVariableShortyTypes() const {
+  DCHECK_EQ(values_.size(), shorty_types_.size());
+  return shorty_types_;
+}
+
 Closure* ClosureBuilder::CreateInPlace(void* memory, ArtLambdaMethod* target_method) const {
   DCHECK(memory != nullptr);
   DCHECK(target_method != nullptr);
@@ -138,11 +148,14 @@
                                    size_t variables_size) const {
   size_t total_size = header_size;
   const char* shorty_types = target_method->GetCapturedVariablesShortyTypeDescriptor();
+  DCHECK_STREQ(shorty_types, shorty_types_.c_str());
 
   size_t variables_offset = 0;
   size_t remaining_size = variables_size;
 
   const size_t shorty_count = target_method->GetNumberOfCapturedVariables();
+  DCHECK_EQ(shorty_count, GetCaptureCount());
+
   for (size_t i = 0; i < shorty_count; ++i) {
     ShortyFieldType shorty{shorty_types[i]};  // NOLINT [readability/braces] [4]
 
diff --git a/runtime/lambda/closure_builder.h b/runtime/lambda/closure_builder.h
index 542e12a..23eb484 100644
--- a/runtime/lambda/closure_builder.h
+++ b/runtime/lambda/closure_builder.h
@@ -40,13 +40,12 @@
 //
 // The mutator lock must be held for the duration of the lifetime of this object,
 // since it needs to temporarily store heap references into an internal list.
-class ClosureBuilder : ValueObject {
+class ClosureBuilder {
  public:
   using ShortyTypeEnum = decltype(ShortyFieldType::kByte);
 
-
   // Mark this primitive value to be captured as the specified type.
-  template <typename T, ShortyTypeEnum kShortyType>
+  template <typename T, ShortyTypeEnum kShortyType = ShortyFieldTypeSelectEnum<T>::value>
   void CaptureVariablePrimitive(T value);
 
   // Mark this object reference to be captured.
@@ -63,6 +62,9 @@
   // Returns how many variables have been captured so far.
   size_t GetCaptureCount() const;
 
+  // Get the list of captured variables' shorty field types.
+  const std::string& GetCapturedVariableShortyTypes() const;
+
   // Creates a closure in-place and writes out the data into 'memory'.
   // Memory must be at least 'GetSize' bytes large.
   // All previously marked data to be captured is now written out.
@@ -93,6 +95,7 @@
   size_t size_ = kInitialSize;
   bool is_dynamic_size_ = false;
   std::vector<ShortyFieldTypeTraits::MaxType> values_;
+  std::string shorty_types_;
 };
 
 }  // namespace lambda
diff --git a/runtime/lambda/leaking_allocator.cc b/runtime/lambda/leaking_allocator.cc
new file mode 100644
index 0000000..4910732
--- /dev/null
+++ b/runtime/lambda/leaking_allocator.cc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lambda/leaking_allocator.h"
+#include "linear_alloc.h"
+#include "runtime.h"
+
+namespace art {
+namespace lambda {
+
+void* LeakingAllocator::AllocateMemory(Thread* self, size_t byte_size) {
+  // TODO: use GetAllocatorForClassLoader to allocate lambda ArtMethod data.
+  return Runtime::Current()->GetLinearAlloc()->Alloc(self, byte_size);
+}
+
+}  // namespace lambda
+}  // namespace art
diff --git a/runtime/lambda/leaking_allocator.h b/runtime/lambda/leaking_allocator.h
new file mode 100644
index 0000000..c3222d0
--- /dev/null
+++ b/runtime/lambda/leaking_allocator.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
+#define ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
+
+#include <utility>  // std::forward
+
+namespace art {
+class Thread;  // forward declaration
+
+namespace lambda {
+
+// Temporary class to centralize all the leaking allocations.
+// Allocations made through this class are never freed, but it is a placeholder
+// that means that the calling code needs to be rewritten to properly:
+//
+// (a) Have a lifetime scoped to some other entity.
+// (b) Not be allocated over and over again if it was already allocated once (immutable data).
+//
+// TODO: do all of the above a/b for each callsite, and delete this class.
+class LeakingAllocator {
+ public:
+  // Allocate byte_size bytes worth of memory. Never freed.
+  static void* AllocateMemory(Thread* self, size_t byte_size);
+
+  // Make a new instance of T, flexibly sized, in-place at newly allocated memory. Never freed.
+  template <typename T, typename... Args>
+  static T* MakeFlexibleInstance(Thread* self, size_t byte_size, Args&&... args) {
+    return new (AllocateMemory(self, byte_size)) T(std::forward<Args>(args)...);
+  }
+
+  // Make a new instance of T in-place at newly allocated memory. Never freed.
+  template <typename T, typename... Args>
+  static T* MakeInstance(Thread* self, Args&&... args) {
+    return new (AllocateMemory(self, sizeof(T))) T(std::forward<Args>(args)...);
+  }
+};
+
+}  // namespace lambda
+}  // namespace art
+
+#endif  // ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 5c13e13..63f43cf 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -20,6 +20,7 @@
 #include "art_method-inl.h"
 #include "dex_instruction.h"
 #include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "handle_scope-inl.h"
 #include "mirror/class-inl.h"
@@ -36,8 +37,9 @@
   : self_(self), context_(self->GetLongJumpContext()), is_deoptimization_(is_deoptimization),
     method_tracing_active_(is_deoptimization ||
                            Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()),
-    handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_method_(nullptr),
-    handler_dex_pc_(0), clear_exception_(false), handler_frame_depth_(kInvalidFrameDepth) {
+    handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_quick_arg0_(0),
+    handler_method_(nullptr), handler_dex_pc_(0), clear_exception_(false),
+    handler_frame_depth_(kInvalidFrameDepth) {
 }
 
 // Finds catch handler.
@@ -260,19 +262,25 @@
 // Prepares deoptimization.
 class DeoptimizeStackVisitor FINAL : public StackVisitor {
  public:
-  DeoptimizeStackVisitor(Thread* self, Context* context, QuickExceptionHandler* exception_handler)
+  DeoptimizeStackVisitor(Thread* self,
+                         Context* context,
+                         QuickExceptionHandler* exception_handler,
+                         bool single_frame)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         exception_handler_(exception_handler),
         prev_shadow_frame_(nullptr),
-        stacked_shadow_frame_pushed_(false) {
+        stacked_shadow_frame_pushed_(false),
+        single_frame_deopt_(single_frame),
+        single_frame_done_(false) {
   }
 
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     exception_handler_->SetHandlerFrameDepth(GetFrameDepth());
     ArtMethod* method = GetMethod();
-    if (method == nullptr) {
-      // This is the upcall, we remember the frame and last pc so that we may long jump to them.
+    if (method == nullptr || single_frame_done_) {
+      // This is the upcall (or the next full frame in single-frame deopt), we remember the frame
+      // and last pc so that we may long jump to them.
       exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
       exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
       if (!stacked_shadow_frame_pushed_) {
@@ -295,7 +303,13 @@
       CHECK_EQ(GetFrameDepth(), 1U);
       return true;
     } else {
-      return HandleDeoptimization(method);
+      HandleDeoptimization(method);
+      if (single_frame_deopt_ && !IsInInlinedFrame()) {
+        // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
+        exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
+        single_frame_done_ = true;
+      }
+      return true;
     }
   }
 
@@ -304,7 +318,7 @@
     return static_cast<VRegKind>(kinds.at(reg * 2));
   }
 
-  bool HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) {
+  void HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) {
     const DexFile::CodeItem* code_item = m->GetCodeItem();
     CHECK(code_item != nullptr) << "No code item for " << PrettyMethod(m);
     uint16_t num_regs = code_item->registers_size_;
@@ -448,16 +462,20 @@
       // Will be popped after the long jump after DeoptimizeStack(),
       // right before interpreter::EnterInterpreterFromDeoptimize().
       stacked_shadow_frame_pushed_ = true;
-      GetThread()->PushStackedShadowFrame(new_frame,
-                                          StackedShadowFrameType::kDeoptimizationShadowFrame);
+      GetThread()->PushStackedShadowFrame(
+          new_frame,
+          single_frame_deopt_
+              ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame
+              : StackedShadowFrameType::kDeoptimizationShadowFrame);
     }
     prev_shadow_frame_ = new_frame;
-    return true;
   }
 
   QuickExceptionHandler* const exception_handler_;
   ShadowFrame* prev_shadow_frame_;
   bool stacked_shadow_frame_pushed_;
+  const bool single_frame_deopt_;
+  bool single_frame_done_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
@@ -468,13 +486,46 @@
     self_->DumpStack(LOG(INFO) << "Deoptimizing: ");
   }
 
-  DeoptimizeStackVisitor visitor(self_, context_, this);
+  DeoptimizeStackVisitor visitor(self_, context_, this, false);
   visitor.WalkStack(true);
 
   // Restore deoptimization exception
   self_->SetException(Thread::GetDeoptimizationException());
 }
 
+void QuickExceptionHandler::DeoptimizeSingleFrame() {
+  DCHECK(is_deoptimization_);
+
+  if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) {
+    LOG(INFO) << "Single-frame deopting:";
+    DumpFramesWithType(self_, true);
+  }
+
+  DeoptimizeStackVisitor visitor(self_, context_, this, true);
+  visitor.WalkStack(true);
+
+  // PC needs to be of the quick-to-interpreter bridge.
+  int32_t offset;
+  #ifdef __LP64__
+      offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value();
+  #else
+      offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value();
+  #endif
+  handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(self_) + offset);
+}
+
+void QuickExceptionHandler::DeoptimizeSingleFrameArchDependentFixup() {
+  // Architecture-dependent work. This is to get the LR right for x86 and x86-64.
+
+  if (kRuntimeISA == InstructionSet::kX86 || kRuntimeISA == InstructionSet::kX86_64) {
+    // On x86, the return address is on the stack, so just reuse it. Otherwise we would have to
+    // change how longjump works.
+    handler_quick_frame_ = reinterpret_cast<ArtMethod**>(
+        reinterpret_cast<uintptr_t>(handler_quick_frame_) - sizeof(void*));
+  }
+}
+
 // Unwinds all instrumentation stack frame prior to catch handler or upcall.
 class InstrumentationStackVisitor : public StackVisitor {
  public:
@@ -529,15 +580,67 @@
   }
 }
 
-void QuickExceptionHandler::DoLongJump() {
+void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) {
   // Place context back on thread so it will be available when we continue.
   self_->ReleaseLongJumpContext(context_);
   context_->SetSP(reinterpret_cast<uintptr_t>(handler_quick_frame_));
   CHECK_NE(handler_quick_frame_pc_, 0u);
   context_->SetPC(handler_quick_frame_pc_);
-  context_->SmashCallerSaves();
+  context_->SetArg0(handler_quick_arg0_);
+  if (smash_caller_saves) {
+    context_->SmashCallerSaves();
+  }
   context_->DoLongJump();
   UNREACHABLE();
 }
 
+// Prints out methods with their type of frame.
+class DumpFramesWithTypeStackVisitor FINAL : public StackVisitor {
+ public:
+  DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        show_details_(show_details) {}
+
+  bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* method = GetMethod();
+    if (show_details_) {
+      LOG(INFO) << "|> pc   = " << std::hex << GetCurrentQuickFramePc();
+      LOG(INFO) << "|> addr = " << std::hex << reinterpret_cast<uintptr_t>(GetCurrentQuickFrame());
+      if (GetCurrentQuickFrame() != nullptr && method != nullptr) {
+        LOG(INFO) << "|> ret  = " << std::hex << GetReturnPc();
+      }
+    }
+    if (method == nullptr) {
+      // Transition, do go on, we want to unwind over bridges, all the way.
+      if (show_details_) {
+        LOG(INFO) << "N  <transition>";
+      }
+      return true;
+    } else if (method->IsRuntimeMethod()) {
+      if (show_details_) {
+        LOG(INFO) << "R  " << PrettyMethod(method, true);
+      }
+      return true;
+    } else {
+      bool is_shadow = GetCurrentShadowFrame() != nullptr;
+      LOG(INFO) << (is_shadow ? "S" : "Q")
+                << ((!is_shadow && IsInInlinedFrame()) ? "i" : " ")
+                << " "
+                << PrettyMethod(method, true);
+      return true;  // Go on.
+    }
+  }
+
+ private:
+  bool show_details_;
+
+  DISALLOW_COPY_AND_ASSIGN(DumpFramesWithTypeStackVisitor);
+};
+
+void QuickExceptionHandler::DumpFramesWithType(Thread* self, bool details) {
+  DumpFramesWithTypeStackVisitor visitor(self, details);
+  visitor.WalkStack(true);
+}
+
 }  // namespace art
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index 2e05c7e..89d6a25 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -49,6 +49,9 @@
   // Deoptimize the stack to the upcall. For every compiled frame, we create a "copy"
   // shadow frame that will be executed with the interpreter.
   void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_);
+  void DeoptimizeSingleFrame() SHARED_REQUIRES(Locks::mutator_lock_);
+  void DeoptimizeSingleFrameArchDependentFixup() SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Update the instrumentation stack by removing all methods that will be unwound
   // by the exception being thrown.
   void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -58,7 +61,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Long jump either to a catch handler or to the upcall.
-  NO_RETURN void DoLongJump() SHARED_REQUIRES(Locks::mutator_lock_);
+  NO_RETURN void DoLongJump(bool smash_caller_saves = true) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetHandlerQuickFrame(ArtMethod** handler_quick_frame) {
     handler_quick_frame_ = handler_quick_frame;
@@ -68,6 +71,10 @@
     handler_quick_frame_pc_ = handler_quick_frame_pc;
   }
 
+  void SetHandlerQuickArg0(uintptr_t handler_quick_arg0) {
+    handler_quick_arg0_ = handler_quick_arg0;
+  }
+
   ArtMethod* GetHandlerMethod() const {
     return handler_method_;
   }
@@ -92,6 +99,11 @@
     handler_frame_depth_ = frame_depth;
   }
 
+  // Walk the stack frames of the given thread, printing out non-runtime methods with their types
+  // of frames. Helps to verify that single-frame deopt really only deopted one frame.
+  static void DumpFramesWithType(Thread* self, bool details = false)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   Thread* const self_;
   Context* const context_;
@@ -103,6 +115,8 @@
   ArtMethod** handler_quick_frame_;
   // PC to branch to for the handler.
   uintptr_t handler_quick_frame_pc_;
+  // The value for argument 0.
+  uintptr_t handler_quick_arg0_;
   // The handler method to report to the debugger.
   ArtMethod* handler_method_;
   // The handler's dex PC, zero implies an uncaught exception.
diff --git a/runtime/stack.h b/runtime/stack.h
index b805239..292c745 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -62,6 +62,10 @@
 class MANAGED StackReference : public mirror::CompressedReference<MirrorType> {
 };
 
+// Forward declaration. Just calls the destructor.
+struct ShadowFrameDeleter;
+using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>;
+
 // ShadowFrame has 2 possible layouts:
 //  - interpreter - separate VRegs and reference arrays. References are in the reference array.
 //  - JNI - just VRegs, but where every VReg holds a reference.
@@ -77,21 +81,26 @@
   static ShadowFrame* CreateDeoptimizedFrame(uint32_t num_vregs, ShadowFrame* link,
                                              ArtMethod* method, uint32_t dex_pc) {
     uint8_t* memory = new uint8_t[ComputeSize(num_vregs)];
-    return Create(num_vregs, link, method, dex_pc, memory);
+    return CreateShadowFrameImpl(num_vregs, link, method, dex_pc, memory);
   }
 
   // Delete a ShadowFrame allocated on the heap for deoptimization.
   static void DeleteDeoptimizedFrame(ShadowFrame* sf) {
+    sf->~ShadowFrame();  // Explicitly destruct.
     uint8_t* memory = reinterpret_cast<uint8_t*>(sf);
     delete[] memory;
   }
 
-  // Create ShadowFrame for interpreter using provided memory.
-  static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link,
-                             ArtMethod* method, uint32_t dex_pc, void* memory) {
-    ShadowFrame* sf = new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true);
-    return sf;
-  }
+  // Create a shadow frame in a fresh alloca. This needs to be in the context of the caller.
+  // Inlining doesn't work, the compiler will still undo the alloca. So this needs to be a macro.
+#define CREATE_SHADOW_FRAME(num_vregs, link, method, dex_pc) ({                              \
+    size_t frame_size = ShadowFrame::ComputeSize(num_vregs);                                 \
+    void* alloca_mem = alloca(frame_size);                                                   \
+    ShadowFrameAllocaUniquePtr(                                                              \
+        ShadowFrame::CreateShadowFrameImpl((num_vregs), (link), (method), (dex_pc),          \
+                                           (alloca_mem)));                                   \
+    })
+
   ~ShadowFrame() {}
 
   // TODO(iam): Clean references array up since they're always there,
@@ -283,6 +292,15 @@
     return OFFSETOF_MEMBER(ShadowFrame, vregs_);
   }
 
+  // Create ShadowFrame for interpreter using provided memory.
+  static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs,
+                                            ShadowFrame* link,
+                                            ArtMethod* method,
+                                            uint32_t dex_pc,
+                                            void* memory) {
+    return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true);
+  }
+
  private:
   ShadowFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method,
               uint32_t dex_pc, bool has_reference_array)
@@ -326,6 +344,14 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(ShadowFrame);
 };
 
+struct ShadowFrameDeleter {
+  inline void operator()(ShadowFrame* frame) {
+    if (frame != nullptr) {
+      frame->~ShadowFrame();
+    }
+  }
+};
+
 class JavaFrameRootInfo : public RootInfo {
  public:
   JavaFrameRootInfo(uint32_t thread_id, const StackVisitor* stack_visitor, size_t vreg)
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 5bf895e..82e6fb0 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -250,10 +250,16 @@
   tlsPtr_.stacked_shadow_frame_record = record;
 }
 
-ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type) {
+ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present) {
   StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record;
-  DCHECK(record != nullptr);
-  DCHECK_EQ(record->GetType(), type);
+  if (must_be_present) {
+    DCHECK(record != nullptr);
+    DCHECK_EQ(record->GetType(), type);
+  } else {
+    if (record == nullptr || record->GetType() != type) {
+      return nullptr;
+    }
+  }
   tlsPtr_.stacked_shadow_frame_record = record->GetLink();
   ShadowFrame* shadow_frame = record->GetShadowFrame();
   delete record;
diff --git a/runtime/thread.h b/runtime/thread.h
index 11f2e28..d21644d 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -108,7 +108,8 @@
 
 enum class StackedShadowFrameType {
   kShadowFrameUnderConstruction,
-  kDeoptimizationShadowFrame
+  kDeoptimizationShadowFrame,
+  kSingleFrameDeoptimizationShadowFrame
 };
 
 static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34;
@@ -843,7 +844,7 @@
   void AssertHasDeoptimizationContext()
       SHARED_REQUIRES(Locks::mutator_lock_);
   void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type);
-  ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type);
+  ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present = true);
 
   // For debugger, find the shadow frame that corresponds to a frame id.
   // Or return null if there is none.
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 9938e90..eed3e22 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -1008,6 +1008,9 @@
     case Instruction::kVerifyRegCWide:
       result = result && CheckWideRegisterIndex(inst->VRegC());
       break;
+    case Instruction::kVerifyRegCString:
+      result = result && CheckStringIndex(inst->VRegC());
+      break;
   }
   switch (inst->GetVerifyExtraFlags()) {
     case Instruction::kVerifyArrayData:
@@ -1300,17 +1303,17 @@
     return false;
   }
 
+  bool is_packed_switch = (*insns & 0xff) == Instruction::PACKED_SWITCH;
+
   uint32_t switch_count = switch_insns[1];
-  int32_t keys_offset, targets_offset;
+  int32_t targets_offset;
   uint16_t expected_signature;
-  if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
+  if (is_packed_switch) {
     /* 0=sig, 1=count, 2/3=firstKey */
     targets_offset = 4;
-    keys_offset = -1;
     expected_signature = Instruction::kPackedSwitchSignature;
   } else {
     /* 0=sig, 1=count, 2..count*2 = keys */
-    keys_offset = 2;
     targets_offset = 2 + 2 * switch_count;
     expected_signature = Instruction::kSparseSwitchSignature;
   }
@@ -1329,19 +1332,33 @@
                                       << ", count " << insn_count;
     return false;
   }
-  /* for a sparse switch, verify the keys are in ascending order */
-  if (keys_offset > 0 && switch_count > 1) {
-    int32_t last_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16);
-    for (uint32_t targ = 1; targ < switch_count; targ++) {
-      int32_t key =
-          static_cast<int32_t>(switch_insns[keys_offset + targ * 2]) |
-          static_cast<int32_t>(switch_insns[keys_offset + targ * 2 + 1] << 16);
-      if (key <= last_key) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid packed switch: last key=" << last_key
-                                          << ", this=" << key;
+
+  constexpr int32_t keys_offset = 2;
+  if (switch_count > 1) {
+    if (is_packed_switch) {
+      /* for a packed switch, verify that keys do not overflow int32 */
+      int32_t first_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16);
+      int32_t max_first_key =
+          std::numeric_limits<int32_t>::max() - (static_cast<int32_t>(switch_count) - 1);
+      if (first_key > max_first_key) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid packed switch: first_key=" << first_key
+                                          << ", switch_count=" << switch_count;
         return false;
       }
-      last_key = key;
+    } else {
+      /* for a sparse switch, verify the keys are in ascending order */
+      int32_t last_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16);
+      for (uint32_t targ = 1; targ < switch_count; targ++) {
+        int32_t key =
+            static_cast<int32_t>(switch_insns[keys_offset + targ * 2]) |
+            static_cast<int32_t>(switch_insns[keys_offset + targ * 2 + 1] << 16);
+        if (key <= last_key) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid sparse switch: last key=" << last_key
+                                            << ", this=" << key;
+          return false;
+        }
+        last_key = key;
+      }
     }
   }
   /* verify each switch target */
@@ -3149,6 +3166,13 @@
       Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement invoke-lambda verification
       break;
     }
+    case Instruction::CAPTURE_VARIABLE: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement capture-variable verification
+      break;
+    }
     case Instruction::CREATE_LAMBDA: {
       // Don't bother verifying, instead the interpreter will take the slow path with access checks.
       // If the code would've normally hard-failed, then the interpreter will throw the
@@ -3156,10 +3180,15 @@
       Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement create-lambda verification
       break;
     }
+    case Instruction::LIBERATE_VARIABLE: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement liberate-variable verification
+      break;
+    }
 
-    case Instruction::UNUSED_F4:
-    case Instruction::UNUSED_F5:
-    case Instruction::UNUSED_F7: {
+    case Instruction::UNUSED_F4: {
       DCHECK(false);  // TODO(iam): Implement opcodes for lambdas
       // Conservatively fail verification on release builds.
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
diff --git a/test/004-JniTest/expected.txt b/test/004-JniTest/expected.txt
index 49d9cc0..86ab37e 100644
--- a/test/004-JniTest/expected.txt
+++ b/test/004-JniTest/expected.txt
@@ -1,3 +1,4 @@
+JNI_OnLoad called
 Super.<init>
 Super.<init>
 Subclass.<init>
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index db0dd32..be7888b 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -15,8 +15,9 @@
  */
 
 #include <assert.h>
-#include <stdio.h>
+#include <iostream>
 #include <pthread.h>
+#include <stdio.h>
 #include <vector>
 
 #include "jni.h"
@@ -27,13 +28,21 @@
 
 static JavaVM* jvm = nullptr;
 
-extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void *) {
+extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void*) {
   assert(vm != nullptr);
   assert(jvm == nullptr);
   jvm = vm;
+  std::cout << "JNI_OnLoad called" << std::endl;
   return JNI_VERSION_1_6;
 }
 
+extern "C" JNIEXPORT void JNI_OnUnload(JavaVM*, void*) {
+  // std::cout since LOG(INFO) adds extra stuff like pid.
+  std::cout << "JNI_OnUnload called" << std::endl;
+  // Clear jvm for assert in test 004-JniTest.
+  jvm = nullptr;
+}
+
 static void* AttachHelper(void* arg) {
   assert(jvm != nullptr);
 
diff --git a/test/004-ReferenceMap/expected.txt b/test/004-ReferenceMap/expected.txt
index e69de29..6a5618e 100644
--- a/test/004-ReferenceMap/expected.txt
+++ b/test/004-ReferenceMap/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/004-SignalTest/expected.txt b/test/004-SignalTest/expected.txt
index fd5ec00..b3a0e1c 100644
--- a/test/004-SignalTest/expected.txt
+++ b/test/004-SignalTest/expected.txt
@@ -1,3 +1,4 @@
+JNI_OnLoad called
 init signal test
 Caught NullPointerException
 Caught StackOverflowError
diff --git a/test/004-StackWalk/expected.txt b/test/004-StackWalk/expected.txt
index bde0024..5af68cd 100644
--- a/test/004-StackWalk/expected.txt
+++ b/test/004-StackWalk/expected.txt
@@ -1,3 +1,4 @@
+JNI_OnLoad called
 1st call
 172001234567891011121314151617181920652310201919
 2nd call
diff --git a/test/004-UnsafeTest/expected.txt b/test/004-UnsafeTest/expected.txt
index e69de29..6a5618e 100644
--- a/test/004-UnsafeTest/expected.txt
+++ b/test/004-UnsafeTest/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/044-proxy/expected.txt b/test/044-proxy/expected.txt
index f86948a..052c8fa 100644
--- a/test/044-proxy/expected.txt
+++ b/test/044-proxy/expected.txt
@@ -93,4 +93,5 @@
 Got expected exception
 Proxy narrowed invocation return type passed
 5.8
+JNI_OnLoad called
 callback
diff --git a/test/051-thread/expected.txt b/test/051-thread/expected.txt
index 54e34af..c6cd4f8 100644
--- a/test/051-thread/expected.txt
+++ b/test/051-thread/expected.txt
@@ -1,3 +1,4 @@
+JNI_OnLoad called
 thread test starting
 testThreadCapacity thread count: 512
 testThreadDaemons starting thread 'TestDaemonThread'
diff --git a/test/088-monitor-verification/expected.txt b/test/088-monitor-verification/expected.txt
index 13b8c73..f252f6f 100644
--- a/test/088-monitor-verification/expected.txt
+++ b/test/088-monitor-verification/expected.txt
@@ -1,3 +1,4 @@
+JNI_OnLoad called
 recursiveSync ok
 nestedMayThrow ok
 constantLock ok
diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt
index 372ecd0..b003307 100644
--- a/test/115-native-bridge/expected.txt
+++ b/test/115-native-bridge/expected.txt
@@ -17,6 +17,7 @@
     name:testSignal, signature:()I, shorty:I.
     name:testZeroLengthByteBuffers, signature:()V, shorty:V.
 trampoline_JNI_OnLoad called!
+JNI_OnLoad called
 Getting trampoline for Java_Main_testFindClassOnAttachedNativeThread with shorty V.
 trampoline_Java_Main_testFindClassOnAttachedNativeThread called!
 Getting trampoline for Java_Main_testFindFieldOnAttachedNativeThreadNative with shorty V.
diff --git a/test/116-nodex2oat/expected.txt b/test/116-nodex2oat/expected.txt
index 05b1c2f..157dfc4 100644
--- a/test/116-nodex2oat/expected.txt
+++ b/test/116-nodex2oat/expected.txt
@@ -1,6 +1,9 @@
 Run -Xnodex2oat
+JNI_OnLoad called
 Has oat is false, is dex2oat enabled is false.
 Run -Xdex2oat
+JNI_OnLoad called
 Has oat is true, is dex2oat enabled is true.
 Run default
+JNI_OnLoad called
 Has oat is true, is dex2oat enabled is true.
diff --git a/test/117-nopatchoat/expected.txt b/test/117-nopatchoat/expected.txt
index 5cc02d1..0cd4715 100644
--- a/test/117-nopatchoat/expected.txt
+++ b/test/117-nopatchoat/expected.txt
@@ -1,9 +1,12 @@
 Run without dex2oat/patchoat
+JNI_OnLoad called
 dex2oat & patchoat are disabled, has oat is true, has executable oat is expected.
 This is a function call
 Run with dexoat/patchoat
+JNI_OnLoad called
 dex2oat & patchoat are enabled, has oat is true, has executable oat is expected.
 This is a function call
 Run default
+JNI_OnLoad called
 dex2oat & patchoat are enabled, has oat is true, has executable oat is expected.
 This is a function call
diff --git a/test/118-noimage-dex2oat/expected.txt b/test/118-noimage-dex2oat/expected.txt
index 0103e89..166481e 100644
--- a/test/118-noimage-dex2oat/expected.txt
+++ b/test/118-noimage-dex2oat/expected.txt
@@ -1,11 +1,14 @@
 Run -Xnoimage-dex2oat
+JNI_OnLoad called
 Has image is false, is image dex2oat enabled is false, is BOOTCLASSPATH on disk is false.
 testB18485243 PASS
 Run -Xnoimage-dex2oat -Xno-dex-file-fallback
 Failed to initialize runtime (check log for details)
 Run -Ximage-dex2oat
+JNI_OnLoad called
 Has image is true, is image dex2oat enabled is true, is BOOTCLASSPATH on disk is true.
 testB18485243 PASS
 Run default
+JNI_OnLoad called
 Has image is true, is image dex2oat enabled is true, is BOOTCLASSPATH on disk is true.
 testB18485243 PASS
diff --git a/test/119-noimage-patchoat/expected.txt b/test/119-noimage-patchoat/expected.txt
index ed13662..9b9db58 100644
--- a/test/119-noimage-patchoat/expected.txt
+++ b/test/119-noimage-patchoat/expected.txt
@@ -1,8 +1,11 @@
 Run -Xnoimage-dex2oat -Xpatchoat:/system/bin/false
+JNI_OnLoad called
 Has image is false, is image dex2oat enabled is false.
 Run -Xnoimage-dex2oat -Xpatchoat:/system/bin/false -Xno-dex-file-fallback
 Failed to initialize runtime (check log for details)
 Run -Ximage-dex2oat
+JNI_OnLoad called
 Has image is true, is image dex2oat enabled is true.
 Run default
+JNI_OnLoad called
 Has image is true, is image dex2oat enabled is true.
diff --git a/test/137-cfi/expected.txt b/test/137-cfi/expected.txt
index e69de29..6a5618e 100644
--- a/test/137-cfi/expected.txt
+++ b/test/137-cfi/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/139-register-natives/expected.txt b/test/139-register-natives/expected.txt
index e69de29..6a5618e 100644
--- a/test/139-register-natives/expected.txt
+++ b/test/139-register-natives/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt
new file mode 100644
index 0000000..ff65a70
--- /dev/null
+++ b/test/141-class-unload/expected.txt
@@ -0,0 +1,18 @@
+1
+2
+JNI_OnLoad called
+JNI_OnUnload called
+1
+2
+JNI_OnLoad called
+JNI_OnUnload called
+null
+null
+JNI_OnLoad called
+JNI_OnUnload called
+null
+loader null false
+loader null false
+JNI_OnLoad called
+JNI_OnUnload called
+null
diff --git a/test/141-class-unload/info.txt b/test/141-class-unload/info.txt
new file mode 100644
index 0000000..d8dd381
--- /dev/null
+++ b/test/141-class-unload/info.txt
@@ -0,0 +1 @@
+Test that classes get freed after they are no longer reachable.
diff --git a/test/141-class-unload/jni_unload.cc b/test/141-class-unload/jni_unload.cc
new file mode 100644
index 0000000..d913efe
--- /dev/null
+++ b/test/141-class-unload/jni_unload.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+
+#include <iostream>
+
+#include "jit/jit.h"
+#include "jit/jit_instrumentation.h"
+#include "runtime.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace {
+
+extern "C" JNIEXPORT void JNICALL Java_IntHolder_waitForCompilation(JNIEnv*, jclass) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    jit->GetInstrumentationCache()->WaitForCompilationToFinish(Thread::Current());
+  }
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/141-class-unload/src-ex/IntHolder.java b/test/141-class-unload/src-ex/IntHolder.java
new file mode 100644
index 0000000..e4aa6b8
--- /dev/null
+++ b/test/141-class-unload/src-ex/IntHolder.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Simple class that holds a static int for testing that class unloading works
+// and re-runs the class initializer.
+public class IntHolder {
+    private static int value = 1;
+
+    public static void setValue(int newValue) {
+        value = newValue;
+    }
+
+    public static int getValue() {
+        return value;
+    }
+
+    public static void runGC() {
+        Runtime.getRuntime().gc();
+    }
+
+    public static void loadLibrary(String name) {
+        System.loadLibrary(name);
+    }
+
+    public static native void waitForCompilation();
+}
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
new file mode 100644
index 0000000..105a2b9
--- /dev/null
+++ b/test/141-class-unload/src/Main.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.ref.WeakReference;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+
+public class Main {
+    static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/141-class-unload-ex.jar";
+    static String nativeLibraryName;
+
+    public static void main(String[] args) throws Exception {
+        nativeLibraryName = args[0];
+        Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+        if (pathClassLoader == null) {
+            throw new AssertionError("Couldn't find path class loader class");
+        }
+        Constructor constructor =
+            pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
+        try {
+            testUnloadClass(constructor);
+            testUnloadLoader(constructor);
+            // Test that we don't unload if we have a Method keeping the class live.
+            testNoUnloadInvoke(constructor);
+            // Test that we don't unload if we have an instance.
+            testNoUnloadInstance(constructor);
+            // Test JNI_OnLoad and JNI_OnUnload.
+            testLoadAndUnloadLibrary(constructor);
+            // Stress test to make sure we dont leak memory.
+            stressTest(constructor);
+        } catch (Exception e) {
+            System.out.println(e);
+        }
+    }
+
+    private static void stressTest(Constructor constructor) throws Exception {
+        for (int i = 0; i <= 100; ++i) {
+            setUpUnloadLoader(constructor, false);
+            if (i % 10 == 0) {
+                Runtime.getRuntime().gc();
+            }
+        }
+    }
+
+    private static void testUnloadClass(Constructor constructor) throws Exception {
+        WeakReference<Class> klass = setUpUnloadClass(constructor);
+        // No strong refernces to class loader, should get unloaded.
+        Runtime.getRuntime().gc();
+        WeakReference<Class> klass2 = setUpUnloadClass(constructor);
+        Runtime.getRuntime().gc();
+        // If the weak reference is cleared, then it was unloaded.
+        System.out.println(klass.get());
+        System.out.println(klass2.get());
+    }
+
+    private static void testUnloadLoader(Constructor constructor)
+        throws Exception {
+      WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true);
+      // No strong refernces to class loader, should get unloaded.
+      Runtime.getRuntime().gc();
+      // If the weak reference is cleared, then it was unloaded.
+      System.out.println(loader.get());
+    }
+
+    private static void testLoadAndUnloadLibrary(Constructor constructor) throws Exception {
+        WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor);
+        // No strong refernces to class loader, should get unloaded.
+        Runtime.getRuntime().gc();
+        // If the weak reference is cleared, then it was unloaded.
+        System.out.println(loader.get());
+    }
+
+    private static void testNoUnloadInvoke(Constructor constructor) throws Exception {
+        WeakReference<ClassLoader> loader =
+            new WeakReference((ClassLoader) constructor.newInstance(
+                DEX_FILE, ClassLoader.getSystemClassLoader()));
+        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
+        intHolder.get().getDeclaredMethod("runGC").invoke(intHolder.get());
+        boolean isNull = loader.get() == null;
+        System.out.println("loader null " + isNull);
+    }
+
+    private static void testNoUnloadInstance(Constructor constructor) throws Exception {
+        WeakReference<ClassLoader> loader =
+            new WeakReference((ClassLoader) constructor.newInstance(
+                DEX_FILE, ClassLoader.getSystemClassLoader()));
+        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
+        Object o = intHolder.get().newInstance();
+        Runtime.getRuntime().gc();
+        boolean isNull = loader.get() == null;
+        System.out.println("loader null " + isNull);
+    }
+
+    private static WeakReference<Class> setUpUnloadClass(Constructor constructor) throws Exception {
+        ClassLoader loader = (ClassLoader) constructor.newInstance(
+            DEX_FILE, ClassLoader.getSystemClassLoader());
+        Class intHolder = loader.loadClass("IntHolder");
+        Method getValue = intHolder.getDeclaredMethod("getValue");
+        Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
+        // Make sure we don't accidentally preserve the value in the int holder, the class
+        // initializer should be re-run.
+        System.out.println((int) getValue.invoke(intHolder));
+        setValue.invoke(intHolder, 2);
+        System.out.println((int) getValue.invoke(intHolder));
+        waitForCompilation(intHolder);
+        return new WeakReference(intHolder);
+    }
+
+    private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor constructor,
+                                                                boolean waitForCompilation)
+        throws Exception {
+        ClassLoader loader = (ClassLoader) constructor.newInstance(
+            DEX_FILE, ClassLoader.getSystemClassLoader());
+        Class intHolder = loader.loadClass("IntHolder");
+        Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
+        setValue.invoke(intHolder, 2);
+        if (waitForCompilation) {
+            waitForCompilation(intHolder);
+        }
+        return new WeakReference(loader);
+    }
+
+    private static void waitForCompilation(Class intHolder) throws Exception {
+      // Load the native library so that we can call waitForCompilation.
+      Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
+      loadLibrary.invoke(intHolder, nativeLibraryName);
+      // Wait for JIT compilation to finish since the async threads may prevent unloading.
+      Method waitForCompilation = intHolder.getDeclaredMethod("waitForCompilation");
+      waitForCompilation.invoke(intHolder);
+    }
+
+    private static WeakReference<ClassLoader> setUpLoadLibrary(Constructor constructor)
+        throws Exception {
+        ClassLoader loader = (ClassLoader) constructor.newInstance(
+            DEX_FILE, ClassLoader.getSystemClassLoader());
+        Class intHolder = loader.loadClass("IntHolder");
+        Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
+        loadLibrary.invoke(intHolder, nativeLibraryName);
+        return new WeakReference(loader);
+    }
+}
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index a746664..f06c250 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -249,6 +249,25 @@
     array[Integer.MAX_VALUE - 998] = 1;
   }
 
+  /// CHECK-START: void Main.constantIndexing6(int[]) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing6(int[]) BCE (after)
+  /// CHECK: Deoptimize
+
+  static void constantIndexing6(int[] array) {
+    array[3] = 1;
+    array[4] = 1;
+  }
+
+  // A helper into which the actual throwing function should be inlined.
+  static void constantIndexingForward6(int[] array) {
+    constantIndexing6(array);
+  }
+
   /// CHECK-START: void Main.loopPattern1(int[]) BCE (before)
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
@@ -602,7 +621,12 @@
       // This will cause AIOOBE.
       constantIndexing2(new int[3]);
     } catch (ArrayIndexOutOfBoundsException e) {
-      return 99;
+      try {
+        // This will cause AIOOBE.
+        constantIndexingForward6(new int[3]);
+      } catch (ArrayIndexOutOfBoundsException e2) {
+        return 99;
+      }
     }
     return 0;
   }
diff --git a/test/454-get-vreg/expected.txt b/test/454-get-vreg/expected.txt
index e69de29..6a5618e 100644
--- a/test/454-get-vreg/expected.txt
+++ b/test/454-get-vreg/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/455-set-vreg/expected.txt b/test/455-set-vreg/expected.txt
index e69de29..6a5618e 100644
--- a/test/455-set-vreg/expected.txt
+++ b/test/455-set-vreg/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/457-regs/expected.txt b/test/457-regs/expected.txt
index e69de29..6a5618e 100644
--- a/test/457-regs/expected.txt
+++ b/test/457-regs/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/461-get-reference-vreg/expected.txt b/test/461-get-reference-vreg/expected.txt
index e69de29..6a5618e 100644
--- a/test/461-get-reference-vreg/expected.txt
+++ b/test/461-get-reference-vreg/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/466-get-live-vreg/expected.txt b/test/466-get-live-vreg/expected.txt
index e69de29..6a5618e 100644
--- a/test/466-get-live-vreg/expected.txt
+++ b/test/466-get-live-vreg/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/485-checker-dce-switch/expected.txt b/test/485-checker-dce-switch/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/485-checker-dce-switch/expected.txt
diff --git a/test/485-checker-dce-switch/info.txt b/test/485-checker-dce-switch/info.txt
new file mode 100644
index 0000000..6653526
--- /dev/null
+++ b/test/485-checker-dce-switch/info.txt
@@ -0,0 +1 @@
+Tests that DCE can remove a packed switch.
diff --git a/test/485-checker-dce-switch/src/Main.java b/test/485-checker-dce-switch/src/Main.java
new file mode 100644
index 0000000..019d876
--- /dev/null
+++ b/test/485-checker-dce-switch/src/Main.java
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static int $inline$method() {
+    return 5;
+  }
+
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (before)
+  /// CHECK-DAG:                      PackedSwitch
+
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after)
+  /// CHECK-DAG:    <<Const100:i\d+>> IntConstant 100
+  /// CHECK-DAG:                      Return [<<Const100>>]
+
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after)
+  /// CHECK-NOT:                      PackedSwitch
+
+  public static int wholeSwitchDead(int j) {
+    int i = $inline$method();
+    int l = 100;
+    if (i > 100) {
+      switch(j) {
+        case 1:
+          i++;
+          break;
+        case 2:
+          i = 99;
+          break;
+        case 3:
+          i = 100;
+          break;
+        case 4:
+          i = -100;
+          break;
+        case 5:
+          i = 7;
+          break;
+        case 6:
+          i = -9;
+          break;
+      }
+      l += i;
+    }
+
+    return l;
+  }
+
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (before)
+  /// CHECK-DAG:                      PackedSwitch
+
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after)
+  /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
+  /// CHECK-DAG:                      Return [<<Const7>>]
+
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after)
+  /// CHECK-NOT:                      PackedSwitch
+
+  public static int constantSwitch_InRange() {
+    int i = $inline$method();
+    switch(i) {
+      case 1:
+        i++;
+        break;
+      case 2:
+        i = 99;
+        break;
+      case 3:
+        i = 100;
+        break;
+      case 4:
+        i = -100;
+        break;
+      case 5:
+        i = 7;
+        break;
+      case 6:
+        i = -9;
+        break;
+    }
+
+    return i;
+  }
+
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (before)
+  /// CHECK-DAG:                      PackedSwitch
+
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after)
+  /// CHECK-DAG:     <<Const15:i\d+>> IntConstant 15
+  /// CHECK-DAG:                      Return [<<Const15>>]
+
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after)
+  /// CHECK-NOT:                      PackedSwitch
+
+  public static int constantSwitch_AboveRange() {
+    int i = $inline$method() + 10;
+    switch(i) {
+      case 1:
+        i++;
+        break;
+      case 2:
+        i = 99;
+        break;
+      case 3:
+        i = 100;
+        break;
+      case 4:
+        i = -100;
+        break;
+      case 5:
+        i = 7;
+        break;
+      case 6:
+        i = -9;
+        break;
+    }
+
+    return i;
+  }
+
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (before)
+  /// CHECK-DAG:                      PackedSwitch
+
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after)
+  /// CHECK-DAG:     <<ConstM5:i\d+>> IntConstant -5
+  /// CHECK-DAG:                      Return [<<ConstM5>>]
+
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after)
+  /// CHECK-NOT:                      PackedSwitch
+
+  public static int constantSwitch_BelowRange() {
+    int i = $inline$method() - 10;
+    switch(i) {
+      case 1:
+        i++;
+        break;
+      case 2:
+        i = 99;
+        break;
+      case 3:
+        i = 100;
+        break;
+      case 4:
+        i = -100;
+        break;
+      case 5:
+        i = 7;
+        break;
+      case 6:
+        i = -9;
+        break;
+    }
+
+    return i;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int ret_val = wholeSwitchDead(10);
+    if (ret_val != 100) {
+      throw new Error("Incorrect return value from wholeSwitchDead:" + ret_val);
+    }
+
+    ret_val = constantSwitch_InRange();
+    if (ret_val != 7) {
+      throw new Error("Incorrect return value from constantSwitch_InRange:" + ret_val);
+    }
+
+    ret_val = constantSwitch_AboveRange();
+    if (ret_val != 15) {
+      throw new Error("Incorrect return value from constantSwitch_AboveRange:" + ret_val);
+    }
+
+    ret_val = constantSwitch_BelowRange();
+    if (ret_val != -5) {
+      throw new Error("Incorrect return value from constantSwitch_BelowRange:" + ret_val);
+    }
+  }
+}
diff --git a/test/497-inlining-and-class-loader/expected.txt b/test/497-inlining-and-class-loader/expected.txt
index f5b9fe0..905dbfd 100644
--- a/test/497-inlining-and-class-loader/expected.txt
+++ b/test/497-inlining-and-class-loader/expected.txt
@@ -1,3 +1,4 @@
+JNI_OnLoad called
 java.lang.Exception
 	at Main.$noinline$bar(Main.java:124)
 	at Level2.$inline$bar(Level1.java:25)
diff --git a/test/526-checker-caller-callee-regs/src/Main.java b/test/526-checker-caller-callee-regs/src/Main.java
index a1f3301..f402c2c 100644
--- a/test/526-checker-caller-callee-regs/src/Main.java
+++ b/test/526-checker-caller-callee-regs/src/Main.java
@@ -36,6 +36,8 @@
   // ------------------------------|------------------------|-----------------
   // ARM64 callee-saved registers  | [x20-x29]              | x2[0-9]
   // ARM callee-saved registers    | [r5-r8,r10,r11]        | r([5-8]|10|11)
+  // X86 callee-saved registers    | [ebp,esi,edi]          | e(bp|si|di)
+  // X86_64 callee-saved registers | [rbx,rbp,r12-15]       | r(bx|bp|1[2-5])
 
   /**
    * Check that a value live across a function call is allocated in a callee
@@ -58,7 +60,21 @@
   /// CHECK:                        Sub [<<t1>>,<<t2>>]
   /// CHECK:                        Return
 
-  // TODO: Add tests for other architectures.
+  /// CHECK-START-X86: int Main.$opt$LiveInCall(int) register (after)
+  /// CHECK-DAG:   <<Arg:i\d+>>     ParameterValue
+  /// CHECK-DAG:   <<Const1:i\d+>>  IntConstant 1
+  /// CHECK:       <<t1:i\d+>>      Add [<<Arg>>,<<Const1>>] {{.*->e(bp|si|di)}}
+  /// CHECK:       <<t2:i\d+>>      InvokeStaticOrDirect
+  /// CHECK:                        Sub [<<t1>>,<<t2>>]
+  /// CHECK:                        Return
+
+  /// CHECK-START-X86_64: int Main.$opt$LiveInCall(int) register (after)
+  /// CHECK-DAG:   <<Arg:i\d+>>     ParameterValue
+  /// CHECK-DAG:   <<Const1:i\d+>>  IntConstant 1
+  /// CHECK:       <<t1:i\d+>>      Add [<<Arg>>,<<Const1>>] {{.*->r(bx|bp|1[2-5])}}
+  /// CHECK:       <<t2:i\d+>>      InvokeStaticOrDirect
+  /// CHECK:                        Sub [<<t1>>,<<t2>>]
+  /// CHECK:                        Return
 
   public static int $opt$LiveInCall(int arg) {
     int t1 = arg + 1;
diff --git a/test/532-checker-nonnull-arrayset/expected.txt b/test/532-checker-nonnull-arrayset/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/532-checker-nonnull-arrayset/expected.txt
diff --git a/test/532-checker-nonnull-arrayset/info.txt b/test/532-checker-nonnull-arrayset/info.txt
new file mode 100644
index 0000000..e1578c8
--- /dev/null
+++ b/test/532-checker-nonnull-arrayset/info.txt
@@ -0,0 +1 @@
+Test that we optimize ArraySet when the value is not null.
diff --git a/test/532-checker-nonnull-arrayset/src/Main.java b/test/532-checker-nonnull-arrayset/src/Main.java
new file mode 100644
index 0000000..7d8fff4
--- /dev/null
+++ b/test/532-checker-nonnull-arrayset/src/Main.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Check that we don't put a null check in the card marking code.
+
+  /// CHECK-START: void Main.test() instruction_simplifier (before)
+  /// CHECK:          ArraySet value_can_be_null:true
+
+  /// CHECK-START: void Main.test() instruction_simplifier (after)
+  /// CHECK:          ArraySet value_can_be_null:false
+
+  /// CHECK-START-X86: void Main.test() disassembly (after)
+  /// CHECK:          ArraySet value_can_be_null:false
+  /// CHECK-NOT:      test
+  /// CHECK:          ReturnVoid
+  public static void test() {
+    Object[] array = new Object[1];
+    Object nonNull = array[0];
+    nonNull.getClass(); // Ensure nonNull has an implicit null check.
+    array[0] = nonNull;
+  }
+
+  public static void main(String[] args) {}
+}
diff --git a/test/533-regression-debugphi/expected.txt b/test/533-regression-debugphi/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/533-regression-debugphi/expected.txt
diff --git a/test/533-regression-debugphi/info.txt b/test/533-regression-debugphi/info.txt
new file mode 100644
index 0000000..a4d4857
--- /dev/null
+++ b/test/533-regression-debugphi/info.txt
@@ -0,0 +1,2 @@
+Test a regression where DeadPhiHandling would infinitely loop over
+complicated phi dependencies.
diff --git a/test/533-regression-debugphi/smali/TestCase.smali b/test/533-regression-debugphi/smali/TestCase.smali
new file mode 100644
index 0000000..1908e72
--- /dev/null
+++ b/test/533-regression-debugphi/smali/TestCase.smali
@@ -0,0 +1,72 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# This is a reduced test case that used to trigger an infinite loop
+# in the DeadPhiHandling phase of the optimizing compiler (only used
+# with debuggable flag).
+.method public static testCase(IILjava/lang/Object;)V
+  .registers 5
+  const/4 v0, 0x0
+
+  :B4
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  goto :B7
+
+  :B7
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  if-nez p2, :Btmp
+  goto :B111
+
+  :Btmp
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  if-nez p2, :B9
+  goto :B110
+
+  :B13
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  add-int v0, p0, p1
+  goto :B7
+
+  :B110
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  add-int v0, p0, p1
+  goto :B111
+
+  :B111
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  goto :B4
+
+  :B9
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  if-nez p2, :B10
+
+  :B11
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  move v1, v0
+  goto :B12
+
+  :B10
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  move-object v1, p2
+  goto :B12
+
+  :B12
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  goto :B13
+
+  return-void
+.end method
diff --git a/test/533-regression-debugphi/src/Main.java b/test/533-regression-debugphi/src/Main.java
new file mode 100644
index 0000000..858770f
--- /dev/null
+++ b/test/533-regression-debugphi/src/Main.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {}
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 6568eac..17c1f00 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -1,4 +1,6 @@
 PackedSwitch
+PackedSwitch key INT_MAX
+PackedSwitch key overflow
 b/17790197
 FloatBadArgReg
 negLong
diff --git a/test/800-smali/smali/PackedSwitch.smali b/test/800-smali/smali/PackedSwitch.smali
index 6a3e5f0..95659fb 100644
--- a/test/800-smali/smali/PackedSwitch.smali
+++ b/test/800-smali/smali/PackedSwitch.smali
@@ -24,3 +24,29 @@
     goto :return
 
 .end method
+
+.method public static packedSwitch_INT_MAX(I)I
+    .registers 2
+
+    const/4 v0, 0
+    packed-switch v0, :switch_data
+    goto :default
+
+    :switch_data
+    .packed-switch 0x7FFFFFFE
+        :case1  # key = INT_MAX - 1
+        :case2  # key = INT_MAX
+    .end packed-switch
+
+    :return
+    return v1
+
+    :default
+    goto :return
+
+    :case1
+    goto :return
+    :case2
+    goto :return
+
+.end method
diff --git a/test/800-smali/smali/b_24399945.smali b/test/800-smali/smali/b_24399945.smali
new file mode 100644
index 0000000..68f59d0
--- /dev/null
+++ b/test/800-smali/smali/b_24399945.smali
@@ -0,0 +1,32 @@
+.class public Lb_24399945;
+
+.super Ljava/lang/Object;
+
+.method public static packedSwitch_overflow(I)I
+    .registers 2
+
+    const/4 v0, 0
+    packed-switch v0, :switch_data
+    goto :default
+
+    :switch_data
+    .packed-switch 0x7FFFFFFE
+        :case1 # key = INT_MAX - 1
+        :case2 # key = INT_MAX
+        :case3 # key = INT_MIN (overflow!)
+    .end packed-switch
+
+    :return
+    return v1
+
+    :default
+    goto :return
+
+    :case1
+    goto :return
+    :case2
+    goto :return
+    :case3
+    goto :return
+
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index ba4990a..f75747d 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -51,6 +51,10 @@
         testCases = new LinkedList<TestCase>();
         testCases.add(new TestCase("PackedSwitch", "PackedSwitch", "packedSwitch",
                 new Object[]{123}, null, 123));
+        testCases.add(new TestCase("PackedSwitch key INT_MAX", "PackedSwitch",
+                "packedSwitch_INT_MAX", new Object[]{123}, null, 123));
+        testCases.add(new TestCase("PackedSwitch key overflow", "b_24399945",
+                "packedSwitch_overflow", new Object[]{123}, new VerifyError(), null));
 
         testCases.add(new TestCase("b/17790197", "B17790197", "getInt", null, null, 100));
         testCases.add(new TestCase("FloatBadArgReg", "FloatBadArgReg", "getInt",
diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt
index 3637099..16381e4 100644
--- a/test/955-lambda-smali/expected.txt
+++ b/test/955-lambda-smali/expected.txt
@@ -16,3 +16,13 @@
 (MoveResult) testF success
 (MoveResult) testD success
 (MoveResult) testL success
+(CaptureVariables) (0-args, 1 captured variable 'Z'): value is true
+(CaptureVariables) (0-args, 1 captured variable 'B'): value is R
+(CaptureVariables) (0-args, 1 captured variable 'C'): value is ∂
+(CaptureVariables) (0-args, 1 captured variable 'S'): value is 1000
+(CaptureVariables) (0-args, 1 captured variable 'I'): value is 12345678
+(CaptureVariables) (0-args, 1 captured variable 'J'): value is 3287471278325742
+(CaptureVariables) (0-args, 1 captured variable 'F'): value is Infinity
+(CaptureVariables) (0-args, 1 captured variable 'D'): value is -Infinity
+(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is true,R,∂,1000,12345678,3287471278325742,Infinity,-Infinity
+(CaptureVariables) Caught NPE
diff --git a/test/955-lambda-smali/smali/BoxUnbox.smali b/test/955-lambda-smali/smali/BoxUnbox.smali
index 108b5fa..915de2d 100644
--- a/test/955-lambda-smali/smali/BoxUnbox.smali
+++ b/test/955-lambda-smali/smali/BoxUnbox.smali
@@ -1,4 +1,3 @@
-#
 #  Copyright (C) 2015 The Android Open Source Project
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
@@ -36,8 +35,8 @@
 .end method
 
 #TODO: should use a closure type instead of ArtMethod.
-.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
-    .registers 3 # 1 parameters, 2 locals
+.method public static doHelloWorld(J)V
+    .registers 4 # 1 wide parameters, 2 locals
 
     const-string v0, "(BoxUnbox) Hello boxing world! (0-args, no closure)"
 
@@ -51,9 +50,9 @@
 .method private static testBox()V
     .registers 3
 
-    create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
     box-lambda v2, v0 # v2 = box(v0)
-    unbox-lambda v0, v2, Ljava/lang/reflect/ArtMethod; # v0 = unbox(v2)
+    unbox-lambda v0, v2, J # v0 = unbox(v2)
     invoke-lambda v0, {}
 
     return-void
@@ -63,7 +62,7 @@
 .method private static testBoxEquality()V
    .registers 6 # 0 parameters, 6 locals
 
-    create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
     box-lambda v2, v0 # v2 = box(v0)
     box-lambda v3, v0 # v3 = box(v0)
 
@@ -95,7 +94,7 @@
     const v0, 0  # v0 = null
     const v1, 0  # v1 = null
 :start
-    unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod;
+    unbox-lambda v2, v0, J
     # attempting to unbox a null lambda will throw NPE
 :end
     return-void
@@ -140,7 +139,7 @@
     const-string v0, "This is not a boxed lambda"
 :start
     # TODO: use \FunctionalType; here instead
-    unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod;
+    unbox-lambda v2, v0, J
     # can't use a string, expects a lambda object here. throws ClassCastException.
 :end
     return-void
diff --git a/test/955-lambda-smali/smali/CaptureVariables.smali b/test/955-lambda-smali/smali/CaptureVariables.smali
new file mode 100644
index 0000000..f18b7ff
--- /dev/null
+++ b/test/955-lambda-smali/smali/CaptureVariables.smali
@@ -0,0 +1,311 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LCaptureVariables;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static run()V
+.registers 8
+    # Test boolean capture
+    const v2, 1           # v2 = true
+    capture-variable v2, "Z"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_Z(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test byte capture
+    const v2, 82       # v2 = 82, 'R'
+    capture-variable v2, "B"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_B(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test char capture
+    const v2, 0x2202       # v2 = 0x2202, '∂'
+    capture-variable v2, "C"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_C(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test short capture
+    const v2, 1000 # v2 = 1000
+    capture-variable v2, "S"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_S(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test int capture
+    const v2, 12345678
+    capture-variable v2, "I"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_I(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test long capture
+    const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742
+    capture-variable v2, "J"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_J(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test float capture
+    const v2, infinityf
+    capture-variable v2, "F"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_F(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test double capture
+    const-wide v2, -infinity
+    capture-variable v2, "D"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_D(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    #TODO: capture objects and lambdas once we have support for it
+
+    # Test capturing multiple variables
+    invoke-static {}, LCaptureVariables;->testMultipleCaptures()V
+
+    # Test failures
+    invoke-static {}, LCaptureVariables;->testFailures()V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_Z(J)V
+    .registers 5 # 1 wide parameter, 3 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'Z'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "Z"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(Z)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_B(J)V
+    .registers 5 # 1 wide parameter, 3 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'B'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "B"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V  # no println(B), use char instead.
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_C(J)V
+    .registers 5 # 1 wide parameter, 3 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'C'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "C"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_S(J)V
+    .registers 5 # 1 wide parameter, 3 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'S'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "S"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V  # no println(S), use int instead
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_I(J)V
+    .registers 5 # 1 wide parameter, 3 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'I'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "I"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_J(J)V
+    .registers 6 # 1 wide parameter, 4 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'J'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "J"
+    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(J)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_F(J)V
+    .registers 5 # 1 parameter, 4 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'F'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "F"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(F)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_D(J)V
+    .registers 6 # 1 wide parameter, 4 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'D'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "D"
+    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V
+
+    return-void
+.end method
+
+# Test capturing more than one variable.
+.method private static testMultipleCaptures()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const v2, 1           # v2 = true
+    capture-variable v2, "Z"
+
+    const v2, 82       # v2 = 82, 'R'
+    capture-variable v2, "B"
+
+    const v2, 0x2202       # v2 = 0x2202, '∂'
+    capture-variable v2, "C"
+
+    const v2, 1000 # v2 = 1000
+    capture-variable v2, "S"
+
+    const v2, 12345678
+    capture-variable v2, "I"
+
+    const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742
+    capture-variable v2, "J"
+
+    const v2, infinityf
+    capture-variable v2, "F"
+
+    const-wide v2, -infinity
+    capture-variable v2, "D"
+
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_ZBCSIJFD(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_ZBCSIJFD(J)V
+    .registers 7 # 1 wide parameter, 5 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is "
+    const-string v4, ","
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "Z"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(Z)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "B"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "C"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "S"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "I"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "J"
+    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->print(J)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "F"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(F)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "D"
+    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V
+
+    return-void
+.end method
+
+# Test exceptions are thrown as expected when used opcodes incorrectly
+.method private static testFailures()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const v0, 0  # v0 = null
+    const v1, 0  # v1 = null
+:start
+    liberate-variable v0, v2, "Z" # invoking a null lambda shall raise an NPE
+:end
+    return-void
+
+:handler
+    const-string v2, "(CaptureVariables) Caught NPE"
+    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+
+    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
+.end method
diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali
index 5d2aabb..9892d61 100644
--- a/test/955-lambda-smali/smali/Main.smali
+++ b/test/955-lambda-smali/smali/Main.smali
@@ -24,6 +24,7 @@
     invoke-static {}, LTrivialHelloWorld;->run()V
     invoke-static {}, LBoxUnbox;->run()V
     invoke-static {}, LMoveResult;->run()V
+    invoke-static {}, LCaptureVariables;->run()V
 
 # TODO: add tests when verification fails
 
diff --git a/test/955-lambda-smali/smali/MoveResult.smali b/test/955-lambda-smali/smali/MoveResult.smali
index 1725da3..52f7ba3 100644
--- a/test/955-lambda-smali/smali/MoveResult.smali
+++ b/test/955-lambda-smali/smali/MoveResult.smali
@@ -41,7 +41,7 @@
 .method public static testZ()V
     .registers 6
 
-    create-lambda v0, LMoveResult;->lambdaZ(Ljava/lang/reflect/ArtMethod;)Z
+    create-lambda v0, LMoveResult;->lambdaZ(J)Z
     invoke-lambda v0, {}
     move-result v2
     const v3, 1
@@ -61,7 +61,7 @@
 .end method
 
 # Lambda target for testZ. Always returns "true".
-.method public static lambdaZ(Ljava/lang/reflect/ArtMethod;)Z
+.method public static lambdaZ(J)Z
     .registers 3
 
     const v0, 1
@@ -73,7 +73,7 @@
 .method public static testB()V
     .registers 6
 
-    create-lambda v0, LMoveResult;->lambdaB(Ljava/lang/reflect/ArtMethod;)B
+    create-lambda v0, LMoveResult;->lambdaB(J)B
     invoke-lambda v0, {}
     move-result v2
     const v3, 15
@@ -93,7 +93,7 @@
 .end method
 
 # Lambda target for testB. Always returns "15".
-.method public static lambdaB(Ljava/lang/reflect/ArtMethod;)B
+.method public static lambdaB(J)B
     .registers 3 # 1 parameters, 2 locals
 
     const v0, 15
@@ -105,7 +105,7 @@
 .method public static testS()V
     .registers 6
 
-    create-lambda v0, LMoveResult;->lambdaS(Ljava/lang/reflect/ArtMethod;)S
+    create-lambda v0, LMoveResult;->lambdaS(J)S
     invoke-lambda v0, {}
     move-result v2
     const/16 v3, 31000
@@ -125,7 +125,7 @@
 .end method
 
 # Lambda target for testS. Always returns "31000".
-.method public static lambdaS(Ljava/lang/reflect/ArtMethod;)S
+.method public static lambdaS(J)S
     .registers 3
 
     const/16 v0, 31000
@@ -137,7 +137,7 @@
 .method public static testI()V
     .registers 6
 
-    create-lambda v0, LMoveResult;->lambdaI(Ljava/lang/reflect/ArtMethod;)I
+    create-lambda v0, LMoveResult;->lambdaI(J)I
     invoke-lambda v0, {}
     move-result v2
     const v3, 128000
@@ -157,7 +157,7 @@
 .end method
 
 # Lambda target for testI. Always returns "128000".
-.method public static lambdaI(Ljava/lang/reflect/ArtMethod;)I
+.method public static lambdaI(J)I
     .registers 3
 
     const v0, 128000
@@ -167,9 +167,9 @@
 
 # Test that chars are returned correctly via move-result.
 .method public static testC()V
-    .registers 6
+    .registers 7
 
-    create-lambda v0, LMoveResult;->lambdaC(Ljava/lang/reflect/ArtMethod;)C
+    create-lambda v0, LMoveResult;->lambdaC(J)C
     invoke-lambda v0, {}
     move-result v2
     const v3, 65535
@@ -189,7 +189,7 @@
 .end method
 
 # Lambda target for testC. Always returns "65535".
-.method public static lambdaC(Ljava/lang/reflect/ArtMethod;)C
+.method public static lambdaC(J)C
     .registers 3
 
     const v0, 65535
@@ -199,12 +199,12 @@
 
 # Test that longs are returned correctly via move-result.
 .method public static testJ()V
-    .registers 8
+    .registers 9
 
-    create-lambda v0, LMoveResult;->lambdaJ(Ljava/lang/reflect/ArtMethod;)J
+    create-lambda v0, LMoveResult;->lambdaJ(J)J
     invoke-lambda v0, {}
     move-result v2
-    const-wide v4, 0xdeadf00dc0ffee
+    const-wide v4, 0xdeadf00dc0ffeeL
 
     if-ne v4, v2, :is_not_equal
     const-string v6, "(MoveResult) testJ success"
@@ -220,11 +220,11 @@
 
 .end method
 
-# Lambda target for testC. Always returns "0xdeadf00dc0ffee".
-.method public static lambdaJ(Ljava/lang/reflect/ArtMethod;)J
-    .registers 4
+# Lambda target for testC. Always returns "0xdeadf00dc0ffeeL".
+.method public static lambdaJ(J)J
+    .registers 5
 
-    const-wide v0, 0xdeadf00dc0ffee
+    const-wide v0, 0xdeadf00dc0ffeeL
     return-wide v0
 
 .end method
@@ -233,7 +233,7 @@
 .method public static testF()V
     .registers 6
 
-    create-lambda v0, LMoveResult;->lambdaF(Ljava/lang/reflect/ArtMethod;)F
+    create-lambda v0, LMoveResult;->lambdaF(J)F
     invoke-lambda v0, {}
     move-result v2
     const v3, infinityf
@@ -253,8 +253,8 @@
 .end method
 
 # Lambda target for testF. Always returns "infinityf".
-.method public static lambdaF(Ljava/lang/reflect/ArtMethod;)F
-    .registers 3
+.method public static lambdaF(J)F
+    .registers 4
 
     const v0, infinityf
     return v0
@@ -265,10 +265,10 @@
 .method public static testD()V
     .registers 8
 
-    create-lambda v0, LMoveResult;->lambdaD(Ljava/lang/reflect/ArtMethod;)D
+    create-lambda v0, LMoveResult;->lambdaD(J)D
     invoke-lambda v0, {}
     move-result-wide v2
-    const-wide v4, infinity
+    const-wide v4, -infinity
 
     if-ne v4, v2, :is_not_equal
     const-string v6, "(MoveResult) testD success"
@@ -285,10 +285,10 @@
 .end method
 
 # Lambda target for testD. Always returns "infinity".
-.method public static lambdaD(Ljava/lang/reflect/ArtMethod;)D
-    .registers 4
+.method public static lambdaD(J)D
+    .registers 5
 
-    const-wide v0, infinity # 123.456789
+    const-wide v0, -infinity
     return-wide v0
 
 .end method
@@ -298,7 +298,7 @@
 .method public static testL()V
     .registers 8
 
-    create-lambda v0, LMoveResult;->lambdaL(Ljava/lang/reflect/ArtMethod;)Ljava/lang/String;
+    create-lambda v0, LMoveResult;->lambdaL(J)Ljava/lang/String;
     invoke-lambda v0, {}
     move-result-object v2
     const-string v4, "Interned string"
@@ -319,8 +319,8 @@
 .end method
 
 # Lambda target for testL. Always returns "Interned string" (string).
-.method public static lambdaL(Ljava/lang/reflect/ArtMethod;)Ljava/lang/String;
-    .registers 4
+.method public static lambdaL(J)Ljava/lang/String;
+    .registers 5
 
     const-string v0, "Interned string"
     return-object v0
diff --git a/test/955-lambda-smali/smali/TrivialHelloWorld.smali b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
index 38ee95a..3444b13 100644
--- a/test/955-lambda-smali/smali/TrivialHelloWorld.smali
+++ b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
@@ -25,12 +25,12 @@
 .method public static run()V
 .registers 8
     # Trivial 0-arg hello world
-    create-lambda v0, LTrivialHelloWorld;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    create-lambda v0, LTrivialHelloWorld;->doHelloWorld(J)V
     # TODO: create-lambda should not write to both v0 and v1
     invoke-lambda v0, {}
 
     # Slightly more interesting 4-arg hello world
-    create-lambda v2, doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+    create-lambda v2, doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
     # TODO: create-lambda should not write to both v2 and v3
     const-string v4, "A"
     const-string v5, "B"
@@ -43,9 +43,9 @@
     return-void
 .end method
 
-#TODO: should use a closure type instead of ArtMethod.
-.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
-    .registers 3 # 1 parameters, 2 locals
+#TODO: should use a closure type instead of jlong. 
+.method public static doHelloWorld(J)V
+    .registers 5 # 1 wide parameters, 3 locals
 
     const-string v0, "Hello world! (0-args, no closure)"
 
@@ -55,17 +55,17 @@
     return-void
 .end method
 
-#TODO: should use a closure type instead of ArtMethod.
-.method public static doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
-    .registers 7 # 5 parameters, 2 locals
+#TODO: should use a closure type instead of jlong. 
+.method public static doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+    .registers 9 # 1 wide parameter, 4 narrow parameters, 3 locals
 
     const-string v0, " Hello world! (4-args, no closure)"
     sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
 
-    invoke-virtual {v1, p1}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
     invoke-virtual {v1, p2}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
     invoke-virtual {v1, p3}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
     invoke-virtual {v1, p4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+    invoke-virtual {v1, p5}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
 
     invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
 
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 7f05a04..e43ea90 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -33,6 +33,7 @@
   1337-gc-coverage/gc_coverage.cc \
   137-cfi/cfi.cc \
   139-register-natives/regnative.cc \
+  141-class-unload/jni_unload.cc \
   454-get-vreg/get_vreg_jni.cc \
   455-set-vreg/set_vreg_jni.cc \
   457-regs/regs_jni.cc \
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 29e015f..4397ea4 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -370,6 +370,7 @@
 # when already tracing, and writes an error message that we do not want to check for.
 TEST_ART_BROKEN_TRACING_RUN_TESTS := \
   137-cfi \
+  141-class-unload \
   802-deoptimization
 
 ifneq (,$(filter trace stream,$(TRACE_TYPES)))
diff --git a/test/run-test b/test/run-test
index 828939d..a5b6e92 100755
--- a/test/run-test
+++ b/test/run-test
@@ -392,7 +392,7 @@
 
 # Most interesting target architecture variables are Makefile variables, not environment variables.
 # Try to map the suffix64 flag and what we find in ${ANDROID_PRODUCT_OUT}/data/art-test to an architecture name.
-function guess_arch_name() {
+function guess_target_arch_name() {
     grep32bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm|x86|mips)$'`
     grep64bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm64|x86_64|mips64)$'`
     if [ "x${suffix64}" = "x64" ]; then
@@ -402,6 +402,14 @@
     fi
 }
 
+function guess_host_arch_name() {
+    if [ "x${suffix64}" = "x64" ]; then
+        host_arch_name="x86_64"
+    else
+        host_arch_name="x86"
+    fi
+}
+
 if [ "$target_mode" = "no" ]; then
     if [ "$runtime" = "jvm" ]; then
         if [ "$prebuild_mode" = "yes" ]; then
@@ -437,10 +445,11 @@
         if [ -z "$ANDROID_HOST_OUT" ]; then
             export ANDROID_HOST_OUT=$ANDROID_BUILD_TOP/out/host/linux-x86
         fi
+        guess_host_arch_name
         run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}.art"
         run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}"
     else
-        guess_arch_name
+        guess_target_arch_name
         run_args="${run_args} --runtime-option -Djava.library.path=/data/art-test/${target_arch_name}"
         run_args="${run_args} --boot /data/art-test/core${image_suffix}${pic_image_suffix}.art"
     fi
@@ -635,7 +644,7 @@
       run_checker="yes"
       if [ "$target_mode" = "no" ]; then
         cfg_output_dir="$tmp_dir"
-        checker_arch_option=
+        checker_arch_option="--arch=${host_arch_name^^}"
       else
         cfg_output_dir="$DEX_LOCATION"
         checker_arch_option="--arch=${target_arch_name^^}"
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index a670fc7..972e827 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -68,20 +68,14 @@
   echo "Executing $make_command"
   $make_command
 elif [[ $mode == "target" ]]; then
+  # Disable NINJA for building on target, it does not support setting environment variables
+  # within the make command.
+  env="$env USE_NINJA=false"
+  # Build extra tools that will be used by tests, so that
+  # they are compiled with our own linker.
   # We need to provide our own linker in case the linker on the device
   # is out of date.
-  env="TARGET_GLOBAL_LDFLAGS=-Wl,-dynamic-linker=$android_root/bin/$linker"
-  # gcc gives a linker error, so compile with clang.
-  # TODO: investigate and fix?
-  if [[ $TARGET_PRODUCT == "mips32r2_fp" ]]; then
-    env="$env USE_CLANG_PLATFORM_BUILD=true"
-  fi
-  # Disable NINJA for building on target, it does not support the -e option to Makefile.
-  env="$env USE_NINJA=false"
-  # Use '-e' to force the override of TARGET_GLOBAL_LDFLAGS.
-  # Also, we build extra tools that will be used by tests, so that
-  # they are compiled with our own linker.
-  make_command="make -e $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb"
+  make_command="make TARGET_LINKER=$android_root/bin/$linker $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb"
   echo "Executing env $env $make_command"
   env $env $make_command
 fi