Merge "Explicitly add HLoadClass/HClinitCheck for HNewInstance."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index dcde5ab..717403f 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -29,6 +29,7 @@
   GetMethodSignature \
   Instrumentation \
   Interfaces \
+  LambdaInterfaces \
   Lookup \
   Main \
   MultiDex \
@@ -77,6 +78,7 @@
 ART_GTEST_oat_test_DEX_DEPS := Main
 ART_GTEST_object_test_DEX_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
 ART_GTEST_proxy_test_DEX_DEPS := Interfaces
+ART_GTEST_lambda_proxy_test_DEX_DEPS := LambdaInterfaces
 ART_GTEST_reflection_test_DEX_DEPS := Main NonStaticLeafMethods StaticLeafMethods
 ART_GTEST_stub_test_DEX_DEPS := AllFields
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
@@ -97,6 +99,7 @@
 
 # TODO: document why this is needed.
 ART_GTEST_proxy_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32)
+ART_GTEST_lambda_proxy_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32)
 
 # The dexdump test requires an image and the dexdump utility.
 # TODO: rename into dexdump when migration completes
@@ -233,6 +236,7 @@
 
 COMPILER_GTEST_COMMON_SRC_FILES := \
   runtime/jni_internal_test.cc \
+  runtime/lambda_proxy_test.cc \
   runtime/proxy_test.cc \
   runtime/reflection_test.cc \
   compiler/compiled_method_test.cc \
@@ -741,6 +745,7 @@
 ART_GTEST_oat_file_assistant_test_TARGET_DEPS :=
 ART_GTEST_object_test_DEX_DEPS :=
 ART_GTEST_proxy_test_DEX_DEPS :=
+ART_GTEST_lambda_proxy_test_DEX_DEPS :=
 ART_GTEST_reflection_test_DEX_DEPS :=
 ART_GTEST_stub_test_DEX_DEPS :=
 ART_GTEST_transaction_test_DEX_DEPS :=
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 34fb790..529143d 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -458,9 +458,9 @@
   }
   {
     EXPECT_SINGLE_PARSE_VALUE(
-        MemoryKiB(16 * KB), "-Xjitcodecacheinitialcapacity:16K", M::JITCodeCacheInitialCapacity);
+        MemoryKiB(16 * KB), "-Xjitinitialsize:16K", M::JITCodeCacheInitialCapacity);
     EXPECT_SINGLE_PARSE_VALUE(
-        MemoryKiB(16 * MB), "-Xjitcodecacheinitialcapacity:16M", M::JITCodeCacheInitialCapacity);
+        MemoryKiB(16 * MB), "-Xjitmaxsize:16M", M::JITCodeCacheMaxCapacity);
   }
   {
     EXPECT_SINGLE_PARSE_VALUE(12345u, "-Xjitthreshold:12345", M::JITCompileThreshold);
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index a121f8b..7b0e5af 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -128,6 +128,7 @@
 #define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \
   if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) {                    \
     switch (GetInstructionSet()) {                                                        \
+      case kThumb2:                                                                       \
       case kX86:                                                                          \
       case kX86_64:                                                                       \
         /* Instruction set has read barrier support. */                                   \
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
index d9b367b..aa31036 100644
--- a/compiler/dwarf/debug_info_entry_writer.h
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -20,6 +20,7 @@
 #include <cstdint>
 #include <unordered_map>
 
+#include "base/casts.h"
 #include "dwarf/dwarf_constants.h"
 #include "dwarf/writer.h"
 #include "leb128.h"
@@ -47,9 +48,9 @@
  * It also handles generation of abbreviations.
  *
  * Usage:
- *   StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
+ *   StartTag(DW_TAG_compile_unit);
  *     WriteStrp(DW_AT_producer, "Compiler name", debug_str);
- *     StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
+ *     StartTag(DW_TAG_subprogram);
  *       WriteStrp(DW_AT_name, "Foo", debug_str);
  *     EndTag();
  *   EndTag();
@@ -59,36 +60,40 @@
   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
 
  public:
+  static constexpr size_t kCompilationUnitHeaderSize = 11;
+
   // Start debugging information entry.
-  void StartTag(Tag tag, Children children) {
-    DCHECK(has_children) << "This tag can not have nested tags";
+  // Returns offset of the entry in compilation unit.
+  size_t StartTag(Tag tag) {
     if (inside_entry_) {
       // Write abbrev code for the previous entry.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+      // Parent entry is finalized before any children are written.
+      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_yes));
       inside_entry_ = false;
     }
-    StartAbbrev(tag, children);
+    StartAbbrev(tag);
     // Abbrev code placeholder of sufficient size.
     abbrev_code_offset_ = this->data()->size();
     this->PushUleb128(NextAbbrevCode());
     depth_++;
     inside_entry_ = true;
-    has_children = (children == DW_CHILDREN_yes);
+    return abbrev_code_offset_ + kCompilationUnitHeaderSize;
   }
 
   // End debugging information entry.
   void EndTag() {
     DCHECK_GT(depth_, 0);
     if (inside_entry_) {
-      // Write abbrev code for this tag.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+      // Write abbrev code for this entry.
+      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_no));
       inside_entry_ = false;
-    }
-    if (has_children) {
-      this->PushUint8(0);  // End of children.
+      // This entry has no children and so there is no terminator.
+    } else {
+      // The entry has been already finalized so it must be parent entry
+      // and we need to write the terminator required by DW_CHILDREN_yes.
+      this->PushUint8(0);
     }
     depth_--;
-    has_children = true;  // Parent tag obviously has children.
   }
 
   void WriteAddr(Attribute attrib, uint64_t value) {
@@ -101,10 +106,10 @@
     }
   }
 
-  void WriteBlock(Attribute attrib, const void* ptr, int size) {
+  void WriteBlock(Attribute attrib, const void* ptr, size_t num_bytes) {
     AddAbbrevAttribute(attrib, DW_FORM_block);
-    this->PushUleb128(size);
-    this->PushData(ptr, size);
+    this->PushUleb128(num_bytes);
+    this->PushData(ptr, num_bytes);
   }
 
   void WriteData1(Attribute attrib, uint8_t value) {
@@ -147,12 +152,12 @@
     this->PushUint8(value ? 1 : 0);
   }
 
-  void WriteRef4(Attribute attrib, int cu_offset) {
+  void WriteRef4(Attribute attrib, uint32_t cu_offset) {
     AddAbbrevAttribute(attrib, DW_FORM_ref4);
     this->PushUint32(cu_offset);
   }
 
-  void WriteRef(Attribute attrib, int cu_offset) {
+  void WriteRef(Attribute attrib, uint32_t cu_offset) {
     AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
     this->PushUleb128(cu_offset);
   }
@@ -162,16 +167,21 @@
     this->PushString(value);
   }
 
-  void WriteStrp(Attribute attrib, int address) {
+  void WriteStrp(Attribute attrib, size_t debug_str_offset) {
     AddAbbrevAttribute(attrib, DW_FORM_strp);
-    this->PushUint32(address);
+    this->PushUint32(dchecked_integral_cast<uint32_t>(debug_str_offset));
   }
 
-  void WriteStrp(Attribute attrib, const char* value, std::vector<uint8_t>* debug_str) {
+  void WriteStrp(Attribute attrib, const char* str, size_t len,
+                 std::vector<uint8_t>* debug_str) {
     AddAbbrevAttribute(attrib, DW_FORM_strp);
-    int address = debug_str->size();
-    debug_str->insert(debug_str->end(), value, value + strlen(value) + 1);
-    this->PushUint32(address);
+    this->PushUint32(debug_str->size());
+    debug_str->insert(debug_str->end(), str, str + len);
+    debug_str->push_back(0);
+  }
+
+  void WriteStrp(Attribute attrib, const char* str, std::vector<uint8_t>* debug_str) {
+    WriteStrp(attrib, str, strlen(str), debug_str);
   }
 
   bool Is64bit() const { return is64bit_; }
@@ -180,7 +190,11 @@
     return patch_locations_;
   }
 
+  int Depth() const { return depth_; }
+
   using Writer<Vector>::data;
+  using Writer<Vector>::size;
+  using Writer<Vector>::UpdateUint32;
 
   DebugInfoEntryWriter(bool is64bitArch,
                        Vector* debug_abbrev,
@@ -196,16 +210,17 @@
   }
 
   ~DebugInfoEntryWriter() {
+    DCHECK(!inside_entry_);
     DCHECK_EQ(depth_, 0);
   }
 
  private:
   // Start abbreviation declaration.
-  void StartAbbrev(Tag tag, Children children) {
-    DCHECK(!inside_entry_);
+  void StartAbbrev(Tag tag) {
     current_abbrev_.clear();
     EncodeUnsignedLeb128(&current_abbrev_, tag);
-    current_abbrev_.push_back(children);
+    has_children_offset_ = current_abbrev_.size();
+    current_abbrev_.push_back(0);  // Place-holder for DW_CHILDREN.
   }
 
   // Add attribute specification.
@@ -220,8 +235,9 @@
   }
 
   // End abbreviation declaration and return its code.
-  int EndAbbrev() {
-    DCHECK(inside_entry_);
+  int EndAbbrev(Children has_children) {
+    DCHECK(!current_abbrev_.empty());
+    current_abbrev_[has_children_offset_] = has_children;
     auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_),
                                                   NextAbbrevCode()));
     int abbrev_code = it.first->second;
@@ -241,6 +257,7 @@
   // Fields for writing and deduplication of abbrevs.
   Writer<Vector> debug_abbrev_;
   Vector current_abbrev_;
+  size_t has_children_offset_ = 0;
   std::unordered_map<Vector, int,
                      FNVHash<Vector> > abbrev_codes_;
 
@@ -250,7 +267,6 @@
   int depth_ = 0;
   size_t abbrev_code_offset_ = 0;  // Location to patch once we know the code.
   bool inside_entry_ = false;  // Entry ends at first child (if any).
-  bool has_children = true;
   std::vector<uintptr_t> patch_locations_;
 };
 
diff --git a/compiler/dwarf/dedup_vector.h b/compiler/dwarf/dedup_vector.h
new file mode 100644
index 0000000..7fb21b7
--- /dev/null
+++ b/compiler/dwarf/dedup_vector.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEDUP_VECTOR_H_
+#define ART_COMPILER_DWARF_DEDUP_VECTOR_H_
+
+#include <vector>
+#include <unordered_map>
+
+namespace art {
+namespace dwarf {
+  class DedupVector {
+   public:
+    // Returns an offset to previously inserted identical block of data,
+    // or appends the data at the end of the vector and returns offset to it.
+    size_t Insert(const uint8_t* ptr, size_t num_bytes) {
+      // See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+      uint32_t hash = 2166136261u;
+      for (size_t i = 0; i < num_bytes; i++) {
+        hash = (hash ^ ptr[i]) * 16777619u;
+      }
+      // Try to find existing copy of the data.
+      const auto& range = hash_to_offset_.equal_range(hash);
+      for (auto it = range.first; it != range.second; ++it) {
+        const size_t offset = it->second;
+        if (offset + num_bytes <= vector_.size() &&
+            memcmp(vector_.data() + offset, ptr, num_bytes) == 0) {
+          return offset;
+        }
+      }
+      // Append the data at the end of the vector.
+      const size_t new_offset = vector_.size();
+      hash_to_offset_.emplace(hash, new_offset);
+      vector_.insert(vector_.end(), ptr, ptr + num_bytes);
+      return new_offset;
+    }
+
+    const std::vector<uint8_t>& Data() const { return vector_; }
+
+   private:
+    struct IdentityHash {
+      size_t operator()(uint32_t v) const { return v; }
+    };
+
+    // We store the full hash as the key to simplify growing of the table.
+    // It avoids storing or referencing the actual data in the hash-table.
+    std::unordered_multimap<uint32_t, size_t, IdentityHash> hash_to_offset_;
+
+    std::vector<uint8_t> vector_;
+  };
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_DEDUP_VECTOR_H_
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
index 6bb22ed..e9cd421 100644
--- a/compiler/dwarf/dwarf_test.cc
+++ b/compiler/dwarf/dwarf_test.cc
@@ -285,7 +285,7 @@
   constexpr bool is64bit = false;
   DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_);
   DW_CHECK("Contents of the .debug_info section:");
-  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+  info.StartTag(dwarf::DW_TAG_compile_unit);
   DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)");
   info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_);
   DW_CHECK_NEXT("DW_AT_producer    : (indirect string, offset: 0x0): Compiler name");
@@ -293,7 +293,7 @@
   DW_CHECK_NEXT("DW_AT_low_pc      : 0x1000000");
   info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000);
   DW_CHECK_NEXT("DW_AT_high_pc     : 0x2000000");
-  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+  info.StartTag(dwarf::DW_TAG_subprogram);
   DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
   info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_);
   DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0xe): Foo");
@@ -302,7 +302,7 @@
   info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000);
   DW_CHECK_NEXT("DW_AT_high_pc     : 0x1020000");
   info.EndTag();  // DW_TAG_subprogram
-  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+  info.StartTag(dwarf::DW_TAG_subprogram);
   DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
   info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_);
   DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0x12): Bar");
@@ -313,7 +313,7 @@
   info.EndTag();  // DW_TAG_subprogram
   info.EndTag();  // DW_TAG_compile_unit
   // Test that previous list was properly terminated and empty children.
-  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+  info.StartTag(dwarf::DW_TAG_compile_unit);
   info.EndTag();  // DW_TAG_compile_unit
 
   // The abbrev table is just side product, but check it as well.
@@ -327,7 +327,7 @@
   DW_CHECK_NEXT("DW_AT_name         DW_FORM_strp");
   DW_CHECK_NEXT("DW_AT_low_pc       DW_FORM_addr");
   DW_CHECK_NEXT("DW_AT_high_pc      DW_FORM_addr");
-  DW_CHECK("3      DW_TAG_compile_unit    [has children]");
+  DW_CHECK("3      DW_TAG_compile_unit    [no children]");
 
   std::vector<uintptr_t> debug_info_patches;
   std::vector<uintptr_t> expected_patches { 16, 20, 29, 33, 42, 46 };  // NOLINT
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
index 633e2f7..c75aeac 100644
--- a/compiler/dwarf/headers.h
+++ b/compiler/dwarf/headers.h
@@ -138,6 +138,7 @@
   writer.PushUint32(debug_abbrev_offset);
   writer.PushUint8(entries.Is64bit() ? 8 : 4);
   size_t entries_offset = writer.data()->size();
+  DCHECK_EQ(entries_offset, DebugInfoEntryWriter<Vector>::kCompilationUnitHeaderSize);
   writer.PushData(*entries.data());
   writer.UpdateUint32(start, writer.data()->size() - start - 4);
   // Copy patch locations and make them relative to .debug_info section.
diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h
index 00b9dfa..d2add7f 100644
--- a/compiler/dwarf/writer.h
+++ b/compiler/dwarf/writer.h
@@ -114,9 +114,9 @@
     data_->insert(data_->end(), value, value + strlen(value) + 1);
   }
 
-  void PushData(const void* ptr, size_t size) {
+  void PushData(const void* ptr, size_t num_bytes) {
     const char* p = reinterpret_cast<const char*>(ptr);
-    data_->insert(data_->end(), p, p + size);
+    data_->insert(data_->end(), p, p + num_bytes);
   }
 
   template<typename Vector2>
@@ -164,6 +164,10 @@
     return data_;
   }
 
+  size_t size() const {
+    return data_->size();
+  }
+
   explicit Writer(Vector* buffer) : data_(buffer) { }
 
  private:
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index e1ab340..5e2a8bf 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -19,9 +19,11 @@
 #include <unordered_set>
 
 #include "base/casts.h"
+#include "base/stl_util.h"
 #include "compiled_method.h"
 #include "driver/compiler_driver.h"
 #include "dex_file-inl.h"
+#include "dwarf/dedup_vector.h"
 #include "dwarf/headers.h"
 #include "dwarf/register.h"
 #include "elf_builder.h"
@@ -249,10 +251,217 @@
   }
 }
 
+struct CompilationUnit {
+  std::vector<const OatWriter::DebugInfo*> methods_;
+  size_t debug_line_offset_ = 0;
+  uint32_t low_pc_ = 0xFFFFFFFFU;
+  uint32_t high_pc_ = 0;
+};
+
+// Helper class to write .debug_info and its supporting sections.
 template<typename ElfTypes>
 class DebugInfoWriter {
   typedef typename ElfTypes::Addr Elf_Addr;
 
+  // Helper class to write one compilation unit.
+  // It holds helper methods and temporary state.
+  class CompilationUnitWriter {
+   public:
+    explicit CompilationUnitWriter(DebugInfoWriter* owner)
+      : owner_(owner),
+        info_(Is64BitInstructionSet(owner_->builder_->GetIsa()), &debug_abbrev_) {
+    }
+
+    void Write(const CompilationUnit& compilation_unit) {
+      CHECK(!compilation_unit.methods_.empty());
+      const Elf_Addr text_address = owner_->builder_->GetText()->GetAddress();
+
+      info_.StartTag(DW_TAG_compile_unit);
+      info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
+      info_.WriteData1(DW_AT_language, DW_LANG_Java);
+      info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_);
+      info_.WriteAddr(DW_AT_high_pc, text_address + compilation_unit.high_pc_);
+      info_.WriteData4(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
+
+      const char* last_dex_class_desc = nullptr;
+      for (auto mi : compilation_unit.methods_) {
+        const DexFile* dex = mi->dex_file_;
+        const DexFile::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index_);
+        const DexFile::ProtoId& dex_proto = dex->GetMethodPrototype(dex_method);
+        const DexFile::TypeList* dex_params = dex->GetProtoParameters(dex_proto);
+        const char* dex_class_desc = dex->GetMethodDeclaringClassDescriptor(dex_method);
+
+        // Enclose the method in correct class definition.
+        if (last_dex_class_desc != dex_class_desc) {
+          if (last_dex_class_desc != nullptr) {
+            EndClassTag(last_dex_class_desc);
+          }
+          size_t offset = StartClassTag(dex_class_desc);
+          type_cache_.emplace(dex_class_desc, offset);
+          // Check that each class is defined only once.
+          bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second;
+          CHECK(unique) << "Redefinition of " << dex_class_desc;
+          last_dex_class_desc = dex_class_desc;
+        }
+
+        std::vector<const char*> param_names;
+        if (mi->code_item_ != nullptr) {
+          const uint8_t* stream = dex->GetDebugInfoStream(mi->code_item_);
+          if (stream != nullptr) {
+            DecodeUnsignedLeb128(&stream);  // line.
+            uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+            for (uint32_t i = 0; i < parameters_size; ++i) {
+              uint32_t id = DecodeUnsignedLeb128P1(&stream);
+              param_names.push_back(mi->dex_file_->StringDataByIdx(id));
+            }
+          }
+        }
+
+        int start_depth = info_.Depth();
+        info_.StartTag(DW_TAG_subprogram);
+        WriteName(dex->GetMethodName(dex_method));
+        info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_);
+        info_.WriteAddr(DW_AT_high_pc, text_address + mi->high_pc_);
+        WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
+        if (dex_params != nullptr) {
+          for (uint32_t i = 0; i < dex_params->Size(); ++i) {
+            info_.StartTag(DW_TAG_formal_parameter);
+            // Parameter names may not be always available.
+            if (i < param_names.size() && param_names[i] != nullptr) {
+              WriteName(param_names[i]);
+            }
+            WriteLazyType(dex->StringByTypeIdx(dex_params->GetTypeItem(i).type_idx_));
+            info_.EndTag();
+          }
+        }
+        info_.EndTag();
+        CHECK_EQ(info_.Depth(), start_depth);  // Balanced start/end.
+      }
+      if (last_dex_class_desc != nullptr) {
+        EndClassTag(last_dex_class_desc);
+      }
+      CHECK_EQ(info_.Depth(), 1);
+      FinishLazyTypes();
+      info_.EndTag();  // DW_TAG_compile_unit
+      std::vector<uint8_t> buffer;
+      buffer.reserve(info_.data()->size() + KB);
+      const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
+      const size_t debug_abbrev_offset =
+          owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size());
+      WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
+      owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
+    }
+
+    // Some types are difficult to define as we go since they need
+    // to be enclosed in the right set of namespaces. Therefore we
+    // just define all types lazily at the end of compilation unit.
+    void WriteLazyType(const char* type_descriptor) {
+      DCHECK(type_descriptor != nullptr);
+      if (type_descriptor[0] != 'V') {
+        lazy_types_.emplace(type_descriptor, info_.size());
+        info_.WriteRef4(DW_AT_type, 0);
+      }
+    }
+
+    void FinishLazyTypes() {
+      for (const auto& lazy_type : lazy_types_) {
+        info_.UpdateUint32(lazy_type.second, WriteType(lazy_type.first));
+      }
+      lazy_types_.clear();
+    }
+
+   private:
+    void WriteName(const char* name) {
+      info_.WriteStrp(DW_AT_name, owner_->WriteString(name));
+    }
+
+    // Convert dex type descriptor to DWARF.
+    // Returns offset in the compilation unit.
+    size_t WriteType(const char* desc) {
+      const auto& it = type_cache_.find(desc);
+      if (it != type_cache_.end()) {
+        return it->second;
+      }
+
+      size_t offset;
+      if (*desc == 'L') {
+        // Class type. For example: Lpackage/name;
+        offset = StartClassTag(desc);
+        info_.WriteFlag(DW_AT_declaration, true);
+        EndClassTag(desc);
+      } else if (*desc == '[') {
+        // Array type.
+        size_t element_type = WriteType(desc + 1);
+        offset = info_.StartTag(DW_TAG_array_type);
+        info_.WriteRef(DW_AT_type, element_type);
+        info_.EndTag();
+      } else {
+        // Primitive types.
+        const char* name;
+        switch (*desc) {
+        case 'B': name = "byte"; break;
+        case 'C': name = "char"; break;
+        case 'D': name = "double"; break;
+        case 'F': name = "float"; break;
+        case 'I': name = "int"; break;
+        case 'J': name = "long"; break;
+        case 'S': name = "short"; break;
+        case 'Z': name = "boolean"; break;
+        case 'V': name = "void"; break;
+        default:
+          LOG(FATAL) << "Unknown dex type descriptor: " << desc;
+          UNREACHABLE();
+        }
+        offset = info_.StartTag(DW_TAG_base_type);
+        WriteName(name);
+        info_.EndTag();
+      }
+
+      type_cache_.emplace(desc, offset);
+      return offset;
+    }
+
+    // Start DW_TAG_class_type tag nested in DW_TAG_namespace tags.
+    // Returns offset of the class tag in the compilation unit.
+    size_t StartClassTag(const char* desc) {
+      DCHECK(desc != nullptr && desc[0] == 'L');
+      // Enclose the type in namespace tags.
+      const char* end;
+      for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) {
+        info_.StartTag(DW_TAG_namespace);
+        WriteName(std::string(desc, end - desc).c_str());
+      }
+      // Start the class tag.
+      size_t offset = info_.StartTag(DW_TAG_class_type);
+      end = strchr(desc, ';');
+      CHECK(end != nullptr);
+      WriteName(std::string(desc, end - desc).c_str());
+      return offset;
+    }
+
+    void EndClassTag(const char* desc) {
+      DCHECK(desc != nullptr && desc[0] == 'L');
+      // End the class tag.
+      info_.EndTag();
+      // Close namespace tags.
+      const char* end;
+      for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) {
+        info_.EndTag();
+      }
+    }
+
+    // For access to the ELF sections.
+    DebugInfoWriter<ElfTypes>* owner_;
+    // Debug abbrevs for this compilation unit only.
+    std::vector<uint8_t> debug_abbrev_;
+    // Temporary buffer to create and store the entries.
+    DebugInfoEntryWriter<> info_;
+    // Cache of already translated type descriptors.
+    std::map<const char*, size_t, CStringLess> type_cache_;  // type_desc -> definition_offset.
+    // 32-bit references which need to be resolved to a type later.
+    std::multimap<const char*, size_t, CStringLess> lazy_types_;  // type_desc -> patch_offset.
+  };
+
  public:
   explicit DebugInfoWriter(ElfBuilder<ElfTypes>* builder) : builder_(builder) {
   }
@@ -261,54 +470,29 @@
     builder_->GetDebugInfo()->Start();
   }
 
-  void Write(const std::vector<const OatWriter::DebugInfo*>& method_infos,
-             size_t debug_line_offset) {
-    const bool is64bit = Is64BitInstructionSet(builder_->GetIsa());
-    const Elf_Addr text_address = builder_->GetText()->GetAddress();
-    uint32_t cunit_low_pc = 0xFFFFFFFFU;
-    uint32_t cunit_high_pc = 0;
-    for (auto method_info : method_infos) {
-      cunit_low_pc = std::min(cunit_low_pc, method_info->low_pc_);
-      cunit_high_pc = std::max(cunit_high_pc, method_info->high_pc_);
-    }
-
-    size_t debug_abbrev_offset = debug_abbrev_.size();
-    DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_);
-    info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
-    info.WriteStrp(DW_AT_producer, "Android dex2oat", &debug_str_);
-    info.WriteData1(DW_AT_language, DW_LANG_Java);
-    info.WriteAddr(DW_AT_low_pc, text_address + cunit_low_pc);
-    info.WriteAddr(DW_AT_high_pc, text_address + cunit_high_pc);
-    info.WriteData4(DW_AT_stmt_list, debug_line_offset);
-    for (auto method_info : method_infos) {
-      std::string method_name = PrettyMethod(method_info->dex_method_index_,
-                                             *method_info->dex_file_, true);
-      info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
-      info.WriteStrp(DW_AT_name, method_name.data(), &debug_str_);
-      info.WriteAddr(DW_AT_low_pc, text_address + method_info->low_pc_);
-      info.WriteAddr(DW_AT_high_pc, text_address + method_info->high_pc_);
-      info.EndTag();  // DW_TAG_subprogram
-    }
-    info.EndTag();  // DW_TAG_compile_unit
-    std::vector<uint8_t> buffer;
-    buffer.reserve(info.data()->size() + KB);
-    size_t offset = builder_->GetDebugInfo()->GetSize();
-    WriteDebugInfoCU(debug_abbrev_offset, info, offset, &buffer, &debug_info_patches_);
-    builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
+  void WriteCompilationUnit(const CompilationUnit& compilation_unit) {
+    CompilationUnitWriter writer(this);
+    writer.Write(compilation_unit);
   }
 
   void End() {
     builder_->GetDebugInfo()->End();
     builder_->WritePatches(".debug_info.oat_patches", &debug_info_patches_);
-    builder_->WriteSection(".debug_abbrev", &debug_abbrev_);
-    builder_->WriteSection(".debug_str", &debug_str_);
+    builder_->WriteSection(".debug_abbrev", &debug_abbrev_.Data());
+    builder_->WriteSection(".debug_str", &debug_str_.Data());
   }
 
  private:
+  size_t WriteString(const char* str) {
+    return debug_str_.Insert(reinterpret_cast<const uint8_t*>(str), strlen(str) + 1);
+  }
+
   ElfBuilder<ElfTypes>* builder_;
   std::vector<uintptr_t> debug_info_patches_;
-  std::vector<uint8_t> debug_abbrev_;
-  std::vector<uint8_t> debug_str_;
+  DedupVector debug_abbrev_;
+  DedupVector debug_str_;
+
+  std::unordered_set<const char*> defined_dex_classes_;  // For CHECKs only.
 };
 
 template<typename ElfTypes>
@@ -325,15 +509,11 @@
 
   // Write line table for given set of methods.
   // Returns the number of bytes written.
-  size_t Write(const std::vector<const OatWriter::DebugInfo*>& method_infos) {
+  size_t WriteCompilationUnit(CompilationUnit& compilation_unit) {
     const bool is64bit = Is64BitInstructionSet(builder_->GetIsa());
     const Elf_Addr text_address = builder_->GetText()->GetAddress();
-    uint32_t cunit_low_pc = 0xFFFFFFFFU;
-    uint32_t cunit_high_pc = 0;
-    for (auto method_info : method_infos) {
-      cunit_low_pc = std::min(cunit_low_pc, method_info->low_pc_);
-      cunit_high_pc = std::max(cunit_high_pc, method_info->high_pc_);
-    }
+
+    compilation_unit.debug_line_offset_ = builder_->GetDebugLine()->GetSize();
 
     std::vector<FileEntry> files;
     std::unordered_map<std::string, size_t> files_map;
@@ -358,11 +538,17 @@
         break;
     }
     DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits_);
-    opcodes.SetAddress(text_address + cunit_low_pc);
+    opcodes.SetAddress(text_address + compilation_unit.low_pc_);
     if (dwarf_isa != -1) {
       opcodes.SetISA(dwarf_isa);
     }
-    for (const OatWriter::DebugInfo* mi : method_infos) {
+    for (const OatWriter::DebugInfo* mi : compilation_unit.methods_) {
+      // Ignore function if we have already generated line table for the same address.
+      // It would confuse the debugger and the DWARF specification forbids it.
+      if (mi->deduped_) {
+        continue;
+      }
+
       struct DebugInfoCallbacks {
         static bool NewPosition(void* ctx, uint32_t address, uint32_t line) {
           auto* context = reinterpret_cast<DebugInfoCallbacks*>(ctx);
@@ -461,7 +647,7 @@
         opcodes.AddRow(method_address, 0);
       }
     }
-    opcodes.AdvancePC(text_address + cunit_high_pc);
+    opcodes.AdvancePC(text_address + compilation_unit.high_pc_);
     opcodes.EndSequence();
     std::vector<uint8_t> buffer;
     buffer.reserve(opcodes.data()->size() + KB);
@@ -484,36 +670,28 @@
 template<typename ElfTypes>
 void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
                         const std::vector<OatWriter::DebugInfo>& method_infos) {
-  struct CompilationUnit {
-    std::vector<const OatWriter::DebugInfo*> methods_;
-    size_t debug_line_offset_ = 0;
-  };
-
   // Group the methods into compilation units based on source file.
   std::vector<CompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
   for (const OatWriter::DebugInfo& mi : method_infos) {
-    // Attribute given instruction range only to single method.
-    // Otherwise the debugger might get really confused.
-    if (!mi.deduped_) {
-      auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_);
-      const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def);
-      if (compilation_units.empty() || source_file != last_source_file) {
-        compilation_units.push_back(CompilationUnit());
-      }
-      compilation_units.back().methods_.push_back(&mi);
-      last_source_file = source_file;
+    auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_);
+    const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def);
+    if (compilation_units.empty() || source_file != last_source_file) {
+      compilation_units.push_back(CompilationUnit());
     }
+    CompilationUnit& cu = compilation_units.back();
+    cu.methods_.push_back(&mi);
+    cu.low_pc_ = std::min(cu.low_pc_, mi.low_pc_);
+    cu.high_pc_ = std::max(cu.high_pc_, mi.high_pc_);
+    last_source_file = source_file;
   }
 
   // Write .debug_line section.
   {
     DebugLineWriter<ElfTypes> line_writer(builder);
     line_writer.Start();
-    size_t offset = 0;
     for (auto& compilation_unit : compilation_units) {
-      compilation_unit.debug_line_offset_ = offset;
-      offset += line_writer.Write(compilation_unit.methods_);
+      line_writer.WriteCompilationUnit(compilation_unit);
     }
     line_writer.End();
   }
@@ -523,7 +701,7 @@
     DebugInfoWriter<ElfTypes> info_writer(builder);
     info_writer.Start();
     for (const auto& compilation_unit : compilation_units) {
-      info_writer.Write(compilation_unit.methods_, compilation_unit.debug_line_offset_);
+      info_writer.WriteCompilationUnit(compilation_unit);
     }
     info_writer.End();
   }
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index f985745..c3e88f3 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -36,6 +36,8 @@
   if (!boolean_not->HasUses()) {
     boolean_not->GetBlock()->RemoveInstruction(boolean_not);
   }
+
+  MaybeRecordStat(MethodCompilationStat::kBooleanSimplifier);
 }
 
 // Returns true if 'block1' and 'block2' are empty, merge into the same single
@@ -146,6 +148,8 @@
   block->MergeWith(false_block);
   block->MergeWith(merge_block);
 
+  MaybeRecordStat(MethodCompilationStat::kBooleanSimplifier);
+
   // No need to update any dominance information, as we are simplifying
   // a simple diamond shape, where the join block is merged with the
   // entry block. Any following blocks would have had the join block
diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h
index e12a12c..0eb6c71 100644
--- a/compiler/optimizing/boolean_simplifier.h
+++ b/compiler/optimizing/boolean_simplifier.h
@@ -62,8 +62,8 @@
 
 class HBooleanSimplifier : public HOptimization {
  public:
-  explicit HBooleanSimplifier(HGraph* graph)
-    : HOptimization(graph, kBooleanSimplifierPassName) {}
+  HBooleanSimplifier(HGraph* graph, OptimizingCompilerStats* stats)
+    : HOptimization(graph, kBooleanSimplifierPassName, stats) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 77d53fc..0baa0e3 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -383,11 +383,11 @@
     HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect();
     switch (call->GetMethodLoadKind()) {
       case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-        locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation());
+        locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation());
         break;
       case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
         locations->AddTemp(visitor->GetMethodLocation());
-        locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister());
+        locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister());
         break;
       default:
         locations->AddTemp(visitor->GetMethodLocation());
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index af2e228..1c3bd6c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -34,6 +34,9 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace arm {
 
 static bool ExpectedPairLayout(Location location) {
@@ -286,15 +289,6 @@
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      Register obj = locations->InAt(0).AsRegister<Register>();
-      Register temp = locations->GetTemp(0).AsRegister<Register>();
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -315,6 +309,8 @@
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     } else {
       DCHECK(instruction_->IsCheckCast());
@@ -322,6 +318,7 @@
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -408,6 +405,221 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
 };
 
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
+ public:
+  ReadBarrierForHeapReferenceSlowPathARM(HInstruction* instruction,
+                                         Location out,
+                                         Location ref,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location index)
+      : instruction_(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ LoadFromOffset(kLoadWord, out, out, offset);
+    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(!instruction_->IsInvoke() ||
+           (instruction_->IsInvokeStaticOrDirect() &&
+            instruction_->GetLocations()->Intrinsified()));
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = index_.AsRegister<Register>();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::arm::Thumb2Assembler::Lsl and
+          // art::arm::Thumb2Assembler::AddConstant below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Mov(free_reg, index_reg);
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Lsl(index_reg, index_reg, TIMES_4);
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ AddConstant(index_reg, index_reg, offset_);
+      } else {
+        DCHECK(instruction_->IsInvoke());
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegisterPair());
+        // UnsafeGet's offset location is a register pair, the low
+        // part contains the correct offset.
+        index = index_.ToLow();
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ LoadImmediate(calling_convention.GetRegisterAt(2), offset_);
+    }
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+    size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return static_cast<Register>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on ARM
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathARM : public SlowPathCode {
+ public:
+  ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root)
+      : instruction_(instruction), out_(out), root_(root) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM"; }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM);
+};
+
 #undef __
 #define __ down_cast<ArmAssembler*>(GetAssembler())->
 
@@ -581,7 +793,7 @@
       LOG(FATAL) << "Unreachable type " << type;
   }
 
-  return Location();
+  return Location::NoLocation();
 }
 
 void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
@@ -820,7 +1032,7 @@
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
   }
-  return Location();
+  return Location::NoLocation();
 }
 
 Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) const {
@@ -847,7 +1059,7 @@
     }
 
     case Primitive::kPrimVoid:
-      return Location();
+      return Location::NoLocation();
   }
 
   UNREACHABLE();
@@ -1762,29 +1974,39 @@
 
 void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+  LocationSummary* locations = invoke->GetLocations();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  Register hidden_reg = locations->GetTemp(1).AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
       invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
-  // Set the hidden argument.
-  __ LoadImmediate(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(),
-                   invoke->GetDexMethodIndex());
+  // Set the hidden argument. This is safe to do this here, as R12
+  // won't be modified thereafter, before the `blx` (call) instruction.
+  DCHECK_EQ(R12, hidden_reg);
+  __ LoadImmediate(hidden_reg, invoke->GetDexMethodIndex());
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ LoadFromOffset(kLoadWord, temp, temp, class_offset);
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetImtEntryAt(method_offset);
-  uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kArmWordSize).Int32Value();
+  uint32_t entry_point =
+      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value();
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
@@ -3405,6 +3627,9 @@
                                                          Register out_lo,
                                                          Register out_hi) {
   if (offset != 0) {
+    // Ensure `out_lo` is different from `addr`, so that loading
+    // `offset` into `out_lo` does not clutter `addr`.
+    DCHECK_NE(out_lo, addr);
     __ LoadImmediate(out_lo, offset);
     __ add(IP, addr, ShifterOperand(out_lo));
     addr = IP;
@@ -3592,14 +3817,26 @@
 
 void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_field_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
 
   bool volatile_for_double = field_info.IsVolatile()
       && (field_info.GetFieldType() == Primitive::kPrimDouble)
       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
-  bool overlap = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong);
+  // The output overlaps in case of volatile long: we don't want the
+  // code generated by GenerateWideAtomicLoad to overwrite the
+  // object's location.  Likewise, in the case of an object field get
+  // with read barriers enabled, we do not want the load to overwrite
+  // the object's location, as we need it to emit the read barrier.
+  bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) ||
+      object_field_get_with_read_barrier;
 
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
@@ -3665,7 +3902,8 @@
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
 
   LocationSummary* locations = instruction->GetLocations();
-  Register base = locations->InAt(0).AsRegister<Register>();
+  Location base_loc = locations->InAt(0);
+  Register base = base_loc.AsRegister<Register>();
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
@@ -3745,7 +3983,7 @@
   }
 
   if (field_type == Primitive::kPrimNot) {
-    __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
+    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
   }
 }
 
@@ -3889,20 +4127,31 @@
 }
 
 void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Primitive::Type type = instruction->GetType();
 
@@ -3965,8 +4214,9 @@
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-                    "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
@@ -4029,8 +4279,17 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 
   if (type == Primitive::kPrimNot) {
-    Register out = locations->Out().AsRegister<Register>();
-    __ MaybeUnpoisonHeapReference(out);
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+    Location out = locations->Out();
+    if (index.IsConstant()) {
+      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+    } else {
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
+    }
   }
 }
 
@@ -4039,11 +4298,16 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
+
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(value_type)) {
@@ -4051,20 +4315,20 @@
   } else {
     locations->SetInAt(2, Location::RequiresRegister());
   }
-
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
-    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for read barrier too.
   }
 }
 
 void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register array = locations->InAt(0).AsRegister<Register>();
+  Location array_loc = locations->InAt(0);
+  Register array = array_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -4101,7 +4365,8 @@
 
     case Primitive::kPrimNot: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
+      Location value_loc = locations->InAt(2);
+      Register value = value_loc.AsRegister<Register>();
       Register source = value;
 
       if (instruction->InputAt(2)->IsNullConstant()) {
@@ -4115,6 +4380,8 @@
           __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
           __ StoreToOffset(kStoreWord, source, IP, data_offset);
         }
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4127,7 +4394,7 @@
       Label done;
       SlowPathCode* slow_path = nullptr;
 
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4147,23 +4414,63 @@
           __ Bind(&non_zero);
         }
 
-        __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ MaybeUnpoisonHeapReference(temp1);
-        __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
-        __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
-        // No need to poison/unpoison, we're comparing two poisoined references.
-        __ cmp(temp1, ShifterOperand(temp2));
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          Label do_put;
-          __ b(&do_put, EQ);
-          __ MaybeUnpoisonHeapReference(temp1);
-          __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
-          // No need to poison/unpoison, we're comparing against null.
-          __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ Mov(temp2, temp1);
+          //   // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          //   __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = value->klass_
+          //   __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
+          //
+          //   __ cmp(temp1, ShifterOperand(temp2));
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ b(slow_path->GetEntryLabel());
         } else {
-          __ b(slow_path->GetEntryLabel(), NE);
+          // /* HeapReference<Class> */ temp1 = array->klass_
+          __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ MaybeUnpoisonHeapReference(temp1);
+
+          // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+          // /* HeapReference<Class> */ temp2 = value->klass_
+          __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+          // If heap poisoning is enabled, no need to unpoison `temp1`
+          // nor `temp2`, as we are comparing two poisoned references.
+          __ cmp(temp1, ShifterOperand(temp2));
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            Label do_put;
+            __ b(&do_put, EQ);
+            // If heap poisoning is enabled, the `temp1` reference has
+            // not been unpoisoned yet; unpoison it now.
+            __ MaybeUnpoisonHeapReference(temp1);
+
+            // /* HeapReference<Class> */ temp1 = temp1->super_class_
+            __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp1`, as we are comparing against null below.
+            __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ b(slow_path->GetEntryLabel(), NE);
+          }
         }
       }
 
@@ -4187,7 +4494,7 @@
         __ StoreToOffset(kStoreWord, source, IP, data_offset);
       }
 
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -4616,7 +4923,8 @@
   CodeGenerator::CreateLoadClassLocationSummary(
       cls,
       Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(R0));
+      Location::RegisterLocation(R0),
+      /* code_generator_supports_read_barrier */ true);
 }
 
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
@@ -4630,21 +4938,42 @@
     return;
   }
 
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
+
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    __ LoadFromOffset(
-        kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+      __ AddConstant(out, current_method, declaring_class_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+    } else {
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
+    }
   } else {
     DCHECK(cls->CanCallRuntime());
+    // /* GcRoot<mirror::Class>[] */ out =
+    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ LoadFromOffset(kLoadWord,
                       out,
                       current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-    __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
-    // TODO: We will need a read barrier here.
+
+    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+      __ AddConstant(out, out, cache_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+    } else {
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      __ LoadFromOffset(kLoadWord, out, out, cache_offset);
+    }
 
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
@@ -4699,13 +5028,35 @@
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = load->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
-  __ LoadFromOffset(
-      kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+
+  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+    __ AddConstant(out, current_method, declaring_class_offset);
+    // /* mirror::Class* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
+  }
+
+  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-  // TODO: We will need a read barrier here.
+
+  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::String>* */ out = &out[string_index]
+    __ AddConstant(out, out, cache_offset);
+    // /* mirror::String* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::String> */ out = out[string_index]
+    __ LoadFromOffset(kLoadWord, out, out, cache_offset);
+  }
+
   __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -4748,41 +5099,45 @@
 
 void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // The out register is used as a temporary, so it overlaps with the inputs.
-    // Note that TypeCheckSlowPathARM uses this register too.
-    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(Location::RegisterLocation(R0));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The "out" register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -4796,15 +5151,9 @@
     __ CompareAndBranchIfZero(obj, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ LoadFromOffset(kLoadWord, target, obj, class_offset);
-  __ MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  __ LoadFromOffset(kLoadWord, out, obj, class_offset);
+  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
 
   switch (instruction->GetTypeCheckKind()) {
     case TypeCheckKind::kExactCheck: {
@@ -4815,13 +5164,23 @@
       __ b(&done);
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       Label loop;
       __ Bind(&loop);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ cmp(out, ShifterOperand(cls));
@@ -4832,14 +5191,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       Label loop, success;
       __ Bind(&loop);
       __ cmp(out, ShifterOperand(cls));
       __ b(&success, EQ);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ b(&done);
@@ -4850,14 +5219,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
       Label exact_check;
       __ cmp(out, ShifterOperand(cls));
       __ b(&exact_check, EQ);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->component_type_
       __ LoadFromOffset(kLoadWord, out, out, component_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -4868,11 +5247,12 @@
       __ b(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       __ cmp(out, ShifterOperand(cls));
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                                    /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ b(slow_path->GetEntryLabel(), NE);
       __ LoadImmediate(out, 1);
@@ -4881,13 +5261,25 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved & interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                                    /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ b(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ b(&done);
       }
@@ -4913,57 +5305,61 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
 
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
 
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // Note that TypeCheckSlowPathARM uses this register too.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
     locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   }
 }
 
 void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register temp = locations->WillCall()
-      ? Register(kNoRegister)
-      : locations->GetTemp(0).AsRegister<Register>();
-
+  Location temp_loc = locations->GetTemp(0);
+  Register temp = temp_loc.AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCode* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCode* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                        is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
   Label done;
   // Avoid null check if we know obj is not null.
@@ -4971,76 +5367,159 @@
     __ CompareAndBranchIfZero(obj, &done);
   }
 
-  if (locations->WillCall()) {
-    __ LoadFromOffset(kLoadWord, obj, obj, class_offset);
-    __ MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-    __ MaybeUnpoisonHeapReference(temp);
-  }
+  // /* HeapReference<Class> */ temp = obj->klass_
+  __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       __ cmp(temp, ShifterOperand(cls));
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ b(slow_path->GetEntryLabel(), NE);
+      __ b(type_check_slow_path->GetEntryLabel(), NE);
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      Label loop;
+      Label loop, compare_classes;
       __ Bind(&loop);
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-      // Jump to the slow path to throw the exception.
-      __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
+      __ CompareAndBranchIfNonZero(temp, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       __ cmp(temp, ShifterOperand(cls));
       __ b(&loop, NE);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       Label loop;
       __ Bind(&loop);
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
+
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ CompareAndBranchIfNonZero(temp, &loop);
-      // Jump to the slow path to throw the exception.
-      __ b(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
+      Label check_non_primitive_component_type;
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->component_type_
       __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-      __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(
+          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
+      __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
-      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot");
+      __ CompareAndBranchIfZero(temp, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      // We always go into the type check slow path for the unresolved &
+      // interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
   }
   __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) {
@@ -5214,6 +5693,82 @@
   }
 }
 
+void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction,
+                                           Location out,
+                                           Location ref,
+                                           Location obj,
+                                           uint32_t offset,
+                                           Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  // TODO: When read barrier has a fast path, add it here.
+  /* Currently the read barrier call is inserted after the original load.
+   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+   * original load. This load-load ordering is required by the read barrier.
+   * The fast path/slow path (for Baker's algorithm) should look like:
+   *
+   * bool isGray = obj.LockWord & kReadBarrierMask;
+   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
+   * ref = obj.field;    // this is the original load
+   * if (isGray) {
+   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
+   * }
+   */
+
+  __ b(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction,
+                                                Location out,
+                                                Location ref,
+                                                Location obj,
+                                                uint32_t offset,
+                                                Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<Register>());
+  }
+}
+
+void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction,
+                                                  Location out,
+                                                  Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  // TODO: Implement a fast path for ReadBarrierForRoot, performing
+  // the following operation (for Baker's algorithm):
+  //
+  //   if (thread.tls32_.is_gc_marking) {
+  //     root = Mark(root);
+  //   }
+
+  __ b(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) {
@@ -5271,7 +5826,7 @@
       __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress());
@@ -5286,7 +5841,7 @@
       LOG(FATAL) << "Unsupported";
       UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       Register reg = temp.AsRegister<Register>();
       if (current_method.IsRegister()) {
@@ -5297,10 +5852,11 @@
         method_reg = reg;
         __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset);
       }
-      // temp = current_method->dex_cache_resolved_methods_;
-      __ LoadFromOffset(
-          kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset(
-              kArmPointerSize).Int32Value());
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
+      __ LoadFromOffset(kLoadWord,
+                        reg,
+                        method_reg,
+                        ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
       // temp = temp[index_in_cache]
       uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
       __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
@@ -5344,10 +5900,17 @@
   LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  // temp = object->GetClass();
   DCHECK(receiver.IsRegister());
+  // /* HeapReference<Class> */ temp = receiver->klass_
   __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 32bfe0f..89de4f8 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -373,6 +373,51 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // Generate a read barrier for a heap reference within `instruction`.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrier(HInstruction* instruction,
+                           Location out,
+                           Location ref,
+                           Location obj,
+                           uint32_t offset,
+                           Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap reference.
+  // If heap poisoning is enabled, also unpoison the reference in `out`.
+  void MaybeGenerateReadBarrier(HInstruction* instruction,
+                                Location out,
+                                Location ref,
+                                Location obj,
+                                uint32_t offset,
+                                Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction`.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
  private:
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 573e542..04955dd 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2926,7 +2926,7 @@
       __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       // Load method address from literal pool.
@@ -2960,7 +2960,7 @@
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = XRegisterFrom(temp);
       Register method_reg;
       if (current_method.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5282884..f3178bd 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -3031,7 +3031,7 @@
                         invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
@@ -3043,7 +3043,7 @@
       LOG(FATAL) << "Unsupported";
       UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = temp.AsRegister<Register>();
       Register method_reg;
       if (current_method.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 04be533..6100859 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -2822,9 +2822,9 @@
   // sorted out.
   if (invoke->HasCurrentMethodInput()) {
     LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
+    Location location = locations->InAt(invoke->GetSpecialInputIndex());
     if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
     }
   }
 }
@@ -2882,7 +2882,7 @@
                         invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst64(temp.AsRegister<GpuRegister>(), invoke->GetMethodAddress());
@@ -2894,7 +2894,7 @@
       LOG(FATAL) << "Unsupported";
       UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       GpuRegister reg = temp.AsRegister<GpuRegister>();
       GpuRegister method_reg;
       if (current_method.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index fb70185..53e33bf 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1908,7 +1908,7 @@
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
-      invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::Any());
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
     }
     return;
   }
@@ -1917,7 +1917,7 @@
 
   // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
   if (invoke->HasPcRelativeDexCache()) {
-    invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(),
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(),
                                     Location::RequiresRegister());
   }
 
@@ -1926,9 +1926,9 @@
     // needs a register. We therefore do not require a register for it, and let
     // the code generation of the invoke handle it.
     LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
+    Location location = locations->InAt(invoke->GetSpecialInputIndex());
     if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
     }
   }
 }
@@ -4030,7 +4030,7 @@
 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
                                                                  Register temp) {
   DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
-  Location location = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
   if (!invoke->GetLocations()->Intrinsified()) {
     return location.AsRegister<Register>();
   }
@@ -4061,7 +4061,7 @@
       __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
@@ -4082,7 +4082,7 @@
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       Register reg = temp.AsRegister<Register>();
       if (current_method.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e114229..0e0b869 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -718,7 +718,7 @@
                     Address::Absolute(invoke->GetStringInitOffset(), true));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
@@ -737,7 +737,7 @@
       __ Bind(&pc_relative_dex_cache_patches_.back().label);
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       CpuRegister reg = temp.AsRegister<CpuRegister>();
       if (current_method.IsRegister()) {
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 57452cc..1db775e 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -68,6 +68,7 @@
         } else {
           inst->Accept(&simplifier);
         }
+        MaybeRecordStat(MethodCompilationStat::kConstantFolding);
       } else if (inst->IsUnaryOperation()) {
         // Constant folding: replace `op(a)' with a constant at compile
         // time if `a' is a constant.
@@ -76,6 +77,7 @@
           inst->ReplaceWith(constant);
           inst->GetBlock()->RemoveInstruction(inst);
         }
+        MaybeRecordStat(MethodCompilationStat::kConstantFolding);
       } else if (inst->IsTypeConversion()) {
         // Constant folding: replace `TypeConversion(a)' with a constant at
         // compile time if `a' is a constant.
@@ -84,6 +86,7 @@
           inst->ReplaceWith(constant);
           inst->GetBlock()->RemoveInstruction(inst);
         }
+        MaybeRecordStat(MethodCompilationStat::kConstantFolding);
       } else if (inst->IsDivZeroCheck()) {
         // We can safely remove the check if the input is a non-null constant.
         HDivZeroCheck* check = inst->AsDivZeroCheck();
@@ -92,6 +95,7 @@
           check->ReplaceWith(check_input);
           check->GetBlock()->RemoveInstruction(check);
         }
+        MaybeRecordStat(MethodCompilationStat::kConstantFolding);
       }
     }
   }
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index 2698b2d..df89371 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -32,8 +32,10 @@
  */
 class HConstantFolding : public HOptimization {
  public:
-  explicit HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
-      : HOptimization(graph, name) {}
+  HConstantFolding(HGraph* graph,
+                   OptimizingCompilerStats* stats,
+                   const char* name = kConstantFoldingPassName)
+      : HOptimization(graph, name, stats) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index af3ecb1..d166d00 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -503,6 +503,18 @@
         StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
       } else if (instruction->IsLoadClass()) {
         StartAttributeStream("klass") << "unresolved";
+      } else if (instruction->IsNullConstant()) {
+        // The NullConstant may be added to the graph during other passes that happen between
+        // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner
+        // doesn't run or doesn't inline anything, the NullConstant remains untyped.
+        // So we should check NullConstants for validity only after reference type propagation.
+        //
+        // Note: The infrastructure to properly type NullConstants everywhere is to complex to add
+        // for the benefits.
+        StartAttributeStream("klass") << "not_set";
+        DCHECK(!is_after_pass_
+            || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName))
+            << " Expected a valid rti after reference type propagation";
       } else {
         DCHECK(!is_after_pass_)
             << "Expected a valid rti after reference type propagation";
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0363f20..b33f6f2 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -395,11 +395,11 @@
 
   // Run simple optimizations on the graph.
   HDeadCodeElimination dce(callee_graph, stats_);
-  HConstantFolding fold(callee_graph);
+  HConstantFolding fold(callee_graph, stats_);
   ReferenceTypePropagation type_propagation(callee_graph, handles_);
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, stats_);
-  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_);
+  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_);
 
   HOptimization* optimizations[] = {
     &intrinsics,
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index b97dc1a..9ad2dd1 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -796,6 +796,34 @@
       HShl* shl = new(allocator) HShl(type, input_other, shift);
       block->ReplaceAndRemoveInstructionWith(instruction, shl);
       RecordSimplification();
+    } else if (IsPowerOfTwo(factor - 1)) {
+      // Transform code looking like
+      //    MUL dst, src, (2^n + 1)
+      // into
+      //    SHL tmp, src, n
+      //    ADD dst, src, tmp
+      HShl* shl = new (allocator) HShl(type,
+                                       input_other,
+                                       GetGraph()->GetIntConstant(WhichPowerOf2(factor - 1)));
+      HAdd* add = new (allocator) HAdd(type, input_other, shl);
+
+      block->InsertInstructionBefore(shl, instruction);
+      block->ReplaceAndRemoveInstructionWith(instruction, add);
+      RecordSimplification();
+    } else if (IsPowerOfTwo(factor + 1)) {
+      // Transform code looking like
+      //    MUL dst, src, (2^n - 1)
+      // into
+      //    SHL tmp, src, n
+      //    SUB dst, tmp, src
+      HShl* shl = new (allocator) HShl(type,
+                                       input_other,
+                                       GetGraph()->GetIntConstant(WhichPowerOf2(factor + 1)));
+      HSub* sub = new (allocator) HSub(type, shl, input_other);
+
+      block->InsertInstructionBefore(shl, instruction);
+      block->ReplaceAndRemoveInstructionWith(instruction, sub);
+      RecordSimplification();
     }
   }
 }
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index b01324e..9516f01 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -438,6 +438,7 @@
                   << invoke->DebugName();
             } else {
               invoke->SetIntrinsic(intrinsic, NeedsEnvironmentOrCache(intrinsic));
+              MaybeRecordStat(MethodCompilationStat::kIntrinsicsRecognizer);
             }
           }
         }
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index e459516..cc4f708 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -30,8 +30,8 @@
 // Recognize intrinsics from HInvoke nodes.
 class IntrinsicsRecognizer : public HOptimization {
  public:
-  IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver)
-      : HOptimization(graph, kIntrinsicsRecognizerPassName),
+  IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kIntrinsicsRecognizerPassName, stats),
         driver_(driver) {}
 
   void Run() OVERRIDE;
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 0a5acc3..d2017da 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -44,7 +44,23 @@
 bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  if (res == nullptr) {
+    return false;
+  }
+  if (kEmitCompilerReadBarrier && res->CanCall()) {
+    // Generating an intrinsic for this HInvoke may produce an
+    // IntrinsicSlowPathARM slow path.  Currently this approach
+    // does not work when using read barriers, as the emitted
+    // calling sequence will make use of another slow path
+    // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect,
+    // ReadBarrierSlowPathARM for HInvokeVirtual).  So we bail
+    // out in this case.
+    //
+    // TODO: Find a way to have intrinsics work with read barriers.
+    invoke->SetLocations(nullptr);
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 #define __ assembler->
@@ -662,20 +678,23 @@
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
   ArmAssembler* assembler = codegen->GetAssembler();
-  Register base = locations->InAt(1).AsRegister<Register>();           // Object pointer.
-  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();  // Long offset, lo part only.
+  Location base_loc = locations->InAt(1);
+  Register base = base_loc.AsRegister<Register>();             // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  Register offset = offset_loc.AsRegisterPairLow<Register>();  // Long offset, lo part only.
+  Location trg_loc = locations->Out();
 
   if (type == Primitive::kPrimLong) {
-    Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
     __ add(IP, base, ShifterOperand(offset));
     if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
-      Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
+      Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
       __ ldrexd(trg_lo, trg_hi, IP);
     } else {
       __ ldrd(trg_lo, Address(IP));
     }
   } else {
-    Register trg = locations->Out().AsRegister<Register>();
+    Register trg = trg_loc.AsRegister<Register>();
     __ ldr(trg, Address(base, offset));
   }
 
@@ -684,14 +703,18 @@
   }
 
   if (type == Primitive::kPrimNot) {
-    Register trg = locations->Out().AsRegister<Register>();
-    __ MaybeUnpoisonHeapReference(trg);
+    codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
   }
 }
 
 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -936,6 +959,7 @@
   __ Bind(&loop_head);
 
   __ ldrex(tmp_lo, tmp_ptr);
+  // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
 
   __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
 
@@ -964,7 +988,11 @@
   // The UnsafeCASObject intrinsic does not always work when heap
   // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
   // off temporarily as a quick fix.
+  //
   // TODO(rpl): Fix it and turn it back on.
+  //
+  // TODO(rpl): Also, we should investigate whether we need a read
+  // barrier in the generated code.
   if (kPoisonHeapReferences) {
     return;
   }
@@ -1400,6 +1428,10 @@
   }
 }
 
+// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
+// Note that this code path is not used (yet) because we do not
+// intrinsify methods that can go into the IntrinsicSlowPathARM
+// slow path.
 void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
   ArmAssembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index a94e3a8..3268445 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -138,6 +138,221 @@
 
 #define __ assembler->
 
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+
+  if (is64bit) {
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    __ Mfc1(out_lo, in);
+    __ Mfhc1(out_hi, in);
+  } else {
+    Register out = locations->Out().AsRegister<Register>();
+
+    __ Mfc1(out, in);
+  }
+}
+
+// long java.lang.Double.doubleToRawLongBits(double)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+
+// int java.lang.Float.floatToRawIntBits(float)
+void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+  if (is64bit) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+
+    __ Mtc1(in_lo, out);
+    __ Mthc1(in_hi, out);
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+
+    __ Mtc1(in, out);
+  }
+}
+
+// double java.lang.Double.longBitsToDouble(long)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+// float java.lang.Float.intBitsToFloat(int)
+void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenReverseBytes(LocationSummary* locations,
+                            Primitive::Type type,
+                            MipsAssembler* assembler,
+                            bool isR2OrNewer) {
+  DCHECK(type == Primitive::kPrimShort ||
+         type == Primitive::kPrimInt ||
+         type == Primitive::kPrimLong);
+
+  if (type == Primitive::kPrimShort) {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    if (isR2OrNewer) {
+      __ Wsbh(out, in);
+      __ Seh(out, out);
+    } else {
+      __ Sll(TMP, in, 24);
+      __ Sra(TMP, TMP, 16);
+      __ Sll(out, in, 16);
+      __ Srl(out, out, 24);
+      __ Or(out, out, TMP);
+    }
+  } else if (type == Primitive::kPrimInt) {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    if (isR2OrNewer) {
+      __ Rotr(out, in, 16);
+      __ Wsbh(out, out);
+    } else {
+      // MIPS32r1
+      // __ Rotr(out, in, 16);
+      __ Sll(TMP, in, 16);
+      __ Srl(out, in, 16);
+      __ Or(out, out, TMP);
+      // __ Wsbh(out, out);
+      __ LoadConst32(AT, 0x00FF00FF);
+      __ And(TMP, out, AT);
+      __ Sll(TMP, TMP, 8);
+      __ Srl(out, out, 8);
+      __ And(out, out, AT);
+      __ Or(out, out, TMP);
+    }
+  } else if (type == Primitive::kPrimLong) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    if (isR2OrNewer) {
+      __ Rotr(AT, in_hi, 16);
+      __ Rotr(TMP, in_lo, 16);
+      __ Wsbh(out_lo, AT);
+      __ Wsbh(out_hi, TMP);
+    } else {
+      // When calling CreateIntToIntLocations() we promised that the
+      // use of the out_lo/out_hi wouldn't overlap with the use of
+      // in_lo/in_hi. Be very careful not to write to out_lo/out_hi
+      // until we're completely done reading from in_lo/in_hi.
+      // __ Rotr(TMP, in_lo, 16);
+      __ Sll(TMP, in_lo, 16);
+      __ Srl(AT, in_lo, 16);
+      __ Or(TMP, TMP, AT);             // Hold in TMP until it's safe
+                                       // to write to out_hi.
+      // __ Rotr(out_lo, in_hi, 16);
+      __ Sll(AT, in_hi, 16);
+      __ Srl(out_lo, in_hi, 16);        // Here we are finally done reading
+                                        // from in_lo/in_hi so it's okay to
+                                        // write to out_lo/out_hi.
+      __ Or(out_lo, out_lo, AT);
+      // __ Wsbh(out_hi, out_hi);
+      __ LoadConst32(AT, 0x00FF00FF);
+      __ And(out_hi, TMP, AT);
+      __ Sll(out_hi, out_hi, 8);
+      __ Srl(TMP, TMP, 8);
+      __ And(TMP, TMP, AT);
+      __ Or(out_hi, out_hi, TMP);
+      // __ Wsbh(out_lo, out_lo);
+      __ And(TMP, out_lo, AT);  // AT already holds the correct mask value
+      __ Sll(TMP, TMP, 8);
+      __ Srl(out_lo, out_lo, 8);
+      __ And(out_lo, out_lo, AT);
+      __ Or(out_lo, out_lo, TMP);
+    }
+  }
+}
+
+// int java.lang.Integer.reverseBytes(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(),
+                  Primitive::kPrimInt,
+                  GetAssembler(),
+                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+}
+
+// long java.lang.Long.reverseBytes(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(),
+                  Primitive::kPrimLong,
+                  GetAssembler(),
+                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+}
+
+// short java.lang.Short.reverseBytes(short)
+void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(),
+                  Primitive::kPrimShort,
+                  GetAssembler(),
+                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+}
+
 // boolean java.lang.String.equals(Object anObject)
 void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -250,15 +465,8 @@
 
 UNIMPLEMENTED_INTRINSIC(IntegerReverse)
 UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
-UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
 UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
 UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(FloatIntBitsToFloat)
-UNIMPLEMENTED_INTRINSIC(DoubleLongBitsToDouble)
-UNIMPLEMENTED_INTRINSIC(FloatFloatToRawIntBits)
-UNIMPLEMENTED_INTRINSIC(DoubleDoubleToRawLongBits)
 UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
 UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
 UNIMPLEMENTED_INTRINSIC(MathAbsInt)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index ff843eb..3654159 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1391,6 +1391,108 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+// boolean java.lang.String.equals(Object anObject)
+void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  GpuRegister str = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister arg = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>();
+  GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>();
+  GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>();
+
+  Label loop;
+  Label end;
+  Label return_true;
+  Label return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // If the register containing the pointer to "this", and the register
+  // containing the pointer to "anObject" are the same register then
+  // "this", and "anObject" are the same object and we can
+  // short-circuit the logic to a true result.
+  if (str == arg) {
+    __ LoadConst64(out, 1);
+    return;
+  }
+
+  // Check if input is null, return false if it is.
+  __ Beqzc(arg, &return_false);
+
+  // Reference equality check, return true if same reference.
+  __ Beqc(str, arg, &return_true);
+
+  // Instanceof check for the argument by comparing class fields.
+  // All string objects must have the same type since String cannot be subclassed.
+  // Receiver must be a string object, so its class field is equal to all strings' class fields.
+  // If the argument is a string object, its class field must be equal to receiver's class field.
+  __ Lw(temp1, str, class_offset);
+  __ Lw(temp2, arg, class_offset);
+  __ Bnec(temp1, temp2, &return_false);
+
+  // Load lengths of this and argument strings.
+  __ Lw(temp1, str, count_offset);
+  __ Lw(temp2, arg, count_offset);
+  // Check if lengths are equal, return false if they're not.
+  __ Bnec(temp1, temp2, &return_false);
+  // Return true if both strings are empty.
+  __ Beqzc(temp1, &return_true);
+
+  // Don't overwrite input registers
+  __ Move(TMP, str);
+  __ Move(temp3, arg);
+
+  // Assertions that must hold in order to compare strings 4 characters at a time.
+  DCHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
+
+  // Loop to compare strings 4 characters at a time starting at the beginning of the string.
+  // Ok to do this because strings are zero-padded to be 8-byte aligned.
+  __ Bind(&loop);
+  __ Ld(out, TMP, value_offset);
+  __ Ld(temp2, temp3, value_offset);
+  __ Bnec(out, temp2, &return_false);
+  __ Daddiu(TMP, TMP, 8);
+  __ Daddiu(temp3, temp3, 8);
+  __ Addiu(temp1, temp1, -4);
+  __ Bgtzc(temp1, &loop);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ LoadConst64(out, 1);
+  __ B(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ LoadConst64(out, 0);
+  __ Bind(&end);
+}
+
 static void GenerateStringIndexOf(HInvoke* invoke,
                                   Mips64Assembler* assembler,
                                   CodeGeneratorMIPS64* codegen,
@@ -1586,8 +1688,6 @@
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
 
-UNIMPLEMENTED_INTRINSIC(StringEquals)
-
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index c38bbe3..6d0a616 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -123,6 +123,7 @@
             UpdateLoopPhisIn(instruction->GetEnvironment(), loop_info);
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
+          MaybeRecordStat(MethodCompilationStat::kLICM);
         } else if (instruction->CanThrow()) {
           // If `instruction` can throw, we cannot move further instructions
           // that can throw as well.
diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h
index 0b5a0f1..bf56f53 100644
--- a/compiler/optimizing/licm.h
+++ b/compiler/optimizing/licm.h
@@ -26,8 +26,9 @@
 
 class LICM : public HOptimization {
  public:
-  LICM(HGraph* graph, const SideEffectsAnalysis& side_effects)
-      : HOptimization(graph, kLoopInvariantCodeMotionPassName), side_effects_(side_effects) {}
+  LICM(HGraph* graph, const SideEffectsAnalysis& side_effects, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kLoopInvariantCodeMotionPassName, stats),
+        side_effects_(side_effects) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 54ca522..263795d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -3434,14 +3434,19 @@
     DCHECK(had_current_method_input || !needs_current_method_input);
 
     if (had_current_method_input && !needs_current_method_input) {
-      DCHECK_EQ(InputAt(GetCurrentMethodInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
-      RemoveInputAt(GetCurrentMethodInputIndex());
+      DCHECK_EQ(InputAt(GetSpecialInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
+      RemoveInputAt(GetSpecialInputIndex());
     }
     dispatch_info_ = dispatch_info;
   }
 
-  void InsertInputAt(size_t index, HInstruction* input);
-  void RemoveInputAt(size_t index);
+  void AddSpecialInput(HInstruction* input) {
+    // We allow only one special input.
+    DCHECK(!IsStringInit() && !HasCurrentMethodInput());
+    DCHECK(InputCount() == GetSpecialInputIndex() ||
+           (InputCount() == GetSpecialInputIndex() + 1 && IsStaticWithExplicitClinitCheck()));
+    InsertInputAt(GetSpecialInputIndex(), input);
+  }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // We access the method via the dex cache so we can't do an implicit null check.
@@ -3453,13 +3458,20 @@
     return return_type_ == Primitive::kPrimNot && !IsStringInit();
   }
 
+  // Get the index of the special input, if any.
+  //
+  // If the invoke IsStringInit(), it initially has a HFakeString special argument
+  // which is removed by the instruction simplifier; if the invoke HasCurrentMethodInput(),
+  // the "special input" is the current method pointer; otherwise there may be one
+  // platform-specific special input, such as PC-relative addressing base.
+  uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
+
   InvokeType GetInvokeType() const { return invoke_type_; }
   MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
   CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
   bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE;
   bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; }
-  uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); }
   bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
   bool HasPcRelativeDexCache() const {
     return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
@@ -3467,11 +3479,11 @@
   bool HasCurrentMethodInput() const {
     // This function can be called only after the invoke has been fully initialized by the builder.
     if (NeedsCurrentMethodInput(GetMethodLoadKind())) {
-      DCHECK(InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod());
+      DCHECK(InputAt(GetSpecialInputIndex())->IsCurrentMethod());
       return true;
     } else {
-      DCHECK(InputCount() == GetCurrentMethodInputIndex() ||
-             !InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod());
+      DCHECK(InputCount() == GetSpecialInputIndex() ||
+             !InputAt(GetSpecialInputIndex())->IsCurrentMethod());
       return false;
     }
   }
@@ -3571,6 +3583,9 @@
     return input_record;
   }
 
+  void InsertInputAt(size_t index, HInstruction* input);
+  void RemoveInputAt(size_t index);
+
  private:
   const InvokeType invoke_type_;
   ClinitCheckRequirement clinit_check_requirement_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 27ee472..24f0d77 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -383,10 +383,11 @@
       || instruction_set == kX86_64;
 }
 
-// Read barrier are supported only on x86 and x86-64 at the moment.
+// Read barrier are supported only on ARM, x86 and x86-64 at the moment.
 // TODO: Add support for other architectures and remove this function
 static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
-  return instruction_set == kX86
+  return instruction_set == kThumb2
+      || instruction_set == kX86
       || instruction_set == kX86_64;
 }
 
@@ -486,10 +487,10 @@
       graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
   HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
       graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
-  HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
+  HConstantFolding* fold1 = new (arena) HConstantFolding(graph, stats);
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
-  HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph);
-  HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
+  HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph, stats);
+  HConstantFolding* fold2 = new (arena) HConstantFolding(graph, stats, "constant_folding_after_inlining");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
   LICM* licm = new (arena) LICM(graph, *side_effects);
@@ -506,7 +507,7 @@
   InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_before_codegen");
 
-  IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
+  IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats);
 
   HOptimization* optimizations1[] = {
     intrinsics,
@@ -668,8 +669,8 @@
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
 
-  // Always use the thumb2 assembler: some runtime functionality (like implicit stack
-  // overflow checks) assume thumb2.
+  // Always use the Thumb-2 assembler: some runtime functionality
+  // (like implicit stack overflow checks) assume Thumb-2.
   if (instruction_set == kArm) {
     instruction_set = kThumb2;
   }
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 6375cf1..21af170 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -54,6 +54,10 @@
   kRemovedCheckedCast,
   kRemovedDeadInstruction,
   kRemovedNullCheck,
+  kConstantFolding,
+  kBooleanSimplifier,
+  kIntrinsicsRecognizer,
+  kLICM,
   kLastStat
 };
 
@@ -121,6 +125,10 @@
       case kRemovedCheckedCast: return "kRemovedCheckedCast";
       case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
       case kRemovedNullCheck: return "kRemovedNullCheck";
+      case kConstantFolding: return "kConstantFolding";
+      case kBooleanSimplifier: return "kBooleanSimplifier";
+      case kIntrinsicsRecognizer: return "kIntrinsicsRecognizer";
+      case kLICM: return "kLICM";
 
       case kLastStat: break;  // Invalid to print out.
     }
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index c2894c7..808a1dc 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -113,9 +113,8 @@
     if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) {
       InitializePCRelativeBasePointer(invoke);
       // Add the extra parameter base_.
-      uint32_t index = invoke_static_or_direct->GetCurrentMethodInputIndex();
       DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
-      invoke_static_or_direct->InsertInputAt(index, base_);
+      invoke_static_or_direct->AddSpecialInput(base_);
     }
     // Ensure that we can load FP arguments from the constant area.
     for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index aee6412..fc7ac70 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -310,15 +310,27 @@
   EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20);
 }
 
+void MipsAssembler::Wsbh(Register rd, Register rt) {
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20);
+}
+
 void MipsAssembler::Sll(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00);
 }
 
 void MipsAssembler::Srl(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02);
 }
 
+void MipsAssembler::Rotr(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
+  EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02);
+}
+
 void MipsAssembler::Sra(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03);
 }
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 4038c1f..1ef0992 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -135,9 +135,11 @@
 
   void Seb(Register rd, Register rt);  // R2+
   void Seh(Register rd, Register rt);  // R2+
+  void Wsbh(Register rd, Register rt);  // R2+
 
   void Sll(Register rd, Register rt, int shamt);
   void Srl(Register rd, Register rt, int shamt);
+  void Rotr(Register rd, Register rt, int shamt);  // R2+
   void Sra(Register rd, Register rt, int shamt);
   void Sllv(Register rd, Register rt, Register rs);
   void Srlv(Register rd, Register rt, Register rs);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 68cf6d9..89c2a7c 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1231,6 +1231,7 @@
 
     // Handle and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = nullptr;
+    jobject class_path_class_loader = nullptr;
     Thread* self = Thread::Current();
 
     if (!boot_image_option_.empty()) {
@@ -1248,10 +1249,12 @@
       key_value_store_->Put(OatHeader::kClassPathKey,
                             OatFile::EncodeDexFileDependencies(class_path_files));
 
-      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
-      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
+      class_path_class_loader = class_linker->CreatePathClassLoader(self,
+                                                                    class_path_files,
+                                                                    nullptr);
 
-      class_loader = class_linker->CreatePathClassLoader(self, class_path_files);
+      // Class path loader as parent so that we'll resolve there first.
+      class_loader = class_linker->CreatePathClassLoader(self, dex_files_, class_path_class_loader);
     }
 
     driver_.reset(new CompilerDriver(compiler_options_.get(),
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index cd83de6..94eb82b 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -2412,7 +2412,7 @@
 
   // Need a class loader.
   // Fake that we're a compiler.
-  jobject class_loader = class_linker->CreatePathClassLoader(self, class_path);
+  jobject class_loader = class_linker->CreatePathClassLoader(self, class_path, /*parent*/nullptr);
 
   // Use the class loader while dumping.
   StackHandleScope<1> scope(self);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 0b0f094..4f4792a 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -106,6 +106,7 @@
   jit/profiling_info.cc \
   lambda/art_lambda_method.cc \
   lambda/box_table.cc \
+  lambda/box_class_table.cc \
   lambda/closure.cc \
   lambda/closure_builder.cc \
   lambda/leaking_allocator.cc \
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index d6ba304..771c8b7 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -46,9 +46,15 @@
   }
 };
 
+}  // namespace art
+
 // Common tests are declared next to the constants.
 #define ADD_TEST_EQ(x, y) EXPECT_EQ(x, y);
 #include "asm_support.h"
+// Important: Do not include this inside of another namespace, since asm_support.h
+// defines its own namespace which must not be nested.
+
+namespace art {
 
 TEST_F(ArchTest, CheckCommonOffsetsAndSizes) {
   CheckAsmSupportOffsetsAndSizes();
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 631b784..588268d 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1045,6 +1045,26 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
+// Forward call from boxed innate lambda to the underlying lambda closure's target method.
+     .extern artQuickLambdaProxyInvokeHandler
+ENTRY art_quick_lambda_proxy_invoke_handler
+// TODO: have a faster handler that doesn't need to set up a frame
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
+    mov     r2, r9                 @ pass Thread::Current
+    mov     r3, sp                 @ pass SP
+    blx     artQuickLambdaProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
+    ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
+    // Tear down the callee-save frame. Skip arg registers.
+    add     sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    cbnz    r2, 1f                 @ success if no exception is pending
+    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
+    bx      lr                     @ return on success
+1:
+    DELIVER_PENDING_EXCEPTION
+END art_quick_lambda_proxy_invoke_handler
+
     /*
      * Called to resolve an imt conflict. r12 is a hidden argument that holds the target method's
      * dex method index.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 9ccabad..177873d 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1582,6 +1582,28 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
+     /*
+     * Called by managed code that is attempting to call a method on a lambda proxy class. On entry
+     * x0 holds the lambda proxy method and x1 holds the receiver; The frame size of the invoked
+     * lambda proxy method agrees with a ref and args callee save frame.
+     */
+     .extern artQuickLambdaProxyInvokeHandler
+ENTRY art_quick_lambda_proxy_invoke_handler
+// TODO: have a faster way to invoke lambda proxies without setting up the whole frame.
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
+    mov     x2, xSELF                   // pass Thread::Current
+    mov     x3, sp                      // pass SP
+    bl      artQuickLambdaProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
+    ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
+    cbnz    x2, .Lexception_in_lambda_proxy    // success if no exception is pending
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME // Restore frame
+    fmov    d0, x0                      // Store result in d0 in case it was float or double
+    ret                                 // return on success
+.Lexception_in_lambda_proxy:
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    DELIVER_PENDING_EXCEPTION
+END art_quick_lambda_proxy_invoke_handler
+
     /*
      * Called to resolve an imt conflict. xIP1 is a hidden argument that holds the target method's
      * dex method index.
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 0691f2a..af79f5e 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1377,6 +1377,10 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
+// Forward call from boxed innate lambda to the underlying lambda closure's target method.
+    .extern artQuickLambdaProxyInvokeHandler
+UNIMPLEMENTED art_quick_lambda_proxy_invoke_handler
+
     /*
      * Called to resolve an imt conflict. t0 is a hidden argument that holds the target method's
      * dex method index.
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 66c8aad..5e70a95 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1431,6 +1431,10 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
+// Forward call from boxed innate lambda to the underlying lambda closure's target method.
+    .extern artQuickLambdaProxyInvokeHandler
+UNIMPLEMENTED art_quick_lambda_proxy_invoke_handler
+
     /*
      * Called to resolve an imt conflict. t0 is a hidden argument that holds the target method's
      * dex method index.
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 463c9cf..4fb6119 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1391,6 +1391,149 @@
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_proxy_invoke_handler
 
+#if LAMBDA_INVOKE_USES_LONG
+#undef LAMBDA_PROXY_SETUP_FRAME
+// We need to always do a 'pop' to readjust the stack, so we have to use the slower call instruction.
+#define LAMBDA_PROXY_SETUP_FRAME 1
+#define LAMBDA_INVOKE_REALIGN_STACK_FRAME 1
+#else
+#define LAMBDA_INVOKE_REALIGN_STACK_FRAME 0
+#endif
+
+#define LAMBDA_INVOKE_CALLS_INTO_RUNTIME LAMBDA_INVOKE_REALIGN_STACK_FRAME
+
+// Forward call from boxed innate lambda to the underlying lambda closure's target method.
+DEFINE_FUNCTION art_quick_lambda_proxy_invoke_handler
+    // This function is always called when the lambda is innate.
+    // Therefore we can assume the box is to an innate lambda.
+    // TODO: perhaps there should be a DCHECK to make sure it's innate?
+
+#if LAMBDA_PROXY_SETUP_FRAME
+    // Set up a quick frame when debugging so we can see that it's going through a stub.
+    // An invoke-virtual + a stub invocation is enough of a hint that we *could* be
+    // going through a lambda proxy.
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX
+#endif
+
+#if !LAMBDA_INVOKE_CALLS_INTO_RUNTIME
+    // Rewrite the following 2 arguments, stored on stack frame:
+    //
+    //   |--------|
+    //   |receiver|   <- esp-4
+    //   |--------|
+    //   | method |   <- esp
+    //   |--------|
+
+    // Set up the new correct method receiver (swap object with closure).
+    // -- The original object is no longer available after this.
+    //
+    // (Before)
+    // ecx == mirror::Object* boxed_lambda;  // lambda proxy object.
+    movl MIRROR_OBJECT_BOXED_INNATE_LAMBDA_CLOSURE_POINTER_OFFSET(%ecx), %ecx
+    // (After)
+    // lambda::Closure* closure = boxed_lambda->closure_;
+    // boxed_lambda = closure; // Overwrite lambda proxy object
+    // ecx == closure
+
+    // Look up the new correct method target.
+    // -- The original method target is no longer available after this.
+    //
+    // (Before)
+    // eax == ArtMethod* old_receiver_method;
+    movl LAMBDA_CLOSURE_METHOD_OFFSET(%ecx), %eax
+    // (After)
+    // ArtLambdaMethod* lambda_method_target = closure->lambda_info_;
+    // eax = lambda_method_target
+    //
+    // Set up the correct method target from the lambda info.
+    movl ART_LAMBDA_METHOD_ART_METHOD_OFFSET(%eax), %eax  // Load new receiver method
+    // (After)
+    // ArtMethod* target_method = lambda_method_target->target_
+    // eax = target_method
+#endif
+
+#if LAMBDA_INVOKE_CALLS_INTO_RUNTIME
+    PUSH esp                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ecx                      // pass receiver
+    PUSH eax                      // pass proxy method
+    call SYMBOL(artQuickLambdaProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
+    movd %eax, %xmm0              // place return value also into floating point return value
+    movd %edx, %xmm1
+    punpckldq %xmm1, %xmm0
+    addl LITERAL(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %esp
+    CFI_ADJUST_CFA_OFFSET(-(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE))
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
+#endif
+
+#if LAMBDA_INVOKE_USES_LONG && !LAMBDA_INVOKE_REALIGN_STACK_FRAME
+    // As a temporary workaround, lambda functions look like
+    //  (J[Arg2][Arg3][Arg4]...)
+    // This means that we can't just pass in the lambda as a 32-bit pointer
+    // We pad the arguments with an extra 32-bit "0" where Arg2 used to be instead.
+
+    // Required arguments for a lambda method:
+    //
+    // Arg0 = eax = method
+    // Arg1 = ecx = closure (hi)
+    // Arg2 = edx = closure (lo)
+    // Arg3 = ebx = <?> (first user-defined argument)
+
+    // Transformation diagram:
+    //
+    // Arg0  Arg1  Arg2  Arg3 ... ArgN
+    //   |    |        \     \        \
+    //   |    |         \     \        \
+    // Arg0  Arg1  0x00  Arg2  Arg3 ... ArgN
+    //              /\
+    //           (inserted)
+    PUSH ebx          // Move out Arg3 into Arg4, and also for all K>3 ArgK into ArgK+1
+    mov %edx, %ebx    // Move out Arg2 into Arg3
+    xor %edx, %edx    // Clear closure 32-bit low register
+
+    // XX: Does this work at all ? This probably breaks the visitors (*and* its unaligned).
+
+    // FIXME: call into the runtime and do a proxy-like-invoke
+    // using a ShadowFrame quick visitor, and then use ArtMethod::Invoke
+    // to call into the actual method (which will take care of fixing up alignment).
+    // Trying to realign in the assembly itself won't actually work
+    // since then the visitor will unwind incorrectly (unless we also fixed up the ManagedStack).
+#endif
+
+    // TODO: avoid extra indirect load by subclass ArtLambdaMethod from ArtMethod.
+
+    // Forward the call to the overwritten receiver method.
+    // -- Arguments [2,N] are left completely untouched since the signature is otherwise identical.
+#if LAMBDA_PROXY_SETUP_FRAME
+  #if LAMBDA_INVOKE_CALLS_INTO_RUNTIME
+    // Have to call into runtime in order to re-align the stack frame to 16 bytes.
+    int3
+  #else
+    // Just call into the method directly. Don't worry about realigning.
+    call *ART_METHOD_QUICK_CODE_OFFSET_32(%eax)  // (new method, new receiver, old args...)
+
+    // The stack frame was manually adjusted, so make sure we have a pop here to fix it back.
+    #if LAMBDA_INVOKE_USES_LONG && !LAMBDA_INVOKE_REALIGN_STACK_FRAME
+
+    POP ecx   // OK: ecx is scratch register after the call.
+    // XX: use 'add esp, 4' instead if we need to keep the register? This way we get cleaner CFI.
+    #endif
+  #endif
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+
+#else
+    // Do not use 'call' here since the stack visitors wouldn't know how to visit this frame.
+    jmp *ART_METHOD_QUICK_CODE_OFFSET_32(%eax)   // tailcall (new method, new receiver, old args...)
+#endif
+
+#if LAMBDA_PROXY_SETUP_FRAME
+    ret
+#endif
+
+END_FUNCTION art_quick_lambda_proxy_invoke_handler
+
     /*
      * Called to resolve an imt conflict. xmm7 is a hidden argument that holds the target method's
      * dex method index.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 17d277e..0a54aa3 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1297,7 +1297,6 @@
     RETURN_IF_EAX_ZERO                   // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
-
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
 
@@ -1309,6 +1308,60 @@
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_proxy_invoke_handler
 
+// Forward call from boxed innate lambda to the underlying lambda closure's target method.
+DEFINE_FUNCTION art_quick_lambda_proxy_invoke_handler
+    // This function is always called when the lambda is innate.
+    // Therefore we can assume the box is to an innate lambda.
+    // TODO: perhaps there should be a DCHECK to make sure it's innate?
+
+#if LAMBDA_PROXY_SETUP_FRAME
+    // Set up a quick frame when debugging so we can see that it's going through a stub.
+    // Our stack traces will contain the quick lambda proxy hander.
+    // Note that we *must* go through the handler (when spilling) otherwise we won't know how
+    // to move the spilled GC references from the caller to this stub.
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
+
+    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
+    movq %rsp, %rcx                         // Pass SP.
+    call SYMBOL(artQuickLambdaProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    movq %rax, %xmm0                        // Copy return value in case of float returns.
+    RETURN_OR_DELIVER_PENDING_EXCEPTION
+#else
+    // Set up the new correct method receiver (swap object with closure).
+    // -- The original object is no longer available after this.
+    //
+    // (Before)
+    // rsi == mirror::Object* boxed_lambda;  // lambda proxy object.
+    movq MIRROR_OBJECT_BOXED_INNATE_LAMBDA_CLOSURE_POINTER_OFFSET(%rsi), %rsi
+    // (After)
+    // lambda::Closure* closure = boxed_lambda->closure_;  // Overwrite receiver object.
+    // rsi == closure
+
+    // Look up the new correct method target.
+    // -- The original method target is no longer available after this.
+    movq LAMBDA_CLOSURE_METHOD_OFFSET(%rsi), %rdi          // Overwrite old receiver method.
+    // (After)
+    // ArtLambdaMethod* lambda_method_target = closure->lambda_info_;
+    // rdi == lambda_method_target
+
+    // TODO: avoid extra indirect load by subclass ArtLambdaMethod from ArtMethod.
+
+    // Set up the correct method target from the lambda info.
+    movq ART_LAMBDA_METHOD_ART_METHOD_OFFSET(%rdi), %rdi  // Write new receiver method.
+    // (After)
+    // ArtMethod* method_target = lambda_method_target->target_;
+    // rdi == method_target
+
+    // Forward the call to the overwritten receiver method.
+    // -- Arguments [2,N] are left completely untouched since the signature is otherwise identical.
+    // Do not use 'call' here since the stack would be misaligned (8b instead of 16b).
+    // Also the stack visitors wouldn't know how to visit this frame if we used a call.
+    jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)   // tailcall (new method, new receiver, old args...)
+#endif
+
+END_FUNCTION art_quick_lambda_proxy_invoke_handler
+
     /*
      * Called to resolve an imt conflict.
      * rax is a hidden argument that holds the target method's dex method index.
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 4166e22..ab42d0e 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -255,7 +255,7 @@
 
 inline const char* ArtField::GetName() SHARED_REQUIRES(Locks::mutator_lock_) {
   uint32_t field_index = GetDexFieldIndex();
-  if (UNLIKELY(GetDeclaringClass()->IsProxyClass())) {
+  if (UNLIKELY(GetDeclaringClass()->IsAnyProxyClass())) {
     DCHECK(IsStatic());
     DCHECK_LT(field_index, 2U);
     return field_index == 0 ? "interfaces" : "throws";
@@ -266,7 +266,7 @@
 
 inline const char* ArtField::GetTypeDescriptor() SHARED_REQUIRES(Locks::mutator_lock_) {
   uint32_t field_index = GetDexFieldIndex();
-  if (UNLIKELY(GetDeclaringClass()->IsProxyClass())) {
+  if (UNLIKELY(GetDeclaringClass()->IsAnyProxyClass())) {
     DCHECK(IsStatic());
     DCHECK_LT(field_index, 2U);
     // 0 == Class[] interfaces; 1 == Class[][] throws;
@@ -290,8 +290,8 @@
 inline mirror::Class* ArtField::GetType() {
   const uint32_t field_index = GetDexFieldIndex();
   auto* declaring_class = GetDeclaringClass();
-  if (UNLIKELY(declaring_class->IsProxyClass())) {
-    return ProxyFindSystemClass(GetTypeDescriptor());
+  if (UNLIKELY(declaring_class->IsAnyProxyClass())) {
+    return AnyProxyFindSystemClass(GetTypeDescriptor());
   }
   auto* dex_cache = declaring_class->GetDexCache();
   const DexFile* const dex_file = dex_cache->GetDexFile();
diff --git a/runtime/art_field.cc b/runtime/art_field.cc
index 3737e0d..3ac563a 100644
--- a/runtime/art_field.cc
+++ b/runtime/art_field.cc
@@ -69,8 +69,8 @@
   return nullptr;
 }
 
-mirror::Class* ArtField::ProxyFindSystemClass(const char* descriptor) {
-  DCHECK(GetDeclaringClass()->IsProxyClass());
+mirror::Class* ArtField::AnyProxyFindSystemClass(const char* descriptor) {
+  DCHECK(GetDeclaringClass()->IsAnyProxyClass());
   return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(), descriptor);
 }
 
diff --git a/runtime/art_field.h b/runtime/art_field.h
index a943a34..4ebe6fb 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -191,7 +191,9 @@
   }
 
  private:
-  mirror::Class* ProxyFindSystemClass(const char* descriptor)
+  mirror::Class* AnyProxyFindSystemClass(const char* descriptor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::Class* LambdaProxyFindSystemClass(const char* descriptor)
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Class* ResolveGetType(uint32_t type_idx) SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::String* ResolveGetStringName(Thread* self, const DexFile& dex_file, uint32_t string_idx,
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index cf548ad..b6e811f 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -292,7 +292,7 @@
 }
 
 inline const char* ArtMethod::GetShorty(uint32_t* out_length) {
-  DCHECK(!IsProxyMethod());
+  DCHECK(!IsProxyMethod() || IsLambdaProxyMethod());  // OK: lambda proxies use parent dex cache.
   const DexFile* dex_file = GetDexFile();
   return dex_file->GetMethodShorty(dex_file->GetMethodId(GetDexMethodIndex()), out_length);
 }
@@ -354,10 +354,31 @@
 }
 
 inline const DexFile::TypeList* ArtMethod::GetParameterTypeList() {
-  DCHECK(!IsProxyMethod());
+  // XX: Do proxy methods have a dex file?  not sure.
   const DexFile* dex_file = GetDexFile();
-  const DexFile::ProtoId& proto = dex_file->GetMethodPrototype(
-      dex_file->GetMethodId(GetDexMethodIndex()));
+  const DexFile::MethodId* method_id = nullptr;
+
+  if (kIsDebugBuild) {
+    if (UNLIKELY(IsProxyMethod())) {
+      // Proxy method case.
+      CHECK(IsLambdaProxyMethod()) << "Cannot GetParameterTypeList for java.lang.reflect.Proxy";
+
+      //
+      // We do not have a method ID, so look up one of the supers we overrode,
+      // it will have the same exact parameter type list as we do.
+
+      // Lambda proxy classes have the dex cache from their single interface parent.
+      // Proxy classes have multiple interface parents, so they use the root dexcache instead.
+      //
+      // For lambda proxy classes only, get the type list data from the parent.
+      // (code happens to look the same as the usual non-proxy path).
+    }
+  }
+
+  method_id = &dex_file->GetMethodId(GetDexMethodIndex());
+  DCHECK(method_id != nullptr);
+
+  const DexFile::ProtoId& proto = dex_file->GetMethodPrototype(*method_id);
   return dex_file->GetProtoParameters(proto);
 }
 
@@ -397,12 +418,20 @@
 }
 
 inline mirror::DexCache* ArtMethod::GetDexCache() {
-  DCHECK(!IsProxyMethod());
+  DCHECK(!IsProxyMethod() || IsLambdaProxyMethod());  // OK: lambda proxies use parent dex cache.
   return GetDeclaringClass()->GetDexCache();
 }
 
 inline bool ArtMethod::IsProxyMethod() {
-  return GetDeclaringClass()->IsProxyClass();
+  return GetDeclaringClass()->IsAnyProxyClass();
+}
+
+inline bool ArtMethod::IsReflectProxyMethod() {
+  return GetDeclaringClass()->IsReflectProxyClass();
+}
+
+inline bool ArtMethod::IsLambdaProxyMethod() {
+  return GetDeclaringClass()->IsLambdaProxyClass();
 }
 
 inline ArtMethod* ArtMethod::GetInterfaceMethodIfProxy(size_t pointer_size) {
@@ -448,9 +477,9 @@
 void ArtMethod::VisitRoots(RootVisitorType& visitor, size_t pointer_size) {
   ArtMethod* interface_method = nullptr;
   mirror::Class* klass = declaring_class_.Read();
-  if (UNLIKELY(klass != nullptr && klass->IsProxyClass())) {
+  if (UNLIKELY(klass != nullptr && klass->IsAnyProxyClass())) {
     // For normal methods, dex cache shortcuts will be visited through the declaring class.
-    // However, for proxies we need to keep the interface method alive, so we visit its roots.
+    // However, for any proxies we need to keep the interface method alive, so we visit its roots.
     interface_method = mirror::DexCache::GetElementPtrSize(
         GetDexCacheResolvedMethods(pointer_size),
         GetDexMethodIndex(),
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 5a2d6c3..98f5aee 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -171,8 +171,16 @@
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
+  // Does this method live on a declaring class that is itself any proxy class?
+  // -- Returns true for both java.lang.reflect.Proxy and java.lang.LambdaProxy subclasses.
   bool IsProxyMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Does this method live in a java.lang.reflect.Proxy subclass?
+  bool IsReflectProxyMethod() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Does this method live in a java.lang.LambdaProxy subclass?
+  bool IsLambdaProxyMethod() SHARED_REQUIRES(Locks::mutator_lock_);
+
   bool IsPreverified() {
     return (GetAccessFlags() & kAccPreverified) != 0;
   }
@@ -274,7 +282,15 @@
                                             uint32_t name_and_signature_idx)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result, const char* shorty)
+  // Invoke this method, passing all the virtual registers in args.
+  // -- args_size must be the size in bytes (not size in words)!
+  // -- shorty must be the method shorty (i.e. it includes the return type).
+  // The result is set when the method finishes execution successfully.
+  void Invoke(Thread* self,
+              uint32_t* args,
+              uint32_t args_size,  // NOTE: size in bytes
+              /*out*/JValue* result,
+              const char* shorty)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   const void* GetEntryPointFromQuickCompiledCode() {
@@ -428,6 +444,9 @@
 
   mirror::DexCache* GetDexCache() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns the current method ('this') if this is a regular, non-proxy method.
+  // Otherwise, when this class is a proxy (IsProxyMethod), look-up the original interface's
+  // method (that the proxy is "overriding") and return that.
   ALWAYS_INLINE ArtMethod* GetInterfaceMethodIfProxy(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index b548dfb..785a9be 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -19,9 +19,12 @@
 
 #if defined(__cplusplus)
 #include "art_method.h"
+#include "lambda/art_lambda_method.h"
+#include "lambda/closure.h"
 #include "gc/allocator/rosalloc.h"
 #include "lock_word.h"
 #include "mirror/class.h"
+#include "mirror/lambda_proxy.h"
 #include "mirror/string.h"
 #include "runtime.h"
 #include "thread.h"
@@ -49,6 +52,8 @@
 #define ADD_TEST_EQ(x, y) CHECK_EQ(x, y);
 #endif
 
+namespace art {
+
 static inline void CheckAsmSupportOffsetsAndSizes() {
 #else
 #define ADD_TEST_EQ(x, y)
@@ -298,9 +303,80 @@
             static_cast<int32_t>(art::gc::allocator::RosAlloc::RunSlotNextOffset()))
 // Assert this so that we can avoid zeroing the next field by installing the class pointer.
 ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, MIRROR_OBJECT_CLASS_OFFSET)
+// Working with raw lambdas (lambda::Closure) in raw memory:
+//
+//     |---------------------|
+//     | ArtLambdaMethod*    |  <-- pointer to lambda art method, has the info like the size.
+//     |---------------------|  <-- 'data offset'
+//     | [ Dynamic Size ]    |  <-- OPTIONAL: only if the ArtLambdaMethod::dynamic_size_ is true.
+//     |---------------------|
+//     | Captured Variables  |
+//     |        ...          |
+//     |---------------------|  <-- total length determined by "dynamic size" if it is present,
+//                                  otherwise by the ArtLambdaMethod::static_size_
+
+// Offset from start of lambda::Closure to the ArtLambdaMethod*.
+#define LAMBDA_CLOSURE_METHOD_OFFSET 0
+ADD_TEST_EQ(static_cast<size_t>(LAMBDA_CLOSURE_METHOD_OFFSET),
+            offsetof(art::lambda::ClosureStorage, lambda_info_))
+// Offset from the start of lambda::Closure to the data (captured vars or dynamic size).
+#define LAMBDA_CLOSURE_DATA_OFFSET __SIZEOF_POINTER__
+ADD_TEST_EQ(static_cast<size_t>(LAMBDA_CLOSURE_DATA_OFFSET),
+            offsetof(art::lambda::ClosureStorage, captured_))
+// Offsets to captured variables intentionally omitted as it needs a runtime branch.
+
+// The size of a lambda closure after it's been compressed down for storage.
+// -- Although a lambda closure is a virtual register pair (64-bit), we only need 32-bit
+//    to track the pointer when we are on 32-bit architectures.
+//    Both the compiler and the runtime therefore compress the closure down for 32-bit archs.
+#define LAMBDA_CLOSURE_COMPRESSED_POINTER_SIZE __SIZEOF_POINTER__
+ADD_TEST_EQ(static_cast<size_t>(LAMBDA_CLOSURE_COMPRESSED_POINTER_SIZE),
+            sizeof(art::lambda::Closure*))
+
+// Working with boxed innate lambdas (as a mirror::Object) in raw memory:
+// --- Note that this layout only applies to lambdas originally made with create-lambda.
+// --- Boxing a lambda created from a new-instance instruction is simply the original object.
+//
+//     |---------------------|
+//     |   object header     |
+//     |---------------------|
+//     | lambda::Closure*    | <-- long on 64-bit, int on 32-bit
+//     |---------------------|
+#define MIRROR_OBJECT_BOXED_INNATE_LAMBDA_CLOSURE_POINTER_OFFSET (MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(static_cast<size_t>(MIRROR_OBJECT_BOXED_INNATE_LAMBDA_CLOSURE_POINTER_OFFSET),
+            art::mirror::LambdaProxy::GetInstanceFieldOffsetClosure().SizeValue())
+            // Equivalent to (private) offsetof(art::mirror::LambdaProxy, closure_))
+
+// Working with boxed innate lambdas (as a mirror::Object) in raw memory:
+// --- Note that this layout only applies to lambdas originally made with create-lambda.
+// --- Boxing a lambda created from a new-instance instruction is simply the original object.
+//
+//     |---------------------|
+//     |   object header     |
+//     |---------------------|
+//     | lambda::Closure*    | <-- long on 64-bit, int on 32-bit
+//     |---------------------|
+#define ART_LAMBDA_METHOD_ART_METHOD_OFFSET (0)
+ADD_TEST_EQ(static_cast<size_t>(ART_LAMBDA_METHOD_ART_METHOD_OFFSET),
+            art::lambda::ArtLambdaMethod::GetArtMethodOffset())
+
+#if defined(NDEBUG)
+// Release should be faaast. So just jump directly to the lambda method.
+#define LAMBDA_PROXY_SETUP_FRAME 0
+#else
+// Debug can be slower, and we want to get better stack traces. Set up a frame.
+#define LAMBDA_PROXY_SETUP_FRAME 1
+#endif
+
+// For WIP implementation, lambda types are all "longs"
+// which means on a 32-bit implementation we need to fill the argument with 32-bit 0s
+// whenever we invoke a method with a lambda in it.
+// TODO: remove all usages of this once we go to a proper \LambdaType; system.
+#define LAMBDA_INVOKE_USES_LONG 1
 
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
+}  // namespace art
 #endif
 
 #endif  // ART_RUNTIME_ASM_SUPPORT_H_
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index 969f5b9..e2ade07 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -53,6 +53,7 @@
   kAllocatorTagClassTable,
   kAllocatorTagInternTable,
   kAllocatorTagLambdaBoxTable,
+  kAllocatorTagLambdaProxyClassBoxTable,
   kAllocatorTagMaps,
   kAllocatorTagLOS,
   kAllocatorTagSafeMap,
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 70bd398..6ca56f5 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -65,6 +65,7 @@
 Mutex* Locks::trace_lock_ = nullptr;
 Mutex* Locks::unexpected_signal_lock_ = nullptr;
 Mutex* Locks::lambda_table_lock_ = nullptr;
+Mutex* Locks::lambda_class_table_lock_ = nullptr;
 Uninterruptible Roles::uninterruptible_;
 
 struct AllMutexData {
@@ -954,6 +955,7 @@
     DCHECK(trace_lock_ != nullptr);
     DCHECK(unexpected_signal_lock_ != nullptr);
     DCHECK(lambda_table_lock_ != nullptr);
+    DCHECK(lambda_class_table_lock_ != nullptr);
   } else {
     // Create global locks in level order from highest lock level to lowest.
     LockLevel current_lock_level = kInstrumentEntrypointsLock;
@@ -1072,6 +1074,10 @@
     DCHECK(lambda_table_lock_ == nullptr);
     lambda_table_lock_ = new Mutex("lambda table lock", current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kLambdaClassTableLock);
+    DCHECK(lambda_class_table_lock_ == nullptr);
+    lambda_class_table_lock_ = new Mutex("lambda class table lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kAbortLock);
     DCHECK(abort_lock_ == nullptr);
     abort_lock_ = new Mutex("abort lock", current_lock_level, true);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index d4c9057..e2d7062 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -60,6 +60,7 @@
   kUnexpectedSignalLock,
   kThreadSuspendCountLock,
   kAbortLock,
+  kLambdaClassTableLock,
   kLambdaTableLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
@@ -692,6 +693,10 @@
   // Allow reader-writer mutual exclusion on the boxed table of lambda objects.
   // TODO: this should be a RW mutex lock, except that ConditionVariables don't work with it.
   static Mutex* lambda_table_lock_ ACQUIRED_AFTER(mutator_lock_);
+
+  // Allow reader-writer mutual exclusion on the boxed table of lambda proxy classes.
+  // TODO: this should be a RW mutex lock, except that ConditionVariables don't work with it.
+  static Mutex* lambda_class_table_lock_ ACQUIRED_AFTER(lambda_table_lock_);
 };
 
 class Roles {
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index 0949619..324ab21 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -149,6 +149,13 @@
   return it != container.end();
 }
 
+// const char* compare function suitable for std::map or std::set.
+struct CStringLess {
+  bool operator()(const char* lhs, const char* rhs) const {
+    return strcmp(lhs, rhs) < 0;
+  }
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_STL_UTIL_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d5a5ea6..8a0d8d4 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -56,6 +56,7 @@
 #include "interpreter/interpreter.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
+#include "lambda/box_class_table.h"
 #include "leb128.h"
 #include "linear_alloc.h"
 #include "mirror/class.h"
@@ -64,6 +65,7 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/field.h"
 #include "mirror/iftable-inl.h"
+#include "mirror/lambda_proxy.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
@@ -581,6 +583,9 @@
   // Create java.lang.reflect.Proxy root.
   SetClassRoot(kJavaLangReflectProxy, FindSystemClass(self, "Ljava/lang/reflect/Proxy;"));
 
+  // Create java.lang.LambdaProxy root.
+  SetClassRoot(kJavaLangLambdaProxy, FindSystemClass(self, "Ljava/lang/LambdaProxy;"));
+
   // Create java.lang.reflect.Field.class root.
   auto* class_root = FindSystemClass(self, "Ljava/lang/reflect/Field;");
   CHECK(class_root != nullptr);
@@ -1257,6 +1262,7 @@
   }
   delete data.allocator;
   delete data.class_table;
+  delete data.lambda_box_class_table;
 }
 
 mirror::PointerArray* ClassLinker::AllocPointerArray(Thread* self, size_t length) {
@@ -1898,8 +1904,10 @@
 // Special case to get oat code without overwriting a trampoline.
 const void* ClassLinker::GetQuickOatCodeFor(ArtMethod* method) {
   CHECK(method->IsInvokable()) << PrettyMethod(method);
-  if (method->IsProxyMethod()) {
+  if (method->IsReflectProxyMethod()) {
     return GetQuickProxyInvokeHandler();
+  } else if (method->IsLambdaProxyMethod()) {
+    return GetQuickLambdaProxyInvokeHandler();
   }
   bool found;
   OatFile::OatMethod oat_method = FindOatMethodFor(method, &found);
@@ -3257,7 +3265,7 @@
   klass->SetName(soa.Decode<mirror::String*>(name));
   klass->SetDexCache(GetClassRoot(kJavaLangReflectProxy)->GetDexCache());
   mirror::Class::SetStatus(klass, mirror::Class::kStatusIdx, self);
-  std::string descriptor(GetDescriptorForProxy(klass.Get()));
+  std::string descriptor(GetDescriptorForAnyProxy(klass.Get()));
   const size_t hash = ComputeModifiedUtf8Hash(descriptor.c_str());
 
   // Needs to be before we insert the class so that the allocator field is set.
@@ -3377,23 +3385,228 @@
                                                decoded_name->ToModifiedUtf8().c_str()));
     CHECK_EQ(PrettyField(klass->GetStaticField(1)), throws_field_name);
 
-    CHECK_EQ(klass.Get()->GetInterfaces(),
+    CHECK_EQ(klass.Get()->GetInterfacesForAnyProxy(),
              soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
-    CHECK_EQ(klass.Get()->GetThrows(),
+    CHECK_EQ(klass.Get()->GetThrowsForAnyProxy(),
              soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>*>(throws));
   }
   return klass.Get();
 }
 
-std::string ClassLinker::GetDescriptorForProxy(mirror::Class* proxy_class) {
-  DCHECK(proxy_class->IsProxyClass());
+mirror::Class* ClassLinker::CreateLambdaProxyClass(ScopedObjectAccessAlreadyRunnable& soa,
+                                                   jstring name,
+                                                   jobjectArray interfaces,
+                                                   jobject loader,
+                                                   jobjectArray methods,
+                                                   jobjectArray throws,
+                                                   bool* already_exists) {
+  DCHECK(already_exists != nullptr);
+  *already_exists = false;
+
+  Thread* self = soa.Self();
+  StackHandleScope<10> hs(self);
+
+  // Allocate a new java.lang.Class object for a mirror::Proxy.
+  MutableHandle<mirror::Class> klass =
+      hs.NewHandle(AllocClass(self, GetClassRoot(kJavaLangClass), sizeof(mirror::Class)));
+  if (klass.Get() == nullptr) {
+    CHECK(self->IsExceptionPending());  // OOME.
+    return nullptr;
+  }
+  DCHECK(klass->GetClass() != nullptr);
+  klass->SetObjectSize(sizeof(mirror::LambdaProxy));
+
+  // Set the class access flags incl. preverified, so we do not try to set the flag on the methods.
+  klass->SetAccessFlags(kAccClassIsLambdaProxy | kAccPublic | kAccFinal | kAccPreverified);
+  klass->SetClassLoader(soa.Decode<mirror::ClassLoader*>(loader));
+  DCHECK_EQ(klass->GetPrimitiveType(), Primitive::kPrimNot);
+  klass->SetName(soa.Decode<mirror::String*>(name));
+  klass->SetDexCache(GetClassRoot(kJavaLangLambdaProxy)->GetDexCache());
+  // Set the status to be just before after loading it, but before anything is resolved.
+  mirror::Class::SetStatus(klass, mirror::Class::kStatusIdx, self);
+  // Convert "foo.bar.baz" string to "Lfoo/bar/baz;"
+  std::string type_descriptor(GetDescriptorForAnyProxy(klass.Get()));
+
+  mirror::Class* existing;
+  {
+    const size_t hash = ComputeModifiedUtf8Hash(type_descriptor.c_str());
+
+    // Insert the class before loading the fields as the field roots
+    // (ArtField::declaring_class_) are only visited from the class
+    // table. There can't be any suspend points between inserting the
+    // class and setting the field arrays below.
+    existing = InsertClass(type_descriptor.c_str(), klass.Get(), hash);
+  }
+  if (UNLIKELY(existing != nullptr)) {
+    // We had already made the lambda proxy previously. Return it.
+
+    *already_exists = true;
+    return existing;
+    // Let the GC clean up the class we had already allocated but isn't being used.
+  }
+
+  // Needs to be after we insert the class so that the allocator field is set.
+  LinearAlloc* const allocator = GetOrCreateAllocatorForClassLoader(klass->GetClassLoader());
+
+  // Instance fields are inherited, but we add a couple of static fields...
+  LengthPrefixedArray<ArtField>* sfields =
+      AllocArtFieldArray(self, allocator, mirror::LambdaProxy::kStaticFieldCount);
+  klass->SetSFieldsPtr(sfields);
+
+  // 1. Create a static field 'interfaces' that holds the _declared_ interfaces implemented by
+  // our proxy, so Class.getInterfaces doesn't return the flattened set.
+  // -- private static java.lang.Class[] interfaces;  // list of declared interfaces
+  ArtField& interfaces_sfield = sfields->At(mirror::LambdaProxy::kStaticFieldIndexInterfaces);
+  interfaces_sfield.SetDexFieldIndex(mirror::LambdaProxy::kStaticFieldIndexInterfaces);
+  interfaces_sfield.SetDeclaringClass(klass.Get());
+  interfaces_sfield.SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
+
+  // 2. Create a static field 'throws' that holds the classes of exceptions thrown by our methods.
+  // This is returned by java.lang.reflect.Method#getExceptionTypes()
+  // --- private static java.lang.Class[][] throws;  // maps vtable id to list of classes.
+  ArtField& throws_sfield = sfields->At(mirror::LambdaProxy::kStaticFieldIndexThrows);
+  throws_sfield.SetDexFieldIndex(mirror::LambdaProxy::kStaticFieldIndexThrows);
+  throws_sfield.SetDeclaringClass(klass.Get());
+  throws_sfield.SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
+
+  // Set up the Constructor method.
+  {
+    // Lambda proxies have 1 direct method, the constructor.
+    static constexpr size_t kNumDirectMethods = 1;
+    LengthPrefixedArray<ArtMethod>* directs = AllocArtMethodArray(self,
+                                                                  allocator,
+                                                                  kNumDirectMethods);
+    // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we
+    // want to throw OOM in the future.
+    if (UNLIKELY(directs == nullptr)) {
+      self->AssertPendingOOMException();
+      return nullptr;
+    }
+    klass->SetDirectMethodsPtr(directs);
+    CreateLambdaProxyConstructor(klass, klass->GetDirectMethodUnchecked(0, image_pointer_size_));
+  }
+
+  // Create virtual method using specified prototypes.
+  auto h_methods = hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Method>*>(methods));
+  DCHECK_EQ(h_methods->GetClass(), mirror::Method::ArrayClass())
+      << PrettyClass(h_methods->GetClass());
+  const size_t num_virtual_methods = h_methods->GetLength();
+  auto* virtuals = AllocArtMethodArray(self, allocator, num_virtual_methods);
+  // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we
+  // want to throw OOM in the future.
+  if (UNLIKELY(virtuals == nullptr)) {
+    self->AssertPendingOOMException();
+    return nullptr;
+  }
+  klass->SetVirtualMethodsPtr(virtuals);
+  size_t abstract_methods = 0;
+  for (size_t i = 0; i < num_virtual_methods; ++i) {
+    ArtMethod* virtual_method = klass->GetVirtualMethodUnchecked(i, image_pointer_size_);
+    ArtMethod* prototype = h_methods->Get(i)->GetArtMethod();
+    if (UNLIKELY((prototype->GetAccessFlags() & kAccDefault) != 0)) {
+      UNIMPLEMENTED(FATAL) << "Lambda proxies don't support default methods yet";
+    }
+    if (prototype->IsAbstract()) {
+      abstract_methods++;
+    }
+    VLOG(class_linker) << "Creating lambda proxy method for " << PrettyMethod(prototype);
+
+    CreateLambdaProxyMethod(klass, prototype, virtual_method);
+    DCHECK(virtual_method->GetDeclaringClass() != nullptr);
+    DCHECK(prototype->GetDeclaringClass() != nullptr);
+  }
+  // Ignore any methods from Object and default methods, it doesn't matter.
+  // Sanity check that the prototype interface is indeed compatible with lambdas.
+  DCHECK_EQ(abstract_methods, 1u)
+      << "Interface must be a single-abstract-method type" << PrettyClass(klass.Get());
+
+  // The super class is java.lang.LambdaProxy
+  klass->SetSuperClass(GetClassRoot(kJavaLangLambdaProxy));
+  // Now effectively in the loaded state.
+  mirror::Class::SetStatus(klass, mirror::Class::kStatusLoaded, self);
+  self->AssertNoPendingException();
+
+  MutableHandle<mirror::Class> new_class = hs.NewHandle<mirror::Class>(nullptr);
+  {
+    // Must hold lock on object when resolved.
+    ObjectLock<mirror::Class> resolution_lock(self, klass);
+    // Link the fields and virtual methods, creating vtable and iftables.
+    // The new class will replace the old one in the class table.
+    Handle<mirror::ObjectArray<mirror::Class>> h_interfaces(
+        hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces)));
+
+    {
+      DCHECK_EQ(1, h_interfaces->GetLength()) << "Lambda proxies must implement 1 interface only";
+      mirror::Class* single_abstract_interface = h_interfaces->Get(0);
+      DCHECK(single_abstract_interface != nullptr);
+
+      // Use the dex cache from the interface, which will enable most of the
+      // dex-using mechanisms on the class and its methods will work.
+      klass->SetDexCache(single_abstract_interface->GetDexCache());
+    }
+
+    if (!LinkClass(self, type_descriptor.c_str(), klass, h_interfaces, &new_class)) {
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
+      return nullptr;
+    }
+  }
+  CHECK(klass->IsRetired());
+  CHECK_NE(klass.Get(), new_class.Get());
+  klass.Assign(new_class.Get());
+
+  CHECK_EQ(interfaces_sfield.GetDeclaringClass(), klass.Get());
+  interfaces_sfield.SetObject<false>(klass.Get(),
+                                     soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+
+  CHECK_EQ(throws_sfield.GetDeclaringClass(), klass.Get());
+  throws_sfield.SetObject<false>(
+      klass.Get(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(throws));
+
+  {
+    // Lock on klass is released. Lock new class object.
+    ObjectLock<mirror::Class> initialization_lock(self, klass);
+    mirror::Class::SetStatus(klass, mirror::Class::kStatusInitialized, self);
+  }
+
+  // Sanity checks
+  if (kIsDebugBuild) {
+    CHECK(klass->GetIFieldsPtr() == nullptr);
+    CheckLambdaProxyConstructor(klass->GetDirectMethod(0, image_pointer_size_));
+
+    for (size_t i = 0; i < num_virtual_methods; ++i) {
+      ArtMethod* virtual_method = klass->GetVirtualMethodUnchecked(i, image_pointer_size_);
+      ArtMethod* prototype = h_methods->Get(i++)->GetArtMethod();
+      CheckLambdaProxyMethod(virtual_method, prototype);
+    }
+
+    StackHandleScope<1> hs2(self);
+    Handle<mirror::String> decoded_name = hs2.NewHandle(soa.Decode<mirror::String*>(name));
+    std::string interfaces_field_name(StringPrintf("java.lang.Class[] %s.interfaces",
+                                                   decoded_name->ToModifiedUtf8().c_str()));
+    CHECK_EQ(PrettyField(klass->GetStaticField(0)), interfaces_field_name);
+
+    std::string throws_field_name(StringPrintf("java.lang.Class[][] %s.throws",
+                                               decoded_name->ToModifiedUtf8().c_str()));
+    CHECK_EQ(PrettyField(klass->GetStaticField(1)), throws_field_name);
+
+    CHECK_EQ(klass.Get()->GetInterfacesForAnyProxy(),
+             soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+    CHECK_EQ(klass.Get()->GetThrowsForAnyProxy(),
+             soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>*>(throws));
+  }
+  return klass.Get();
+}
+
+std::string ClassLinker::GetDescriptorForAnyProxy(mirror::Class* proxy_class) {
+  DCHECK(proxy_class != nullptr);
+  DCHECK(proxy_class->IsAnyProxyClass());
   mirror::String* name = proxy_class->GetName();
   DCHECK(name != nullptr);
   return DotToDescriptor(name->ToModifiedUtf8().c_str());
 }
 
 ArtMethod* ClassLinker::FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method) {
-  DCHECK(proxy_class->IsProxyClass());
+  DCHECK(proxy_class->IsAnyProxyClass());
   DCHECK(proxy_method->IsProxyMethod());
   {
     Thread* const self = Thread::Current();
@@ -3421,7 +3634,7 @@
 
 void ClassLinker::CreateProxyConstructor(Handle<mirror::Class> klass, ArtMethod* out) {
   // Create constructor for Proxy that must initialize the method.
-  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 16u);
+  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 18u);
   ArtMethod* proxy_constructor = GetClassRoot(kJavaLangReflectProxy)->GetDirectMethodUnchecked(
       2, image_pointer_size_);
   // Ensure constructor is in dex cache so that we can use the dex cache to look up the overridden
@@ -3437,6 +3650,38 @@
   out->SetDeclaringClass(klass.Get());
 }
 
+void ClassLinker::CreateLambdaProxyConstructor(Handle<mirror::Class> klass,
+                                               /*out*/ArtMethod* method_constructor) {
+  DCHECK(klass.Get() != nullptr);
+  DCHECK(method_constructor != nullptr);
+
+  // Create constructor for Proxy that must initialize the method.
+  // Lambda proxy superclass only has 1 direct method, the constructor (<init>()V)
+  CHECK_EQ(GetClassRoot(kJavaLangLambdaProxy)->NumDirectMethods(),
+           mirror::LambdaProxy::kDirectMethodCount);
+  // Get the constructor method.
+  ArtMethod* proxy_constructor = GetClassRoot(kJavaLangLambdaProxy)->GetDirectMethodUnchecked(
+      mirror::LambdaProxy::kDirectMethodIndexConstructor,
+      image_pointer_size_);
+
+  // Verify constructor method is indeed a constructor.
+  CHECK(proxy_constructor != nullptr);
+
+  // Ensure constructor is in dex cache so that we can use the dex cache to look up the overridden
+  // constructor method.
+  GetClassRoot(kJavaLangLambdaProxy)->GetDexCache()->SetResolvedMethod(
+      proxy_constructor->GetDexMethodIndex(),
+      proxy_constructor,
+      image_pointer_size_);
+
+  // Clone the existing constructor of LambdaProxy
+  // (our constructor would just invoke it so steal its code_ too).
+  method_constructor->CopyFrom(proxy_constructor, image_pointer_size_);
+  // Make this constructor public and fix the class to be our LambdaProxy version
+  method_constructor->SetAccessFlags((method_constructor->GetAccessFlags() & ~kAccProtected) | kAccPublic);
+  method_constructor->SetDeclaringClass(klass.Get());
+}
+
 void ClassLinker::CheckProxyConstructor(ArtMethod* constructor) const {
   CHECK(constructor->IsConstructor());
   auto* np = constructor->GetInterfaceMethodIfProxy(image_pointer_size_);
@@ -3445,6 +3690,14 @@
   DCHECK(constructor->IsPublic());
 }
 
+void ClassLinker::CheckLambdaProxyConstructor(ArtMethod* constructor) const {
+  CHECK(constructor->IsConstructor());
+  auto* np = constructor->GetInterfaceMethodIfProxy(image_pointer_size_);
+  CHECK_STREQ(np->GetName(), "<init>");
+  CHECK_STREQ(np->GetSignature().ToString().c_str(), "()V");
+  DCHECK(constructor->IsPublic());
+}
+
 void ClassLinker::CreateProxyMethod(Handle<mirror::Class> klass, ArtMethod* prototype,
                                     ArtMethod* out) {
   // Ensure prototype is in dex cache so that we can use the dex cache to look up the overridden
@@ -3456,6 +3709,7 @@
     dex_cache->SetResolvedMethod(
         prototype->GetDexMethodIndex(), prototype, image_pointer_size_);
   }
+
   // We steal everything from the prototype (such as DexCache, invoke stub, etc.) then specialize
   // as necessary
   DCHECK(out != nullptr);
@@ -3471,6 +3725,42 @@
   out->SetEntryPointFromQuickCompiledCode(GetQuickProxyInvokeHandler());
 }
 
+void ClassLinker::CreateLambdaProxyMethod(Handle<mirror::Class> klass,
+                                          ArtMethod* prototype,
+                                          ArtMethod* out) {
+  DCHECK(prototype != nullptr);
+  DCHECK(out != nullptr);
+
+  // DO NOT go through the proxy invoke handler for the default methods. They have no idea
+  // how to handle the raw closure, so they must get the regular object when invoked.
+  CHECK_EQ(prototype->GetAccessFlags() & kAccDefault, 0u) << "Default methods must not be proxied";
+
+  // Ensure prototype is in dex cache so that we can use the dex cache to look up the overridden
+  // prototype method
+  auto* dex_cache = prototype->GetDeclaringClass()->GetDexCache();
+  // Avoid dirtying the dex cache unless we need to.
+  if (dex_cache->GetResolvedMethod(prototype->GetDexMethodIndex(), image_pointer_size_) !=
+      prototype) {
+    dex_cache->SetResolvedMethod(
+        prototype->GetDexMethodIndex(), prototype, image_pointer_size_);
+  }
+  // We steal everything from the prototype (such as DexCache, invoke stub, etc.) then specialize
+  // as necessary
+  out->CopyFrom(prototype, image_pointer_size_);
+
+  // Set class to be the concrete proxy class and clear the abstract flag, modify exceptions to
+  // the intersection of throw exceptions as defined in Proxy
+  out->SetDeclaringClass(klass.Get());
+  out->SetAccessFlags((out->GetAccessFlags() & ~kAccAbstract) | kAccFinal);
+
+  // Setting the entry point isn't safe for AOT since ASLR loads it anywhere at runtime.
+  CHECK(!Runtime::Current()->IsAotCompiler());
+
+  // At runtime the method looks like a reference and argument saving method, clone the code
+  // related parameters from this method.
+  out->SetEntryPointFromQuickCompiledCode(GetQuickLambdaProxyInvokeHandler());
+}
+
 void ClassLinker::CheckProxyMethod(ArtMethod* method, ArtMethod* prototype) const {
   // Basic sanity
   CHECK(!prototype->IsFinal());
@@ -3492,6 +3782,11 @@
            prototype->GetReturnType(true /* resolve */, image_pointer_size_));
 }
 
+void ClassLinker::CheckLambdaProxyMethod(ArtMethod* method, ArtMethod* prototype) const {
+  // same as above.
+  return CheckProxyMethod(method, prototype);
+}
+
 bool ClassLinker::CanWeInitializeClass(mirror::Class* klass, bool can_init_statics,
                                        bool can_init_parents) {
   if (can_init_statics && can_init_parents) {
@@ -4123,7 +4418,9 @@
     class_loader->SetClassTable(data.class_table);
     // Should have been set when we registered the dex file.
     data.allocator = class_loader->GetAllocator();
-    CHECK(data.allocator != nullptr);
+    CHECK(class_loader->GetLambdaProxyCache() == nullptr);
+    data.lambda_box_class_table = new lambda::BoxClassTable();
+    class_loader->SetLambdaProxyCache(data.lambda_box_class_table);
     class_loaders_.push_back(data);
   }
   return class_table;
@@ -6566,6 +6863,7 @@
     "Ljava/lang/reflect/Field;",
     "Ljava/lang/reflect/Method;",
     "Ljava/lang/reflect/Proxy;",
+    "Ljava/lang/LambdaProxy;",
     "[Ljava/lang/String;",
     "[Ljava/lang/reflect/Constructor;",
     "[Ljava/lang/reflect/Field;",
@@ -6629,7 +6927,9 @@
   }
 }
 
-jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files) {
+jobject ClassLinker::CreatePathClassLoader(Thread* self,
+                                           std::vector<const DexFile*>& dex_files,
+                                           jobject parent_loader) {
   // SOAAlreadyRunnable is protected, and we need something to add a global reference.
   // We could move the jobject to the callers, but all call-sites do this...
   ScopedObjectAccessUnchecked soa(self);
@@ -6660,8 +6960,8 @@
   for (const DexFile* dex_file : dex_files) {
     StackHandleScope<3> hs2(self);
 
-    // CreatePathClassLoader is only used by gtests. Index 0 of h_long_array is supposed to be the
-    // oat file but we can leave it null.
+    // CreatePathClassLoader is only used by gtests and dex2oat. Index 0 of h_long_array is
+    // supposed to be the oat file but we can leave it null.
     Handle<mirror::LongArray> h_long_array = hs2.NewHandle(mirror::LongArray::Alloc(
         self,
         kDexFileIndexStart + 1));
@@ -6707,9 +7007,10 @@
       mirror::Class::FindField(self, hs.NewHandle(h_path_class_loader->GetClass()), "parent",
                                "Ljava/lang/ClassLoader;");
   DCHECK(parent_field != nullptr);
-  mirror::Object* boot_cl =
-      soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self);
-  parent_field->SetObject<false>(h_path_class_loader.Get(), boot_cl);
+  mirror::Object* parent = (parent_loader != nullptr)
+      ? soa.Decode<mirror::ClassLoader*>(parent_loader)
+      : soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self);
+  parent_field->SetObject<false>(h_path_class_loader.Get(), parent);
 
   // Make it a global ref and return.
   ScopedLocalRef<jobject> local_ref(
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 5ba9652..f073cd8 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -40,6 +40,11 @@
   class ImageSpace;
 }  // namespace space
 }  // namespace gc
+
+namespace lambda {
+  class BoxClassTable;
+}  // namespace lambda
+
 namespace mirror {
   class ClassLoader;
   class DexCache;
@@ -82,6 +87,7 @@
     kJavaLangReflectField,
     kJavaLangReflectMethod,
     kJavaLangReflectProxy,
+    kJavaLangLambdaProxy,
     kJavaLangStringArrayClass,
     kJavaLangReflectConstructorArrayClass,
     kJavaLangReflectFieldArrayClass,
@@ -424,12 +430,46 @@
                                   jobjectArray methods,
                                   jobjectArray throws)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  std::string GetDescriptorForProxy(mirror::Class* proxy_class)
+
+  // Get the long type descriptor, e.g. "LProxyName$1234;" for the requested proxy class.
+  static std::string GetDescriptorForAnyProxy(mirror::Class* proxy_class)
       SHARED_REQUIRES(Locks::mutator_lock_);
   ArtMethod* FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Create a lambda proxy class.
+  // -- Nominally used when boxing an innate lambda, since that has no corresponding class.
+  //
+  // * name must be a fully-qualified class name (and dotted), e.g. "java.lang.Runnable"
+  // * interfaces is an array of java.lang.Class for interfaces that will be the supertype
+  //   (note that there must be exactly 1 element here for a lambda interface since lambda
+  //   types can only target 1 interface).
+  // * loader must be a java.lang.ClassLoader where the proxy class will be created
+  // * methods must be an array of java.lang.reflect.Method that consists of the
+  //   deduplicated methods from all of the interfaces specified.
+  // * throws must be an array of java.lang.Class[] where each index corresponds to that of
+  //   methods, and it signifies the "throws" keyword of each method
+  //   (this is not directly used by the runtime itself, but it is available via reflection).
+  //
+  // Returns a non-null pointer to a class upon success, otherwise null and throws an exception.
+  //
+  // If the class was already created previously (with the same name but potentially different
+  // parameters), already_exists is set to true; otherwise already_exists is set to false.
+  // The already_exists value is undefined when an exception was thrown.
+  //
+  // Sidenote: interfaces is an array to simplify the libcore code which creates a Java
+  // array in an attempt to reduce code duplication.
+  // TODO: this should probably also take the target single-abstract-method as well.
+  mirror::Class* CreateLambdaProxyClass(ScopedObjectAccessAlreadyRunnable& soa,
+                                        jstring name,
+                                        jobjectArray interfaces,
+                                        jobject loader,
+                                        jobjectArray methods,
+                                        jobjectArray throws,
+                                        /*out*/bool* already_exists)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Get the oat code for a method when its class isn't yet initialized
   const void* GetQuickOatCodeFor(ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -514,7 +554,10 @@
 
   // Creates a GlobalRef PathClassLoader that can be used to load classes from the given dex files.
   // Note: the objects are not completely set up. Do not use this outside of tests and the compiler.
-  jobject CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files)
+  // If parent_loader is null then we use the boot class loader.
+  jobject CreatePathClassLoader(Thread* self,
+                                std::vector<const DexFile*>& dex_files,
+                                jobject parent_loader)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
@@ -570,6 +613,7 @@
     jweak weak_root;  // Weak root to enable class unloading.
     ClassTable* class_table;
     LinearAlloc* allocator;
+    lambda::BoxClassTable* lambda_box_class_table;
   };
 
   // Ensures that the supertype of 'klass' ('supertype') is verified. Returns false and throws
@@ -904,8 +948,12 @@
 
   void CheckProxyConstructor(ArtMethod* constructor) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+  void CheckLambdaProxyConstructor(ArtMethod* constructor) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void CheckProxyMethod(ArtMethod* method, ArtMethod* prototype) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+  void CheckLambdaProxyMethod(ArtMethod* method, ArtMethod* prototype) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // For use by ImageWriter to find DexCaches for its roots
   ReaderWriterMutex* DexLock()
@@ -923,9 +971,19 @@
 
   void CreateProxyConstructor(Handle<mirror::Class> klass, ArtMethod* out)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Copy the constructor from java.lang.LambdaProxy into the 'klass'.
+  // The copy is written into 'method_constructor'.
+  void CreateLambdaProxyConstructor(Handle<mirror::Class> klass,
+                                    /*out*/ArtMethod* method_constructor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   void CreateProxyMethod(Handle<mirror::Class> klass, ArtMethod* prototype, ArtMethod* out)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void CreateLambdaProxyMethod(Handle<mirror::Class> klass, ArtMethod* prototype, ArtMethod* out)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Ensures that methods have the kAccPreverified bit set. We use the kAccPreverfied bit on the
   // class access flags to determine whether this has been done before.
   void EnsurePreverifiedMethods(Handle<mirror::Class> c)
@@ -937,7 +995,10 @@
   // Returns null if not found.
   ClassTable* ClassTableForClassLoader(mirror::ClassLoader* class_loader)
       SHARED_REQUIRES(Locks::mutator_lock_, Locks::classlinker_classes_lock_);
-  // Insert a new class table if not found.
+
+  // Insert a new class table if not found. Uses bootclasspath if class_loader is null.
+  // Returns either the existing table, or the new one if there wasn't one previously
+  // (the return value is always non-null).
   ClassTable* InsertClassTableForClassLoader(mirror::ClassLoader* class_loader)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(Locks::classlinker_classes_lock_);
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 2c086c5..4a9db1d 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -31,6 +31,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
 #include "mirror/field.h"
+#include "mirror/lambda_proxy.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/proxy.h"
@@ -552,6 +553,7 @@
   ClassLoaderOffsets() : CheckOffsets<mirror::ClassLoader>(false, "Ljava/lang/ClassLoader;") {
     addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, allocator_), "allocator");
     addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, class_table_), "classTable");
+    addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, lambda_proxy_cache_), "lambdaProxyCache");
     addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, packages_), "packages");
     addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, parent_), "parent");
     addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, proxyCache_), "proxyCache");
@@ -564,6 +566,13 @@
   };
 };
 
+struct LambdaProxyOffsets : public CheckOffsets<mirror::LambdaProxy> {
+  LambdaProxyOffsets() : CheckOffsets<mirror::LambdaProxy>(false, "Ljava/lang/LambdaProxy;") {
+    addOffset(OFFSETOF_MEMBER(mirror::LambdaProxy, closure_), "closure");
+  };
+};
+
+
 struct DexCacheOffsets : public CheckOffsets<mirror::DexCache> {
   DexCacheOffsets() : CheckOffsets<mirror::DexCache>(false, "Ljava/lang/DexCache;") {
     addOffset(OFFSETOF_MEMBER(mirror::DexCache, dex_), "dex");
@@ -639,6 +648,7 @@
   EXPECT_TRUE(StackTraceElementOffsets().Check());
   EXPECT_TRUE(ClassLoaderOffsets().Check());
   EXPECT_TRUE(ProxyOffsets().Check());
+  EXPECT_TRUE(LambdaProxyOffsets().Check());
   EXPECT_TRUE(DexCacheOffsets().Check());
   EXPECT_TRUE(ReferenceOffsets().Check());
   EXPECT_TRUE(FinalizerReferenceOffsets().Check());
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index b6b5141..e84313c 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -55,7 +55,6 @@
   // Gtests can be very noisy. For example, an executable with multiple tests will trigger native
   // bridge warnings. The following line reduces the minimum log severity to ERROR and suppresses
   // everything else. In case you want to see all messages, comment out the line.
-  setenv("ANDROID_LOG_TAGS", "*:e", 1);
 
   art::InitLogging(argv);
   LOG(::art::INFO) << "Running main() from common_runtime_test.cc...";
@@ -553,7 +552,8 @@
 
   Thread* self = Thread::Current();
   jobject class_loader = Runtime::Current()->GetClassLinker()->CreatePathClassLoader(self,
-                                                                                     class_path);
+                                                                                     class_path,
+                                                                                     nullptr);
   self->SetClassLoaderOverride(class_loader);
   return class_loader;
 }
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 70096f5..4163e2e 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -1870,10 +1870,10 @@
         Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
         ArtField* enum_field = Runtime::Current()->GetClassLinker()->ResolveField(
             klass->GetDexFile(), index, dex_cache, class_loader, true);
-        Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass()));
         if (enum_field == nullptr) {
           return false;
         } else {
+          Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass()));
           Runtime::Current()->GetClassLinker()->EnsureInitialized(self, field_class, true, true);
           element_object = enum_field->GetObject(field_class.Get());
           set_object = true;
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 87e29ae..2a92226 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -313,7 +313,7 @@
           reinterpret_cast<uintptr_t>(virtual_methods)) / method_size;
       CHECK_LT(throws_index, static_cast<int>(num_virtuals));
       mirror::ObjectArray<mirror::Class>* declared_exceptions =
-          proxy_class->GetThrows()->Get(throws_index);
+          proxy_class->GetThrowsForAnyProxy()->Get(throws_index);
       mirror::Class* exception_class = exception->GetClass();
       bool declares_exception = false;
       for (int32_t i = 0; i < declared_exceptions->GetLength() && !declares_exception; i++) {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index abf9ac4..8c2dc3e 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -23,9 +23,12 @@
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/card_table-inl.h"
 #include "interpreter/interpreter.h"
+#include "lambda/closure.h"
+#include "lambda/art_lambda_method.h"
 #include "method_reference.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
+#include "mirror/lambda_proxy.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
@@ -294,7 +297,8 @@
   // 1st GPR.
   static mirror::Object* GetProxyThisObject(ArtMethod** sp)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    CHECK((*sp)->IsProxyMethod());
+    // TODO: Lambda proxies only set up a frame when debugging
+    CHECK((*sp)->IsReflectProxyMethod() || ((*sp)->IsLambdaProxyMethod() /*&& kIsDebugBuild*/));
     CHECK_GT(kNumQuickGprArgs, 0u);
     constexpr uint32_t kThisGprIndex = 0u;  // 'this' is in the 1st GPR.
     size_t this_arg_offset = kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset +
@@ -834,8 +838,9 @@
 extern "C" uint64_t artQuickProxyInvokeHandler(
     ArtMethod* proxy_method, mirror::Object* receiver, Thread* self, ArtMethod** sp)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  DCHECK(proxy_method->IsProxyMethod()) << PrettyMethod(proxy_method);
-  DCHECK(receiver->GetClass()->IsProxyClass()) << PrettyMethod(proxy_method);
+  DCHECK(proxy_method->GetDeclaringClass()->IsReflectProxyClass()) << PrettyMethod(proxy_method);
+  DCHECK(proxy_method->IsReflectProxyMethod()) << PrettyMethod(proxy_method);
+  DCHECK(receiver->GetClass()->IsReflectProxyClass()) << PrettyMethod(proxy_method);
   // Ensure we don't get thread suspension until the object arguments are safely in jobjects.
   const char* old_cause =
       self->StartAssertNoThreadSuspension("Adding to IRT proxy object arguments");
@@ -878,6 +883,175 @@
   return result.GetJ();
 }
 
+extern "C" uint64_t artQuickLambdaProxyInvokeHandler(
+    ArtMethod* proxy_method, mirror::LambdaProxy* receiver, Thread* self, ArtMethod** sp)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  using lambda::ShortyFieldType;
+
+  DCHECK(proxy_method->GetDeclaringClass()->IsLambdaProxyClass()) << PrettyMethod(proxy_method);
+  DCHECK(proxy_method->IsLambdaProxyMethod()) << PrettyMethod(proxy_method);
+  DCHECK(receiver->GetClass()->IsLambdaProxyClass()) << PrettyMethod(proxy_method);
+
+  lambda::Closure* lambda_closure = receiver->GetClosure();
+  DCHECK(lambda_closure != nullptr);  // Should've NPEd during the invoke-interface.
+  // Learned lambdas have their own implementation of the SAM, they must not go through here.
+  DCHECK(lambda_closure->GetLambdaInfo()->IsInnateLambda());
+  ArtMethod* target_method = lambda_closure->GetTargetMethod();
+
+  // Lambda targets are always static.
+  // TODO: This should really be a target_method->IsLambda(), once we add the access flag.
+  CHECK(target_method->IsStatic()) << PrettyMethod(proxy_method) << " "
+                                   << PrettyMethod(target_method);
+
+  // Ensure we don't get thread suspension until the object arguments are safely in jobjects.
+  const char* old_cause =
+      self->StartAssertNoThreadSuspension("Adding to IRT/SF lambda proxy object arguments");
+  // Register the top of the managed stack, making stack crawlable.
+  DCHECK_EQ((*sp), proxy_method) << PrettyMethod(proxy_method);
+  self->VerifyStack();
+  // Start new JNI local reference state.
+  JNIEnvExt* env = self->GetJniEnv();
+  ScopedObjectAccessUnchecked soa(env);
+
+  // Placing arguments into args vector and remove the receiver.
+  ArtMethod* non_proxy_method = proxy_method->GetInterfaceMethodIfProxy(sizeof(void*));
+  CHECK(!non_proxy_method->IsStatic()) << PrettyMethod(proxy_method) << " "
+                                       << PrettyMethod(non_proxy_method);
+  uint32_t shorty_len = 0;
+  const char* shorty = non_proxy_method->GetShorty(/*out*/&shorty_len);
+
+  std::vector<jvalue> args;
+  // Make a quick visitor so we can restore the refs incase they move after a GC.
+  BuildQuickArgumentVisitor local_ref_visitor(sp,
+                                              false /*is_static*/,
+                                              shorty,
+                                              shorty_len,
+                                              &soa,
+                                              /*out*/&args);
+  local_ref_visitor.VisitArguments();
+
+  static_assert(lambda::kClosureIsStoredAsLong,
+                "Need to update this code once closures are no "
+                "longer treated as a 'long' in quick abi");
+
+  // Allocate one vreg more than usual because we need to convert our
+  // receiver Object (1 vreg) into a long (2 vregs).
+  // TODO: Ugly... move to traits instead?
+  const uint32_t first_arg_reg = ShortyFieldType(ShortyFieldType::kLambda).GetVirtualRegisterCount()
+        - ShortyFieldType(ShortyFieldType::kObject).GetVirtualRegisterCount();
+  const uint32_t num_vregs = lambda_closure->GetLambdaInfo()->GetArgumentVRegCount();
+  DCHECK_GE(num_vregs, first_arg_reg);
+  if (kIsDebugBuild) {
+    const char* method_shorty = non_proxy_method->GetShorty();
+    DCHECK_NE(*method_shorty, '\0') << method_shorty;
+    const char* arg_shorty = method_shorty + 1;  // Skip return type.
+
+    // Proxy method should have an object (1 vreg) receiver,
+    // Lambda method should have a lambda (2 vregs) receiver.
+    // -- All other args are the same as before.
+    // -- Make sure vreg count is what we thought it was.
+    uint32_t non_proxy_num_vregs =
+        ShortyFieldType::CountVirtualRegistersRequired(arg_shorty)  // doesn't count receiver
+        + ShortyFieldType(ShortyFieldType::kObject).GetVirtualRegisterCount();  // implicit receiver
+
+    CHECK_EQ(non_proxy_num_vregs + first_arg_reg, num_vregs)
+        << PrettyMethod(non_proxy_method) << " " << PrettyMethod(lambda_closure->GetTargetMethod());
+  }
+
+  ShadowFrameAllocaUniquePtr shadow_frame = CREATE_SHADOW_FRAME(num_vregs,
+                                                                /*link*/nullptr,
+                                                                target_method,
+                                                                /*dex_pc*/0);
+
+  // Copy our proxy method caller's arguments into this ShadowFrame.
+  BuildQuickShadowFrameVisitor local_sf_visitor(sp,
+                                                /*is_static*/false,
+                                                shorty,
+                                                shorty_len,
+                                                shadow_frame.get(),
+                                                first_arg_reg);
+
+  local_sf_visitor.VisitArguments();
+  // Now fix up the arguments, with each ArgK being a vreg:
+
+  // (Before):
+  // Arg0 = proxy receiver (LambdaProxy)
+  // Arg1 = first-user defined argument
+  // Arg2 = second user-defined argument
+  // ....
+  // ArgN = ...
+
+  // (After)
+  // Arg0 = closure (hi)
+  // Arg1 =  closure (lo) = 0x00 on 32-bit
+  // Arg2 = <?> (first user-defined argument)
+  // Arg3 = <?> (first user-defined argument)
+  // ...
+  // argN+1 = ...
+
+  // Transformation diagram:
+  /*
+     Arg0  Arg2  Arg3 ... ArgN
+       |       \     \        \
+       |        \     \        \
+     ClHi  ClLo  Arg2  Arg3 ... ArgN:
+   */
+
+  // 1) memmove vregs 1-N into 2-N+1
+  uint32_t* shadow_frame_vregs = shadow_frame->GetVRegArgs(/*i*/0);
+  if (lambda::kClosureIsStoredAsLong ||
+      sizeof(void*) != sizeof(mirror::CompressedReference<mirror::LambdaProxy>)) {
+    // Suspending here would be very bad since we are doing a raw memmove
+
+    // Move the primitive vregs over.
+    {
+      size_t shadow_frame_vregs_size = num_vregs;
+      memmove(shadow_frame_vregs + first_arg_reg,
+              shadow_frame_vregs,
+              shadow_frame_vregs_size - first_arg_reg);
+    }
+
+    // Move the reference vregs over.
+    if (LIKELY(shadow_frame->HasReferenceArray())) {
+      uint32_t* shadow_frame_references = shadow_frame_vregs + num_vregs;
+      size_t shadow_frame_references_size = num_vregs;
+      memmove(shadow_frame_references + first_arg_reg,
+              shadow_frame_references,
+              shadow_frame_references_size - first_arg_reg);
+    }
+
+    static_assert(lambda::kClosureSupportsReadBarrier == false,
+                  "Using this memmove code with a read barrier GC seems like it could be unsafe.");
+
+    static_assert(sizeof(mirror::CompressedReference<mirror::LambdaProxy>) == sizeof(uint32_t),
+                  "This block of code assumes a compressed reference fits into exactly 1 vreg");
+  }
+  // 2) replace proxy receiver with lambda
+  shadow_frame->SetVRegLong(0, static_cast<int64_t>(reinterpret_cast<uintptr_t>(lambda_closure)));
+
+  // OK: After we do the invoke, the target method takes over managing the arguments
+  //     and we won't ever access the shadow frame again (if any references moved).
+  self->EndAssertNoThreadSuspension(old_cause);
+
+  // The shadow frame vreg contents are now 'owned' by the Invoke method, and
+  // will be managed by it during a GC despite being a raw uint32_t array.
+  // We however have no guarantee that it is updated on the way out, so do not read out of the
+  // shadow frame after this call.
+  JValue result;
+  target_method->Invoke(self,
+                        shadow_frame_vregs,
+                        num_vregs * sizeof(uint32_t),
+                        /*out*/&result,
+                        target_method->GetShorty());
+
+  // Restore references on the proxy caller stack frame which might have moved.
+  // -- This is necessary because the QuickFrameInfo is just the generic runtime "RefsAndArgs"
+  //    which means that the regular stack visitor wouldn't know how to GC-move any references
+  //    that we spilled ourselves in the proxy stub.
+  local_ref_visitor.FixupReferences();
+  return result.GetJ();
+}
+
 // Read object references held in arguments from quick frames and place in a JNI local references,
 // so they don't get garbage collected.
 class RememberForGcArgumentVisitor FINAL : public QuickArgumentVisitor {
diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h
index 2842c5a..1ef7585 100644
--- a/runtime/entrypoints/runtime_asm_entrypoints.h
+++ b/runtime/entrypoints/runtime_asm_entrypoints.h
@@ -17,6 +17,10 @@
 #ifndef ART_RUNTIME_ENTRYPOINTS_RUNTIME_ASM_ENTRYPOINTS_H_
 #define ART_RUNTIME_ENTRYPOINTS_RUNTIME_ASM_ENTRYPOINTS_H_
 
+// Define entry points to assembly routines.
+// All extern "C" functions here are defined in a corresponding assembly-only file.
+// The exact file paths are runtime/arch/$ISA/quick_entrypoints_$ISA.s
+
 namespace art {
 
 #ifndef BUILDING_LIBART
@@ -52,6 +56,13 @@
   return reinterpret_cast<const void*>(art_quick_proxy_invoke_handler);
 }
 
+// Return the address of quick stub code for handling transitions into the lambda proxy
+// invoke handler.
+extern "C" void art_quick_lambda_proxy_invoke_handler();
+static inline const void* GetQuickLambdaProxyInvokeHandler() {
+  return reinterpret_cast<const void*>(art_quick_lambda_proxy_invoke_handler);
+}
+
 // Return the address of quick stub code for resolving a method at first call.
 extern "C" void art_quick_resolution_trampoline(ArtMethod*);
 static inline const void* GetQuickResolutionStub() {
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 1cd7983..bcfcb89 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1080,7 +1080,7 @@
                 !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
     // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
     // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
-    CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
+    DCHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
   } else {
     // We may occasionally leave a Reference black or white in the queue if its referent happens to
     // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
@@ -1089,9 +1089,10 @@
     if (kUseBakerReadBarrier) {
       if (region_space_->IsInToSpace(to_ref)) {
         // If to-space, change from gray to white.
-        bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                           ReadBarrier::WhitePtr());
-        CHECK(success) << "Must succeed as we won the race.";
+        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+            ReadBarrier::GrayPtr(),
+            ReadBarrier::WhitePtr());
+        DCHECK(success) << "Must succeed as we won the race.";
         DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
       } else {
         // If non-moving space/unevac from space, change from gray
@@ -1101,9 +1102,10 @@
         // indicate non-moving objects that have been marked
         // through. Note we'd need to change from black to white
         // later (concurrently).
-        bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                           ReadBarrier::BlackPtr());
-        CHECK(success) << "Must succeed as we won the race.";
+        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+            ReadBarrier::GrayPtr(),
+            ReadBarrier::BlackPtr());
+        DCHECK(success) << "Must succeed as we won the race.";
         DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
       }
     }
@@ -1227,9 +1229,6 @@
  public:
   explicit ConcurrentCopyingClearBlackPtrsVisitor(ConcurrentCopying* cc)
       : collector_(cc) {}
-#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
-  NO_RETURN
-#endif
   void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
       SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
     DCHECK(obj != nullptr);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index da9a79e..07f0628 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -672,8 +672,8 @@
     return result;
   } else if (UNLIKELY(klass->IsPrimitive<kVerifyNone>())) {
     return Primitive::Descriptor(klass->GetPrimitiveType<kVerifyNone>());
-  } else if (UNLIKELY(klass->IsProxyClass<kVerifyNone>())) {
-    return Runtime::Current()->GetClassLinker()->GetDescriptorForProxy(klass);
+  } else if (UNLIKELY(klass->IsAnyProxyClass<kVerifyNone>())) {
+    return Runtime::Current()->GetClassLinker()->GetDescriptorForAnyProxy(klass);
   } else {
     mirror::DexCache* dex_cache = klass->GetDexCache<kVerifyNone>();
     if (!IsValidContinuousSpaceObjectAddress(dex_cache)) {
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 1fe9a03..e2b2431 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -58,10 +58,7 @@
   CHECK_ALIGNED(max_delta, kPageSize);
   CHECK_LT(min_delta, max_delta);
 
-  std::default_random_engine generator;
-  generator.seed(NanoTime() * getpid());
-  std::uniform_int_distribution<int32_t> distribution(min_delta, max_delta);
-  int32_t r = distribution(generator);
+  int32_t r = GetRandomNumber<int32_t>(min_delta, max_delta);
   if (r % 2 == 0) {
     r = RoundUp(r, kPageSize);
   } else {
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 9f6699f..2de8e7e 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -888,12 +888,56 @@
     return false;
   }
 
+  StackHandleScope<1> hs{self};  // NOLINT: [readability/braces] [4];
+
+  // Use the lambda method's class loader since it's close enough.
+  // TODO: create-lambda should capture the current method's class loader and use that instead.
+  // TODO: Do we want create-lambda to work for static methods outside of the declaring class?
+  // --> then we need to store a classloader in the lambda method. otherwise we don't
+  //     because it would always use the declaring class's class loader.
+  // TODO: add a GetClassLoader to the lambda closure which knows how to do this,
+  //       don't hardcode this here.
+  Handle<ClassLoader> current_class_loader = hs.NewHandle(
+      lambda_closure->GetTargetMethod()->GetDeclaringClass()->GetClassLoader());
+
+  // TODO: get the type ID from the instruction
+  std::string class_name;
+  {
+    // Temporary hack to read the interface corresponding to a box-lambda.
+    // TODO: The box-lambda should encode the type ID instead, so we don't need to do this.
+    {
+      // Do a hack where we read from const-string the interface name
+      mirror::Object* string_reference = shadow_frame.GetVRegReference(vreg_target_object);
+
+      CHECK(string_reference != nullptr)
+          << "box-lambda needs the type name stored in string vA (target), but it was null";
+
+      CHECK(string_reference->IsString())
+          << "box-lambda needs the type name stored in string vA (target)";
+
+      mirror::String* as_string = string_reference->AsString();
+      class_name = as_string->ToModifiedUtf8();
+    }
+
+    // Trigger class loading of the functional interface.
+    // TODO: This should actually be done by the create-lambda...
+    if (Runtime::Current()->GetClassLinker()
+            ->FindClass(self, class_name.c_str(), current_class_loader) == nullptr) {
+      CHECK(self->IsExceptionPending());
+      self->AssertPendingException();
+      return false;
+    }
+  }
+
   mirror::Object* closure_as_object =
-      Runtime::Current()->GetLambdaBoxTable()->BoxLambda(lambda_closure);
+      Runtime::Current()->GetLambdaBoxTable()->BoxLambda(lambda_closure,
+                                                         class_name.c_str(),
+                                                         current_class_loader.Get());
 
   // Failed to box the lambda, an exception was raised.
   if (UNLIKELY(closure_as_object == nullptr)) {
     CHECK(self->IsExceptionPending());
+    shadow_frame.SetVRegReference(vreg_target_object, nullptr);
     return false;
   }
 
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bf95a0e..11a8c2e 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -102,6 +102,8 @@
   size_t lambda_captured_variable_index = 0;
   while (true) {
     dex_pc = inst->GetDexPc(insns);
+    DCHECK_LE(dex_pc, code_item->insns_size_in_code_units_)
+        << "Dex PC overflowed code item size; missing return instruction?";
     shadow_frame.SetDexPC(dex_pc);
     TraceExecution(shadow_frame, inst, dex_pc);
     inst_data = inst->Fetch16(0);
diff --git a/runtime/lambda/art_lambda_method.cc b/runtime/lambda/art_lambda_method.cc
index 6f9f8bb..0690cd1 100644
--- a/runtime/lambda/art_lambda_method.cc
+++ b/runtime/lambda/art_lambda_method.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "art_method-inl.h"
 #include "lambda/art_lambda_method.h"
 
 #include "base/logging.h"
@@ -73,5 +74,12 @@
   }
 }
 
+size_t ArtLambdaMethod::GetArgumentVRegCount() const {
+  DCHECK(GetArtMethod()->IsStatic());  // Instance methods don't have receiver in shorty.
+  const char* method_shorty = GetArtMethod()->GetShorty();
+  DCHECK_NE(*method_shorty, '\0') << method_shorty;
+  return ShortyFieldType::CountVirtualRegistersRequired(method_shorty + 1);  // skip return type
+}
+
 }  // namespace lambda
 }  // namespace art
diff --git a/runtime/lambda/art_lambda_method.h b/runtime/lambda/art_lambda_method.h
index ea13eb7..a858bf9 100644
--- a/runtime/lambda/art_lambda_method.h
+++ b/runtime/lambda/art_lambda_method.h
@@ -90,6 +90,17 @@
     return strlen(captured_variables_shorty_);
   }
 
+  // Return the offset in bytes from the start of ArtLambdaMethod to the method_.
+  // -- Only should be used by assembly (stubs) support code and compiled code.
+  static constexpr size_t GetArtMethodOffset() {
+    return offsetof(ArtLambdaMethod, method_);
+  }
+
+  // Calculate how many vregs all the arguments will use when doing an invoke.
+  // (Most primitives are 1 vregs, double/long are 2, reference is 1, lambda is 2).
+  // -- This is used to know how big to set up shadow frame when invoking into the target method.
+  size_t GetArgumentVRegCount() const SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // TODO: ArtMethod, or at least the entry points should be inlined into this struct
   // to avoid an extra indirect load when doing invokes.
diff --git a/runtime/lambda/box_class_table-inl.h b/runtime/lambda/box_class_table-inl.h
new file mode 100644
index 0000000..2fc34a7
--- /dev/null
+++ b/runtime/lambda/box_class_table-inl.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_LAMBDA_BOX_CLASS_TABLE_INL_H_
+#define ART_RUNTIME_LAMBDA_BOX_CLASS_TABLE_INL_H_
+
+#include "lambda/box_class_table.h"
+#include "thread.h"
+
+namespace art {
+namespace lambda {
+
+template <typename Visitor>
+inline void BoxClassTable::VisitRoots(const Visitor& visitor) {
+  MutexLock mu(Thread::Current(), *Locks::lambda_class_table_lock_);
+  for (std::pair<UnorderedMapKeyType, ValueType>& key_value : map_) {
+    ValueType& gc_root = key_value.second;
+    visitor.VisitRoot(gc_root.AddressWithoutBarrier());
+  }
+}
+
+}  // namespace lambda
+}  // namespace art
+
+#endif  // ART_RUNTIME_LAMBDA_BOX_CLASS_TABLE_INL_H_
diff --git a/runtime/lambda/box_class_table.cc b/runtime/lambda/box_class_table.cc
new file mode 100644
index 0000000..1e49886
--- /dev/null
+++ b/runtime/lambda/box_class_table.cc
@@ -0,0 +1,204 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "lambda/box_class_table.h"
+
+#include "base/mutex.h"
+#include "common_throws.h"
+#include "gc_root-inl.h"
+#include "lambda/closure.h"
+#include "lambda/leaking_allocator.h"
+#include "mirror/method.h"
+#include "mirror/object-inl.h"
+#include "thread.h"
+
+#include <string>
+#include <vector>
+
+namespace art {
+namespace lambda {
+
+// Create the lambda proxy class given the name of the lambda interface (e.g. Ljava/lang/Runnable;)
+// Also needs a proper class loader (or null for bootclasspath) where the proxy will be created
+// into.
+//
+// The class must **not** have already been created.
+// Returns a non-null ptr on success, otherwise returns null and has an exception set.
+static mirror::Class* CreateClass(Thread* self,
+                                  const std::string& class_name,
+                                  const Handle<mirror::ClassLoader>& class_loader)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedObjectAccessUnchecked soa(self);
+  StackHandleScope<2> hs(self);
+
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+
+  // Find the java.lang.Class for our class name (from the class loader).
+  Handle<mirror::Class> lambda_interface =
+      hs.NewHandle(class_linker->FindClass(self, class_name.c_str(), class_loader));
+  // TODO: use LookupClass in a loop
+  // TODO: DCHECK That this doesn't actually cause the class to be loaded,
+  //       since the create-lambda should've loaded it already
+  DCHECK(lambda_interface.Get() != nullptr) << "CreateClass with class_name=" << class_name;
+  DCHECK(lambda_interface->IsInterface()) << "CreateClass with class_name=" << class_name;
+  jobject lambda_interface_class = soa.AddLocalReference<jobject>(lambda_interface.Get());
+
+  // Look up java.lang.reflect.Proxy#getLambdaProxyClass method.
+  Handle<mirror::Class> java_lang_reflect_proxy =
+      hs.NewHandle(class_linker->FindSystemClass(soa.Self(), "Ljava/lang/reflect/Proxy;"));
+  jclass java_lang_reflect_proxy_class =
+      soa.AddLocalReference<jclass>(java_lang_reflect_proxy.Get());
+  DCHECK(java_lang_reflect_proxy.Get() != nullptr);
+
+  jmethodID proxy_factory_method_id =
+      soa.Env()->GetStaticMethodID(java_lang_reflect_proxy_class,
+                                  "getLambdaProxyClass",
+                                  "(Ljava/lang/ClassLoader;Ljava/lang/Class;)Ljava/lang/Class;");
+  DCHECK(!soa.Env()->ExceptionCheck());
+
+  // Call into the java code to do the hard work of figuring out which methods and throws
+  // our lambda interface proxy needs to implement. It then calls back into the class linker
+  // on our behalf to make the proxy itself.
+  jobject generated_lambda_proxy_class =
+      soa.Env()->CallStaticObjectMethod(java_lang_reflect_proxy_class,
+                                        proxy_factory_method_id,
+                                        class_loader.ToJObject(),
+                                        lambda_interface_class);
+
+  // This can throw in which case we return null. Caller must handle.
+  return soa.Decode<mirror::Class*>(generated_lambda_proxy_class);
+}
+
+BoxClassTable::BoxClassTable() {
+}
+
+BoxClassTable::~BoxClassTable() {
+  // Don't need to do anything, classes are deleted automatically by GC
+  // when the classloader is deleted.
+  //
+  // Our table will not outlive the classloader since the classloader owns it.
+}
+
+mirror::Class* BoxClassTable::GetOrCreateBoxClass(const char* class_name,
+                                                  const Handle<mirror::ClassLoader>& class_loader) {
+  DCHECK(class_name != nullptr);
+
+  Thread* self = Thread::Current();
+
+  std::string class_name_str = class_name;
+
+  {
+    MutexLock mu(self, *Locks::lambda_class_table_lock_);
+
+    // Attempt to look up this class, it's possible it was already created previously.
+    // If this is the case we *must* return the same class as before to maintain
+    // referential equality between box instances.
+    //
+    // In managed code:
+    //   Functional f = () -> 5;  // vF = create-lambda
+    //   Object a = f;            // vA = box-lambda vA
+    //   Object b = f;            // vB = box-lambda vB
+    //   assert(a.getClass() == b.getClass())
+    //   assert(a == b)
+    ValueType value = FindBoxedClass(class_name_str);
+    if (!value.IsNull()) {
+      return value.Read();
+    }
+  }
+
+  // Otherwise we need to generate a class ourselves and insert it into the hash map
+
+  // Release the table lock here, which implicitly allows other threads to suspend
+  // (since the GC callbacks will not block on trying to acquire our lock).
+  // We also don't want to call into the class linker with the lock held because
+  // our lock level is lower.
+  self->AllowThreadSuspension();
+
+  // Create a lambda proxy class, within the specified class loader.
+  mirror::Class* lambda_proxy_class = CreateClass(self, class_name_str, class_loader);
+
+  // There are no thread suspension points after this, so we don't need to put it into a handle.
+  ScopedAssertNoThreadSuspension soants{self, "BoxClassTable::GetOrCreateBoxClass"};  // NOLINT:  [readability/braces] [4]
+
+  if (UNLIKELY(lambda_proxy_class == nullptr)) {
+    // Most likely an OOM has occurred.
+    CHECK(self->IsExceptionPending());
+    return nullptr;
+  }
+
+  {
+    MutexLock mu(self, *Locks::lambda_class_table_lock_);
+
+    // Possible, but unlikely, that someone already came in and made a proxy class
+    // on another thread.
+    ValueType value = FindBoxedClass(class_name_str);
+    if (UNLIKELY(!value.IsNull())) {
+      DCHECK_EQ(lambda_proxy_class, value.Read());
+      return value.Read();
+    }
+
+    // Otherwise we made a brand new proxy class.
+    // The class itself is cleaned up by the GC (e.g. class unloading) later.
+
+    // Actually insert into the table.
+    map_.Insert({std::move(class_name_str), ValueType(lambda_proxy_class)});
+  }
+
+  return lambda_proxy_class;
+}
+
+BoxClassTable::ValueType BoxClassTable::FindBoxedClass(const std::string& class_name) const {
+  auto map_iterator = map_.Find(class_name);
+  if (map_iterator != map_.end()) {
+    const std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
+    const ValueType& value = key_value_pair.second;
+
+    DCHECK(!value.IsNull());  // Never store null boxes.
+    return value;
+  }
+
+  return ValueType(nullptr);
+}
+
+void BoxClassTable::EmptyFn::MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const {
+  item.first.clear();
+
+  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  item.second = ValueType();  // Also clear the GC root.
+}
+
+bool BoxClassTable::EmptyFn::IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const {
+  bool is_empty = item.first.empty();
+  DCHECK_EQ(item.second.IsNull(), is_empty);
+
+  return is_empty;
+}
+
+bool BoxClassTable::EqualsFn::operator()(const UnorderedMapKeyType& lhs,
+                                         const UnorderedMapKeyType& rhs) const {
+  // Be damn sure the classes don't just move around from under us.
+  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+
+  // Being the same class name isn't enough, must also have the same class loader.
+  // When we are in the same class loader, classes are equal via the pointer.
+  return lhs == rhs;
+}
+
+size_t BoxClassTable::HashFn::operator()(const UnorderedMapKeyType& key) const {
+  return std::hash<std::string>()(key);
+}
+
+}  // namespace lambda
+}  // namespace art
diff --git a/runtime/lambda/box_class_table.h b/runtime/lambda/box_class_table.h
new file mode 100644
index 0000000..17e1026
--- /dev/null
+++ b/runtime/lambda/box_class_table.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ART_RUNTIME_LAMBDA_BOX_CLASS_TABLE_H_
+#define ART_RUNTIME_LAMBDA_BOX_CLASS_TABLE_H_
+
+#include "base/allocator.h"
+#include "base/hash_map.h"
+#include "gc_root.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "object_callbacks.h"
+
+#include <stdint.h>
+
+namespace art {
+
+class ArtMethod;  // forward declaration
+template<class T> class Handle;  // forward declaration
+
+namespace mirror {
+class Class;  // forward declaration
+class ClassLoader;  // forward declaration
+class LambdaProxy;  // forward declaration
+class Object;  // forward declaration
+}  // namespace mirror
+
+namespace lambda {
+struct Closure;  // forward declaration
+
+/*
+ * Store a table of boxed lambdas. This is required to maintain object referential equality
+ * when a lambda is re-boxed.
+ *
+ * Conceptually, we store a mapping of Class Name -> Weak Reference<Class>.
+ * When too many objects get GCd, we shrink the underlying table to use less space.
+ */
+class BoxClassTable FINAL {
+ public:
+  // TODO: This should take a LambdaArtMethod instead, read class name from that.
+  // Note: null class_loader means bootclasspath.
+  mirror::Class* GetOrCreateBoxClass(const char* class_name,
+                                     const Handle<mirror::ClassLoader>& class_loader)
+      REQUIRES(!Locks::lambda_class_table_lock_, !Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Sweep strong references to lambda class boxes. Update the addresses if the objects
+  // have been moved, and delete them from the table if the objects have been cleaned up.
+  template <typename Visitor>
+  void VisitRoots(const Visitor& visitor)
+      NO_THREAD_SAFETY_ANALYSIS  // for object marking requiring heap bitmap lock
+      REQUIRES(!Locks::lambda_class_table_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  BoxClassTable();
+  ~BoxClassTable();
+
+ private:
+  // We only store strong GC roots in our table.
+  using ValueType = GcRoot<mirror::Class>;
+
+  // Attempt to look up the class in the map, or return null if it's not there yet.
+  ValueType FindBoxedClass(const std::string& class_name) const
+      SHARED_REQUIRES(Locks::lambda_class_table_lock_);
+
+  // Store the key as a string so that we can have our own copy of the class name.
+  using UnorderedMapKeyType = std::string;
+
+  // EmptyFn implementation for art::HashMap
+  struct EmptyFn {
+    void MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const
+        NO_THREAD_SAFETY_ANALYSIS;
+        // SHARED_REQUIRES(Locks::mutator_lock_);
+
+    bool IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const;
+  };
+
+  // HashFn implementation for art::HashMap
+  struct HashFn {
+    size_t operator()(const UnorderedMapKeyType& key) const
+        NO_THREAD_SAFETY_ANALYSIS;
+        // SHARED_REQUIRES(Locks::mutator_lock_);
+  };
+
+  // EqualsFn implementation for art::HashMap
+  struct EqualsFn {
+    bool operator()(const UnorderedMapKeyType& lhs, const UnorderedMapKeyType& rhs) const
+        NO_THREAD_SAFETY_ANALYSIS;
+        // SHARED_REQUIRES(Locks::mutator_lock_);
+  };
+
+  using UnorderedMap = art::HashMap<UnorderedMapKeyType,
+                                    ValueType,
+                                    EmptyFn,
+                                    HashFn,
+                                    EqualsFn,
+                                    TrackingAllocator<std::pair<UnorderedMapKeyType, ValueType>,
+                                                      kAllocatorTagLambdaProxyClassBoxTable>>;
+
+  // Map of strong GC roots (lambda interface name -> lambda proxy class)
+  UnorderedMap map_ GUARDED_BY(Locks::lambda_class_table_lock_);
+
+  // Shrink the map when we get below this load factor.
+  // (This is an arbitrary value that should be large enough to prevent aggressive map erases
+  // from shrinking the table too often.)
+  static constexpr double kMinimumLoadFactor = UnorderedMap::kDefaultMinLoadFactor / 2;
+
+  DISALLOW_COPY_AND_ASSIGN(BoxClassTable);
+};
+
+}  // namespace lambda
+}  // namespace art
+
+#endif  // ART_RUNTIME_LAMBDA_BOX_CLASS_TABLE_H_
diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc
index 9918bb7..0032d08 100644
--- a/runtime/lambda/box_table.cc
+++ b/runtime/lambda/box_table.cc
@@ -18,8 +18,10 @@
 #include "base/mutex.h"
 #include "common_throws.h"
 #include "gc_root-inl.h"
+#include "lambda/box_class_table.h"
 #include "lambda/closure.h"
 #include "lambda/leaking_allocator.h"
+#include "mirror/lambda_proxy.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "thread.h"
@@ -28,12 +30,13 @@
 
 namespace art {
 namespace lambda {
-// Temporarily represent the lambda Closure as its raw bytes in an array.
-// TODO: Generate a proxy class for the closure when boxing the first time.
-using BoxedClosurePointerType = mirror::ByteArray*;
+// All closures are boxed into a subtype of LambdaProxy which implements the lambda's interface.
+using BoxedClosurePointerType = mirror::LambdaProxy*;
 
-static mirror::Class* GetBoxedClosureClass() SHARED_REQUIRES(Locks::mutator_lock_) {
-  return mirror::ByteArray::GetArrayClass();
+// Returns the base class for all boxed closures.
+// Note that concrete closure boxes are actually a subtype of mirror::LambdaProxy.
+static mirror::Class* GetBoxedClosureBaseClass() SHARED_REQUIRES(Locks::mutator_lock_) {
+  return Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kJavaLangLambdaProxy);
 }
 
 namespace {
@@ -54,6 +57,14 @@
       return closure;
     }
   };
+
+  struct DeleterForClosure {
+    void operator()(Closure* closure) const {
+      ClosureAllocator::Delete(closure);
+    }
+  };
+
+  using UniqueClosurePtr = std::unique_ptr<Closure, DeleterForClosure>;
 }  // namespace
 
 BoxTable::BoxTable()
@@ -75,7 +86,9 @@
   }
 }
 
-mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) {
+mirror::Object* BoxTable::BoxLambda(const ClosureType& closure,
+                                    const char* class_name,
+                                    mirror::ClassLoader* class_loader) {
   Thread* self = Thread::Current();
 
   {
@@ -91,7 +104,7 @@
     //   Functional f = () -> 5;  // vF = create-lambda
     //   Object a = f;            // vA = box-lambda vA
     //   Object b = f;            // vB = box-lambda vB
-    //   assert(a == f)
+    //   assert(a == b)
     ValueType value = FindBoxedLambda(closure);
     if (!value.IsNull()) {
       return value.Read();
@@ -100,30 +113,62 @@
     // Otherwise we need to box ourselves and insert it into the hash map
   }
 
-  // Release the lambda table lock here, so that thread suspension is allowed.
+  // Convert the Closure into a managed object instance, whose supertype of java.lang.LambdaProxy.
 
-  // Convert the Closure into a managed byte[] which will serve
-  // as the temporary 'boxed' version of the lambda. This is good enough
-  // to check all the basic object identities that a boxed lambda must retain.
-  // It's also good enough to contain all the captured primitive variables.
-
-  // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class
   // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object
-  BoxedClosurePointerType closure_as_array_object =
-      mirror::ByteArray::Alloc(self, closure->GetSize());
+  StackHandleScope<2> hs{self};  // NOLINT: [readability/braces] [4]
 
-  // There are no thread suspension points after this, so we don't need to put it into a handle.
+  Handle<mirror::ClassLoader> class_loader_handle = hs.NewHandle(class_loader);
 
-  if (UNLIKELY(closure_as_array_object == nullptr)) {
+  // Release the lambda table lock here, so that thread suspension is allowed.
+  self->AllowThreadSuspension();
+
+  lambda::BoxClassTable* lambda_box_class_table;
+
+  // Find the lambda box class table, which can be in the system class loader if classloader is null
+  if (class_loader == nullptr) {
+    ScopedObjectAccessUnchecked soa(self);
+    mirror::ClassLoader* system_class_loader =
+        soa.Decode<mirror::ClassLoader*>(Runtime::Current()->GetSystemClassLoader());
+    lambda_box_class_table = system_class_loader->GetLambdaProxyCache();
+  } else {
+    lambda_box_class_table = class_loader_handle->GetLambdaProxyCache();
+    // OK: can't be deleted while we hold a handle to the class loader.
+  }
+  DCHECK(lambda_box_class_table != nullptr);
+
+  Handle<mirror::Class> closure_class(hs.NewHandle(
+      lambda_box_class_table->GetOrCreateBoxClass(class_name, class_loader_handle)));
+  if (UNLIKELY(closure_class.Get() == nullptr)) {
     // Most likely an OOM has occurred.
-    CHECK(self->IsExceptionPending());
+    self->AssertPendingException();
     return nullptr;
   }
 
-  // Write the raw closure data into the byte[].
-  closure->CopyTo(closure_as_array_object->GetRawData(sizeof(uint8_t),  // component size
-                                                      0 /*index*/),     // index
-                  closure_as_array_object->GetLength());
+  BoxedClosurePointerType closure_as_object = nullptr;
+  UniqueClosurePtr closure_table_copy;
+  // Create an instance of the class, and assign the pointer to the closure into it.
+  {
+    closure_as_object = down_cast<BoxedClosurePointerType>(closure_class->AllocObject(self));
+    if (UNLIKELY(closure_as_object == nullptr)) {
+      self->AssertPendingOOMException();
+      return nullptr;
+    }
+
+    // Make a copy of the closure that we will store in the hash map.
+    // The proxy instance will also point to this same hash map.
+    // Note that the closure pointer is cleaned up only after the proxy is GCd.
+    closure_table_copy.reset(ClosureAllocator::Allocate(closure->GetSize()));
+    closure_as_object->SetClosure(closure_table_copy.get());
+  }
+
+  // There are no thread suspension points after this, so we don't need to put it into a handle.
+  ScopedAssertNoThreadSuspension soants{self,                                                    // NOLINT: [whitespace/braces] [5]
+                                        "box lambda table - box lambda - no more suspensions"};  // NOLINT: [whitespace/braces] [5]
+
+  // Write the raw closure data into the proxy instance's copy of the closure.
+  closure->CopyTo(closure_table_copy.get(),
+                  closure->GetSize());
 
   // The method has been successfully boxed into an object, now insert it into the hash map.
   {
@@ -134,24 +179,21 @@
     // we were allocating the object before.
     ValueType value = FindBoxedLambda(closure);
     if (UNLIKELY(!value.IsNull())) {
-      // Let the GC clean up method_as_object at a later time.
+      // Let the GC clean up closure_as_object at a later time.
+      // (We will not see this object when sweeping, it wasn't inserted yet.)
+      closure_as_object->SetClosure(nullptr);
       return value.Read();
     }
 
     // Otherwise we need to insert it into the hash map in this thread.
 
-    // Make a copy for the box table to keep, in case the closure gets collected from the stack.
-    // TODO: GC may need to sweep for roots in the box table's copy of the closure.
-    Closure* closure_table_copy = ClosureAllocator::Allocate(closure->GetSize());
-    closure->CopyTo(closure_table_copy, closure->GetSize());
-
-    // The closure_table_copy needs to be deleted by us manually when we erase it from the map.
+    // The closure_table_copy is deleted by us manually when we erase it from the map.
 
     // Actually insert into the table.
-    map_.Insert({closure_table_copy, ValueType(closure_as_array_object)});
+    map_.Insert({closure_table_copy.release(), ValueType(closure_as_object)});
   }
 
-  return closure_as_array_object;
+  return closure_as_object;
 }
 
 bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) {
@@ -165,29 +207,35 @@
 
   mirror::Object* boxed_closure_object = object;
 
-  // Raise ClassCastException if object is not instanceof byte[]
-  if (UNLIKELY(!boxed_closure_object->InstanceOf(GetBoxedClosureClass()))) {
-    ThrowClassCastException(GetBoxedClosureClass(), boxed_closure_object->GetClass());
+  // Raise ClassCastException if object is not instanceof LambdaProxy
+  if (UNLIKELY(!boxed_closure_object->InstanceOf(GetBoxedClosureBaseClass()))) {
+    ThrowClassCastException(GetBoxedClosureBaseClass(), boxed_closure_object->GetClass());
     return false;
   }
 
   // TODO(iam): We must check that the closure object extends/implements the type
-  // specified in [type id]. This is not currently implemented since it's always a byte[].
+  // specified in [type id]. This is not currently implemented since the type id is unavailable.
 
   // If we got this far, the inputs are valid.
-  // Shuffle the byte[] back into a raw closure, then allocate it, copy, and return it.
-  BoxedClosurePointerType boxed_closure_as_array =
+  // Shuffle the java.lang.LambdaProxy back into a raw closure, then allocate it, copy,
+  // and return it.
+  BoxedClosurePointerType boxed_closure =
       down_cast<BoxedClosurePointerType>(boxed_closure_object);
 
-  const int8_t* unaligned_interior_closure = boxed_closure_as_array->GetData();
+  DCHECK_ALIGNED(boxed_closure->GetClosure(), alignof(Closure));
+  const Closure* aligned_interior_closure = boxed_closure->GetClosure();
+  DCHECK(aligned_interior_closure != nullptr);
+
+  // TODO: we probably don't need to make a copy here later on, once there's GC support.
 
   // Allocate a copy that can "escape" and copy the closure data into that.
   Closure* unboxed_closure =
-      LeakingAllocator::MakeFlexibleInstance<Closure>(self, boxed_closure_as_array->GetLength());
+      LeakingAllocator::MakeFlexibleInstance<Closure>(self, aligned_interior_closure->GetSize());
+  DCHECK_ALIGNED(unboxed_closure, alignof(Closure));
   // TODO: don't just memcpy the closure, it's unsafe when we add references to the mix.
-  memcpy(unboxed_closure, unaligned_interior_closure, boxed_closure_as_array->GetLength());
+  memcpy(unboxed_closure, aligned_interior_closure, aligned_interior_closure->GetSize());
 
-  DCHECK_EQ(unboxed_closure->GetSize(), static_cast<size_t>(boxed_closure_as_array->GetLength()));
+  DCHECK_EQ(unboxed_closure->GetSize(), aligned_interior_closure->GetSize());
 
   *out_closure = unboxed_closure;
   return true;
@@ -236,9 +284,10 @@
 
     if (new_value == nullptr) {
       // The object has been swept away.
-      const ClosureType& closure = key_value_pair.first;
+      Closure* closure = key_value_pair.first;
 
       // Delete the entry from the map.
+      // (Remove from map first to avoid accessing dangling pointer).
       map_iterator = map_.Erase(map_iterator);
 
       // Clean up the memory by deleting the closure.
@@ -290,7 +339,10 @@
 }
 
 bool BoxTable::EmptyFn::IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const {
-  return item.first == nullptr;
+  bool is_empty = item.first == nullptr;
+  DCHECK_EQ(item.second.IsNull(), is_empty);
+
+  return is_empty;
 }
 
 bool BoxTable::EqualsFn::operator()(const UnorderedMapKeyType& lhs,
diff --git a/runtime/lambda/box_table.h b/runtime/lambda/box_table.h
index adb7332..9dca6ab 100644
--- a/runtime/lambda/box_table.h
+++ b/runtime/lambda/box_table.h
@@ -30,6 +30,9 @@
 class ArtMethod;  // forward declaration
 
 namespace mirror {
+class Class;   // forward declaration
+class ClassLoader;  // forward declaration
+class LambdaProxy;  // forward declaration
 class Object;  // forward declaration
 }  // namespace mirror
 
@@ -48,8 +51,11 @@
   using ClosureType = art::lambda::Closure*;
 
   // Boxes a closure into an object. Returns null and throws an exception on failure.
-  mirror::Object* BoxLambda(const ClosureType& closure)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::lambda_table_lock_);
+  mirror::Object* BoxLambda(const ClosureType& closure,
+                            const char* class_name,
+                            mirror::ClassLoader* class_loader)
+      REQUIRES(!Locks::lambda_table_lock_, !Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Unboxes an object back into the lambda. Returns false and throws an exception on failure.
   bool UnboxLambda(mirror::Object* object, ClosureType* out_closure)
@@ -128,7 +134,16 @@
                                     TrackingAllocator<std::pair<ClosureType, ValueType>,
                                                       kAllocatorTagLambdaBoxTable>>;
 
+  using ClassMap = art::HashMap<std::string,
+                                GcRoot<mirror::Class>,
+                                EmptyFn,
+                                HashFn,
+                                EqualsFn,
+                                TrackingAllocator<std::pair<ClosureType, ValueType>,
+                                                      kAllocatorTagLambdaProxyClassBoxTable>>;
+
   UnorderedMap map_                                          GUARDED_BY(Locks::lambda_table_lock_);
+  UnorderedMap classes_map_                                  GUARDED_BY(Locks::lambda_table_lock_);
   bool allow_new_weaks_                                      GUARDED_BY(Locks::lambda_table_lock_);
   ConditionVariable new_weaks_condition_                     GUARDED_BY(Locks::lambda_table_lock_);
 
diff --git a/runtime/lambda/closure.cc b/runtime/lambda/closure.cc
index 179e4ee..f935e04 100644
--- a/runtime/lambda/closure.cc
+++ b/runtime/lambda/closure.cc
@@ -20,9 +20,6 @@
 #include "lambda/art_lambda_method.h"
 #include "runtime/mirror/object_reference.h"
 
-static constexpr const bool kClosureSupportsReferences = false;
-static constexpr const bool kClosureSupportsGarbageCollection = false;
-
 namespace art {
 namespace lambda {
 
@@ -128,6 +125,10 @@
   return const_cast<ArtMethod*>(lambda_info_->GetArtMethod());
 }
 
+ArtLambdaMethod* Closure::GetLambdaInfo() const {
+  return const_cast<ArtLambdaMethod*>(lambda_info_);
+}
+
 uint32_t Closure::GetHashCode() const {
   // Start with a non-zero constant, a prime number.
   uint32_t result = 17;
diff --git a/runtime/lambda/closure.h b/runtime/lambda/closure.h
index 31ff194..38ec063 100644
--- a/runtime/lambda/closure.h
+++ b/runtime/lambda/closure.h
@@ -33,12 +33,52 @@
 class ArtLambdaMethod;  // forward declaration
 class ClosureBuilder;   // forward declaration
 
+// TODO: Remove these constants once closures are supported properly.
+
+// Does the lambda closure support containing references? If so, all the users of lambdas
+// must be updated to also support references.
+static constexpr const bool kClosureSupportsReferences = false;
+// Does the lambda closure support being garbage collected? If so, all the users of lambdas
+// must be updated to also support garbage collection.
+static constexpr const bool kClosureSupportsGarbageCollection = false;
+// Does the lambda closure support being garbage collected with a read barrier? If so,
+// all the users of the lambdas msut also be updated to support read barrier GC.
+static constexpr const bool kClosureSupportsReadBarrier = false;
+
+// Is this closure being stored as a 'long' in shadow frames and the quick ABI?
+static constexpr const bool kClosureIsStoredAsLong = true;
+
+
+// Raw memory layout for the lambda closure.
+//
+// WARNING:
+// * This should only be used by the compiler and tests, as they need to offsetof the raw fields.
+// * Runtime/interpreter should always access closures through a Closure pointer.
+struct ClosureStorage {
+  // Compile-time known lambda information such as the type descriptor and size.
+  ArtLambdaMethod* lambda_info_;
+
+  // A contiguous list of captured variables, and possibly the closure size.
+  // The runtime size can always be determined through GetSize().
+  union {
+    // Read from here if the closure size is static (ArtLambdaMethod::IsStatic)
+    uint8_t static_variables_[0];
+    struct {
+      // Read from here if the closure size is dynamic (ArtLambdaMethod::IsDynamic)
+      size_t size_;  // The lambda_info_ and the size_ itself is also included as part of the size.
+      uint8_t variables_[0];
+    } dynamic_;
+  } captured_[0];
+  // captured_ will always consist of one array element at runtime.
+  // Set to [0] so that 'size_' is not counted in sizeof(Closure).
+};
+
 // Inline representation of a lambda closure.
 // Contains the target method and the set of packed captured variables as a copy.
 //
 // The closure itself is logically immutable, although in practice any object references
 // it (recursively) contains can be moved and updated by the GC.
-struct PACKED(sizeof(ArtLambdaMethod*)) Closure {
+struct Closure : private ClosureStorage {
   // Get the size of the Closure in bytes.
   // This is necessary in order to allocate a large enough area to copy the Closure into.
   // Do *not* copy the closure with memcpy, since references also need to get moved.
@@ -52,6 +92,9 @@
   // Get the target method, i.e. the method that will be dispatched into with invoke-lambda.
   ArtMethod* GetTargetMethod() const;
 
+  // Get the static lambda info that never changes.
+  ArtLambdaMethod* GetLambdaInfo() const;
+
   // Calculates the hash code. Value is recomputed each time.
   uint32_t GetHashCode() const SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -156,28 +199,15 @@
   static size_t GetClosureSize(const uint8_t* closure);
 
   ///////////////////////////////////////////////////////////////////////////////////
-
-  // Compile-time known lambda information such as the type descriptor and size.
-  ArtLambdaMethod* lambda_info_;
-
-  // A contiguous list of captured variables, and possibly the closure size.
-  // The runtime size can always be determined through GetSize().
-  union {
-    // Read from here if the closure size is static (ArtLambdaMethod::IsStatic)
-    uint8_t static_variables_[0];
-    struct {
-      // Read from here if the closure size is dynamic (ArtLambdaMethod::IsDynamic)
-      size_t size_;  // The lambda_info_ and the size_ itself is also included as part of the size.
-      uint8_t variables_[0];
-    } dynamic_;
-  } captured_[0];
-  // captured_ will always consist of one array element at runtime.
-  // Set to [0] so that 'size_' is not counted in sizeof(Closure).
-
-  friend class ClosureBuilder;
+  // NOTE: Actual fields are declared in ClosureStorage.
   friend class ClosureTest;
 };
 
+// ABI guarantees:
+// * Closure same size as a ClosureStorage
+// * ClosureStorage begins at the same point a Closure would begin.
+static_assert(sizeof(Closure) == sizeof(ClosureStorage), "Closure size must match ClosureStorage");
+
 }  // namespace lambda
 }  // namespace art
 
diff --git a/runtime/lambda/closure_builder.cc b/runtime/lambda/closure_builder.cc
index 739e965..7b36042 100644
--- a/runtime/lambda/closure_builder.cc
+++ b/runtime/lambda/closure_builder.cc
@@ -75,7 +75,7 @@
   if (LIKELY(is_dynamic_size_ == false)) {
     // Write in the extra bytes to store the dynamic size the first time.
     is_dynamic_size_ = true;
-    size_ += sizeof(Closure::captured_[0].dynamic_.size_);
+    size_ += sizeof(ClosureStorage::captured_[0].dynamic_.size_);
   }
 
   // A closure may be sized dynamically, so always query it for the true size.
@@ -107,38 +107,40 @@
     << "number of variables captured at runtime does not match "
     << "number of variables captured at compile time";
 
-  Closure* closure = new (memory) Closure;
-  closure->lambda_info_ = target_method;
+  ClosureStorage* closure_storage = new (memory) ClosureStorage;
+  closure_storage->lambda_info_ = target_method;
 
-  static_assert(offsetof(Closure, captured_) == kInitialSize, "wrong initial size");
+  static_assert(offsetof(ClosureStorage, captured_) == kInitialSize, "wrong initial size");
 
   size_t written_size;
   if (UNLIKELY(is_dynamic_size_)) {
     // The closure size must be set dynamically (i.e. nested lambdas).
-    closure->captured_[0].dynamic_.size_ = GetSize();
-    size_t header_size = offsetof(Closure, captured_[0].dynamic_.variables_);
+    closure_storage->captured_[0].dynamic_.size_ = GetSize();
+    size_t header_size = offsetof(ClosureStorage, captured_[0].dynamic_.variables_);
     DCHECK_LE(header_size, GetSize());
     size_t variables_size = GetSize() - header_size;
     written_size =
         WriteValues(target_method,
-                    closure->captured_[0].dynamic_.variables_,
+                    closure_storage->captured_[0].dynamic_.variables_,
                     header_size,
                     variables_size);
   } else {
     // The closure size is known statically (i.e. no nested lambdas).
     DCHECK(GetSize() == target_method->GetStaticClosureSize());
-    size_t header_size = offsetof(Closure, captured_[0].static_variables_);
+    size_t header_size = offsetof(ClosureStorage, captured_[0].static_variables_);
     DCHECK_LE(header_size, GetSize());
     size_t variables_size = GetSize() - header_size;
     written_size =
         WriteValues(target_method,
-                    closure->captured_[0].static_variables_,
+                    closure_storage->captured_[0].static_variables_,
                     header_size,
                     variables_size);
   }
 
-  DCHECK_EQ(written_size, closure->GetSize());
+  // OK: The closure storage is guaranteed to be the same as a closure.
+  Closure* closure = reinterpret_cast<Closure*>(closure_storage);
 
+  DCHECK_EQ(written_size, closure->GetSize());
   return closure;
 }
 
diff --git a/runtime/lambda/shorty_field_type.h b/runtime/lambda/shorty_field_type.h
index 46ddaa9..54bb4d4 100644
--- a/runtime/lambda/shorty_field_type.h
+++ b/runtime/lambda/shorty_field_type.h
@@ -285,6 +285,39 @@
     }
   }
 
+  // Get the number of virtual registers necessary to represent this type as a stack local.
+  inline size_t GetVirtualRegisterCount() const {
+    if (IsPrimitiveNarrow()) {
+      return 1;
+    } else if (IsPrimitiveWide()) {
+      return 2;
+    } else if (IsObject()) {
+      return kObjectReferenceSize / sizeof(uint32_t);
+    } else if (IsLambda()) {
+      return 2;
+    } else {
+      DCHECK(false) << "unknown shorty field type '" << static_cast<char>(value_) << "'";
+      UNREACHABLE();
+    }
+  }
+
+  // Count how many virtual registers would be necessary in order to store this list of shorty
+  // field types.
+  inline size_t static CountVirtualRegistersRequired(const char* shorty) {
+    size_t size = 0;
+
+    while (shorty != nullptr && *shorty != '\0') {
+      // Each argument appends to the size.
+      ShortyFieldType shorty_field{*shorty};  // NOLINT [readability/braces] [4]
+
+      size += shorty_field.GetVirtualRegisterCount();
+
+      ++shorty;
+  }
+
+    return size;
+  }
+
   // Implicitly convert to the anonymous nested inner type. Used for exhaustive switch detection.
   inline operator decltype(kByte)() const {
     return value_;
diff --git a/runtime/lambda/shorty_field_type_test.cc b/runtime/lambda/shorty_field_type_test.cc
index 32bade9..430e39e 100644
--- a/runtime/lambda/shorty_field_type_test.cc
+++ b/runtime/lambda/shorty_field_type_test.cc
@@ -218,6 +218,56 @@
   }
 }  // TEST_F
 
+TEST_F(ShortyFieldTypeTest, TestCalculateVRegSize) {
+  // Make sure the single calculation for each value is correct.
+  std::pair<size_t, char> expected_actual_single[] = {
+      // Primitives
+      { 1u, 'Z' },
+      { 1u, 'B' },
+      { 1u, 'C' },
+      { 1u, 'S' },
+      { 1u, 'I' },
+      { 1u, 'F' },
+      { 2u, 'J' },
+      { 2u, 'D' },
+      // Non-primitives
+      { 1u, 'L' },
+      { 2u, '\\' },
+  };
+
+  for (auto pair : expected_actual_single) {
+    SCOPED_TRACE(pair.second);
+    EXPECT_EQ(pair.first, ShortyFieldType(pair.second).GetVirtualRegisterCount());
+  }
+
+  // Make sure we are correctly calculating how many virtual registers a shorty descriptor takes.
+  std::pair<size_t, const char*> expected_actual[] = {
+      // Empty list
+      { 0u, "" },
+      // Primitives
+      { 1u, "Z" },
+      { 1u, "B" },
+      { 1u, "C" },
+      { 1u, "S" },
+      { 1u, "I" },
+      { 1u, "F" },
+      { 2u, "J" },
+      { 2u, "D" },
+      // Non-primitives
+      { 1u, "L" },
+      { 2u, "\\" },
+      // Multiple things at once:
+      { 10u, "ZBCSIFJD" },
+      { 5u, "LLSSI" },
+      { 6u, "LLL\\L" }
+  };
+
+  for (auto pair : expected_actual) {
+    SCOPED_TRACE(pair.second);
+    EXPECT_EQ(pair.first, ShortyFieldType::CountVirtualRegistersRequired(pair.second));
+  }
+}  // TEST_F
+
 // Helper class to probe a shorty's characteristics by minimizing copy-and-paste tests.
 template <typename T, decltype(ShortyFieldType::kByte) kShortyEnum>
 struct ShortyTypeCharacteristics {
diff --git a/runtime/lambda_proxy_test.cc b/runtime/lambda_proxy_test.cc
new file mode 100644
index 0000000..63d6ccc
--- /dev/null
+++ b/runtime/lambda_proxy_test.cc
@@ -0,0 +1,367 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <jni.h>
+#include <vector>
+
+#include "art_field-inl.h"
+#include "class_linker-inl.h"
+#include "compiler_callbacks.h"
+#include "common_compiler_test.h"
+#include "mirror/field-inl.h"
+#include "mirror/lambda_proxy.h"
+#include "mirror/method.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+// The enclosing class of all the interfaces used by this test.
+// -- Defined as a macro to allow for string concatenation.
+#define TEST_INTERFACE_ENCLOSING_CLASS_NAME "LambdaInterfaces"
+// Generate out "LLambdaInterfaces$<<iface>>;" , replacing <<iface>> with the interface name.
+#define MAKE_TEST_INTERFACE_NAME(iface) ("L" TEST_INTERFACE_ENCLOSING_CLASS_NAME "$" iface ";")
+
+#define ASSERT_NOT_NULL(x) ASSERT_TRUE((x) != nullptr)
+#define ASSERT_NULL(x) ASSERT_TRUE((x) == nullptr)
+#define EXPECT_NULL(x) EXPECT_TRUE((x) == nullptr)
+
+class LambdaProxyTest  // : public CommonCompilerTest {
+    : public CommonRuntimeTest {
+ public:
+  // Generate a lambda proxy class with the given name and interfaces. This is a simplification from what
+  // libcore does to fit to our test needs. We do not check for duplicated interfaces or methods and
+  // we do not declare exceptions.
+  mirror::Class* GenerateProxyClass(ScopedObjectAccess& soa,
+                                    jobject jclass_loader,
+                                    const char* class_name,
+                                    const std::vector<mirror::Class*>& interfaces)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    CHECK(class_name != nullptr);
+    CHECK(jclass_loader != nullptr);
+
+    mirror::Class* java_lang_object =
+        class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;");
+    CHECK(java_lang_object != nullptr);
+
+    jclass java_lang_class = soa.AddLocalReference<jclass>(mirror::Class::GetJavaLangClass());
+
+    // Builds the interfaces array.
+    jobjectArray proxy_class_interfaces = soa.Env()->NewObjectArray(interfaces.size(),
+                                                                    java_lang_class,
+                                                                    nullptr);  // No initial element.
+    soa.Self()->AssertNoPendingException();
+    for (size_t i = 0; i < interfaces.size(); ++i) {
+      soa.Env()->SetObjectArrayElement(proxy_class_interfaces,
+                                       i,
+                                       soa.AddLocalReference<jclass>(interfaces[i]));
+    }
+
+    // Builds the method array.
+    jsize methods_count = 3;  // Object.equals, Object.hashCode and Object.toString.
+    for (mirror::Class* interface : interfaces) {
+      methods_count += interface->NumVirtualMethods();
+    }
+    jobjectArray proxy_class_methods =
+        soa.Env()->NewObjectArray(methods_count,
+                                  soa.AddLocalReference<jclass>(mirror::Method::StaticClass()),
+                                  nullptr);  // No initial element.
+    soa.Self()->AssertNoPendingException();
+
+    jsize array_index = 0;
+
+    //
+    // Fill the method array with the Object and all the interface's virtual methods.
+    //
+
+    // Add a method to 'proxy_class_methods'
+    auto add_method_to_array = [&](ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
+      CHECK(method != nullptr);
+      soa.Env()->SetObjectArrayElement(proxy_class_methods,
+                                       array_index++,
+                                       soa.AddLocalReference<jobject>(
+                                           mirror::Method::CreateFromArtMethod(soa.Self(),
+                                                                               method))
+                                      );  // NOLINT: [whitespace/parens] [2]
+
+      LOG(DEBUG) << "Add " << PrettyMethod(method) << " to list of methods to generate proxy";
+    };
+    // Add a method to 'proxy_class_methods' by looking it up from java.lang.Object
+    auto add_method_to_array_by_lookup = [&](const char* name, const char* method_descriptor)
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+      ArtMethod* method = java_lang_object->FindDeclaredVirtualMethod(name,
+                                                                      method_descriptor,
+                                                                      sizeof(void*));
+      add_method_to_array(method);
+    };
+
+    // Add all methods from Object.
+    add_method_to_array_by_lookup("equals",   "(Ljava/lang/Object;)Z");
+    add_method_to_array_by_lookup("hashCode", "()I");
+    add_method_to_array_by_lookup("toString", "()Ljava/lang/String;");
+
+    // Now adds all interfaces virtual methods.
+    for (mirror::Class* interface : interfaces) {
+      mirror::Class* next_class = interface;
+      do {
+        for (ArtMethod& method : next_class->GetVirtualMethods(sizeof(void*))) {
+          add_method_to_array(&method);
+        }
+        next_class = next_class->GetSuperClass();
+      } while (!next_class->IsObjectClass());
+      // Skip adding any methods from "Object".
+    }
+    CHECK_EQ(array_index, methods_count);
+
+    // Builds an empty exception array.
+    jobjectArray proxy_class_throws = soa.Env()->NewObjectArray(0 /* length */,
+                                                                java_lang_class,
+                                                                nullptr /* initial element*/);
+    soa.Self()->AssertNoPendingException();
+
+    bool already_exists;
+    mirror::Class* proxy_class =
+        class_linker_->CreateLambdaProxyClass(soa,
+                                              soa.Env()->NewStringUTF(class_name),
+                                              proxy_class_interfaces,
+                                              jclass_loader,
+                                              proxy_class_methods,
+                                              proxy_class_throws,
+                                              /*out*/&already_exists);
+
+    CHECK(!already_exists);
+
+    soa.Self()->AssertNoPendingException();
+    return proxy_class;
+  }
+
+  LambdaProxyTest() {
+  }
+
+  virtual void SetUp() {
+    CommonRuntimeTest::SetUp();
+  }
+
+  virtual void SetUpRuntimeOptions(RuntimeOptions* options ATTRIBUTE_UNUSED) {
+    // Do not have any compiler options because we don't want to run as an AOT
+    // (In particular the lambda proxy class generation isn't currently supported for AOT).
+    this->callbacks_.reset();
+  }
+
+  template <typename THandleScope>
+  Handle<mirror::Class> GenerateProxyClass(THandleScope& hs,
+                                           const char* name,
+                                           const std::vector<mirror::Class*>& interfaces)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    return hs.NewHandle(GenerateProxyClass(*soa_, jclass_loader_, name, interfaces));
+  }
+
+ protected:
+  ScopedObjectAccess* soa_ = nullptr;
+  jobject jclass_loader_ = nullptr;
+};
+
+// Creates a lambda proxy class and check ClassHelper works correctly.
+TEST_F(LambdaProxyTest, ProxyClassHelper) {
+  // gLogVerbosity.class_linker = true;  // Uncomment to enable class linker logging.
+
+  ASSERT_NOT_NULL(Thread::Current());
+
+  ScopedObjectAccess soa(Thread::Current());
+  soa_ = &soa;
+
+  // Must happen after CommonRuntimeTest finishes constructing the runtime.
+  jclass_loader_ = LoadDex(TEST_INTERFACE_ENCLOSING_CLASS_NAME);
+  jobject jclass_loader = jclass_loader_;
+
+  StackHandleScope<4> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
+
+  Handle<mirror::Class> J(hs.NewHandle(
+      class_linker_->FindClass(soa.Self(), MAKE_TEST_INTERFACE_NAME("J"), class_loader)));
+  ASSERT_TRUE(J.Get() != nullptr);
+
+  std::vector<mirror::Class*> interfaces;
+  interfaces.push_back(J.Get());
+  Handle<mirror::Class> proxy_class(hs.NewHandle(
+      GenerateProxyClass(soa, jclass_loader, "$Proxy1234", interfaces)));
+  interfaces.clear();  // Don't least possibly stale objects in the array as good practice.
+  ASSERT_TRUE(proxy_class.Get() != nullptr);
+  ASSERT_TRUE(proxy_class->IsLambdaProxyClass());
+  ASSERT_TRUE(proxy_class->IsInitialized());
+
+  EXPECT_EQ(1U, proxy_class->NumDirectInterfaces());  // LambdaInterfaces$J.
+  EXPECT_EQ(J.Get(), mirror::Class::GetDirectInterface(soa.Self(), proxy_class, 0));
+  std::string temp;
+  const char* proxy_class_descriptor = proxy_class->GetDescriptor(&temp);
+  EXPECT_STREQ("L$Proxy1234;", proxy_class_descriptor);
+  EXPECT_EQ(nullptr, proxy_class->GetSourceFile());
+
+  // Make sure all the virtual methods are marked as a proxy
+  for (ArtMethod& method : proxy_class->GetVirtualMethods(sizeof(void*))) {
+    SCOPED_TRACE(PrettyMethod(&method, /* with_signature */true));
+    EXPECT_TRUE(method.IsProxyMethod());
+    EXPECT_TRUE(method.IsLambdaProxyMethod());
+    EXPECT_FALSE(method.IsReflectProxyMethod());
+  }
+}
+
+// Creates a proxy class and check FieldHelper works correctly.
+TEST_F(LambdaProxyTest, ProxyFieldHelper) {
+  // gLogVerbosity.class_linker = true;  // Uncomment to enable class linker logging.
+
+  ASSERT_NOT_NULL(Thread::Current());
+
+  ScopedObjectAccess soa(Thread::Current());
+  soa_ = &soa;
+
+  // Must happen after CommonRuntimeTest finishes constructing the runtime.
+  jclass_loader_ = LoadDex(TEST_INTERFACE_ENCLOSING_CLASS_NAME);
+  jobject jclass_loader = jclass_loader_;
+
+  StackHandleScope<9> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
+
+  Handle<mirror::Class> I(hs.NewHandle(
+      class_linker_->FindClass(soa.Self(), MAKE_TEST_INTERFACE_NAME("I"), class_loader)));
+  ASSERT_NOT_NULL(I.Get());
+
+  // Create the lambda proxy which implements interfaces "I".
+  Handle<mirror::Class> proxy_class = GenerateProxyClass(hs,
+                                                         "$Proxy1234",
+                                                         { I.Get() });  // Interfaces.
+
+  ASSERT_NOT_NULL(proxy_class.Get());
+  EXPECT_TRUE(proxy_class->IsLambdaProxyClass());
+  EXPECT_TRUE(proxy_class->IsInitialized());
+  EXPECT_NULL(proxy_class->GetIFieldsPtr());
+
+  LengthPrefixedArray<ArtField>* static_fields = proxy_class->GetSFieldsPtr();
+  ASSERT_NOT_NULL(static_fields);
+
+  // Must have "throws" and "interfaces" static fields.
+  ASSERT_EQ(+mirror::LambdaProxy::kStaticFieldCount, proxy_class->NumStaticFields());
+
+  static constexpr const char* kInterfacesClassName = "[Ljava/lang/Class;";
+  static constexpr const char* kThrowsClassName     = "[[Ljava/lang/Class;";
+
+  // Class for "interfaces" field.
+  Handle<mirror::Class> interfaces_field_class =
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), kInterfacesClassName));
+  ASSERT_NOT_NULL(interfaces_field_class.Get());
+
+  // Class for "throws" field.
+  Handle<mirror::Class> throws_field_class =
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), kThrowsClassName));
+  ASSERT_NOT_NULL(throws_field_class.Get());
+
+  // Helper to test the static fields for correctness.
+  auto test_static_field = [&](size_t index,
+                               const char* field_name,
+                               Handle<mirror::Class>& handle_class,
+                               const char* class_name)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtField* field = &static_fields->At(index);
+    EXPECT_STREQ(field_name, field->GetName());
+    EXPECT_STREQ(class_name, field->GetTypeDescriptor());
+    EXPECT_EQ(handle_class.Get(), field->GetType</*kResolve*/true>())
+        << "Expected: " << PrettyClass(interfaces_field_class.Get()) << ", "
+        << "Actual: " << PrettyClass(field->GetType</*kResolve*/true>()) << ", "
+        << "field_name: " << field_name;
+    std::string temp;
+    EXPECT_STREQ("L$Proxy1234;", field->GetDeclaringClass()->GetDescriptor(&temp));
+    EXPECT_FALSE(field->IsPrimitiveType());
+  };
+
+  // Test "Class[] interfaces" field.
+  test_static_field(mirror::LambdaProxy::kStaticFieldIndexInterfaces,
+                    "interfaces",
+                    interfaces_field_class,
+                    kInterfacesClassName);
+
+  // Test "Class[][] throws" field.
+  test_static_field(mirror::LambdaProxy::kStaticFieldIndexThrows,
+                    "throws",
+                    throws_field_class,
+                    kThrowsClassName);
+}
+
+// Creates two proxy classes and check the art/mirror fields of their static fields.
+TEST_F(LambdaProxyTest, CheckArtMirrorFieldsOfProxyStaticFields) {
+  // gLogVerbosity.class_linker = true;  // Uncomment to enable class linker logging.
+
+  ASSERT_NOT_NULL(Thread::Current());
+
+  ScopedObjectAccess soa(Thread::Current());
+  soa_ = &soa;
+
+  // Must happen after CommonRuntimeTest finishes constructing the runtime.
+  jclass_loader_ = LoadDex(TEST_INTERFACE_ENCLOSING_CLASS_NAME);
+  jobject jclass_loader = jclass_loader_;
+
+  StackHandleScope<8> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
+
+  Handle<mirror::Class> proxyClass0;
+  Handle<mirror::Class> proxyClass1;
+  {
+    Handle<mirror::Class> L(hs.NewHandle(
+        class_linker_->FindClass(soa.Self(), MAKE_TEST_INTERFACE_NAME("L"), class_loader)));
+    ASSERT_TRUE(L.Get() != nullptr);
+
+    std::vector<mirror::Class*> interfaces = { L.Get() };
+    proxyClass0 = hs.NewHandle(GenerateProxyClass(soa, jclass_loader, "$Proxy0", interfaces));
+    proxyClass1 = hs.NewHandle(GenerateProxyClass(soa, jclass_loader, "$Proxy1", interfaces));
+  }
+
+  ASSERT_TRUE(proxyClass0.Get() != nullptr);
+  ASSERT_TRUE(proxyClass0->IsLambdaProxyClass());
+  ASSERT_TRUE(proxyClass0->IsInitialized());
+  ASSERT_TRUE(proxyClass1.Get() != nullptr);
+  ASSERT_TRUE(proxyClass1->IsLambdaProxyClass());
+  ASSERT_TRUE(proxyClass1->IsInitialized());
+
+  LengthPrefixedArray<ArtField>* static_fields0 = proxyClass0->GetSFieldsPtr();
+  ASSERT_TRUE(static_fields0 != nullptr);
+  ASSERT_EQ(2u, static_fields0->size());
+  LengthPrefixedArray<ArtField>* static_fields1 = proxyClass1->GetSFieldsPtr();
+  ASSERT_TRUE(static_fields1 != nullptr);
+  ASSERT_EQ(2u, static_fields1->size());
+
+  EXPECT_EQ(static_fields0->At(0).GetDeclaringClass(), proxyClass0.Get());
+  EXPECT_EQ(static_fields0->At(1).GetDeclaringClass(), proxyClass0.Get());
+  EXPECT_EQ(static_fields1->At(0).GetDeclaringClass(), proxyClass1.Get());
+  EXPECT_EQ(static_fields1->At(1).GetDeclaringClass(), proxyClass1.Get());
+
+  Handle<mirror::Field> field00 =
+      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields0->At(0), true));
+  Handle<mirror::Field> field01 =
+      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields0->At(1), true));
+  Handle<mirror::Field> field10 =
+      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields1->At(0), true));
+  Handle<mirror::Field> field11 =
+      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields1->At(1), true));
+  EXPECT_EQ(field00->GetArtField(), &static_fields0->At(0));
+  EXPECT_EQ(field01->GetArtField(), &static_fields0->At(1));
+  EXPECT_EQ(field10->GetArtField(), &static_fields1->At(0));
+  EXPECT_EQ(field11->GetArtField(), &static_fields1->At(1));
+}
+
+// TODO: make sure there's a non-abstract implementation of the single-abstract-method on the class.
+
+}  // namespace art
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 9e416dc..a8685b8 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -695,7 +695,11 @@
 }
 
 inline const DexFile& Class::GetDexFile() {
-  return *GetDexCache()->GetDexFile();
+  DexCache* dex_cache = GetDexCache();
+  DCHECK(dex_cache != nullptr);
+  const DexFile* dex_file = dex_cache->GetDexFile();
+  DCHECK(dex_file != nullptr);
+  return *dex_file;
 }
 
 inline bool Class::DescriptorEquals(const char* match) {
@@ -703,8 +707,8 @@
     return match[0] == '[' && GetComponentType()->DescriptorEquals(match + 1);
   } else if (IsPrimitive()) {
     return strcmp(Primitive::Descriptor(GetPrimitiveType()), match) == 0;
-  } else if (IsProxyClass()) {
-    return ProxyDescriptorEquals(match);
+  } else if (IsAnyProxyClass()) {
+    return AnyProxyDescriptorEquals(match);
   } else {
     const DexFile& dex_file = GetDexFile();
     const DexFile::TypeId& type_id = dex_file.GetTypeId(GetClassDef()->class_idx_);
@@ -720,22 +724,32 @@
   }
 }
 
-inline ObjectArray<Class>* Class::GetInterfaces() {
-  CHECK(IsProxyClass());
+inline ObjectArray<Class>* Class::GetInterfacesForAnyProxy() {
+  CHECK(IsAnyProxyClass());
   // First static field.
   auto* field = GetStaticField(0);
   DCHECK_STREQ(field->GetName(), "interfaces");
   MemberOffset field_offset = field->GetOffset();
-  return GetFieldObject<ObjectArray<Class>>(field_offset);
+  ObjectArray<Class>* interfaces_array = GetFieldObject<ObjectArray<Class>>(field_offset);
+
+  CHECK(interfaces_array != nullptr);
+  if (UNLIKELY(IsLambdaProxyClass())) {
+    DCHECK_EQ(1, interfaces_array->GetLength())
+        << "Lambda proxies cannot have multiple direct interfaces implemented";
+  }
+  return interfaces_array;
 }
 
-inline ObjectArray<ObjectArray<Class>>* Class::GetThrows() {
-  CHECK(IsProxyClass());
+inline ObjectArray<ObjectArray<Class>>* Class::GetThrowsForAnyProxy() {
+  CHECK(IsAnyProxyClass());
   // Second static field.
   auto* field = GetStaticField(1);
   DCHECK_STREQ(field->GetName(), "throws");
+
   MemberOffset field_offset = field->GetOffset();
-  return GetFieldObject<ObjectArray<ObjectArray<Class>>>(field_offset);
+  auto* throws_array = GetFieldObject<ObjectArray<ObjectArray<Class>>>(field_offset);
+  CHECK(throws_array != nullptr);
+  return throws_array;
 }
 
 inline MemberOffset Class::GetDisableIntrinsicFlagOffset() {
@@ -796,8 +810,8 @@
     return 0;
   } else if (IsArrayClass()) {
     return 2;
-  } else if (IsProxyClass()) {
-    mirror::ObjectArray<mirror::Class>* interfaces = GetInterfaces();
+  } else if (IsAnyProxyClass()) {
+    mirror::ObjectArray<mirror::Class>* interfaces = GetInterfacesForAnyProxy();
     return interfaces != nullptr ? interfaces->GetLength() : 0;
   } else {
     const DexFile::TypeList* interfaces = GetInterfaceTypeList();
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 05a9039..b201293 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -538,6 +538,7 @@
 
 ArtMethod* Class::FindClassInitializer(size_t pointer_size) {
   for (ArtMethod& method : GetDirectMethods(pointer_size)) {
+    DCHECK(reinterpret_cast<volatile void*>(&method) != nullptr);
     if (method.IsClassInitializer()) {
       DCHECK_STREQ(method.GetName(), "<clinit>");
       DCHECK_STREQ(method.GetSignature().ToString().c_str(), "()V");
@@ -742,8 +743,8 @@
     return Primitive::Descriptor(GetPrimitiveType());
   } else if (IsArrayClass()) {
     return GetArrayDescriptor(storage);
-  } else if (IsProxyClass()) {
-    *storage = Runtime::Current()->GetClassLinker()->GetDescriptorForProxy(this);
+  } else if (IsAnyProxyClass()) {
+    *storage = Runtime::Current()->GetClassLinker()->GetDescriptorForAnyProxy(this);
     return storage->c_str();
   } else {
     const DexFile& dex_file = GetDexFile();
@@ -786,8 +787,10 @@
       DCHECK_EQ(1U, idx);
       return class_linker->FindSystemClass(self, "Ljava/io/Serializable;");
     }
-  } else if (klass->IsProxyClass()) {
-    mirror::ObjectArray<mirror::Class>* interfaces = klass.Get()->GetInterfaces();
+  } else if (klass->IsAnyProxyClass()) {
+    // Proxies don't have a dex cache, so look at the
+    // interfaces through the magic static field "interfaces" from the proxy class itself.
+    mirror::ObjectArray<mirror::Class>* interfaces = klass.Get()->GetInterfacesForAnyProxy();
     DCHECK(interfaces != nullptr);
     return interfaces->Get(idx);
   } else {
@@ -826,7 +829,7 @@
 
 std::string Class::GetLocation() {
   mirror::DexCache* dex_cache = GetDexCache();
-  if (dex_cache != nullptr && !IsProxyClass()) {
+  if (dex_cache != nullptr && !IsAnyProxyClass()) {
     return dex_cache->GetLocation()->ToModifiedUtf8();
   }
   // Arrays and proxies are generated and have no corresponding dex file location.
@@ -944,9 +947,9 @@
   return new_class->AsClass();
 }
 
-bool Class::ProxyDescriptorEquals(const char* match) {
-  DCHECK(IsProxyClass());
-  return Runtime::Current()->GetClassLinker()->GetDescriptorForProxy(this) == match;
+bool Class::AnyProxyDescriptorEquals(const char* match) {
+  DCHECK(IsAnyProxyClass());
+  return Runtime::Current()->GetClassLinker()->GetDescriptorForAnyProxy(this) == match;
 }
 
 // TODO: Move this to java_lang_Class.cc?
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 0ab5b97..fcfb4b9 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -352,8 +352,16 @@
   static String* ComputeName(Handle<Class> h_this) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
+  // Is this either a java.lang.reflect.Proxy or a boxed lambda (java.lang.LambdaProxy)?
+  // -- Most code doesn't need to make the distinction, and this is the preferred thing to check.
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool IsProxyClass() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsAnyProxyClass() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return IsReflectProxyClass() || IsLambdaProxyClass();
+  }
+
+  // Is this a java.lang.reflect.Proxy ?
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool IsReflectProxyClass() SHARED_REQUIRES(Locks::mutator_lock_) {
     // Read access flags without using getter as whether something is a proxy can be check in
     // any loaded state
     // TODO: switch to a check if the super class is java.lang.reflect.Proxy?
@@ -361,6 +369,17 @@
     return (access_flags & kAccClassIsProxy) != 0;
   }
 
+  // Is this a boxed lambda (java.lang.LambdaProxy)?
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool IsLambdaProxyClass() SHARED_REQUIRES(Locks::mutator_lock_) {
+    // Read access flags without using getter as whether something is a proxy can be check in
+    // any loaded state
+    // TODO: switch to a check if the super class is java.lang.reflect.Proxy?
+    uint32_t access_flags = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
+    return (access_flags & kAccClassIsLambdaProxy) != 0;
+  }
+
+
   static MemberOffset PrimitiveTypeOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_);
   }
@@ -677,6 +696,8 @@
     return MemberOffset(OFFSETOF_MEMBER(Class, super_class_));
   }
 
+  // Returns the class's ClassLoader.
+  // A null value is returned if and only if this is a boot classpath class.
   ClassLoader* GetClassLoader() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetClassLoader(ClassLoader* new_cl) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -1076,6 +1097,8 @@
 
   bool DescriptorEquals(const char* match) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns the backing DexFile's class definition for this class.
+  // This returns null if and only if the class has no backing DexFile.
   const DexFile::ClassDef* GetClassDef() SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE uint32_t NumDirectInterfaces() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -1102,11 +1125,15 @@
                 size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
-  // For proxy class only.
-  ObjectArray<Class>* GetInterfaces() SHARED_REQUIRES(Locks::mutator_lock_);
+  // For any proxy class only. Returns list of directly implemented interfaces.
+  // The value returned is always non-null.
+  ObjectArray<Class>* GetInterfacesForAnyProxy() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // For proxy class only.
-  ObjectArray<ObjectArray<Class>>* GetThrows() SHARED_REQUIRES(Locks::mutator_lock_);
+  // For any proxy class only. Returns a 2d array of classes.
+  // -- The 0th dimension correponds to the vtable index.
+  // -- The 1st dimension is a list of checked exception classes.
+  // The value returned is always non-null.
+  ObjectArray<ObjectArray<Class>>* GetThrowsForAnyProxy() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // For reference class only.
   MemberOffset GetDisableIntrinsicFlagOffset() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -1194,7 +1221,7 @@
   IterationRange<StrideIterator<ArtField>> GetIFieldsUnchecked()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool ProxyDescriptorEquals(const char* match) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool AnyProxyDescriptorEquals(const char* match) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Check that the pointer size matches the one in the class linker.
   ALWAYS_INLINE static void CheckPointerSize(size_t pointer_size);
diff --git a/runtime/mirror/class_loader-inl.h b/runtime/mirror/class_loader-inl.h
index e22ddd7..3139117 100644
--- a/runtime/mirror/class_loader-inl.h
+++ b/runtime/mirror/class_loader-inl.h
@@ -21,6 +21,7 @@
 
 #include "base/mutex-inl.h"
 #include "class_table-inl.h"
+#include "lambda/box_class_table-inl.h"
 
 namespace art {
 namespace mirror {
@@ -35,6 +36,10 @@
   if (class_table != nullptr) {
     class_table->VisitRoots(visitor);
   }
+  lambda::BoxClassTable* const lambda_box_class_table = GetLambdaProxyCache();
+  if (lambda_box_class_table != nullptr) {
+    lambda_box_class_table->VisitRoots(visitor);
+  }
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index c2a65d6..9d4fe96 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -24,6 +24,12 @@
 struct ClassLoaderOffsets;
 class ClassTable;
 
+namespace lambda {
+
+class BoxClassTable;
+
+}  // namespace lambda
+
 namespace mirror {
 
 class Class;
@@ -60,6 +66,16 @@
                       reinterpret_cast<uint64_t>(allocator));
   }
 
+  lambda::BoxClassTable* GetLambdaProxyCache() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return reinterpret_cast<lambda::BoxClassTable*>(
+        GetField64(OFFSET_OF_OBJECT_MEMBER(ClassLoader, lambda_proxy_cache_)));
+  }
+
+  void SetLambdaProxyCache(lambda::BoxClassTable* cache) SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetField64<false>(OFFSET_OF_OBJECT_MEMBER(ClassLoader, lambda_proxy_cache_),
+                      reinterpret_cast<uint64_t>(cache));
+  }
+
  private:
   // Visit instance fields of the class loader as well as its associated classes.
   // Null class loader is handled by ClassLinker::VisitClassRoots.
@@ -76,6 +92,7 @@
   uint32_t padding_ ATTRIBUTE_UNUSED;
   uint64_t allocator_;
   uint64_t class_table_;
+  uint64_t lambda_proxy_cache_;
 
   friend struct art::ClassLoaderOffsets;  // for verifying offset information
   friend class Object;  // For VisitReferences
diff --git a/runtime/mirror/field-inl.h b/runtime/mirror/field-inl.h
index 8a0daec..49c443e 100644
--- a/runtime/mirror/field-inl.h
+++ b/runtime/mirror/field-inl.h
@@ -57,14 +57,15 @@
   const auto pointer_size = kTransactionActive ?
       Runtime::Current()->GetClassLinker()->GetImagePointerSize() : sizeof(void*);
   auto dex_field_index = field->GetDexFieldIndex();
-  auto* resolved_field = field->GetDexCache()->GetResolvedField(dex_field_index, pointer_size);
-  if (field->GetDeclaringClass()->IsProxyClass()) {
+  if (field->GetDeclaringClass()->IsAnyProxyClass()) {
     DCHECK(field->IsStatic());
     DCHECK_LT(dex_field_index, 2U);
     // The two static fields (interfaces, throws) of all proxy classes
     // share the same dex file indices 0 and 1. So, we can't resolve
     // them in the dex cache.
   } else {
+    ArtField* resolved_field =
+        field->GetDexCache()->GetResolvedField(dex_field_index, pointer_size);
     if (resolved_field != nullptr) {
       DCHECK_EQ(resolved_field, field);
     } else {
diff --git a/runtime/mirror/field.cc b/runtime/mirror/field.cc
index ff6847c..b02e5b5 100644
--- a/runtime/mirror/field.cc
+++ b/runtime/mirror/field.cc
@@ -56,7 +56,7 @@
 
 ArtField* Field::GetArtField() {
   mirror::Class* declaring_class = GetDeclaringClass();
-  if (UNLIKELY(declaring_class->IsProxyClass())) {
+  if (UNLIKELY(declaring_class->IsAnyProxyClass())) {
     DCHECK(IsStatic());
     DCHECK_EQ(declaring_class->NumStaticFields(), 2U);
     // 0 == Class[] interfaces; 1 == Class[][] throws;
diff --git a/runtime/mirror/lambda_proxy.h b/runtime/mirror/lambda_proxy.h
new file mode 100644
index 0000000..cff3a12
--- /dev/null
+++ b/runtime/mirror/lambda_proxy.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_LAMBDA_PROXY_H_
+#define ART_RUNTIME_MIRROR_LAMBDA_PROXY_H_
+
+#include "lambda/closure.h"
+#include "object.h"
+
+namespace art {
+
+struct LambdaProxyOffsets;
+
+namespace mirror {
+
+// C++ mirror of a lambda proxy. Does not yet have a Java-equivalent source file.
+class MANAGED LambdaProxy FINAL : public Object {
+ public:
+  // Note that the runtime subclasses generate the following static fields:
+
+  // private static java.lang.Class[] interfaces;  // Declared interfaces for the lambda interface.
+  static constexpr size_t kStaticFieldIndexInterfaces = 0;
+  // private static java.lang.Class[][] throws;    // Maps vtable id to list of classes.
+  static constexpr size_t kStaticFieldIndexThrows = 1;
+  static constexpr size_t kStaticFieldCount = 2;   // Number of fields total.
+
+  // The offset from the start of 'LambdaProxy' object, to the closure_ field, in bytes.
+  // -- This is exposed publically in order to avoid exposing 'closure_' publically.
+  // -- Only meant to be used in stubs and other compiled code, not in runtime.
+  static inline MemberOffset GetInstanceFieldOffsetClosure() {
+    return OFFSET_OF_OBJECT_MEMBER(LambdaProxy, closure_);
+  }
+
+  // Direct methods available on the class:
+  static constexpr size_t kDirectMethodIndexConstructor = 0;  // <init>()V
+  static constexpr size_t kDirectMethodCount = 1;             // Only the constructor.
+
+  // Accessors to the fields:
+
+  // Get the native closure pointer. Usually non-null outside of lambda proxy contexts.
+  lambda::Closure* GetClosure() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return reinterpret_cast<lambda::Closure*>(
+        GetField64(GetInstanceFieldOffsetClosure()));
+  }
+
+  // Set the native closure pointer. Usually should be non-null outside of lambda proxy contexts.
+  void SetClosure(lambda::Closure* closure) SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetField64<false>(GetInstanceFieldOffsetClosure(),
+                      reinterpret_cast<uint64_t>(closure));
+  }
+
+ private:
+  // Instance fields, present in the base class and every generated subclass:
+
+  // private long closure;
+  union {
+    lambda::Closure* actual;
+    uint64_t padding;         // Don't trip up GetObjectSize checks, since the Java code has a long.
+  } closure_;
+
+  // Friends for generating offset tests:
+  friend struct art::LambdaProxyOffsets;              // for verifying offset information
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(LambdaProxy);
+};
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_LAMBDA_PROXY_H_
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 5c12091..4603428 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -163,6 +163,7 @@
 #endif
 }
 
+template<bool kCasRelease>
 inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
@@ -181,10 +182,13 @@
         static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
     new_lw = lw;
     new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
-    // This CAS is a CAS release so that when GC updates all the fields of an object and then
-    // changes the object from gray to black, the field updates (stores) will be visible (won't be
-    // reordered after this CAS.)
-  } while (!CasLockWordWeakRelease(expected_lw, new_lw));
+    // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
+    // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
+    // an object and then changes the object from gray to black, the field updates (stores) will be
+    // visible (won't be reordered after this CAS.)
+  } while (!(kCasRelease ?
+             CasLockWordWeakRelease(expected_lw, new_lw) :
+             CasLockWordWeakRelaxed(expected_lw, new_lw)));
   return true;
 #elif USE_BROOKS_READ_BARRIER
   DCHECK(kUseBrooksReadBarrier);
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 5c6520f..71e704e 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -92,13 +92,13 @@
   void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_);
+
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
   NO_RETURN
 #endif
   void SetReadBarrierPointer(Object* rb_ptr) SHARED_REQUIRES(Locks::mutator_lock_);
-#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
-  NO_RETURN
-#endif
+
+  template<bool kCasRelease = false>
   ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index 9946eab..36aa57f 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -54,6 +54,8 @@
 // if any particular method needs to be a default conflict. Used to figure out at runtime if
 // invoking this method will throw an exception.
 static constexpr uint32_t kAccDefaultConflict =      0x00800000;  // method (runtime)
+// Set by the class linker when creating a class that's a subtype of LambdaProxy.
+static constexpr uint32_t kAccClassIsLambdaProxy =   0x01000000;  // class  (dex only)
 
 // Special runtime-only flags.
 // Interface and all its super-interfaces with default methods have been recursively initialized.
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 4cd3c3d..da6cf1f 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -155,7 +155,9 @@
                                          jstring javaOutputName,
                                          jint flags ATTRIBUTE_UNUSED,
                                          // class_loader will be used for app images.
-                                         jobject class_loader ATTRIBUTE_UNUSED) {
+                                         jobject class_loader ATTRIBUTE_UNUSED,
+                                         // dex_elements will be used for app images.
+                                         jobject dex_elements ATTRIBUTE_UNUSED) {
   ScopedUtfChars sourceName(env, javaSourceName);
   if (sourceName.c_str() == nullptr) {
     return 0;
@@ -445,7 +447,12 @@
   NATIVE_METHOD(DexFile, getDexOptNeeded,
                 "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"),
   NATIVE_METHOD(DexFile, openDexFileNative,
-                "(Ljava/lang/String;Ljava/lang/String;ILjava/lang/ClassLoader;)Ljava/lang/Object;"),
+                "(Ljava/lang/String;"
+                "Ljava/lang/String;"
+                "I"
+                "Ljava/lang/ClassLoader;"
+                "[Ldalvik/system/DexPathList$Element;"
+                ")Ljava/lang/Object;"),
 };
 
 void register_dalvik_system_DexFile(JNIEnv* env) {
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 5e42392..6cebd4d 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -103,7 +103,7 @@
 static jobjectArray Class_getProxyInterfaces(JNIEnv* env, jobject javaThis) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Class* c = DecodeClass(soa, javaThis);
-  return soa.AddLocalReference<jobjectArray>(c->GetInterfaces()->Clone(soa.Self()));
+  return soa.AddLocalReference<jobjectArray>(c->GetInterfacesForAnyProxy()->Clone(soa.Self()));
 }
 
 static mirror::ObjectArray<mirror::Field>* GetDeclaredFields(
@@ -489,7 +489,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+  if (klass->IsAnyProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
   Handle<mirror::Class> annotation_class(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
@@ -501,7 +501,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+  if (klass->IsAnyProxyClass() || klass->GetDexCache() == nullptr) {
     // Return an empty array instead of a null pointer.
     mirror::Class* annotation_array_class =
         soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_annotation_Annotation__array);
@@ -517,7 +517,7 @@
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
   mirror::ObjectArray<mirror::Class>* classes = nullptr;
-  if (!klass->IsProxyClass() && klass->GetDexCache() != nullptr) {
+  if (!klass->IsAnyProxyClass() && klass->GetDexCache() != nullptr) {
     classes = klass->GetDexFile().GetDeclaredClasses(klass);
   }
   if (classes == nullptr) {
@@ -543,7 +543,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+  if (klass->IsAnyProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
   return soa.AddLocalReference<jclass>(klass->GetDexFile().GetEnclosingClass(klass));
@@ -553,7 +553,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+  if (klass->IsAnyProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
   mirror::Object* method = klass->GetDexFile().GetEnclosingMethod(klass);
@@ -570,7 +570,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+  if (klass->IsAnyProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
   mirror::Object* method = klass->GetDexFile().GetEnclosingMethod(klass);
@@ -587,7 +587,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+  if (klass->IsAnyProxyClass() || klass->GetDexCache() == nullptr) {
     return defaultValue;
   }
   uint32_t flags;
@@ -601,7 +601,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+  if (klass->IsAnyProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
   mirror::String* class_name = nullptr;
@@ -615,7 +615,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+  if (klass->IsAnyProxyClass() || klass->GetDexCache() == nullptr) {
     return false;
   }
   mirror::String* class_name = nullptr;
@@ -630,7 +630,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+  if (klass->IsAnyProxyClass() || klass->GetDexCache() == nullptr) {
     return false;
   }
   Handle<mirror::Class> annotation_class(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
@@ -641,7 +641,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+  if (klass->IsAnyProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
   // Return null for anonymous classes.
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index aac800a..9166ecc 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -419,7 +419,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   ArtField* field = soa.Decode<mirror::Field*>(javaField)->GetArtField();
-  if (field->GetDeclaringClass()->IsProxyClass()) {
+  if (field->GetDeclaringClass()->IsAnyProxyClass()) {
     return nullptr;
   }
   Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
@@ -429,7 +429,7 @@
 static jobjectArray Field_getDeclaredAnnotations(JNIEnv* env, jobject javaField) {
   ScopedFastNativeObjectAccess soa(env);
   ArtField* field = soa.Decode<mirror::Field*>(javaField)->GetArtField();
-  if (field->GetDeclaringClass()->IsProxyClass()) {
+  if (field->GetDeclaringClass()->IsAnyProxyClass()) {
     // Return an empty array instead of a null pointer.
     mirror::Class* annotation_array_class =
         soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_annotation_Annotation__array);
@@ -443,7 +443,7 @@
 static jobjectArray Field_getSignatureAnnotation(JNIEnv* env, jobject javaField) {
   ScopedFastNativeObjectAccess soa(env);
   ArtField* field = soa.Decode<mirror::Field*>(javaField)->GetArtField();
-  if (field->GetDeclaringClass()->IsProxyClass()) {
+  if (field->GetDeclaringClass()->IsAnyProxyClass()) {
     return nullptr;
   }
   return soa.AddLocalReference<jobjectArray>(
@@ -455,7 +455,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   ArtField* field = soa.Decode<mirror::Field*>(javaField)->GetArtField();
-  if (field->GetDeclaringClass()->IsProxyClass()) {
+  if (field->GetDeclaringClass()->IsAnyProxyClass()) {
     return false;
   }
   Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index caacba6..7894c9b 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -32,7 +32,7 @@
 static jobject Method_getAnnotationNative(JNIEnv* env, jobject javaMethod, jclass annotationType) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  if (method->GetDeclaringClass()->IsProxyClass()) {
+  if (method->GetDeclaringClass()->IsAnyProxyClass()) {
     return nullptr;
   }
   StackHandleScope<1> hs(soa.Self());
@@ -44,7 +44,7 @@
 static jobjectArray Method_getDeclaredAnnotations(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  if (method->GetDeclaringClass()->IsProxyClass()) {
+  if (method->GetDeclaringClass()->IsAnyProxyClass()) {
     // Return an empty array instead of a null pointer.
     mirror::Class* annotation_array_class =
         soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_annotation_Annotation__array);
@@ -67,7 +67,7 @@
 static jobjectArray Method_getExceptionTypes(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  if (method->GetDeclaringClass()->IsProxyClass()) {
+  if (method->GetDeclaringClass()->IsAnyProxyClass()) {
     mirror::Class* klass = method->GetDeclaringClass();
     int throws_index = -1;
     size_t i = 0;
@@ -79,7 +79,8 @@
       ++i;
     }
     CHECK_NE(throws_index, -1);
-    mirror::ObjectArray<mirror::Class>* declared_exceptions = klass->GetThrows()->Get(throws_index);
+    mirror::ObjectArray<mirror::Class>* declared_exceptions =
+        klass->GetThrowsForAnyProxy()->Get(throws_index);
     return soa.AddLocalReference<jobjectArray>(declared_exceptions->Clone(soa.Self()));
   } else {
     mirror::ObjectArray<mirror::Class>* result_array =
@@ -104,7 +105,7 @@
 static jobjectArray Method_getParameterAnnotationsNative(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  if (method->GetDeclaringClass()->IsProxyClass()) {
+  if (method->GetDeclaringClass()->IsAnyProxyClass()) {
     return nullptr;
   }
   return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetParameterAnnotations(method));
@@ -120,7 +121,7 @@
                                                  jclass annotationType) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  if (method->GetDeclaringClass()->IsProxyClass()) {
+  if (method->GetDeclaringClass()->IsAnyProxyClass()) {
     return false;
   }
   StackHandleScope<1> hs(soa.Self());
diff --git a/runtime/native/java_lang_reflect_Proxy.cc b/runtime/native/java_lang_reflect_Proxy.cc
index 4a6ab40..647cec0 100644
--- a/runtime/native/java_lang_reflect_Proxy.cc
+++ b/runtime/native/java_lang_reflect_Proxy.cc
@@ -27,15 +27,31 @@
 namespace art {
 
 static jclass Proxy_generateProxy(JNIEnv* env, jclass, jstring name, jobjectArray interfaces,
-                                  jobject loader, jobjectArray methods, jobjectArray throws) {
+                                  jobject loader, jobjectArray methods, jobjectArray throws,
+                                  jboolean is_lambda_proxy) {
   ScopedFastNativeObjectAccess soa(env);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  return soa.AddLocalReference<jclass>(class_linker->CreateProxyClass(
-      soa, name, interfaces, loader, methods, throws));
+
+  mirror::Class* proxy_class = nullptr;
+
+  if (UNLIKELY(is_lambda_proxy)) {
+    bool already_exists;  // XX: Perhaps add lambdaProxyCache to java.lang.ClassLoader ?
+    proxy_class = class_linker->CreateLambdaProxyClass(soa,
+                                                       name,
+                                                       interfaces,
+                                                       loader,
+                                                       methods,
+                                                       throws,
+                                                       /*out*/&already_exists);
+  } else {
+    proxy_class = class_linker->CreateProxyClass(soa, name, interfaces, loader, methods, throws);
+  }
+
+  return soa.AddLocalReference<jclass>(proxy_class);
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Proxy, generateProxy, "!(Ljava/lang/String;[Ljava/lang/Class;Ljava/lang/ClassLoader;[Ljava/lang/reflect/Method;[[Ljava/lang/Class;)Ljava/lang/Class;"),
+  NATIVE_METHOD(Proxy, generateProxy, "!(Ljava/lang/String;[Ljava/lang/Class;Ljava/lang/ClassLoader;[Ljava/lang/reflect/Method;[[Ljava/lang/Class;Z)Ljava/lang/Class;"),
 };
 
 void register_java_lang_reflect_Proxy(JNIEnv* env) {
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 2e1fc95..dfd783b 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -152,10 +152,10 @@
           .WithType<bool>()
           .WithValueMap({{"false", false}, {"true", true}})
           .IntoKey(M::UseJIT)
-      .Define("-Xjitcodecacheinitialcapacity:_")
+      .Define("-Xjitinitialsize:_")
           .WithType<MemoryKiB>()
           .IntoKey(M::JITCodeCacheInitialCapacity)
-      .Define("-Xjitcodecachemaxcapacity:_")
+      .Define("-Xjitmaxsize:_")
           .WithType<MemoryKiB>()
           .IntoKey(M::JITCodeCacheMaxCapacity)
       .Define("-Xjitthreshold:_")
@@ -643,7 +643,6 @@
   UsageMessage(stream, "  -XX:ForegroundHeapGrowthMultiplier=doublevalue\n");
   UsageMessage(stream, "  -XX:LowMemoryMode\n");
   UsageMessage(stream, "  -Xprofile:{threadcpuclock,wallclock,dualclock}\n");
-  UsageMessage(stream, "  -Xjitcodecachesize:N\n");
   UsageMessage(stream, "  -Xjitthreshold:integervalue\n");
   UsageMessage(stream, "\n");
 
@@ -687,6 +686,8 @@
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Xpatchoat:filename\n");
   UsageMessage(stream, "  -Xusejit:booleanvalue\n");
+  UsageMessage(stream, "  -Xjitinitialsize:N\n");
+  UsageMessage(stream, "  -Xjitmaxsize:N\n");
   UsageMessage(stream, "  -X[no]relocate\n");
   UsageMessage(stream, "  -X[no]dex2oat (Whether to invoke dex2oat on the application)\n");
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
@@ -721,6 +722,7 @@
   UsageMessage(stream, "  -Xjitblocking\n");
   UsageMessage(stream, "  -Xjitmethod:signature[,signature]* (eg Ljava/lang/String\\;replace)\n");
   UsageMessage(stream, "  -Xjitclass:classname[,classname]*\n");
+  UsageMessage(stream, "  -Xjitcodecachesize:N\n");
   UsageMessage(stream, "  -Xjitoffset:offset[,offset]\n");
   UsageMessage(stream, "  -Xjitconfig:filename\n");
   UsageMessage(stream, "  -Xjitcheckcg\n");
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 57472ad..57aafcc 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -121,7 +121,7 @@
       GenerateProxyClass(soa, jclass_loader, "$Proxy1234", interfaces)));
   interfaces.clear();  // Don't least possibly stale objects in the array as good practice.
   ASSERT_TRUE(proxy_class.Get() != nullptr);
-  ASSERT_TRUE(proxy_class->IsProxyClass());
+  ASSERT_TRUE(proxy_class->IsReflectProxyClass());
   ASSERT_TRUE(proxy_class->IsInitialized());
 
   EXPECT_EQ(2U, proxy_class->NumDirectInterfaces());  // Interfaces$I and Interfaces$J.
@@ -157,7 +157,7 @@
   }
 
   ASSERT_TRUE(proxyClass.Get() != nullptr);
-  ASSERT_TRUE(proxyClass->IsProxyClass());
+  ASSERT_TRUE(proxyClass->IsReflectProxyClass());
   ASSERT_TRUE(proxyClass->IsInitialized());
 
   EXPECT_TRUE(proxyClass->GetIFieldsPtr() == nullptr);
@@ -208,10 +208,10 @@
   }
 
   ASSERT_TRUE(proxyClass0.Get() != nullptr);
-  ASSERT_TRUE(proxyClass0->IsProxyClass());
+  ASSERT_TRUE(proxyClass0->IsReflectProxyClass());
   ASSERT_TRUE(proxyClass0->IsInitialized());
   ASSERT_TRUE(proxyClass1.Get() != nullptr);
-  ASSERT_TRUE(proxyClass1->IsProxyClass());
+  ASSERT_TRUE(proxyClass1->IsReflectProxyClass());
   ASSERT_TRUE(proxyClass1->IsInitialized());
 
   LengthPrefixedArray<ArtField>* static_fields0 = proxyClass0->GetSFieldsPtr();
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 9098d38..2ff9fd2 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -172,12 +172,23 @@
     } else {
       return cur_shadow_frame_->GetVRegReference(0);
     }
-  } else if (m->IsProxyMethod()) {
+  } else if (m->IsReflectProxyMethod()) {
     if (cur_quick_frame_ != nullptr) {
       return artQuickGetProxyThisObject(cur_quick_frame_);
     } else {
       return cur_shadow_frame_->GetVRegReference(0);
     }
+  } else if (m->IsLambdaProxyMethod()) {
+    if (cur_quick_frame_ != nullptr) {
+      // XX: Should be safe to return null here, the lambda proxies
+      // don't set up their own quick frame because they don't need to spill any registers.
+      // By the time we are executing inside of the final target of the proxy invoke,
+      // the original 'this' reference is no longer live.
+      LOG(WARNING) << "Lambda proxies don't have a quick frame, do they?!";
+      return nullptr;
+    } else {
+      return cur_shadow_frame_->GetVRegReference(0);
+    }
   } else {
     const DexFile::CodeItem* code_item = m->GetCodeItem();
     if (code_item == nullptr) {
@@ -814,7 +825,27 @@
     // compiled method without any stubs. Therefore the method must have a OatQuickMethodHeader.
     DCHECK(!method->IsDirect() && !method->IsConstructor())
         << "Constructors of proxy classes must have a OatQuickMethodHeader";
-    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+
+    if (method->IsReflectProxyMethod()) {
+      return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+    } else if (method->IsLambdaProxyMethod()) {
+      // Set this to true later once every stub works without a frame.
+      // This is currently 'false' because using a closure as a "long"
+      // requires a quick frame to be set up on 32-bit architectures.
+      constexpr bool kLambdaProxyStubsSupportFrameless = false;
+      if (kIsDebugBuild || !kLambdaProxyStubsSupportFrameless) {
+        // When debugging we always use the 'RefAndArgs' quick frame to allow us
+        // to see a runtime stub when unwinding.
+        return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+      } else {
+        // Lambda proxies don't bother setting up a quick frame for release builds.
+        LOG(FATAL) << "Requested QuickMethodFrameInfo for a lambda proxy,"
+                   << "but it doesn't have one, for method: " << PrettyMethod(method);
+        UNREACHABLE();
+      }
+    } else {
+      LOG(FATAL) << "Unknown type of proxy method " << PrettyMethod(method);
+    }
   }
 
   // The only remaining case is if the method is native and uses the generic JNI stub.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index b09b87f..a390908 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -948,7 +948,12 @@
   Locks::mutator_lock_->ExclusiveLock(self);
   Locks::mutator_lock_->ExclusiveUnlock(self);
 #endif
-  AssertThreadsAreSuspended(self, self, debug_thread);
+  // Disabled for the following race condition:
+  // Thread 1 calls SuspendAllForDebugger, gets preempted after pulsing the mutator lock.
+  // Thread 2 calls SuspendAll and SetStateUnsafe (perhaps from Dbg::Disconnected).
+  // Thread 1 fails assertion that all threads are suspended due to thread 2 being in a runnable
+  // state (from SetStateUnsafe).
+  // AssertThreadsAreSuspended(self, self, debug_thread);
 
   VLOG(threads) << *self << " SuspendAllForDebugger complete";
 }
diff --git a/runtime/utils.h b/runtime/utils.h
index 3690f86..8b7941a 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -18,9 +18,11 @@
 #define ART_RUNTIME_UTILS_H_
 
 #include <pthread.h>
+#include <stdlib.h>
 
 #include <limits>
 #include <memory>
+#include <random>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -350,6 +352,26 @@
                  double* parsed_value,
                  UsageFn Usage);
 
+#if defined(__BIONIC__)
+struct Arc4RandomGenerator {
+  typedef uint32_t result_type;
+  static constexpr uint32_t min() { return std::numeric_limits<uint32_t>::min(); }
+  static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); }
+  uint32_t operator() () { return arc4random(); }
+};
+using RNG = Arc4RandomGenerator;
+#else
+using RNG = std::random_device;
+#endif
+
+template <typename T>
+T GetRandomNumber(T min, T max) {
+  CHECK_LT(min, max);
+  std::uniform_int_distribution<T> dist(min, max);
+  RNG rng;
+  return dist(rng);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index c32d34a..d5fed2a 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -1226,6 +1226,46 @@
     return arg / -0.25f;
   }
 
+  /**
+   * Test strength reduction of factors of the form (2^n + 1).
+   */
+
+  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (before)
+  /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const9:i\d+>>      IntConstant 9
+  /// CHECK:                            Mul [<<Arg>>,<<Const9>>]
+
+  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (after)
+  /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const3:i\d+>>      IntConstant 3
+  /// CHECK:       <<Shift:i\d+>>       Shl [<<Arg>>,<<Const3>>]
+  /// CHECK-NEXT:                       Add [<<Arg>>,<<Shift>>]
+
+  public static int mulPow2Plus1(int arg) {
+    return arg * 9;
+  }
+
+
+  /**
+   * Test strength reduction of factors of the form (2^n - 1).
+   */
+
+  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (before)
+  /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const31:j\d+>>     LongConstant 31
+  /// CHECK:                            Mul [<<Arg>>,<<Const31>>]
+
+  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (after)
+  /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const5:i\d+>>      IntConstant 5
+  /// CHECK:       <<Shift:j\d+>>       Shl [<<Arg>>,<<Const5>>]
+  /// CHECK-NEXT:                       Sub [<<Shift>>,<<Arg>>]
+
+  public static long mulPow2Minus1(long arg) {
+    return arg * 31;
+  }
+
+
   public static void main(String[] args) {
     int arg = 123456;
 
@@ -1283,5 +1323,15 @@
     assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L);
     assertIntEquals(Shr24And127(0xc1234567), 0x41);
     assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L);
+    assertIntEquals(0, mulPow2Plus1(0));
+    assertIntEquals(9, mulPow2Plus1(1));
+    assertIntEquals(18, mulPow2Plus1(2));
+    assertIntEquals(900, mulPow2Plus1(100));
+    assertIntEquals(111105, mulPow2Plus1(12345));
+    assertLongEquals(0, mulPow2Minus1(0));
+    assertLongEquals(31, mulPow2Minus1(1));
+    assertLongEquals(62, mulPow2Minus1(2));
+    assertLongEquals(3100, mulPow2Minus1(100));
+    assertLongEquals(382695, mulPow2Minus1(12345));
   }
 }
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index ab4afdb..1de0bae 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -136,11 +136,11 @@
 ## CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
 ## CHECK-DAG:     <<Cst5:i\d+>>  IntConstant 5
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
-## CHECK-DAG:     <<Cst9:i\d+>>  IntConstant 9
+## CHECK-DAG:     <<Cst11:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>>
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<ArgZ>>]                              loop:<<HeaderY>>
-## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:<<HeaderY>>
+## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst11>>]                  loop:<<HeaderY>>
 ## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<Cst1>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:     <<Add5>>       Add [<<PhiX2>>,<<Cst5>>]                   loop:<<HeaderY>>
@@ -152,12 +152,12 @@
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
-## CHECK-DAG:     <<Cst9:i\d+>>  IntConstant 9
+## CHECK-DAG:     <<Cst11:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add7:i\d+>>]               loop:<<HeaderY:B\d+>>
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<ArgZ>>]                              loop:none
-## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:none
+## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst11>>]                  loop:none
 ## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:none
 ## CHECK-DAG:                    Return [<<PhiX2>>]                         loop:none
 
@@ -177,7 +177,7 @@
 
   # Additional logic which will end up outside the loop
   if-eqz p2, :skip_if
-  mul-int/lit8 p0, p0, 9
+  mul-int/lit8 p0, p0, 11
   :skip_if
 
   if-nez v0, :loop_end    # will always take the branch
diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt
index 16381e4..8afe4bc 100644
--- a/test/955-lambda-smali/expected.txt
+++ b/test/955-lambda-smali/expected.txt
@@ -26,3 +26,5 @@
 (CaptureVariables) (0-args, 1 captured variable 'D'): value is -Infinity
 (CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is true,R,∂,1000,12345678,3287471278325742,Infinity,-Infinity
 (CaptureVariables) Caught NPE
+(BoxInvoke) Hello boxing world! (0-args, no closure) void
+(BoxInvoke) Hello boxing world!(1-args, no closure) returned: 12345678
diff --git a/test/955-lambda-smali/smali/BoxInvoke.smali b/test/955-lambda-smali/smali/BoxInvoke.smali
new file mode 100644
index 0000000..8b53333
--- /dev/null
+++ b/test/955-lambda-smali/smali/BoxInvoke.smali
@@ -0,0 +1,103 @@
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LBoxInvoke;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static run()V
+    .registers 0
+
+    invoke-static {}, LBoxInvoke;->testBoxInvoke()V
+    invoke-static {}, LBoxInvoke;->forceGC()V
+
+    return-void
+.end method
+
+# Test that invoke-virtual works on boxed innate lambdas.
+.method public static testBoxInvoke()V
+    .registers 100
+
+    # Try invoking 0-arg void return lambda
+    create-lambda v0, LBoxInvoke;->doHelloWorld0(J)V
+    const-string v2, "Ljava/lang/Runnable;"
+    box-lambda v2, v0 # Ljava/lang/Runnable;
+    invoke-interface {v2}, Ljava/lang/Runnable;->run()V
+
+    # Try invoking 1-arg int return lambda
+    create-lambda v3, LBoxInvoke;->doHelloWorld1(JLjava/lang/Object;)I
+    const-string v5, "Ljava/lang/Comparable;"
+    box-lambda v5, v3 # Ljava/lang/Comparable;
+    const-string v6, "Hello boxing world!"
+    invoke-interface {v5, v6}, Ljava/lang/Comparable;->compareTo(Ljava/lang/Object;)I
+    move-result v7
+    sget-object v8, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v8, v7}, Ljava/io/PrintStream;->println(I)V
+
+    return-void
+
+    # TODO: more tests once box-lambda can take a type descriptor.
+
+.end method
+
+#TODO: should use a closure type instead of a long.
+.method public static doHelloWorld0(J)V
+    .registers 4 # 1 wide parameters, 2 locals
+
+    const-string v0, "(BoxInvoke) Hello boxing world! (0-args, no closure) void"
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long.
+.method public static doHelloWorld1(JLjava/lang/Object;)I
+    # J = closure, L = obj, I = return type
+    .registers 6 # 1 wide parameters, 1 narrow parameter, 3 locals
+
+    # Prints "<before> $parameter1(Object) <after>:" without the line terminator. 
+
+    const-string v0, "(BoxInvoke) "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    # System.out.print("<before>");
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    # System.out.print(obj);
+    invoke-virtual {v1, p2}, Ljava/io/PrintStream;->print(Ljava/lang/Object;)V
+
+    # System.out.print("<after>: ");
+    const-string v0, "(1-args, no closure) returned: "
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    const v2, 12345678
+    return v2
+.end method
+
+# Force a GC. Used to ensure our weak reference table of boxed lambdas is getting swept.
+.method private static forceGC()V
+    .registers 1
+    invoke-static {}, Ljava/lang/Runtime;->getRuntime()Ljava/lang/Runtime;
+    move-result-object v0
+    invoke-virtual {v0}, Ljava/lang/Runtime;->gc()V
+
+    return-void
+.end method
diff --git a/test/955-lambda-smali/smali/BoxUnbox.smali b/test/955-lambda-smali/smali/BoxUnbox.smali
index 915de2d..157adb3 100644
--- a/test/955-lambda-smali/smali/BoxUnbox.smali
+++ b/test/955-lambda-smali/smali/BoxUnbox.smali
@@ -51,6 +51,7 @@
     .registers 3
 
     create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
+    const-string v2, "Ljava/lang/Runnable;"
     box-lambda v2, v0 # v2 = box(v0)
     unbox-lambda v0, v2, J # v0 = unbox(v2)
     invoke-lambda v0, {}
@@ -63,7 +64,9 @@
    .registers 6 # 0 parameters, 6 locals
 
     create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
+    const-string v2, "Ljava/lang/Runnable;"
     box-lambda v2, v0 # v2 = box(v0)
+    const-string v3, "Ljava/lang/Runnable;"
     box-lambda v3, v0 # v3 = box(v0)
 
     # The objects should be not-null, and they should have the same reference
@@ -116,6 +119,7 @@
     const v0, 0  # v0 = null
     const v1, 0  # v1 = null
 :start
+    const-string v2, "Ljava/lang/Runnable;"
     box-lambda v2, v0  # attempting to box a null lambda will throw NPE
 :end
     return-void
diff --git a/test/955-lambda-smali/smali/CaptureVariables.smali b/test/955-lambda-smali/smali/CaptureVariables.smali
index f18b7ff..531c259 100644
--- a/test/955-lambda-smali/smali/CaptureVariables.smali
+++ b/test/955-lambda-smali/smali/CaptureVariables.smali
@@ -243,6 +243,8 @@
     # TODO: create-lambda should not write to both v0 and v1
     invoke-lambda v0, {}
 
+    return-void
+
 .end method
 
 #TODO: should use a closure type instead of a long
diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali
index 9892d61..e8ab84c 100644
--- a/test/955-lambda-smali/smali/Main.smali
+++ b/test/955-lambda-smali/smali/Main.smali
@@ -25,6 +25,7 @@
     invoke-static {}, LBoxUnbox;->run()V
     invoke-static {}, LMoveResult;->run()V
     invoke-static {}, LCaptureVariables;->run()V
+    invoke-static {}, LBoxInvoke;->run()V
 
 # TODO: add tests when verification fails
 
diff --git a/test/960-default-smali/build b/test/960-default-smali/build
index 4dc848c..b72afcd 100755
--- a/test/960-default-smali/build
+++ b/test/960-default-smali/build
@@ -22,7 +22,7 @@
 
 # Should we compile with Java source code. By default we will use Smali.
 USES_JAVA_SOURCE="false"
-if [[ $ARGS == *"--jvm"* ]]; then
+if [[ $@ == *"--jvm"* ]]; then
   USES_JAVA_SOURCE="true"
 elif [[ "$USE_JACK" == "true" ]]; then
   if $JACK -D jack.java.source.version=1.8 >& /dev/null; then
diff --git a/test/961-default-iface-resolution-generated/build b/test/961-default-iface-resolution-generated/build
index b4ced3e..005f76c 100755
--- a/test/961-default-iface-resolution-generated/build
+++ b/test/961-default-iface-resolution-generated/build
@@ -33,7 +33,7 @@
 
 # Should we compile with Java source code. By default we will use Smali.
 USES_JAVA_SOURCE="false"
-if [[ $ARGS == *"--jvm"* ]]; then
+if [[ $@ == *"--jvm"* ]]; then
   USES_JAVA_SOURCE="true"
 elif [[ $USE_JACK == "true" ]]; then
   if "$JACK" -D jack.java.source.version=1.8 >& /dev/null; then
diff --git a/test/LambdaInterfaces/LambdaInterfaces.java b/test/LambdaInterfaces/LambdaInterfaces.java
new file mode 100644
index 0000000..261163d
--- /dev/null
+++ b/test/LambdaInterfaces/LambdaInterfaces.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class LambdaInterfaces {
+    interface I {
+        public int i();
+    }
+    interface J {
+        public String foo = "foo";
+        public void j1();
+    }
+    interface K extends J {
+    }
+    interface L {
+      public int sum(int a, int b);
+    }
+    interface C {
+      public String concat(String a, String b);
+    }
+}
diff --git a/test/run-test b/test/run-test
index 10ec310..d0da34e 100755
--- a/test/run-test
+++ b/test/run-test
@@ -669,9 +669,9 @@
 # -------------------------------
 # Return whether the Optimizing compiler has read barrier support for ARCH.
 function arch_supports_read_barrier() {
-  # Optimizing has read barrier support for x86 and x86-64 at the
+  # Optimizing has read barrier support for ARM, x86 and x86-64 at the
   # moment.
-  [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ]
+  [ "x$1" = xarm ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ]
 }
 
 # Tests named '<number>-checker-*' will also have their CFGs verified with
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index de27a6f..0747712 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -43,9 +43,11 @@
 vm_args=""
 # By default, we run the whole JDWP test suite.
 test="org.apache.harmony.jpda.tests.share.AllTests"
+host="no"
 
 while true; do
   if [[ "$1" == "--mode=host" ]]; then
+    host="yes"
     # Specify bash explicitly since the art script cannot, since it has to run on the device
     # with mksh.
     art="bash ${OUT_DIR-out}/host/linux-x86/bin/art"
@@ -118,3 +120,15 @@
       --classpath $test_jar \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       $test
+
+vogar_exit_status=$?
+
+echo "Killing stalled dalvikvm processes..."
+if [[ $host == "yes" ]]; then
+  pkill -9 -f /bin/dalvikvm
+else
+  adb shell pkill -9 -f /bin/dalvikvm
+fi
+echo "Done."
+
+exit $vogar_exit_status