Merge "Rename vdex-fd to output-vdex-fd, add input-vdex-fd."
diff --git a/benchmark/string-indexof/info.txt b/benchmark/string-indexof/info.txt
new file mode 100644
index 0000000..cc04217
--- /dev/null
+++ b/benchmark/string-indexof/info.txt
@@ -0,0 +1 @@
+Benchmarks for repeating String.indexOf() instructions in a loop.
diff --git a/benchmark/string-indexof/src/StringIndexOfBenchmark.java b/benchmark/string-indexof/src/StringIndexOfBenchmark.java
new file mode 100644
index 0000000..481a27a
--- /dev/null
+++ b/benchmark/string-indexof/src/StringIndexOfBenchmark.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class StringIndexOfBenchmark {
+    public static final String string36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";  // length = 36
+
+    public void timeIndexOf0(int count) {
+        final char c = '0';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf1(int count) {
+        final char c = '1';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf2(int count) {
+        final char c = '2';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf3(int count) {
+        final char c = '3';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf4(int count) {
+        final char c = '4';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf7(int count) {
+        final char c = '7';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf8(int count) {
+        final char c = '8';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfF(int count) {
+        final char c = 'F';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfG(int count) {
+        final char c = 'G';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfV(int count) {
+        final char c = 'V';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfW(int count) {
+        final char c = 'W';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf_(int count) {
+        final char c = '_';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    static int $noinline$indexOf(String s, char c) {
+        if (doThrow) { throw new Error(); }
+        return s.indexOf(c);
+    }
+
+    public static boolean doThrow = false;
+}
diff --git a/build/Android.bp b/build/Android.bp
index 9156027..cd9d74a 100644
--- a/build/Android.bp
+++ b/build/Android.bp
@@ -145,6 +145,10 @@
         "external/vixl/src",
         "external/zlib",
     ],
+
+    tidy_checks: [
+        "-google-default-arguments",
+    ],
 }
 
 art_debug_defaults {
diff --git a/cmdline/cmdline.h b/cmdline/cmdline.h
index dec9c83..6e042c3 100644
--- a/cmdline/cmdline.h
+++ b/cmdline/cmdline.h
@@ -234,7 +234,7 @@
     // Checks for --boot-image location.
     {
       std::string boot_image_location = boot_image_location_;
-      size_t file_name_idx = boot_image_location.rfind("/");
+      size_t file_name_idx = boot_image_location.rfind('/');
       if (file_name_idx == std::string::npos) {  // Prevent a InsertIsaDirectory check failure.
         *error_msg = "Boot image location must have a / in it";
         return false;
@@ -244,7 +244,7 @@
       // This prevents a common error "Could not create an image space..." when initing the Runtime.
       if (file_name_idx != std::string::npos) {
         std::string no_file_name = boot_image_location.substr(0, file_name_idx);
-        size_t ancestor_dirs_idx = no_file_name.rfind("/");
+        size_t ancestor_dirs_idx = no_file_name.rfind('/');
 
         std::string parent_dir_name;
         if (ancestor_dirs_idx != std::string::npos) {
diff --git a/cmdline/cmdline_parser.h b/cmdline/cmdline_parser.h
index cfc0967..d82fd48 100644
--- a/cmdline/cmdline_parser.h
+++ b/cmdline/cmdline_parser.h
@@ -390,7 +390,7 @@
         // Unlike regular argument definitions, when a value gets parsed into its
         // stronger type, we just throw it away.
 
-        if (ign.find("_") != std::string::npos) {  // Does the arg-def have a wildcard?
+        if (ign.find('_') != std::string::npos) {  // Does the arg-def have a wildcard?
           // pretend this is a string, e.g. -Xjitconfig:<anythinggoeshere>
           auto&& builder = Define(ignore_name).template WithType<std::string>().IntoIgnore();
           assert(&builder == this);
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index cad5104..550e8c4 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -78,7 +78,7 @@
     return memcmp(std::addressof(expected), std::addressof(actual), sizeof(expected)) == 0;
   }
 
-  bool UsuallyEquals(const char* expected, std::string actual) {
+  bool UsuallyEquals(const char* expected, const std::string& actual) {
     return std::string(expected) == actual;
   }
 
@@ -129,7 +129,7 @@
     parser_ = ParsedOptions::MakeParser(false);  // do not ignore unrecognized options
   }
 
-  static ::testing::AssertionResult IsResultSuccessful(CmdlineResult result) {
+  static ::testing::AssertionResult IsResultSuccessful(const CmdlineResult& result) {
     if (result.IsSuccess()) {
       return ::testing::AssertionSuccess();
     } else {
@@ -138,7 +138,7 @@
     }
   }
 
-  static ::testing::AssertionResult IsResultFailure(CmdlineResult result,
+  static ::testing::AssertionResult IsResultFailure(const CmdlineResult& result,
                                                     CmdlineResult::Status failure_status) {
     if (result.IsSuccess()) {
       return ::testing::AssertionFailure() << " got success but expected failure: "
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 13a3235..3f55eef 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -182,7 +182,7 @@
 struct CmdlineType<Memory<Divisor>> : CmdlineTypeParser<Memory<Divisor>> {
   using typename CmdlineTypeParser<Memory<Divisor>>::Result;
 
-  Result Parse(const std::string arg) {
+  Result Parse(const std::string& arg) {
     CMDLINE_DEBUG_LOG << "Parsing memory: " << arg << std::endl;
     size_t val = ParseMemoryOption(arg.c_str(), Divisor);
     CMDLINE_DEBUG_LOG << "Memory parsed to size_t value: " << val << std::endl;
@@ -696,7 +696,7 @@
   }
 
   static std::string RemovePrefix(const std::string& source) {
-    size_t prefix_idx = source.find(":");
+    size_t prefix_idx = source.find(':');
 
     if (prefix_idx == std::string::npos) {
       return "";
diff --git a/cmdline/detail/cmdline_parse_argument_detail.h b/cmdline/detail/cmdline_parse_argument_detail.h
index 84beff5..14eac30 100644
--- a/cmdline/detail/cmdline_parse_argument_detail.h
+++ b/cmdline/detail/cmdline_parse_argument_detail.h
@@ -108,7 +108,7 @@
       // If this is true, then the wildcard matching later on can still fail, so this is not
       // a guarantee that the argument is correct, it's more of a strong hint that the
       // user-provided input *probably* was trying to match this argument.
-      size_t MaybeMatches(TokenRange token_list) const {
+      size_t MaybeMatches(const TokenRange& token_list) const {
         auto best_match = FindClosestMatch(token_list);
 
         return best_match.second;
@@ -118,7 +118,7 @@
       //
       // Returns the token range that was the closest match and the # of tokens that
       // this range was matched up until.
-      std::pair<const TokenRange*, size_t> FindClosestMatch(TokenRange token_list) const {
+      std::pair<const TokenRange*, size_t> FindClosestMatch(const TokenRange& token_list) const {
         const TokenRange* best_match_ptr = nullptr;
 
         size_t best_match = 0;
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 0b14859..f1bf27e 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -105,6 +105,7 @@
                 "optimizing/instruction_simplifier_arm.cc",
                 "optimizing/instruction_simplifier_shared.cc",
                 "optimizing/intrinsics_arm.cc",
+                "optimizing/intrinsics_arm_vixl.cc",
                 "utils/arm/assembler_arm.cc",
                 "utils/arm/assembler_arm_vixl.cc",
                 "utils/arm/assembler_thumb2.cc",
@@ -203,7 +204,8 @@
 
 gensrcs {
     name: "art_compiler_operator_srcs",
-    cmd: "art/tools/generate-operator-out.py art/compiler $in > $out",
+    cmd: "$(location generate-operator-out.py) art/compiler $(in) > $(out)",
+    tool_files: ["generate-operator-out.py"],
     srcs: [
         "compiled_method.h",
         "dex/dex_to_dex_compiler.h",
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index 34fd88b..db0fdaa 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_DEX_QUICK_COMPILER_CALLBACKS_H_
 
 #include "compiler_callbacks.h"
+#include "verifier/verifier_deps.h"
 
 namespace art {
 
@@ -46,16 +47,16 @@
     }
 
     verifier::VerifierDeps* GetVerifierDeps() const OVERRIDE {
-      return verifier_deps_;
+      return verifier_deps_.get();
     }
 
-    void SetVerifierDeps(verifier::VerifierDeps* deps) {
-      verifier_deps_ = deps;
+    void SetVerifierDeps(verifier::VerifierDeps* deps) OVERRIDE {
+      verifier_deps_.reset(deps);
     }
 
   private:
     VerificationResults* const verification_results_;
-    verifier::VerifierDeps* verifier_deps_;
+    std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
 };
 
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 1b87725..223be88 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -39,6 +39,7 @@
 #include "compiled_class.h"
 #include "compiled_method.h"
 #include "compiler.h"
+#include "compiler_callbacks.h"
 #include "compiler_driver-inl.h"
 #include "dex_compilation_unit.h"
 #include "dex_file-inl.h"
@@ -393,6 +394,7 @@
 
 void CompilerDriver::CompileAll(jobject class_loader,
                                 const std::vector<const DexFile*>& dex_files,
+                                verifier::VerifierDeps* verifier_deps,
                                 TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
 
@@ -404,7 +406,7 @@
   // 2) Resolve all classes
   // 3) Attempt to verify all classes
   // 4) Attempt to initialize image classes, and trivially initialized classes
-  PreCompile(class_loader, dex_files, timings);
+  PreCompile(class_loader, dex_files, verifier_deps, timings);
   if (GetCompilerOptions().IsBootImage()) {
     // We don't need to setup the intrinsics for non boot image compilation, as
     // those compilations will pick up a boot image that have the ArtMethod already
@@ -676,7 +678,7 @@
 
   InitializeThreadPools();
 
-  PreCompile(jclass_loader, dex_files, timings);
+  PreCompile(jclass_loader, dex_files, /* verifier_deps */ nullptr, timings);
 
   // Can we run DEX-to-DEX compiler on this class ?
   optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
@@ -873,6 +875,7 @@
 
 void CompilerDriver::PreCompile(jobject class_loader,
                                 const std::vector<const DexFile*>& dex_files,
+                                verifier::VerifierDeps* verifier_deps,
                                 TimingLogger* timings) {
   CheckThreadPools();
 
@@ -906,7 +909,7 @@
     VLOG(compiler) << "Resolve const-strings: " << GetMemoryUsageString(false);
   }
 
-  Verify(class_loader, dex_files, timings);
+  Verify(class_loader, dex_files, verifier_deps, timings);
   VLOG(compiler) << "Verify: " << GetMemoryUsageString(false);
 
   if (had_hard_verifier_failure_ && GetCompilerOptions().AbortOnHardVerifierFailure()) {
@@ -1522,7 +1525,7 @@
 
   if (!use_dex_cache) {
     bool method_in_image = false;
-    const std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces();
+    const std::vector<gc::space::ImageSpace*>& image_spaces = heap->GetBootImageSpaces();
     for (gc::space::ImageSpace* image_space : image_spaces) {
       const auto& method_section = image_space->GetImageHeader().GetMethodsSection();
       if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) {
@@ -1932,15 +1935,61 @@
   }
 }
 
-void CompilerDriver::Verify(jobject class_loader,
+void CompilerDriver::Verify(jobject jclass_loader,
                             const std::vector<const DexFile*>& dex_files,
+                            verifier::VerifierDeps* verifier_deps,
                             TimingLogger* timings) {
+  if (verifier_deps != nullptr) {
+    TimingLogger::ScopedTiming t("Fast Verify", timings);
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<2> hs(soa.Self());
+    Handle<mirror::ClassLoader> class_loader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader)));
+    MutableHandle<mirror::Class> cls(hs.NewHandle<mirror::Class>(nullptr));
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    if (verifier_deps->ValidateDependencies(class_loader, soa.Self())) {
+      // We successfully validated the dependencies, now update class status
+      // of verified classes. Note that the dependencies also record which classes
+      // could not be fully verified; we could try again, but that would hurt verification
+      // time. So instead we assume these classes still need to be verified at
+      // runtime.
+      for (const DexFile* dex_file : dex_files) {
+        // Fetch the list of unverified classes and turn it into a set for faster
+        // lookups.
+        const std::vector<uint16_t>& unverified_classes =
+            verifier_deps->GetUnverifiedClasses(*dex_file);
+        std::set<uint16_t> set(unverified_classes.begin(), unverified_classes.end());
+        for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+          const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+          const char* descriptor = dex_file->GetClassDescriptor(class_def);
+          cls.Assign(class_linker->FindClass(soa.Self(), descriptor, class_loader));
+          if (cls.Get() == nullptr) {
+            CHECK(soa.Self()->IsExceptionPending());
+            soa.Self()->ClearException();
+          } else if (set.find(class_def.class_idx_) == set.end()) {
+            ObjectLock<mirror::Class> lock(soa.Self(), cls);
+            mirror::Class::SetStatus(cls, mirror::Class::kStatusVerified, soa.Self());
+          }
+        }
+      }
+      return;
+    }
+  }
+
+  // If there is no passed `verifier_deps` (because of non-existing vdex), or
+  // the passed `verifier_deps` is not valid anymore, create a new one for
+  // non boot image compilation. The verifier will need it to record the new dependencies.
+  // Then dex2oat can update the vdex file with these new dependencies.
+  if (!GetCompilerOptions().IsBootImage()) {
+    Runtime::Current()->GetCompilerCallbacks()->SetVerifierDeps(
+        new verifier::VerifierDeps(dex_files));
+  }
   // Note: verification should not be pulling in classes anymore when compiling the boot image,
   //       as all should have been resolved before. As such, doing this in parallel should still
   //       be deterministic.
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
-    VerifyDexFile(class_loader,
+    VerifyDexFile(jclass_loader,
                   *dex_file,
                   dex_files,
                   parallel_thread_pool_.get(),
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 4a48f9c..c8d6cb0 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -50,6 +50,7 @@
 
 namespace verifier {
 class MethodVerifier;
+class VerifierDeps;
 class VerifierDepsTest;
 }  // namespace verifier
 
@@ -117,6 +118,7 @@
 
   void CompileAll(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
+                  verifier::VerifierDeps* verifier_deps,
                   TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_, !dex_to_dex_references_lock_);
 
@@ -415,6 +417,7 @@
  private:
   void PreCompile(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
+                  verifier::VerifierDeps* verifier_deps,
                   TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
@@ -437,7 +440,9 @@
 
   void Verify(jobject class_loader,
               const std::vector<const DexFile*>& dex_files,
+              verifier::VerifierDeps* verifier_deps,
               TimingLogger* timings);
+
   void VerifyDexFile(jobject class_loader,
                      const DexFile& dex_file,
                      const std::vector<const DexFile*>& dex_files,
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 845028d..9679a79 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -43,6 +43,7 @@
     TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
     compiler_driver_->CompileAll(class_loader,
                                  GetDexFiles(class_loader),
+                                 /* verifier_deps */ nullptr,
                                  &timings);
     t.NewTiming("MakeAllExecutable");
     MakeAllExecutable(class_loader);
diff --git a/compiler/generate-operator-out.py b/compiler/generate-operator-out.py
new file mode 120000
index 0000000..cc291d2
--- /dev/null
+++ b/compiler/generate-operator-out.py
@@ -0,0 +1 @@
+../tools/generate-operator-out.py
\ No newline at end of file
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 8fdf6fc..fcb8979 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -189,7 +189,7 @@
       TimingLogger timings("ImageTest::WriteRead", false, false);
       TimingLogger::ScopedTiming t("CompileAll", &timings);
       driver->SetDexFilesForOatFile(class_path);
-      driver->CompileAll(class_loader, class_path, &timings);
+      driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings);
 
       t.NewTiming("WriteElf");
       SafeMap<std::string, std::string> key_value_store;
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 59f339a..51ef440 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -51,6 +51,7 @@
 #include "lock_word.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
@@ -757,7 +758,8 @@
   if (klass->GetStatus() == mirror::Class::kStatusError) {
     result = true;
   } else {
-    CHECK(klass->GetVerifyError() == nullptr) << klass->PrettyClass();
+    ObjPtr<mirror::ClassExt> ext(klass->GetExtData());
+    CHECK(ext.IsNull() || ext->GetVerifyError() == nullptr) << klass->PrettyClass();
   }
   if (!result) {
     // Check interfaces since these wont be visited through VisitReferences.)
@@ -2068,13 +2070,8 @@
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
   DCHECK(orig != nullptr);
   DCHECK(copy != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    orig->AssertReadBarrierPointer();
-    if (kUseBrooksReadBarrier) {
-      // Note the address 'copy' isn't the same as the image address of 'orig'.
-      copy->SetReadBarrierPointer(GetImageAddress(orig));
-      DCHECK_EQ(copy->GetReadBarrierPointer(), GetImageAddress(orig));
-    }
+  if (kUseBakerReadBarrier) {
+    orig->AssertReadBarrierState();
   }
   auto* klass = orig->GetClass();
   if (klass->IsIntArrayClass() || klass->IsLongArrayClass()) {
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 4960a73..a9044a2 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -2202,8 +2202,7 @@
                "()V",
                CURRENT_JNI_WRAPPER(Java_MyClassNatives_normalNative));
 
-  ScopedObjectAccess soa(Thread::Current());
-  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ArtMethod* method = jni::DecodeArtMethod(jmethod_);
   ASSERT_TRUE(method != nullptr);
 
   EXPECT_FALSE(method->IsAnnotatedWithCriticalNative());
@@ -2225,8 +2224,7 @@
                "()V",
                CURRENT_JNI_WRAPPER(Java_MyClassNatives_fastNative));
 
-  ScopedObjectAccess soa(Thread::Current());
-  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ArtMethod* method = jni::DecodeArtMethod(jmethod_);
   ASSERT_TRUE(method != nullptr);
 
   EXPECT_FALSE(method->IsAnnotatedWithCriticalNative());
@@ -2255,8 +2253,7 @@
   UpdateCurrentJni(JniKind::kCritical);
   ASSERT_TRUE(IsCurrentJniCritical());
 
-  ScopedObjectAccess soa(Thread::Current());
-  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ArtMethod* method = jni::DecodeArtMethod(jmethod_);
   ASSERT_TRUE(method != nullptr);
 
   EXPECT_TRUE(method->IsAnnotatedWithCriticalNative());
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 64ee574..102637f 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -377,7 +377,8 @@
   if (kCompile) {
     TimingLogger timings2("OatTest::WriteRead", false, false);
     compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
-    compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings2);
+    compiler_driver_->CompileAll(
+        class_loader, class_linker->GetBootClassPath(), /* verifier_deps */ nullptr, &timings2);
   }
 
   ScratchFile tmp_oat, tmp_vdex(tmp_oat, ".vdex");
@@ -391,7 +392,8 @@
   ASSERT_TRUE(success);
 
   if (kCompile) {  // OatWriter strips the code, regenerate to compare
-    compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
+    compiler_driver_->CompileAll(
+        class_loader, class_linker->GetBootClassPath(), /* verifier_deps */ nullptr, &timings);
   }
   std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp_oat.GetFilename(),
                                                   tmp_oat.GetFilename(),
@@ -515,7 +517,7 @@
                                   soa.Decode<mirror::ClassLoader>(class_loader).Ptr());
   }
   compiler_driver_->SetDexFilesForOatFile(dex_files);
-  compiler_driver_->CompileAll(class_loader, dex_files, &timings);
+  compiler_driver_->CompileAll(class_loader, dex_files, /* verifier_deps */ nullptr, &timings);
 
   ScratchFile tmp_oat, tmp_vdex(tmp_oat, ".vdex");
   SafeMap<std::string, std::string> key_value_store;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index d2357a5..7dc094b 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -548,7 +548,21 @@
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
     DCHECK(!IsAddedBlock(block));
     first_index_bounds_check_map_.clear();
-    HGraphVisitor::VisitBasicBlock(block);
+    // Visit phis and instructions using a safe iterator. The iteration protects
+    // against deleting the current instruction during iteration. However, it
+    // must advance next_ if that instruction is deleted during iteration.
+    for (HInstruction* instruction = block->GetFirstPhi(); instruction != nullptr;) {
+      DCHECK(instruction->IsInBlock());
+      next_ = instruction->GetNext();
+      instruction->Accept(this);
+      instruction = next_;
+    }
+    for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) {
+      DCHECK(instruction->IsInBlock());
+      next_ = instruction->GetNext();
+      instruction->Accept(this);
+      instruction = next_;
+    }
     // We should never deoptimize from an osr method, otherwise we might wrongly optimize
     // code dominated by the deoptimization.
     if (!GetGraph()->IsCompilingOsr()) {
@@ -1798,7 +1812,12 @@
   }
 
   /** Helper method to replace an instruction with another instruction. */
-  static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+  void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+    // Safe iteration.
+    if (instruction == next_) {
+      next_ = next_->GetNext();
+    }
+    // Replace and remove.
     instruction->ReplaceWith(replacement);
     instruction->GetBlock()->RemoveInstruction(instruction);
   }
@@ -1831,6 +1850,9 @@
   // Range analysis based on induction variables.
   InductionVarRange induction_range_;
 
+  // Safe iteration.
+  HInstruction* next_;
+
   DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
 };
 
diff --git a/compiler/optimizing/bytecode_utils.h b/compiler/optimizing/bytecode_utils.h
index 6dfffce..133afa4 100644
--- a/compiler/optimizing/bytecode_utils.h
+++ b/compiler/optimizing/bytecode_utils.h
@@ -26,7 +26,8 @@
 
 class CodeItemIterator : public ValueObject {
  public:
-  CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc = 0u)
+  explicit CodeItemIterator(const DexFile::CodeItem& code_item) : CodeItemIterator(code_item, 0u) {}
+  CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc)
       : code_ptr_(code_item.insns_ + start_dex_pc),
         code_end_(code_item.insns_ + code_item.insns_size_in_code_units_),
         dex_pc_(start_dex_pc) {}
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 8b450e1..a5f248d 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1375,4 +1375,30 @@
   return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
 }
 
+void CodeGenerator::EmitJitRoots(uint8_t* code,
+                                 Handle<mirror::ObjectArray<mirror::Object>> roots,
+                                 const uint8_t* roots_data,
+                                 Handle<mirror::DexCache> outer_dex_cache) {
+  DCHECK_EQ(static_cast<size_t>(roots->GetLength()), GetNumberOfJitRoots());
+  StackHandleScope<1> hs(Thread::Current());
+  MutableHandle<mirror::DexCache> h_dex_cache(hs.NewHandle<mirror::DexCache>(nullptr));
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  size_t index = 0;
+  for (auto& entry : jit_string_roots_) {
+    const DexFile& entry_dex_file = *entry.first.dex_file;
+    // Avoid the expensive FindDexCache call by checking if the string is
+    // in the compiled method's dex file.
+    h_dex_cache.Assign(IsSameDexFile(*outer_dex_cache->GetDexFile(), entry_dex_file)
+        ? outer_dex_cache.Get()
+        : class_linker->FindDexCache(hs.Self(), entry_dex_file));
+    mirror::String* string = class_linker->LookupString(
+        entry_dex_file, entry.first.string_index, h_dex_cache);
+    DCHECK(string != nullptr) << "JIT roots require strings to have been loaded";
+    roots->Set(index, string);
+    entry.second = index;
+    ++index;
+  }
+  EmitJitRootPatches(code, roots_data);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index a81f24e..212d571 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -31,6 +31,7 @@
 #include "nodes.h"
 #include "optimizing_compiler_stats.h"
 #include "stack_map_stream.h"
+#include "string_reference.h"
 #include "utils/label.h"
 
 namespace art {
@@ -331,6 +332,17 @@
 
   void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
   size_t ComputeStackMapsSize();
+  size_t GetNumberOfJitRoots() const {
+    return jit_string_roots_.size();
+  }
+
+  // Fills the `literals` array with literals collected during code generation.
+  // Also emits literal patches.
+  void EmitJitRoots(uint8_t* code,
+                    Handle<mirror::ObjectArray<mirror::Object>> roots,
+                    const uint8_t* roots_data,
+                    Handle<mirror::DexCache> outer_dex_cache)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool IsLeafMethod() const {
     return is_leaf_;
@@ -567,6 +579,8 @@
         fpu_callee_save_mask_(fpu_callee_save_mask),
         stack_map_stream_(graph->GetArena()),
         block_order_(nullptr),
+        jit_string_roots_(StringReferenceValueComparator(),
+                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         disasm_info_(nullptr),
         stats_(stats),
         graph_(graph),
@@ -633,6 +647,12 @@
     return current_slow_path_;
   }
 
+  // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
+  virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
+                                  const uint8_t* roots_data ATTRIBUTE_UNUSED) {
+    DCHECK_EQ(jit_string_roots_.size(), 0u);
+  }
+
   // Frame size required for this method.
   uint32_t frame_size_;
   uint32_t core_spill_mask_;
@@ -658,6 +678,11 @@
   // The order to use for code generation.
   const ArenaVector<HBasicBlock*>* block_order_;
 
+  // Maps a StringReference (dex_file, string_index) to the index in the literal table.
+  // Entries are intially added with a 0 index, and `EmitJitRoots` will compute all the
+  // indices.
+  ArenaSafeMap<StringReference, size_t, StringReferenceValueComparator> jit_string_roots_;
+
   DisassemblyInformation* disasm_info_;
 
  private:
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index be65f89..f9ef96c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -489,8 +489,14 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
+    Location arg0, arg1;
+    if (instruction_->IsInstanceOf()) {
+      arg0 = locations->InAt(1);
+      arg1 = locations->Out();
+    } else {
+      arg0 = locations->InAt(0);
+      arg1 = locations->InAt(1);
+    }
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -504,26 +510,26 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(
-        locations->InAt(1),
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Primitive::kPrimNot,
-        object_class,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
-        Primitive::kPrimNot);
-
+    codegen->EmitParallelMoves(arg0,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               arg1,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>();
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     } else {
       DCHECK(instruction_->IsCheckCast());
-      arm_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -638,6 +644,11 @@
            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
@@ -894,6 +905,11 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -4841,8 +4857,6 @@
                                         instruction->IsStringCharAt();
   HInstruction* array_instr = instruction->GetArray();
   bool has_intermediate_address = array_instr->IsIntermediateAddress();
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -4915,6 +4929,11 @@
     }
 
     case Primitive::kPrimNot: {
+      // The read barrier instrumentation of object ArrayGet
+      // instructions does not support the HIntermediateAddress
+      // instruction.
+      DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
       static_assert(
           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
@@ -5055,8 +5074,6 @@
   Location value_loc = locations->InAt(2);
   HInstruction* array_instr = instruction->GetArray();
   bool has_intermediate_address = array_instr->IsIntermediateAddress();
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
@@ -5306,8 +5323,6 @@
 }
 
 void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
 
@@ -5322,9 +5337,6 @@
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
 
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
-
   if (second.IsRegister()) {
     __ add(out.AsRegister<Register>(),
            first.AsRegister<Register>(),
@@ -5877,6 +5889,9 @@
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      return HLoadString::LoadKind::kDexCacheViaMethod;
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -6291,26 +6306,16 @@
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      Label loop, compare_classes;
+      Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
-      __ CompareAndBranchIfNonZero(temp, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
 
-      __ Bind(&compare_classes);
+      // Otherwise, compare the classes.
       __ cmp(temp, ShifterOperand(cls));
       __ b(&loop, NE);
       break;
@@ -6326,55 +6331,29 @@
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
-      // If the class reference currently in `temp` is not null, jump
-      // back at the beginning of the loop.
-      __ CompareAndBranchIfNonZero(temp, &loop);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise, jump to the beginning of the loop.
+      __ b(&loop);
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
-      Label check_non_primitive_component_type;
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
       GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
-
-      // If the component type is not null (i.e. the object is indeed
-      // an array), jump to label `check_non_primitive_component_type`
-      // to further check that this component type is not a primitive
-      // type.
-      __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&check_non_primitive_component_type);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive type.
       __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot");
-      __ CompareAndBranchIfZero(temp, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
+      __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
       break;
     }
 
@@ -6390,13 +6369,6 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
-      //
-      // TODO: Introduce a new runtime entry point taking the object
-      // to test (instead of its class) as argument, and let it deal
-      // with the read barrier issues. This will let us refactor this
-      // case of the `switch` code as it was previously (with a direct
-      // call to the runtime not using a type checking slow path).
-      // This should also be beneficial for the other cases above.
       __ b(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -6843,7 +6815,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -6919,14 +6891,13 @@
   }
   AddSlowPath(slow_path);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // Given the numeric representation, it's enough to check the low bit of the
   // rb_state. We do that by shifting the bit out of the lock word with LSRS
   // which can be a 16-bit instruction unlike the TST immediate.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
   __ b(slow_path->GetEntryLabel(), CS);  // Carry flag is the last bit shifted out by LSRS.
   __ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b537509..4f7cc61 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -459,9 +459,15 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location class_to_check = locations->InAt(1);
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
+    Location arg0, arg1;
+    if (instruction_->IsInstanceOf()) {
+      arg0 = locations->InAt(1);
+      arg1 = locations->Out();
+    } else {
+      arg0 = locations->InAt(0);
+      arg1 = locations->InAt(1);
+    }
+
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
@@ -476,21 +482,22 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(
-        class_to_check, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot,
-        object_class, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot);
-
+    codegen->EmitParallelMoves(arg0,
+                               LocationFrom(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               arg1,
+                               LocationFrom(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t,
-                           const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
     } else {
       DCHECK(instruction_->IsCheckCast());
-      arm64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -626,6 +633,11 @@
            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
@@ -876,7 +888,9 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
-    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
     DCHECK(!(instruction_->IsArrayGet() &&
              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
@@ -2192,8 +2206,6 @@
 }
 
 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -2201,10 +2213,7 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorARM64::VisitIntermediateAddress(
-    HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
+void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
   __ Add(OutputRegister(instruction),
          InputRegisterAt(instruction, 0),
          Operand(InputOperandAt(instruction, 1)));
@@ -2304,11 +2313,15 @@
   // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
   BlockPoolsScope block_pools(masm);
 
+  // The read barrier instrumentation of object ArrayGet instructions
+  // does not support the HIntermediateAddress instruction.
+  DCHECK(!((type == Primitive::kPrimNot) &&
+           instruction->GetArray()->IsIntermediateAddress() &&
+           kEmitCompilerReadBarrier));
+
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object ArrayGet with Baker's read barrier case.
     Register temp = temps.AcquireW();
-    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
     codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -2341,9 +2354,6 @@
     } else {
       Register temp = temps.AcquireSameSizeAs(obj);
       if (instruction->GetArray()->IsIntermediateAddress()) {
-        // The read barrier instrumentation does not support the
-        // HIntermediateAddress instruction yet.
-        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `TryExtractArrayAccessAddress()`.
@@ -2451,9 +2461,6 @@
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
       if (instruction->GetArray()->IsIntermediateAddress()) {
-        // The read barrier instrumentation does not support the
-        // HIntermediateAddress instruction yet.
-        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `TryExtractArrayAccessAddress()`.
@@ -3594,26 +3601,15 @@
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::aarch64::Label loop, compare_classes;
+      vixl::aarch64::Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
-      __ Cbnz(temp, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ B(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&compare_classes);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise, compare classes.
       __ Cmp(temp, cls);
       __ B(ne, &loop);
       break;
@@ -3633,20 +3629,12 @@
       // back at the beginning of the loop.
       __ Cbnz(temp, &loop);
       // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
-      vixl::aarch64::Label check_non_primitive_component_type;
       __ Cmp(temp, cls);
       __ B(eq, &done);
 
@@ -3654,30 +3642,13 @@
       // /* HeapReference<Class> */ temp = temp->component_type_
       GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
 
-      // If the component type is not null (i.e. the object is indeed
-      // an array), jump to label `check_non_primitive_component_type`
-      // to further check that this component type is not a primitive
-      // type.
-      __ Cbnz(temp, &check_non_primitive_component_type);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ B(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&check_non_primitive_component_type);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise, the object is indeed an array. Further check that this component type is not a
+      // primitive type.
       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ Cbz(temp, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ B(type_check_slow_path->GetEntryLabel());
+      __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
       break;
     }
 
@@ -3693,13 +3664,6 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
-      //
-      // TODO: Introduce a new runtime entry point taking the object
-      // to test (instead of its class) as argument, and let it deal
-      // with the read barrier issues. This will let us refactor this
-      // case of the `switch` code as it was previously (with a direct
-      // call to the runtime not using a type checking slow path).
-      // This should also be beneficial for the other cases above.
       __ B(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -4422,6 +4386,9 @@
       break;
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      return HLoadString::LoadKind::kDexCacheViaMethod;
   }
   return desired_string_load_kind;
 }
@@ -5426,7 +5393,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -5517,12 +5484,11 @@
   }
   AddSlowPath(slow_path);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index cc40522..b9814b6 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -23,6 +23,7 @@
 #include "compiled_method.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
+#include "intrinsics_arm_vixl.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "thread.h"
@@ -62,6 +63,7 @@
   return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
 }
 
+static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr size_t kArmInstrMaxSizeInBytes = 4u;
 
 #ifdef __
@@ -434,6 +436,67 @@
   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
 };
 
+class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
+      : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Location arg0, arg1;
+    if (instruction_->IsInstanceOf()) {
+      arg0 = locations->InAt(1);
+      arg1 = locations->Out();
+    } else {
+      arg0 = locations->InAt(0);
+      arg1 = locations->InAt(1);
+    }
+    DCHECK(instruction_->IsCheckCast()
+           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    __ Bind(GetEntryLabel());
+
+    if (!is_fatal_) {
+      TODO_VIXL32(FATAL);
+    }
+
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+
+    codegen->EmitParallelMoves(arg0,
+                               LocationFrom(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               arg1,
+                               LocationFrom(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
+    if (instruction_->IsInstanceOf()) {
+      TODO_VIXL32(FATAL);
+    } else {
+      DCHECK(instruction_->IsCheckCast());
+      arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
+    }
+
+    if (!is_fatal_) {
+      TODO_VIXL32(FATAL);
+    }
+  }
+
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARMVIXL"; }
+
+  bool IsFatal() const OVERRIDE { return is_fatal_; }
+
+ private:
+  const bool is_fatal_;
+
+  DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
+};
+
 class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
  public:
   explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
@@ -567,6 +630,11 @@
   return mask;
 }
 
+size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  GetAssembler()->LoadSFromOffset(vixl32::SRegister(reg_id), sp, stack_index);
+  return kArmWordSize;
+}
+
 #undef __
 
 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
@@ -600,7 +668,7 @@
   GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15);
 }
 
-#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->
+#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->  // NOLINT
 
 void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
   GetAssembler()->FinalizeCode();
@@ -1415,9 +1483,26 @@
   // art::PrepareForRegisterAllocation.
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
-  // TODO(VIXL): TryDispatch
+  IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
+    return;
+  }
 
   HandleInvoke(invoke);
+
+  // TODO(VIXL): invoke->HasPcRelativeDexCache()
+}
+
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
+  if (invoke->GetLocations()->Intrinsified()) {
+    IntrinsicCodeGeneratorARMVIXL intrinsic(codegen);
+    intrinsic.Dispatch(invoke);
+    return true;
+  }
+  return false;
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -1425,7 +1510,9 @@
   // art::PrepareForRegisterAllocation.
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
-  // TODO(VIXL): TryGenerateIntrinsicCode
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
 
   LocationSummary* locations = invoke->GetLocations();
   DCHECK(locations->HasTemps());
@@ -1441,13 +1528,18 @@
 }
 
 void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  // TODO(VIXL): TryDispatch
+  IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
 
   HandleInvoke(invoke);
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  // TODO(VIXL): TryGenerateIntrinsicCode
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
@@ -1507,6 +1599,8 @@
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
+      // TODO(VIXL): Consider introducing an InputVRegister()
+      // helper function (equivalent to InputRegister()).
       __ Vneg(OutputVRegister(neg), InputVRegisterAt(neg, 0));
       break;
 
@@ -2325,7 +2419,12 @@
       break;
     }
     case Primitive::kPrimLong: {
-      TODO_VIXL32(FATAL);
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, LocationFrom(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      locations->SetOut(LocationFrom(r0, r1));
       break;
     }
     case Primitive::kPrimFloat:
@@ -2342,6 +2441,7 @@
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
+  Location lhs = div->GetLocations()->InAt(0);
   Location rhs = div->GetLocations()->InAt(1);
 
   switch (div->GetResultType()) {
@@ -2357,7 +2457,16 @@
     }
 
     case Primitive::kPrimLong: {
-      TODO_VIXL32(FATAL);
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs)));
+      DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs)));
+      DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs)));
+      DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs)));
+      DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0));
+      DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1));
+
+      codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc());
+      CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
       break;
     }
 
@@ -3663,7 +3772,7 @@
   // Also need for String compression feature.
   if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
       || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
-    TODO_VIXL32(FATAL);
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -3692,7 +3801,24 @@
       if (index.IsConstant()) {
         int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
         if (maybe_compressed_char_at) {
-          TODO_VIXL32(FATAL);
+          vixl32::Register length = temps.Acquire();
+          vixl32::Label uncompressed_load, done;
+          uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+          GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ Cmp(length, 0);
+          __ B(ge, &uncompressed_load);
+          GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
+                                         RegisterFrom(out_loc),
+                                         obj,
+                                         data_offset + const_index);
+          __ B(&done);
+          __ Bind(&uncompressed_load);
+          GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
+                                         RegisterFrom(out_loc),
+                                         obj,
+                                         data_offset + (const_index << 1));
+          __ Bind(&done);
         } else {
           uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
 
@@ -3708,7 +3834,18 @@
           __ Add(temp, obj, data_offset);
         }
         if (maybe_compressed_char_at) {
-          TODO_VIXL32(FATAL);
+          vixl32::Label uncompressed_load, done;
+          uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+          vixl32::Register length = RegisterFrom(locations->GetTemp(0));
+          GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ Cmp(length, 0);
+          __ B(ge, &uncompressed_load);
+          __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
+          __ B(&done);
+          __ Bind(&uncompressed_load);
+          __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
+          __ Bind(&done);
         } else {
           codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
         }
@@ -4080,7 +4217,10 @@
   vixl32::Register out = OutputRegister(instruction);
   GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
   codegen_->MaybeRecordImplicitNullCheck(instruction);
-  // TODO(VIXL): https://android-review.googlesource.com/#/c/272625/
+  // Mask out compression flag from String's array length.
+  if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+    __ Bic(out, out, 1u << 31);
+  }
 }
 
 void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -4376,7 +4516,12 @@
     GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
     GetAssembler()->StoreDToOffset(temp, sp, mem);
   } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
-    TODO_VIXL32(FATAL);
+    vixl32::DRegister first = DRegisterFrom(source);
+    vixl32::DRegister second = DRegisterFrom(destination);
+    vixl32::DRegister temp = temps.AcquireD();
+    __ Vmov(temp, first);
+    __ Vmov(first, second);
+    __ Vmov(second, temp);
   } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
     TODO_VIXL32(FATAL);
   } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
@@ -4609,6 +4754,115 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
+void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
+  vixl32::Register obj = InputRegisterAt(instruction, 0);
+  vixl32::Register cls = InputRegisterAt(instruction, 1);
+  Location temp_loc = locations->GetTemp(0);
+  vixl32::Register temp = RegisterFrom(temp_loc);
+  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCodeARMVIXL* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction,
+                                                            is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
+
+  vixl32::Label done;
+  // Avoid null check if we know obj is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Cbz(obj, &done);
+  }
+
+  // /* HeapReference<Class> */ temp = obj->klass_
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kArrayCheck: {
+      __ Cmp(temp, cls);
+      // Jump to slow path for throwing the exception or doing a
+      // more involved array check.
+      __ B(ne, type_check_slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      TODO_VIXL32(FATAL);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      TODO_VIXL32(FATAL);
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      TODO_VIXL32(FATAL);
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      TODO_VIXL32(FATAL);
+      break;
+  }
+  __ Bind(&done);
+
+  __ Bind(type_check_slow_path->GetExitLabel());
+}
+
 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
   HandleBitwiseOperation(instruction, AND);
 }
@@ -4780,6 +5034,24 @@
   }
 }
 
+void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp ATTRIBUTE_UNUSED) {
+  vixl32::Register out_reg = RegisterFrom(out);
+  vixl32::Register obj_reg = RegisterFrom(obj);
+  if (kEmitCompilerReadBarrier) {
+    TODO_VIXL32(FATAL);
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
 void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
     HInstruction* instruction ATTRIBUTE_UNUSED,
     Location root,
@@ -4798,6 +5070,39 @@
   }
 }
 
+void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location ref ATTRIBUTE_UNUSED,
+    vixl::aarch32::Register obj ATTRIBUTE_UNUSED,
+    uint32_t offset ATTRIBUTE_UNUSED,
+    Location temp ATTRIBUTE_UNUSED,
+    bool needs_null_check ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
+void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location ref ATTRIBUTE_UNUSED,
+    vixl::aarch32::Register obj ATTRIBUTE_UNUSED,
+    uint32_t offset ATTRIBUTE_UNUSED,
+    Location index ATTRIBUTE_UNUSED,
+    ScaleFactor scale_factor ATTRIBUTE_UNUSED,
+    Location temp ATTRIBUTE_UNUSED,
+    bool needs_null_check ATTRIBUTE_UNUSED,
+    bool always_update_field ATTRIBUTE_UNUSED,
+    vixl::aarch32::Register* temp2 ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
+void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED,
+                                                   Location out ATTRIBUTE_UNUSED,
+                                                   Location ref ATTRIBUTE_UNUSED,
+                                                   Location obj ATTRIBUTE_UNUSED,
+                                                   uint32_t offset ATTRIBUTE_UNUSED,
+                                                   Location index ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
 void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED,
                                                         Location out,
                                                         Location ref ATTRIBUTE_UNUSED,
@@ -4871,7 +5176,10 @@
       if (current_method.IsRegister()) {
         method_reg = RegisterFrom(current_method);
       } else {
-        TODO_VIXL32(FATAL);
+        DCHECK(invoke->GetLocations()->Intrinsified());
+        DCHECK(!current_method.IsValid());
+        method_reg = temp_reg;
+        GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, sp, kCurrentMethodStackOffset);
       }
       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       GetAssembler()->LoadFromOffset(
@@ -4942,9 +5250,31 @@
 }
 
 // Copy the result of a call into the given target.
-void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
-                                                  Primitive::Type type ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
+void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) {
+  if (!trg.IsValid()) {
+    DCHECK_EQ(type, Primitive::kPrimVoid);
+    return;
+  }
+
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  Location return_loc = InvokeDexCallingConventionVisitorARM().GetReturnLocation(type);
+  if (return_loc.Equals(trg)) {
+    return;
+  }
+
+  // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
+  //       with the last branch.
+  if (type == Primitive::kPrimLong) {
+    TODO_VIXL32(FATAL);
+  } else if (type == Primitive::kPrimDouble) {
+    TODO_VIXL32(FATAL);
+  } else {
+    // Let the parallel move resolver take care of all of this.
+    HParallelMove parallel_move(GetGraph()->GetArena());
+    parallel_move.AddMove(return_loc, trg, type, nullptr);
+    GetMoveResolver()->EmitNativeCode(&parallel_move);
+  }
 }
 
 #undef __
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index df7d467..c583a44 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -113,6 +113,7 @@
   M(BelowOrEqual)                               \
   M(BooleanNot)                                 \
   M(BoundsCheck)                                \
+  M(CheckCast)                                  \
   M(ClearException)                             \
   M(ClinitCheck)                                \
   M(Compare)                                    \
@@ -171,7 +172,6 @@
 // TODO: Remove once the VIXL32 backend is implemented completely.
 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)   \
   M(BoundType)                                  \
-  M(CheckCast)                                  \
   M(ClassTableGet)                              \
   M(InstanceOf)                                 \
   M(InvokeInterface)                            \
@@ -344,6 +344,22 @@
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp);
+
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
@@ -473,11 +489,7 @@
     return 0;
   }
 
-  size_t RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                                      uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(INFO) << "TODO: RestoreFloatingPointRegister";
-    return 0;
-  }
+  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
 
   bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
     return type == Primitive::kPrimDouble || type == Primitive::kPrimLong;
@@ -513,6 +525,62 @@
                   vixl::aarch32::Register value,
                   bool can_be_null);
 
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             vixl::aarch32::Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+  // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+  //
+  // Load the object reference located at the address
+  // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+  // `ref`, and mark it if needed.
+  //
+  // If `always_update_field` is true, the value of the reference is
+  // atomically updated in the holder (`obj`).  This operation
+  // requires an extra temporary register, which must be provided as a
+  // non-null pointer (`temp2`).
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 vixl::aarch32::Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 ScaleFactor scale_factor,
+                                                 Location temp,
+                                                 bool needs_null_check,
+                                                 bool always_update_field = false,
+                                                 vixl::aarch32::Register* temp2 = nullptr);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
   // If read barriers are enabled, generate a read barrier for a heap
   // reference using a slow path. If heap poisoning is enabled, also
   // unpoison the reference in `out`.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 12b1ab9..0960c54 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -378,7 +378,14 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) : locations->Out();
+    Location arg0, arg1;
+    if (instruction_->IsInstanceOf()) {
+      arg0 = locations->InAt(1);
+      arg1 = locations->Out();
+    } else {
+      arg0 = locations->InAt(0);
+      arg1 = locations->InAt(1);
+    }
     uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -390,24 +397,22 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(locations->InAt(1),
+    codegen->EmitParallelMoves(arg0,
                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                Primitive::kPrimNot,
-                               object_class,
+                               arg1,
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimNot);
-
     if (instruction_->IsInstanceOf()) {
       mips_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
     } else {
       DCHECK(instruction_->IsCheckCast());
-      mips_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      mips_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     RestoreLiveRegisters(codegen, locations);
@@ -5204,6 +5209,11 @@
     case HLoadString::LoadKind::kDexCacheViaMethod:
       fallback_load = false;
       break;
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      // TODO: implement.
+      fallback_load = true;
+      break;
   }
   if (fallback_load) {
     desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 010bf24..7598740 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -322,7 +322,15 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) : locations->Out();
+    Location arg0, arg1;
+    if (instruction_->IsInstanceOf()) {
+      arg0 = locations->InAt(1);
+      arg1 = locations->Out();
+    } else {
+      arg0 = locations->InAt(0);
+      arg1 = locations->InAt(1);
+    }
+
     uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -334,24 +342,23 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(locations->InAt(1),
+    codegen->EmitParallelMoves(arg0,
                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                Primitive::kPrimNot,
-                               object_class,
+                               arg1,
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimNot);
-
     if (instruction_->IsInstanceOf()) {
       mips64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
       CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+          kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
     } else {
       DCHECK(instruction_->IsCheckCast());
-      mips64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      mips64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     RestoreLiveRegisters(codegen, locations);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index efd33c7..a259637 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -312,8 +312,14 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
+    Location arg0, arg1;
+    if (instruction_->IsInstanceOf()) {
+      arg0 = locations->InAt(1);
+      arg1 = locations->Out();
+    } else {
+      arg0 = locations->InAt(0);
+      arg1 = locations->InAt(1);
+    }
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -327,25 +333,25 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    x86_codegen->EmitParallelMoves(
-        locations->InAt(1),
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Primitive::kPrimNot,
-        object_class,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
-        Primitive::kPrimNot);
-
+    x86_codegen->EmitParallelMoves(arg0,
+                                   Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                                   Primitive::kPrimNot,
+                                   arg1,
+                                   Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                                   Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      x86_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -6217,6 +6223,9 @@
       break;
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      return HLoadString::LoadKind::kDexCacheViaMethod;
   }
   return desired_string_load_kind;
 }
@@ -6645,26 +6654,17 @@
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      NearLabel loop, compare_classes;
+      NearLabel loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
       __ testl(temp, temp);
-      __ j(kNotEqual, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
 
-      __ Bind(&compare_classes);
+      // Otherwise, compare the classes
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -6693,21 +6693,14 @@
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
       __ testl(temp, temp);
-      __ j(kNotEqual, &loop);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+      __ j(kNotZero, &loop);
+      // Otherwise, jump to the slow path to throw the exception.;
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
-      NearLabel check_non_primitive_component_type;
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -6720,28 +6713,13 @@
       // /* HeapReference<Class> */ temp = temp->component_type_
       GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
 
-      // If the component type is not null (i.e. the object is indeed
-      // an array), jump to label `check_non_primitive_component_type`
-      // to further check that this component type is not a primitive
-      // type.
+      // If the component type is null (i.e. the object not an array),  jump to the slow path to
+      // throw the exception. Otherwise proceed with the check.
       __ testl(temp, temp);
-      __ j(kNotEqual, &check_non_primitive_component_type);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
 
-      __ Bind(&check_non_primitive_component_type);
       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kEqual, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
       break;
     }
 
@@ -7093,7 +7071,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -7111,14 +7089,13 @@
   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
   // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // At this point, just do the "if" and make sure that flags are preserved until the branch.
   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index fcabeea..a1d22f8 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -332,8 +332,14 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
+    Location arg0, arg1;
+    if (instruction_->IsInstanceOf()) {
+      arg0 = locations->InAt(1);
+      arg1 = locations->Out();
+    } else {
+      arg0 = locations->InAt(0);
+      arg1 = locations->InAt(1);
+    }
     uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -348,22 +354,19 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(
-        locations->InAt(1),
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Primitive::kPrimNot,
-        object_class,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
-        Primitive::kPrimNot);
-
+    codegen->EmitParallelMoves(arg0,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               arg1,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      x86_64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -1263,7 +1266,8 @@
         simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
@@ -5632,6 +5636,9 @@
       break;
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
   }
   return desired_string_load_kind;
 }
@@ -5661,6 +5668,14 @@
   }
 }
 
+Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file, uint32_t dex_index) {
+  jit_string_roots_.Overwrite(StringReference(&dex_file, dex_index), /* placeholder */ 0u);
+  // Add a patch entry and return the label.
+  jit_string_patches_.emplace_back(dex_file, dex_index);
+  PatchInfo<Label>* info = &jit_string_patches_.back();
+  return &info->label;
+}
+
 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
@@ -5692,6 +5707,15 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
+    case HLoadString::LoadKind::kJitTableAddress: {
+      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
+                                          /* no_rip */ true);
+      Label* fixup_label =
+          codegen_->NewJitRootStringPatch(load->GetDexFile(), load->GetStringIndex());
+      // /* GcRoot<mirror::String> */ out = *address
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier);
+      return;
+    }
     default:
       break;
   }
@@ -5740,7 +5764,19 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
+    // We need a temporary for holding the iftable length.
+    return true;
+  }
+  return kEmitCompilerReadBarrier &&
+      !kUseBakerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
+static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
   return kEmitCompilerReadBarrier &&
       !kUseBakerReadBarrier &&
       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5778,7 +5814,7 @@
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
+  if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -5791,7 +5827,7 @@
   Location cls = locations->InAt(1);
   Location out_loc =  locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
-  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(0) :
       Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -5809,7 +5845,11 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction,
+                                    out_loc,
+                                    obj_loc,
+                                    class_offset,
+                                    kEmitCompilerReadBarrier);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
@@ -5970,33 +6010,45 @@
   }
 }
 
-void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
-  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) {
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
-          LocationSummary::kCallOnSlowPath :
-          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
-      break;
+      return !throws_into_catch && !kEmitCompilerReadBarrier;
+    case TypeCheckKind::kInterfaceCheck:
+      return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences;
     case TypeCheckKind::kArrayCheck:
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCallOnSlowPath;
-      break;
+      return false;
   }
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
+  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool is_fatal_slow_path = IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch);
+  LocationSummary::CallKind call_kind = is_fatal_slow_path
+                                            ? LocationSummary::kNoCall
+                                            : LocationSummary::kCallOnSlowPath;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    // Require a register for the interface check since there is a loop that compares the class to
+    // a memory address.
+    locations->SetInAt(1, Location::RequiresRegister());
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
+
   // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
   locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
+  if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -6009,20 +6061,19 @@
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(1) :
       Location::NoLocation();
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const int object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
 
   bool is_type_check_slow_path_fatal =
-      (type_check_kind == TypeCheckKind::kExactCheck ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
-      !instruction->CanThrowIntoCatchBlock();
+      IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock());
   SlowPathCode* type_check_slow_path =
       new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
                                                            is_type_check_slow_path_fatal);
@@ -6039,8 +6090,11 @@
       }
 
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kEmitCompilerReadBarrier);
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -6063,30 +6117,23 @@
       }
 
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kEmitCompilerReadBarrier);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      NearLabel loop, compare_classes;
+      NearLabel loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
       __ testl(temp, temp);
-      __ j(kNotEqual, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&compare_classes);
+      // Otherwise, compare the classes.
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -6107,8 +6154,11 @@
       }
 
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kEmitCompilerReadBarrier);
       // Walk over the class hierarchy to find a match.
       NearLabel loop;
       __ Bind(&loop);
@@ -6126,14 +6176,8 @@
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
       __ testl(temp, temp);
-      __ j(kNotEqual, &loop);
+      __ j(kNotZero, &loop);
       // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
       __ jmp(type_check_slow_path->GetEntryLabel());
       __ Bind(&done);
       break;
@@ -6152,8 +6196,11 @@
       }
 
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kEmitCompilerReadBarrier);
       // Do an exact check.
       NearLabel check_non_primitive_component_type;
       if (cls.IsRegister()) {
@@ -6173,23 +6220,10 @@
       // to further check that this component type is not a primitive
       // type.
       __ testl(temp, temp);
-      __ j(kNotEqual, &check_non_primitive_component_type);
       // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&check_non_primitive_component_type);
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kEqual, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
       __ Bind(&done);
       break;
     }
@@ -6197,17 +6231,15 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
       NearLabel done;
+
       // Avoid null check if we know obj is not null.
       if (instruction->MustDoNullCheck()) {
         __ testl(obj, obj);
         __ j(kEqual, &done);
       }
 
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
 
-      // We always go into the type check slow path for the unresolved
-      // and interface check cases.
+      // We always go into the type check slow path for the unresolved case.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6223,6 +6255,46 @@
       // case of the `switch` code as it was previously (with a direct
       // call to the runtime not using a type checking slow path).
       // This should also be beneficial for the other cases above.
+
+      // Fast path for the interface check. Since we compare with a memory location in the inner
+      // loop we would need to have cls poisoned. However unpoisoning cls would reset the
+      // conditional flags and cause the conditional jump to be incorrect.
+      if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
+        // Try to avoid read barriers to improve the fast path. We can not get false positives by
+        // doing this.
+        // /* HeapReference<Class> */ temp = obj->klass_
+        GenerateReferenceLoadTwoRegisters(instruction,
+                                          temp_loc,
+                                          obj_loc,
+                                          class_offset,
+                                          /*emit_read_barrier*/ false);
+
+        // /* HeapReference<Class> */ temp = temp->iftable_
+        GenerateReferenceLoadTwoRegisters(instruction,
+                                          temp_loc,
+                                          temp_loc,
+                                          iftable_offset,
+                                          /*emit_read_barrier*/ false);
+        NearLabel is_null;
+        // Null iftable means it is empty.
+        __ testl(temp_loc.AsRegister<CpuRegister>(), temp_loc.AsRegister<CpuRegister>());
+        __ j(kZero, &is_null);
+
+        // Loop through the iftable and check if any class matches.
+        __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(),
+                Address(temp_loc.AsRegister<CpuRegister>(), array_length_offset));
+
+        NearLabel start_loop;
+        __ Bind(&start_loop);
+        __ cmpl(cls.AsRegister<CpuRegister>(),
+                Address(temp_loc.AsRegister<CpuRegister>(), object_array_data_offset));
+        __ j(kEqual, &done);  // Return if same class.
+        // Go to next interface.
+        __ addq(temp_loc.AsRegister<CpuRegister>(), Immediate(2 * kHeapReferenceSize));
+        __ subq(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
+        __ j(kNotZero, &start_loop);
+        __ Bind(&is_null);
+      }
       __ jmp(type_check_slow_path->GetEntryLabel());
       __ Bind(&done);
       break;
@@ -6397,10 +6469,11 @@
 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                                                        Location out,
                                                                        Location obj,
-                                                                       uint32_t offset) {
+                                                                       uint32_t offset,
+                                                                       bool emit_read_barrier) {
   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
-  if (kEmitCompilerReadBarrier) {
+  if (emit_read_barrier) {
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
@@ -6535,7 +6608,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -6553,14 +6626,13 @@
   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
   // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // At this point, just do the "if" and make sure that flags are preserved until the branch.
   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
@@ -7029,6 +7101,20 @@
   }
 }
 
+void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+  for (const PatchInfo<Label>& info : jit_string_patches_) {
+    const auto& it = jit_string_roots_.find(StringReference(&info.dex_file, info.index));
+    DCHECK(it != jit_string_roots_.end());
+    size_t index_in_table = it->second;
+    uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    uintptr_t address =
+        reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+    typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
+    reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
+       dchecked_integral_cast<uint32_t>(address);
+  }
+}
+
 #undef __
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 8b19dad..bc78b8c 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -248,7 +248,8 @@
   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                          Location out,
                                          Location obj,
-                                         uint32_t offset);
+                                         uint32_t offset,
+                                         bool emit_read_barrier);
   // Generate a GC root reference load:
   //
   //   root <- *address
@@ -410,11 +411,14 @@
   void RecordTypePatch(HLoadClass* load_class);
   Label* NewStringBssEntryPatch(HLoadString* load_string);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
+  Label* NewJitRootStringPatch(const DexFile& dex_file, uint32_t dex_index);
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+
   const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const {
     return isa_features_;
   }
@@ -600,6 +604,9 @@
   // Fixups for jump tables need to be handled specially.
   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
 
+  // Patches for string literals in JIT compiled code.
+  ArenaDeque<PatchInfo<Label>> jit_string_patches_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
 };
 
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 9ec32df..ac83bd9 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -259,7 +259,7 @@
   GraphChecker graph_checker(graph);
   graph_checker.Run();
   if (!graph_checker.IsValid()) {
-    for (auto error : graph_checker.GetErrors()) {
+    for (const auto& error : graph_checker.GetErrors()) {
       std::cout << error << std::endl;
     }
   }
@@ -269,7 +269,7 @@
 template <typename Expected>
 static void RunCodeNoCheck(CodeGenerator* codegen,
                            HGraph* graph,
-                           std::function<void(HGraph*)> hook_before_codegen,
+                           const std::function<void(HGraph*)>& hook_before_codegen,
                            bool has_result,
                            Expected expected) {
   SsaLivenessAnalysis liveness(graph, codegen);
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index 8c08a9c..13824ad 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -135,6 +135,16 @@
   return InputRegisterAt(instr, 0);
 }
 
+inline int32_t Int32ConstantFrom(Location location) {
+  HConstant* instr = location.GetConstant();
+  if (instr->IsIntConstant()) {
+    return instr->AsIntConstant()->GetValue();
+  } else {
+    DCHECK(instr->IsNullConstant()) << instr->DebugName();
+    return 0;
+  }
+}
+
 inline int64_t Int64ConstantFrom(Location location) {
   HConstant* instr = location.GetConstant();
   if (instr->IsIntConstant()) {
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index e10b1d6..05c6df4 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -39,8 +39,7 @@
  */
 class HConstantFolding : public HOptimization {
  public:
-  HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
-      : HOptimization(graph, name) {}
+  HConstantFolding(HGraph* graph, const char* name) : HOptimization(graph, name) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index d1a2a26..5fac3ac 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -42,7 +42,7 @@
                 const std::string& expected_before,
                 const std::string& expected_after_cf,
                 const std::string& expected_after_dce,
-                std::function<void(HGraph*)> check_after_cf,
+                const std::function<void(HGraph*)>& check_after_cf,
                 Primitive::Type return_type = Primitive::kPrimInt) {
     graph_ = CreateCFG(&allocator_, data, return_type);
     TestCodeOnReadyGraph(expected_before,
@@ -54,7 +54,7 @@
   void TestCodeOnReadyGraph(const std::string& expected_before,
                             const std::string& expected_after_cf,
                             const std::string& expected_after_dce,
-                            std::function<void(HGraph*)> check_after_cf) {
+                            const std::function<void(HGraph*)>& check_after_cf) {
     ASSERT_NE(graph_, nullptr);
 
     StringPrettyPrinter printer_before(graph_);
@@ -65,7 +65,7 @@
     std::unique_ptr<const X86InstructionSetFeatures> features_x86(
         X86InstructionSetFeatures::FromCppDefines());
     x86::CodeGeneratorX86 codegenX86(graph_, *features_x86.get(), CompilerOptions());
-    HConstantFolding(graph_).Run();
+    HConstantFolding(graph_, "constant_folding").Run();
     GraphChecker graph_checker_cf(graph_);
     graph_checker_cf.Run();
     ASSERT_TRUE(graph_checker_cf.IsValid());
@@ -77,7 +77,7 @@
 
     check_after_cf(graph_);
 
-    HDeadCodeElimination(graph_).Run();
+    HDeadCodeElimination(graph_, nullptr /* stats */, "dead_code_elimination").Run();
     GraphChecker graph_checker_dce(graph_);
     graph_checker_dce.Run();
     ASSERT_TRUE(graph_checker_dce.IsValid());
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 9de521a..c31c66a 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -161,8 +161,21 @@
 //        |      |      |
 //       B4      B5    B?
 //
-// This simplification cannot be applied for loop headers, as they
-// contain a suspend check.
+// Note that individual edges can be redirected (for example B2->B3
+// can be redirected as B2->B5) without applying this optimization
+// to other incoming edges.
+//
+// This simplification cannot be applied to catch blocks, because
+// exception handler edges do not represent normal control flow.
+// Though in theory this could still apply to normal control flow
+// going directly to a catch block, we cannot support it at the
+// moment because the catch Phi's inputs do not correspond to the
+// catch block's predecessors, so we cannot identify which
+// predecessor corresponds to a given statically evaluated input.
+//
+// We do not apply this optimization to loop headers as this could
+// create irreducible loops. We rely on the suspend check in the
+// loop header to prevent the pattern match.
 //
 // Note that we rely on the dead code elimination to get rid of B3.
 bool HDeadCodeElimination::SimplifyIfs() {
@@ -172,7 +185,8 @@
   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     HInstruction* last = block->GetLastInstruction();
     HInstruction* first = block->GetFirstInstruction();
-    if (last->IsIf() &&
+    if (!block->IsCatchBlock() &&
+        last->IsIf() &&
         block->HasSinglePhi() &&
         block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) {
       bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi());
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 58e700d..84fd890 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -29,9 +29,7 @@
  */
 class HDeadCodeElimination : public HOptimization {
  public:
-  HDeadCodeElimination(HGraph* graph,
-                       OptimizingCompilerStats* stats = nullptr,
-                       const char* name = kDeadCodeEliminationPassName)
+  HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats, const char* name)
       : HOptimization(graph, name, stats) {}
 
   void Run() OVERRIDE;
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index fe52aac..fdd77e7 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -44,7 +44,7 @@
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
-  HDeadCodeElimination(graph).Run();
+  HDeadCodeElimination(graph, nullptr /* stats */, "dead_code_elimination").Run();
   GraphChecker graph_checker(graph);
   graph_checker.Run();
   ASSERT_TRUE(graph_checker.IsValid());
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 7cc8b1e..235793d 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -58,22 +58,90 @@
 }
 
 /**
- * An upper bound a * (length / a) + b, where a >= 1, can be conservatively rewritten as length + b
- * because length >= 0 is true. This makes it more likely the bound is useful to clients.
+ * Detects an instruction that is >= 0. As long as the value is carried by
+ * a single instruction, arithmetic wrap-around cannot occur.
  */
-static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) {
-  int64_t value;
-  if (v.is_known &&
-      v.a_constant >= 1 &&
-      v.instruction->IsDiv() &&
-      v.instruction->InputAt(0)->IsArrayLength() &&
-      IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
-    return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant);
+static bool IsGEZero(HInstruction* instruction) {
+  DCHECK(instruction != nullptr);
+  if (instruction->IsArrayLength()) {
+    return true;
+  } else if (instruction->IsInvokeStaticOrDirect()) {
+    switch (instruction->AsInvoke()->GetIntrinsic()) {
+      case Intrinsics::kMathMinIntInt:
+      case Intrinsics::kMathMinLongLong:
+        // Instruction MIN(>=0, >=0) is >= 0.
+        return IsGEZero(instruction->InputAt(0)) &&
+               IsGEZero(instruction->InputAt(1));
+      case Intrinsics::kMathAbsInt:
+      case Intrinsics::kMathAbsLong:
+        // Instruction ABS(x) is >= 0.
+        return true;
+      default:
+        break;
+    }
+  }
+  int64_t value = -1;
+  return IsIntAndGet(instruction, &value) && value >= 0;
+}
+
+/** Hunts "under the hood" for a suitable instruction at the hint. */
+static bool IsMaxAtHint(
+    HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) {
+  if (instruction->IsInvokeStaticOrDirect()) {
+    switch (instruction->AsInvoke()->GetIntrinsic()) {
+      case Intrinsics::kMathMinIntInt:
+      case Intrinsics::kMathMinLongLong:
+        // For MIN(x, y), return most suitable x or y as maximum.
+        return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
+               IsMaxAtHint(instruction->InputAt(1), hint, suitable);
+      default:
+        break;
+    }
+  } else {
+    *suitable = instruction;
+    while (instruction->IsArrayLength() ||
+           instruction->IsNullCheck() ||
+           instruction->IsNewArray()) {
+      instruction = instruction->InputAt(0);
+    }
+    return instruction == hint;
+  }
+  return false;
+}
+
+/** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */
+static InductionVarRange::Value SimplifyMin(InductionVarRange::Value v) {
+  if (v.is_known && v.a_constant == 1 && v.b_constant <= 0) {
+    // If a == 1,  instruction >= 0 and b <= 0, just return the constant b.
+    // No arithmetic wrap-around can occur.
+    if (IsGEZero(v.instruction)) {
+      return InductionVarRange::Value(v.b_constant);
+    }
   }
   return v;
 }
 
-/** Helper method to test for a constant value. */
+/** Post-analysis simplification of a maximum value that makes the bound more useful to clients. */
+static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v, HInstruction* hint) {
+  if (v.is_known && v.a_constant >= 1) {
+    // An upper bound a * (length / a) + b, where a >= 1, can be conservatively rewritten as
+    // length + b because length >= 0 is true.
+    int64_t value;
+    if (v.instruction->IsDiv() &&
+        v.instruction->InputAt(0)->IsArrayLength() &&
+        IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
+      return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant);
+    }
+    // If a == 1, the most suitable one suffices as maximum value.
+    HInstruction* suitable = nullptr;
+    if (v.a_constant == 1 && IsMaxAtHint(v.instruction, hint, &suitable)) {
+      return InductionVarRange::Value(suitable, 1, v.b_constant);
+    }
+  }
+  return v;
+}
+
+/** Tests for a constant value. */
 static bool IsConstantValue(InductionVarRange::Value v) {
   return v.is_known && v.a_constant == 0;
 }
@@ -97,7 +165,7 @@
   }
 }
 
-/** Helper method to insert an instruction. */
+/** Inserts an instruction. */
 static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
   DCHECK(block != nullptr);
   DCHECK(block->GetLastInstruction() != nullptr) << block->GetBlockId();
@@ -106,7 +174,7 @@
   return instruction;
 }
 
-/** Helper method to obtain loop's control instruction. */
+/** Obtains loop's control instruction. */
 static HInstruction* GetLoopControl(HLoopInformation* loop) {
   DCHECK(loop != nullptr);
   return loop->GetHeader()->GetLastInstruction();
@@ -150,9 +218,14 @@
   chase_hint_ = chase_hint;
   bool in_body = context->GetBlock() != loop->GetHeader();
   int64_t stride_value = 0;
-  *min_val = GetVal(info, trip, in_body, /* is_min */ true);
-  *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
+  *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true));
+  *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false), chase_hint);
   *needs_finite_test = NeedsTripCount(info, &stride_value) && IsUnsafeTripCount(trip);
+  chase_hint_ = nullptr;
+  // Retry chasing constants for wrap-around (merge sensitive).
+  if (!min_val->is_known && info->induction_class == HInductionVarAnalysis::kWrapAround) {
+    *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true));
+  }
   return true;
 }
 
@@ -175,7 +248,7 @@
                                   needs_taken_test)
       && (stride_value == -1 ||
           stride_value == 0 ||
-          stride_value == 1);  // avoid wrap-around anomalies.
+          stride_value == 1);  // avoid arithmetic wrap-around anomalies.
 }
 
 void InductionVarRange::GenerateRange(HInstruction* context,
@@ -302,7 +375,8 @@
         return true;
       }
     }
-    // Try range analysis on the invariant, but only on proper range to avoid wrap-around anomalies.
+    // Try range analysis on the invariant, only accept a proper range
+    // to avoid arithmetic wrap-around anomalies.
     Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true);
     Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false);
     if (IsConstantValue(min_val) &&
@@ -450,25 +524,26 @@
                                                      HInductionVarAnalysis::InductionInfo* trip,
                                                      bool in_body,
                                                      bool is_min) const {
-  // Stop chasing the instruction at constant or hint.
-  int64_t value;
-  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
-    return Value(static_cast<int32_t>(value));
-  } else if (instruction == chase_hint_) {
-    return Value(instruction, 1, 0);
-  }
-  // Special cases when encountering a single instruction that denotes trip count in the
-  // loop-body: min is 1 and, when chasing constants, max of safe trip-count is max int
-  if (in_body && trip != nullptr && instruction == trip->op_a->fetch) {
+  // Special case when chasing constants: single instruction that denotes trip count in the
+  // loop-body is minimal 1 and maximal, with safe trip-count, max int,
+  if (chase_hint_ == nullptr && in_body && trip != nullptr && instruction == trip->op_a->fetch) {
     if (is_min) {
       return Value(1);
-    } else if (chase_hint_ == nullptr && !IsUnsafeTripCount(trip)) {
+    } else if (!IsUnsafeTripCount(trip)) {
       return Value(std::numeric_limits<int32_t>::max());
     }
   }
-  // Chase the instruction a bit deeper into the HIR tree, so that it becomes more likely
-  // range analysis will compare the same instructions as terminal nodes.
-  if (instruction->IsAdd()) {
+  // Unless at a constant or hint, chase the instruction a bit deeper into the HIR tree, so that
+  // it becomes more likely range analysis will compare the same instructions as terminal nodes.
+  int64_t value;
+  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
+    // Proper constant reveals best information.
+    return Value(static_cast<int32_t>(value));
+  } else if (instruction == chase_hint_) {
+    // At hint, fetch is represented by itself.
+    return Value(instruction, 1, 0);
+  } else if (instruction->IsAdd()) {
+    // Incorporate suitable constants in the chased value.
     if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
       return AddValue(Value(static_cast<int32_t>(value)),
                       GetFetch(instruction->InputAt(1), trip, in_body, is_min));
@@ -477,14 +552,14 @@
                       Value(static_cast<int32_t>(value)));
     }
   } else if (instruction->IsArrayLength()) {
-    // Return extreme values when chasing constants. Otherwise, chase deeper.
+    // Exploit length properties when chasing constants or chase into a new array declaration.
     if (chase_hint_ == nullptr) {
       return is_min ? Value(0) : Value(std::numeric_limits<int32_t>::max());
     } else if (instruction->InputAt(0)->IsNewArray()) {
       return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
     }
   } else if (instruction->IsTypeConversion()) {
-    // Since analysis is 32-bit (or narrower) we allow a widening along the path.
+    // Since analysis is 32-bit (or narrower), chase beyond widening along the path.
     if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt &&
         instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) {
       return GetFetch(instruction->InputAt(0), trip, in_body, is_min);
@@ -506,6 +581,7 @@
       !IsUnsafeTripCount(next_trip)) {
     return GetVal(next_info, next_trip, next_in_body, is_min);
   }
+  // Fetch is represented by itself.
   return Value(instruction, 1, 0);
 }
 
@@ -870,10 +946,11 @@
     HInstruction* opb = nullptr;
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
-        // Invariants.
+        // Invariants (note that even though is_min does not impact code generation for
+        // invariants, some effort is made to keep this parameter consistent).
         switch (info->operation) {
           case HInductionVarAnalysis::kAdd:
-          case HInductionVarAnalysis::kXor:
+          case HInductionVarAnalysis::kXor:  // no proper is_min for second arg
           case HInductionVarAnalysis::kLT:
           case HInductionVarAnalysis::kLE:
           case HInductionVarAnalysis::kGT:
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index cc420b3..9e81623 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -1315,8 +1315,8 @@
                                   const DexCompilationUnit& dex_compilation_unit) {
   // Note: if the outermost_graph_ is being compiled OSR, we should not run any
   // optimization that could lead to a HDeoptimize. The following optimizations do not.
-  HDeadCodeElimination dce(callee_graph, stats_);
-  HConstantFolding fold(callee_graph);
+  HDeadCodeElimination dce(callee_graph, stats_, "dead_code_elimination$inliner");
+  HConstantFolding fold(callee_graph, "constant_folding$inliner");
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, stats_);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index e4d280f..e06fdee 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -111,9 +111,11 @@
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
   int simplifications_at_current_position_ = 0;
-  // We ensure we do not loop infinitely. The value is a finger in the air guess
-  // that should allow enough simplification.
-  static constexpr int kMaxSamePositionSimplifications = 10;
+  // We ensure we do not loop infinitely. The value should not be too high, since that
+  // would allow looping around the same basic block too many times. The value should
+  // not be too low either, however, since we want to allow revisiting a basic block
+  // with many statements and simplifications at least once.
+  static constexpr int kMaxSamePositionSimplifications = 50;
 };
 
 void InstructionSimplifier::Run() {
@@ -605,11 +607,23 @@
   return nullptr;
 }
 
+static bool CmpHasBoolType(HInstruction* input, HInstruction* cmp) {
+  if (input->GetType() == Primitive::kPrimBoolean) {
+    return true;  // input has direct boolean type
+  } else if (cmp->GetUses().HasExactlyOneElement()) {
+    // Comparison also has boolean type if both its input and the instruction
+    // itself feed into the same phi node.
+    HInstruction* user = cmp->GetUses().front().GetUser();
+    return user->IsPhi() && user->HasInput(input) && user->HasInput(cmp);
+  }
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
   HInstruction* input_const = equal->GetConstantRight();
   if (input_const != nullptr) {
     HInstruction* input_value = equal->GetLeastConstantLeft();
-    if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
+    if (CmpHasBoolType(input_value, equal) && input_const->IsIntConstant()) {
       HBasicBlock* block = equal->GetBlock();
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
@@ -619,6 +633,7 @@
         block->RemoveInstruction(equal);
         RecordSimplification();
       } else if (input_const->AsIntConstant()->IsFalse()) {
+        // Replace (bool_value == false) with !bool_value
         equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal));
         block->RemoveInstruction(equal);
         RecordSimplification();
@@ -640,11 +655,12 @@
   HInstruction* input_const = not_equal->GetConstantRight();
   if (input_const != nullptr) {
     HInstruction* input_value = not_equal->GetLeastConstantLeft();
-    if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
+    if (CmpHasBoolType(input_value, not_equal) && input_const->IsIntConstant()) {
       HBasicBlock* block = not_equal->GetBlock();
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
       if (input_const->AsIntConstant()->IsTrue()) {
+        // Replace (bool_value != true) with !bool_value
         not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal));
         block->RemoveInstruction(not_equal);
         RecordSimplification();
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 782110c..9b54511 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -48,7 +48,7 @@
 class InstructionSimplifierArm : public HOptimization {
  public:
   InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
+      : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
 
   static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm";
 
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index f71684e..d4cb1f1 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -82,9 +82,10 @@
 class InstructionSimplifierArm64 : public HOptimization {
  public:
   InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
-  static constexpr const char* kInstructionSimplifierArm64PassName
-      = "instruction_simplifier_arm64";
+      : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
+
+  static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64";
+
   void Run() OVERRIDE {
     InstructionSimplifierArm64Visitor visitor(graph_, stats_);
     visitor.VisitReversePostOrder();
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 04e063c..c2b1374 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -231,15 +231,6 @@
                                   HInstruction* array,
                                   HInstruction* index,
                                   size_t data_offset) {
-  if (kEmitCompilerReadBarrier) {
-    // The read barrier instrumentation does not support the
-    // HIntermediateAddress instruction yet.
-    //
-    // TODO: Handle this case properly in the ARM64 and ARM code generator and
-    // re-enable this optimization; otherwise, remove this TODO.
-    // b/26601270
-    return false;
-  }
   if (index->IsConstant() ||
       (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
     // When the index is a constant all the addressing can be fitted in the
@@ -251,14 +242,20 @@
     // The access may require a runtime call or the original array pointer.
     return false;
   }
+  if (kEmitCompilerReadBarrier &&
+      access->IsArrayGet() &&
+      access->GetType() == Primitive::kPrimNot) {
+    // For object arrays, the read barrier instrumentation requires
+    // the original array pointer.
+    return false;
+  }
 
   // Proceed to extract the base address computation.
   HGraph* graph = access->GetBlock()->GetGraph();
   ArenaAllocator* arena = graph->GetArena();
 
   HIntConstant* offset = graph->GetIntConstant(data_offset);
-  HIntermediateAddress* address =
-      new (arena) HIntermediateAddress(array, offset, kNoDexPc);
+  HIntermediateAddress* address = new (arena) HIntermediateAddress(array, offset, kNoDexPc);
   // TODO: Is it ok to not have this on the intermediate address?
   // address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
   access->GetBlock()->InsertInstructionBefore(address, access);
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 8790c1e..93a2340 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1945,7 +1945,7 @@
     //   if (src_ptr != end_ptr) {
     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
     //     if (is_gray) {
     //       // Slow-path copy.
     //       do {
@@ -1986,9 +1986,8 @@
     // Given the numeric representation, it's enough to check the low bit of the
     // rb_state. We do that by shifting the bit out of the lock word with LSRS
     // which can be a 16-bit instruction unlike the TST immediate.
-    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
     __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
     // Carry flag is the last bit shifted out by LSRS.
     __ b(read_barrier_slow_path->GetEntryLabel(), CS);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index db1c022..47e6d96 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2659,7 +2659,7 @@
       //   if (src_ptr != end_ptr) {
       //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
       //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-      //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+      //     bool is_gray = (rb_state == ReadBarrier::GrayState());
       //     if (is_gray) {
       //       // Slow-path copy.
       //       do {
@@ -2704,9 +2704,8 @@
       codegen_->AddSlowPath(read_barrier_slow_path);
 
       // Given the numeric representation, it's enough to check the low bit of the rb_state.
-      static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-      static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-      static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+      static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+      static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
       __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
 
       // Fast-path copy.
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
new file mode 100644
index 0000000..6ff0ca4
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -0,0 +1,2694 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_arm_vixl.h"
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_arm_vixl.h"
+#include "common_arm.h"
+#include "lock_word.h"
+#include "mirror/array-inl.h"
+
+#include "aarch32/constants-aarch32.h"
+
+namespace art {
+namespace arm {
+
+#define __ assembler->GetVIXLAssembler()->
+
+using helpers::DRegisterFrom;
+using helpers::HighRegisterFrom;
+using helpers::InputDRegisterAt;
+using helpers::InputRegisterAt;
+using helpers::InputSRegisterAt;
+using helpers::InputVRegisterAt;
+using helpers::Int32ConstantFrom;
+using helpers::LocationFrom;
+using helpers::LowRegisterFrom;
+using helpers::LowSRegisterFrom;
+using helpers::OutputDRegister;
+using helpers::OutputRegister;
+using helpers::OutputVRegister;
+using helpers::RegisterFrom;
+using helpers::SRegisterFrom;
+
+using namespace vixl::aarch32;  // NOLINT(build/namespaces)
+
+ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
+  return codegen_->GetAssembler();
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
+  return codegen_->GetGraph()->GetArena();
+}
+
+// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
+// intrinsified call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+//       summary. If an intrinsic modifies those locations before a slowpath call, they must be
+//       restored!
+//
+// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
+//       sub-optimal (compared to a direct pointer call), but this is a slow-path.
+
+class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
+      : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
+
+  Location MoveArguments(CodeGenerator* codegen) {
+    InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+    IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
+    return calling_convention_visitor.GetMethodLocation();
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
+    __ Bind(GetEntryLabel());
+
+    SaveLiveRegisters(codegen, invoke_->GetLocations());
+
+    Location method_loc = MoveArguments(codegen);
+
+    if (invoke_->IsInvokeStaticOrDirect()) {
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
+    } else {
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
+    }
+    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
+
+    // Copy the result back to the expected output.
+    Location out = invoke_->GetLocations()->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
+      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      codegen->MoveFromReturnRegister(out, invoke_->GetType());
+    }
+
+    RestoreLiveRegisters(codegen, invoke_->GetLocations());
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
+
+ private:
+  // The instruction where this slow path is happening.
+  HInvoke* const invoke_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
+};
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
+      : SlowPathCodeARMVIXL(instruction) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(instruction_->IsInvokeStaticOrDirect())
+        << "Unexpected instruction in read barrier arraycopy slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+    int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+    uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+    uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+    vixl32::Register dest = InputRegisterAt(instruction_, 2);
+    Location dest_pos = locations->InAt(3);
+    vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
+    vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
+    vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
+    vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
+
+    __ Bind(GetEntryLabel());
+    // Compute the base destination address in `dst_curr_addr`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = Int32ConstantFrom(dest_pos);
+      __ Add(dst_curr_addr, dest, element_size * constant + offset);
+    } else {
+      __ Add(dst_curr_addr,
+             dest,
+             Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
+      __ Add(dst_curr_addr, dst_curr_addr, offset);
+    }
+
+    vixl32::Label loop;
+    __ Bind(&loop);
+    __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+    assembler->MaybeUnpoisonHeapReference(tmp);
+    // TODO: Inline the mark bit check before calling the runtime?
+    // tmp = ReadBarrier::Mark(tmp);
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
+    // explanations.)
+    DCHECK(!tmp.IsSP());
+    DCHECK(!tmp.IsLR());
+    DCHECK(!tmp.IsPC());
+    // IP is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary (and not preserved).  It thus cannot be used by
+    // any live register in this slow path.
+    DCHECK(!src_curr_addr.Is(ip));
+    DCHECK(!dst_curr_addr.Is(ip));
+    DCHECK(!src_stop_addr.Is(ip));
+    DCHECK(!tmp.Is(ip));
+    DCHECK(tmp.IsRegister()) << tmp;
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
+    // This runtime call does not require a stack map.
+    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    assembler->MaybePoisonHeapReference(tmp);
+    __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+    __ Cmp(src_curr_addr, src_stop_addr);
+    __ B(ne, &loop);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
+};
+
+IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
+    : arena_(codegen->GetGraph()->GetArena()),
+      assembler_(codegen->GetAssembler()),
+      features_(codegen->GetInstructionSetFeatures()) {}
+
+bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
+  Dispatch(invoke);
+  LocationSummary* res = invoke->GetLocations();
+  if (res == nullptr) {
+    return false;
+  }
+  return res->Intrinsified();
+}
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  if (is64bit) {
+    __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
+  } else {
+    __ Vmov(RegisterFrom(output), SRegisterFrom(input));
+  }
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  if (is64bit) {
+    __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
+  } else {
+    __ Vmov(SRegisterFrom(output), RegisterFrom(input));
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenNumberOfLeadingZeros(LocationSummary* locations,
+                                    Primitive::Type type,
+                                    ArmVIXLAssembler* assembler) {
+  Location in = locations->InAt(0);
+  vixl32::Register out = RegisterFrom(locations->Out());
+
+  DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+
+  if (type == Primitive::kPrimLong) {
+    vixl32::Register in_reg_lo = LowRegisterFrom(in);
+    vixl32::Register in_reg_hi = HighRegisterFrom(in);
+    vixl32::Label end;
+    __ Clz(out, in_reg_hi);
+    __ Cbnz(in_reg_hi, &end);
+    __ Clz(out, in_reg_lo);
+    __ Add(out, out, 32);
+    __ Bind(&end);
+  } else {
+    __ Clz(out, RegisterFrom(in));
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void GenNumberOfTrailingZeros(LocationSummary* locations,
+                                     Primitive::Type type,
+                                     ArmVIXLAssembler* assembler) {
+  DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+
+  vixl32::Register out = RegisterFrom(locations->Out());
+
+  if (type == Primitive::kPrimLong) {
+    vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
+    vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
+    vixl32::Label end;
+    __ Rbit(out, in_reg_lo);
+    __ Clz(out, out);
+    __ Cbnz(in_reg_lo, &end);
+    __ Rbit(out, in_reg_hi);
+    __ Clz(out, out);
+    __ Add(out, out, 32);
+    __ Bind(&end);
+  } else {
+    vixl32::Register in = RegisterFrom(locations->InAt(0));
+    __ Rbit(out, in);
+    __ Clz(out, out);
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
+  __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
+  MathAbsFP(invoke, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
+  MathAbsFP(invoke, GetAssembler());
+}
+
+static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenAbsInteger(LocationSummary* locations,
+                          bool is64bit,
+                          ArmVIXLAssembler* assembler) {
+  Location in = locations->InAt(0);
+  Location output = locations->Out();
+
+  vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
+
+  if (is64bit) {
+    vixl32::Register in_reg_lo = LowRegisterFrom(in);
+    vixl32::Register in_reg_hi = HighRegisterFrom(in);
+    vixl32::Register out_reg_lo = LowRegisterFrom(output);
+    vixl32::Register out_reg_hi = HighRegisterFrom(output);
+
+    DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
+
+    __ Asr(mask, in_reg_hi, 31);
+    __ Adds(out_reg_lo, in_reg_lo, mask);
+    __ Adc(out_reg_hi, in_reg_hi, mask);
+    __ Eor(out_reg_lo, mask, out_reg_lo);
+    __ Eor(out_reg_hi, mask, out_reg_hi);
+  } else {
+    vixl32::Register in_reg = RegisterFrom(in);
+    vixl32::Register out_reg = RegisterFrom(output);
+
+    __ Asr(mask, in_reg, 31);
+    __ Add(out_reg, in_reg, mask);
+    __ Eor(out_reg, mask, out_reg);
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+  vixl32::Register op1 = InputRegisterAt(invoke, 0);
+  vixl32::Register op2 = InputRegisterAt(invoke, 1);
+  vixl32::Register out = OutputRegister(invoke);
+
+  __ Cmp(op1, op2);
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               3 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ ite(is_min ? lt : gt);
+    __ mov(is_min ? lt : gt, out, op1);
+    __ mov(is_min ? ge : le, out, op2);
+  }
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
+  GenMinMax(invoke, /* is_min */ true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
+  GenMinMax(invoke, /* is_min */ false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ Ldrsb(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ Ldr(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
+  // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+  // exception. So we can't use ldrd as addr may be unaligned.
+  vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
+  vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
+  if (addr.Is(lo)) {
+    __ Ldr(hi, MemOperand(addr, 4));
+    __ Ldr(lo, addr);
+  } else {
+    __ Ldr(lo, addr);
+    __ Ldr(hi, MemOperand(addr, 4));
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ Ldrsh(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Strb(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Str(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
+  // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+  // exception. So we can't use ldrd as addr may be unaligned.
+  __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), addr);
+  __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Strh(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Ldr(OutputRegister(invoke),
+         MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
+}
+
+static void GenUnsafeGet(HInvoke* invoke,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         CodeGeneratorARMVIXL* codegen) {
+  LocationSummary* locations = invoke->GetLocations();
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+  Location base_loc = locations->InAt(1);
+  vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  vixl32::Register offset = LowRegisterFrom(offset_loc);  // Long offset, lo part only.
+  Location trg_loc = locations->Out();
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      vixl32::Register trg = RegisterFrom(trg_loc);
+      __ Ldr(trg, MemOperand(base, offset));
+      if (is_volatile) {
+        __ Dmb(vixl32::ISH);
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      vixl32::Register trg = RegisterFrom(trg_loc);
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
+          if (is_volatile) {
+            __ Dmb(vixl32::ISH);
+          }
+        } else {
+          __ Ldr(trg, MemOperand(base, offset));
+          if (is_volatile) {
+            __ Dmb(vixl32::ISH);
+          }
+          codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ Ldr(trg, MemOperand(base, offset));
+        if (is_volatile) {
+          __ Dmb(vixl32::ISH);
+        }
+        assembler->MaybeUnpoisonHeapReference(trg);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
+      vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
+      if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+        __ Ldrexd(trg_lo, trg_hi, MemOperand(base, offset));
+      } else {
+        __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
+      }
+      if (is_volatile) {
+        __ Dmb(vixl32::ISH);
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type " << type;
+      UNREACHABLE();
+  }
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
+                                                           kIntrinsified);
+  if (can_call && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(),
+                    (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
+}
+
+static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
+                                     const ArmInstructionSetFeatures& features,
+                                     Primitive::Type type,
+                                     bool is_volatile,
+                                     HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+
+  if (type == Primitive::kPrimLong) {
+    // Potentially need temps for ldrexd-strexd loop.
+    if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
+      locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
+      locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
+    }
+  } else if (type == Primitive::kPrimNot) {
+    // Temps for card-marking.
+    locations->AddTemp(Location::RequiresRegister());  // Temp.
+    locations->AddTemp(Location::RequiresRegister());  // Card.
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
+}
+
+static void GenUnsafePut(LocationSummary* locations,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         bool is_ordered,
+                         CodeGeneratorARMVIXL* codegen) {
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+
+  vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
+  vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
+  vixl32::Register value;
+
+  if (is_volatile || is_ordered) {
+    __ Dmb(vixl32::ISH);
+  }
+
+  if (type == Primitive::kPrimLong) {
+    vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
+    vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
+    value = value_lo;
+    if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+      vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
+      vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
+      UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+      const vixl32::Register temp_reg = temps.Acquire();
+
+      __ Add(temp_reg, base, offset);
+      vixl32::Label loop_head;
+      __ Bind(&loop_head);
+      __ Ldrexd(temp_lo, temp_hi, temp_reg);
+      __ Strexd(temp_lo, value_lo, value_hi, temp_reg);
+      __ Cmp(temp_lo, 0);
+      __ B(ne, &loop_head);
+    } else {
+      __ Strd(value_lo, value_hi, MemOperand(base, offset));
+    }
+  } else {
+    value = RegisterFrom(locations->InAt(3));
+    vixl32::Register source = value;
+    if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+      vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+      __ Mov(temp, value);
+      assembler->PoisonHeapReference(temp);
+      source = temp;
+    }
+    __ Str(source, MemOperand(base, offset));
+  }
+
+  if (is_volatile) {
+    __ Dmb(vixl32::ISH);
+  }
+
+  if (type == Primitive::kPrimNot) {
+    vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+    vixl32::Register card = RegisterFrom(locations->GetTemp(1));
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
+  }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
+}
+
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
+                                                HInvoke* invoke,
+                                                Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      kUseBakerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  // If heap poisoning is enabled, we don't want the unpoisoning
+  // operations to potentially clobber the output. Likewise when
+  // emitting a (Baker) read barrier, which may call.
+  Location::OutputOverlap overlaps =
+      ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
+      ? Location::kOutputOverlap
+      : Location::kNoOutputOverlap;
+  locations->SetOut(Location::RequiresRegister(), overlaps);
+
+  // Temporary registers used in CAS. In the object case
+  // (UnsafeCASObject intrinsic), these are also used for
+  // card-marking, and possibly for (Baker) read barrier.
+  locations->AddTemp(Location::RequiresRegister());  // Pointer.
+  locations->AddTemp(Location::RequiresRegister());  // Temp 1.
+}
+
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) {
+  DCHECK_NE(type, Primitive::kPrimLong);
+
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Location out_loc = locations->Out();
+  vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
+
+  vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  vixl32::Register offset = LowRegisterFrom(offset_loc);              // Offset (discard high 4B).
+  vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
+  vixl32::Register value = InputRegisterAt(invoke, 4);                // Value.
+
+  Location tmp_ptr_loc = locations->GetTemp(0);
+  vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc);               // Pointer to actual memory.
+  vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Value in memory.
+
+  if (type == Primitive::kPrimNot) {
+    // The only read barrier implementation supporting the
+    // UnsafeCASObject intrinsic is the Baker-style read barriers.
+    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+    // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+    // object and scan the receiver at the next GC for nothing.
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // Need to make sure the reference stored in the field is a to-space
+      // one before attempting the CAS or the CAS could fail incorrectly.
+      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+          invoke,
+          out_loc,  // Unused, used only as a "temporary" within the read barrier.
+          base,
+          /* offset */ 0u,
+          /* index */ offset_loc,
+          ScaleFactor::TIMES_1,
+          tmp_ptr_loc,
+          /* needs_null_check */ false,
+          /* always_update_field */ true,
+          &tmp);
+    }
+  }
+
+  // Prevent reordering with prior memory operations.
+  // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
+  // latter allows a preceding load to be delayed past the STXR
+  // instruction below.
+  __ Dmb(vixl32::ISH);
+
+  __ Add(tmp_ptr, base, offset);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    codegen->GetAssembler()->PoisonHeapReference(expected);
+    if (value.Is(expected)) {
+      // Do not poison `value`, as it is the same register as
+      // `expected`, which has just been poisoned.
+    } else {
+      codegen->GetAssembler()->PoisonHeapReference(value);
+    }
+  }
+
+  // do {
+  //   tmp = [r_ptr] - expected;
+  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+  // result = tmp != 0;
+
+  vixl32::Label loop_head;
+  __ Bind(&loop_head);
+
+  __ Ldrex(tmp, tmp_ptr);
+
+  __ Subs(tmp, tmp, expected);
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               3 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ itt(eq);
+    __ strex(eq, tmp, value, tmp_ptr);
+    __ cmp(eq, tmp, 1);
+  }
+
+  __ B(eq, &loop_head);
+
+  __ Dmb(vixl32::ISH);
+
+  __ Rsbs(out, tmp, 1);
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ it(cc);
+    __ mov(cc, out, 0);
+  }
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    codegen->GetAssembler()->UnpoisonHeapReference(expected);
+    if (value.Is(expected)) {
+      // Do not unpoison `value`, as it is the same register as
+      // `expected`, which has just been unpoisoned.
+    } else {
+      codegen->GetAssembler()->UnpoisonHeapReference(value);
+    }
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCas(invoke, Primitive::kPrimInt, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  GenCas(invoke, Primitive::kPrimNot, codegen_);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
+  // The inputs plus one temp.
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            invoke->InputAt(1)->CanBeNull()
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  // Need temporary registers for String compression's feature.
+  if (mirror::kUseStringCompression) {
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register str = InputRegisterAt(invoke, 0);
+  vixl32::Register arg = InputRegisterAt(invoke, 1);
+  vixl32::Register out = OutputRegister(invoke);
+
+  vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
+  vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
+  vixl32::Register temp3, temp4;
+  if (mirror::kUseStringCompression) {
+    temp3 = RegisterFrom(locations->GetTemp(3));
+    temp4 = RegisterFrom(locations->GetTemp(4));
+  }
+
+  vixl32::Label loop;
+  vixl32::Label find_char_diff;
+  vixl32::Label end;
+  vixl32::Label different_compression;
+
+  // Get offsets of count and value fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Take slow path and throw if input can be and is null.
+  SlowPathCodeARMVIXL* slow_path = nullptr;
+  const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
+  if (can_slow_path) {
+    slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+    codegen_->AddSlowPath(slow_path);
+    __ Cbz(arg, slow_path->GetEntryLabel());
+  }
+
+  // Reference equality check, return 0 if same reference.
+  __ Subs(out, str, arg);
+  __ B(eq, &end);
+
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  vixl32::Register temp_reg = temps.Acquire();
+
+  if (mirror::kUseStringCompression) {
+    // Load lengths of this and argument strings.
+    __ Ldr(temp3, MemOperand(str, count_offset));
+    __ Ldr(temp4, MemOperand(arg, count_offset));
+    // Clean out compression flag from lengths.
+    __ Bic(temp0, temp3, 0x80000000);
+    __ Bic(temp_reg, temp4, 0x80000000);
+  } else {
+    // Load lengths of this and argument strings.
+    __ Ldr(temp0, MemOperand(str, count_offset));
+    __ Ldr(temp_reg, MemOperand(arg, count_offset));
+  }
+  // out = length diff.
+  __ Subs(out, temp0, temp_reg);
+  // temp0 = min(len(str), len(arg)).
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ it(gt);
+    __ mov(gt, temp0, temp_reg);
+  }
+
+  temps.Release(temp_reg);
+  // Shorter string is empty?
+  __ Cbz(temp0, &end);
+
+  if (mirror::kUseStringCompression) {
+    // Check if both strings using same compression style to use this comparison loop.
+    __ Eors(temp3, temp3, temp4);
+    __ B(mi, &different_compression);
+  }
+  // Store offset of string value in preparation for comparison loop.
+  __ Mov(temp1, value_offset);
+  if (mirror::kUseStringCompression) {
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
+    __ Cmp(temp4, 0);
+
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ it(ge);
+    __ add(ge, temp0, temp0, temp0);
+  }
+
+  // Assertions that must hold in order to compare multiple characters at a time.
+  CHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment),
+                "String data must be 8-byte aligned for unrolled CompareTo loop.");
+
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  vixl32::Label find_char_diff_2nd_cmp;
+  // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
+  __ Bind(&loop);
+  temp_reg = temps.Acquire();
+  __ Ldr(temp_reg, MemOperand(str, temp1));
+  __ Ldr(temp2, MemOperand(arg, temp1));
+  __ Cmp(temp_reg, temp2);
+  __ B(ne, &find_char_diff);
+  __ Add(temp1, temp1, char_size * 2);
+
+  __ Ldr(temp_reg, MemOperand(str, temp1));
+  __ Ldr(temp2, MemOperand(arg, temp1));
+  __ Cmp(temp_reg, temp2);
+  __ B(ne, &find_char_diff_2nd_cmp);
+  __ Add(temp1, temp1, char_size * 2);
+  // With string compression, we have compared 8 bytes, otherwise 4 chars.
+  __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
+  __ B(hi, &loop);
+  __ B(&end);
+
+  __ Bind(&find_char_diff_2nd_cmp);
+  if (mirror::kUseStringCompression) {
+    __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
+    __ B(ls, &end);  // Was the second comparison fully beyond the end?
+  } else {
+    // Without string compression, we can start treating temp0 as signed
+    // and rely on the signed comparison below.
+    __ Sub(temp0, temp0, 2);
+  }
+
+  // Find the single character difference.
+  __ Bind(&find_char_diff);
+  // Get the bit position of the first character that differs.
+  __ Eor(temp1, temp2, temp_reg);
+  __ Rbit(temp1, temp1);
+  __ Clz(temp1, temp1);
+
+  // temp0 = number of characters remaining to compare.
+  // (Without string compression, it could be < 1 if a difference is found by the second CMP
+  // in the comparison loop, and after the end of the shorter string data).
+
+  // Without string compression (temp1 >> 4) = character where difference occurs between the last
+  // two words compared, in the interval [0,1].
+  // (0 for low half-word different, 1 for high half-word different).
+  // With string compression, (temp1 << 3) = byte where the difference occurs,
+  // in the interval [0,3].
+
+  // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
+  // the remaining string data, so just return length diff (out).
+  // The comparison is unsigned for string compression, otherwise signed.
+  __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
+  __ B((mirror::kUseStringCompression ? ls : le), &end);
+  // Extract the characters and calculate the difference.
+  vixl32::Label uncompressed_string, continue_process;
+  if (mirror::kUseStringCompression) {
+    __ Cmp(temp4, 0);
+    __ B(ge, &uncompressed_string);
+    __ Bic(temp1, temp1, 0x7);
+    __ B(&continue_process);
+  }
+  __ Bind(&uncompressed_string);
+  __ Bic(temp1, temp1, 0xf);
+  __ Bind(&continue_process);
+
+  __ Lsr(temp2, temp2, temp1);
+  __ Lsr(temp_reg, temp_reg, temp1);
+  vixl32::Label calculate_difference, uncompressed_string_extract_chars;
+  if (mirror::kUseStringCompression) {
+    __ Cmp(temp4, 0);
+    __ B(ge, &uncompressed_string_extract_chars);
+    __ Ubfx(temp2, temp2, 0, 8);
+    __ Ubfx(temp_reg, temp_reg, 0, 8);
+    __ B(&calculate_difference);
+  }
+  __ Bind(&uncompressed_string_extract_chars);
+  __ Movt(temp2, 0);
+  __ Movt(temp_reg, 0);
+  __ Bind(&calculate_difference);
+  __ Sub(out, temp_reg, temp2);
+  temps.Release(temp_reg);
+  __ B(&end);
+
+  if (mirror::kUseStringCompression) {
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+    vixl32::Label loop_arg_compressed, loop_this_compressed, find_diff;
+    // Comparison for different compression style.
+    // This part is when THIS is compressed and ARG is not.
+    __ Bind(&different_compression);
+    __ Add(temp2, str, value_offset);
+    __ Add(temp3, arg, value_offset);
+    __ Cmp(temp4, 0);
+    __ B(lt, &loop_arg_compressed);
+
+    __ Bind(&loop_this_compressed);
+    temp_reg = temps.Acquire();
+    __ Ldrb(temp_reg, MemOperand(temp2, c_char_size, PostIndex));
+    __ Ldrh(temp4, MemOperand(temp3, char_size, PostIndex));
+    __ Cmp(temp_reg, temp4);
+    __ B(ne, &find_diff);
+    __ Subs(temp0, temp0, 1);
+    __ B(gt, &loop_this_compressed);
+    __ B(&end);
+
+    // This part is when THIS is not compressed and ARG is.
+    __ Bind(&loop_arg_compressed);
+    __ Ldrh(temp_reg, MemOperand(temp2, char_size, PostIndex));
+    __ Ldrb(temp4, MemOperand(temp3, c_char_size, PostIndex));
+    __ Cmp(temp_reg, temp4);
+    __ B(ne, &find_diff);
+    __ Subs(temp0, temp0, 1);
+    __ B(gt, &loop_arg_compressed);
+    __ B(&end);
+
+    // Calculate the difference.
+    __ Bind(&find_diff);
+    __ Sub(out, temp_reg, temp4);
+    temps.Release(temp_reg);
+  }
+
+  __ Bind(&end);
+
+  if (can_slow_path) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Temporary registers to store lengths of strings and for calculations.
+  // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
+  locations->AddTemp(LocationFrom(r0));
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register str = InputRegisterAt(invoke, 0);
+  vixl32::Register arg = InputRegisterAt(invoke, 1);
+  vixl32::Register out = OutputRegister(invoke);
+
+  vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
+  vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
+
+  vixl32::Label loop, preloop;
+  vixl32::Label end;
+  vixl32::Label return_true;
+  vixl32::Label return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ Cbz(arg, &return_false);
+  }
+
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ Ldr(temp, MemOperand(str, class_offset));
+    __ Ldr(temp1, MemOperand(arg, class_offset));
+    __ Cmp(temp, temp1);
+    __ B(ne, &return_false);
+  }
+
+  // Load lengths of this and argument strings.
+  __ Ldr(temp, MemOperand(str, count_offset));
+  __ Ldr(temp1, MemOperand(arg, count_offset));
+  // Check if lengths are equal, return false if they're not.
+  // Also compares the compression style, if differs return false.
+  __ Cmp(temp, temp1);
+  __ B(ne, &return_false);
+  // Return true if both strings are empty.
+  if (mirror::kUseStringCompression) {
+    // Length needs to be masked out first because 0 is treated as compressed.
+    __ Bic(temp, temp, 0x80000000);
+  }
+  __ Cbz(temp, &return_true);
+  // Reference equality check, return true if same reference.
+  __ Cmp(str, arg);
+  __ B(eq, &return_true);
+
+  // Assertions that must hold in order to compare strings 2 characters at a time.
+  DCHECK_ALIGNED(value_offset, 4);
+  static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
+
+  if (mirror::kUseStringCompression) {
+    // If not compressed, directly to fast compare. Else do preprocess on length.
+    __ Cmp(temp1, 0);
+    __ B(gt, &preloop);
+    // Mask out compression flag and adjust length for compressed string (8-bit)
+    // as if it is a 16-bit data, new_length = (length + 1) / 2.
+    __ Add(temp, temp, 1);
+    __ Lsr(temp, temp, 1);
+    __ Bind(&preloop);
+  }
+  // Loop to compare strings 2 characters at a time starting at the front of the string.
+  // Ok to do this because strings with an odd length are zero-padded.
+  __ Mov(temp1, value_offset);
+  __ Bind(&loop);
+  __ Ldr(out, MemOperand(str, temp1));
+  __ Ldr(temp2, MemOperand(arg, temp1));
+  __ Cmp(out, temp2);
+  __ B(ne, &return_false);
+  __ Add(temp1, temp1, sizeof(uint32_t));
+  __ Subs(temp, temp, sizeof(uint32_t) / sizeof(uint16_t));
+  __ B(gt, &loop);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ Mov(out, 1);
+  __ B(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ Mov(out, 0);
+  __ Bind(&end);
+}
+
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+                                       ArmVIXLAssembler* assembler,
+                                       CodeGeneratorARMVIXL* codegen,
+                                       ArenaAllocator* allocator,
+                                       bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
+  SlowPathCodeARMVIXL* slow_path = nullptr;
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
+        std::numeric_limits<uint16_t>::max()) {
+      // Always needs the slow-path. We could directly dispatch to it, but this case should be
+      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
+    vixl32::Register char_reg = InputRegisterAt(invoke, 1);
+    // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
+    __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
+    slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ B(hs, slow_path->GetEntryLabel());
+  }
+
+  if (start_at_zero) {
+    vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
+    DCHECK(tmp_reg.Is(r2));
+    // Start-index = 0.
+    __ Mov(tmp_reg, 0);
+  }
+
+  codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
+  CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetOut(LocationFrom(r0));
+
+  // Need to send start-index=0.
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  vixl32::Register byte_array = InputRegisterAt(invoke, 0);
+  __ Cmp(byte_array, 0);
+  SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ B(eq, slow_path->GetEntryLabel());
+
+  codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainOnly,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
+  // No need to emit code checking whether `locations->InAt(2)` is a null
+  // pointer, as callers of the native method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // all include a null check on `data` before calling that method.
+  codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
+  __ Cmp(string_to_copy, 0);
+  SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ B(eq, slow_path->GetEntryLabel());
+
+  codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
+  LocationSummary* locations = invoke->GetLocations();
+  if (locations == nullptr) {
+    return;
+  }
+
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+
+  if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
+    locations->SetInAt(3, Location::RequiresRegister());
+  }
+  if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
+    locations->SetInAt(4, Location::RequiresRegister());
+  }
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Temporary register IP cannot be used in
+    // ReadBarrierSystemArrayCopySlowPathARM (because that register
+    // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
+    // temporary register from the register allocator.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+static void CheckPosition(ArmVIXLAssembler* assembler,
+                          Location pos,
+                          vixl32::Register input,
+                          Location length,
+                          SlowPathCodeARMVIXL* slow_path,
+                          vixl32::Register temp,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
+  const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+  if (pos.IsConstant()) {
+    int32_t pos_const = Int32ConstantFrom(pos);
+    if (pos_const == 0) {
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        __ Ldr(temp, MemOperand(input, length_offset));
+        if (length.IsConstant()) {
+          __ Cmp(temp, Int32ConstantFrom(length));
+        } else {
+          __ Cmp(temp, RegisterFrom(length));
+        }
+        __ B(lt, slow_path->GetEntryLabel());
+      }
+    } else {
+      // Check that length(input) >= pos.
+      __ Ldr(temp, MemOperand(input, length_offset));
+      __ Subs(temp, temp, pos_const);
+      __ B(lt, slow_path->GetEntryLabel());
+
+      // Check that (length(input) - pos) >= length.
+      if (length.IsConstant()) {
+        __ Cmp(temp, Int32ConstantFrom(length));
+      } else {
+        __ Cmp(temp, RegisterFrom(length));
+      }
+      __ B(lt, slow_path->GetEntryLabel());
+    }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    vixl32::Register pos_reg = RegisterFrom(pos);
+    __ Cbnz(pos_reg, slow_path->GetEntryLabel());
+  } else {
+    // Check that pos >= 0.
+    vixl32::Register pos_reg = RegisterFrom(pos);
+    __ Cmp(pos_reg, 0);
+    __ B(lt, slow_path->GetEntryLabel());
+
+    // Check that pos <= length(input).
+    __ Ldr(temp, MemOperand(input, length_offset));
+    __ Subs(temp, temp, pos_reg);
+    __ B(lt, slow_path->GetEntryLabel());
+
+    // Check that (length(input) - pos) >= length.
+    if (length.IsConstant()) {
+      __ Cmp(temp, Int32ConstantFrom(length));
+    } else {
+      __ Cmp(temp, RegisterFrom(length));
+    }
+    __ B(lt, slow_path->GetEntryLabel());
+  }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  vixl32::Register src = InputRegisterAt(invoke, 0);
+  Location src_pos = locations->InAt(1);
+  vixl32::Register dest = InputRegisterAt(invoke, 2);
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  Location temp1_loc = locations->GetTemp(0);
+  vixl32::Register temp1 = RegisterFrom(temp1_loc);
+  Location temp2_loc = locations->GetTemp(1);
+  vixl32::Register temp2 = RegisterFrom(temp2_loc);
+  Location temp3_loc = locations->GetTemp(2);
+  vixl32::Register temp3 = RegisterFrom(temp3_loc);
+
+  SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+  codegen_->AddSlowPath(intrinsic_slow_path);
+
+  vixl32::Label conditions_on_positions_validated;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = Int32ConstantFrom(src_pos);
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
+      if (optimizations.GetDestinationIsSource()) {
+        // Checked when building locations.
+        DCHECK_GE(src_pos_constant, dest_pos_constant);
+      } else if (src_pos_constant < dest_pos_constant) {
+        __ Cmp(src, dest);
+        __ B(eq, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // Checked when building locations.
+      DCHECK(!optimizations.GetDestinationIsSource()
+             || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ Cmp(src, dest);
+        __ B(ne, &conditions_on_positions_validated);
+      }
+      __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
+      __ B(gt, intrinsic_slow_path->GetEntryLabel());
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ Cmp(src, dest);
+      __ B(ne, &conditions_on_positions_validated);
+    }
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
+      __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
+    } else {
+      __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
+    }
+    __ B(lt, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  __ Bind(&conditions_on_positions_validated);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    __ Cmp(RegisterFrom(length), 0);
+    __ B(lt, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler,
+                src_pos,
+                src,
+                length,
+                intrinsic_slow_path,
+                temp1,
+                optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckPosition(assembler,
+                dest_pos,
+                dest,
+                length,
+                intrinsic_slow_path,
+                temp1,
+                optimizations.GetCountIsDestinationLength());
+
+  if (!optimizations.GetDoesNotNeedTypeCheck()) {
+    // Check whether all elements of the source array are assignable to the component
+    // type of the destination array. We do two checks: the classes are the same,
+    // or the destination is Object[]. If none of these checks succeed, we go to the
+    // slow path.
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp1` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+        __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        //
+        // Register `temp1` is not trashed by the read barrier emitted
+        // by GenerateFieldLoadWithBakerReadBarrier below, as that
+        // method produces a call to a ReadBarrierMarkRegX entry point,
+        // which saves all potentially live registers, including
+        // temporaries such a `temp1`.
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+        __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp2` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+        __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // For the same reason given earlier, `temp1` is not trashed by the
+      // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+      // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+      __ Cmp(temp1, temp2);
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        vixl32::Label do_copy;
+        __ B(eq, &do_copy);
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        // We do not need to emit a read barrier for the following
+        // heap reference load, as `temp1` is only used in a
+        // comparison with null below, and this reference is not
+        // kept afterwards.
+        __ Ldr(temp1, MemOperand(temp1, super_offset));
+        __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ B(ne, intrinsic_slow_path->GetEntryLabel());
+      }
+    } else {
+      // Non read barrier code.
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      __ Ldr(temp1, MemOperand(dest, class_offset));
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      __ Ldr(temp2, MemOperand(src, class_offset));
+      bool did_unpoison = false;
+      if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+          !optimizations.GetSourceIsNonPrimitiveArray()) {
+        // One or two of the references need to be unpoisoned. Unpoison them
+        // both to make the identity check valid.
+        assembler->MaybeUnpoisonHeapReference(temp1);
+        assembler->MaybeUnpoisonHeapReference(temp2);
+        did_unpoison = true;
+      }
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp1->component_type_
+        __ Ldr(temp3, MemOperand(temp1, component_offset));
+        __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+        assembler->MaybeUnpoisonHeapReference(temp3);
+        // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+        __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp2->component_type_
+        __ Ldr(temp3, MemOperand(temp2, component_offset));
+        __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+        assembler->MaybeUnpoisonHeapReference(temp3);
+        // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+        __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      __ Cmp(temp1, temp2);
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        vixl32::Label do_copy;
+        __ B(eq, &do_copy);
+        if (!did_unpoison) {
+          assembler->MaybeUnpoisonHeapReference(temp1);
+        }
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ Ldr(temp1, MemOperand(temp1, component_offset));
+        assembler->MaybeUnpoisonHeapReference(temp1);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        __ Ldr(temp1, MemOperand(temp1, super_offset));
+        // No need to unpoison the result, we're comparing against null.
+        __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ B(ne, intrinsic_slow_path->GetEntryLabel());
+      }
+    }
+  } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+    DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+    // Bail out if the source is not a non primitive array.
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+      __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+      // If heap poisoning is enabled, `temp3` has been unpoisoned
+      // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+    } else {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      __ Ldr(temp1, MemOperand(src, class_offset));
+      assembler->MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      __ Ldr(temp3, MemOperand(temp1, component_offset));
+      __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+      assembler->MaybeUnpoisonHeapReference(temp3);
+    }
+    // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+    __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
+    static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+    __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+  uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+  // Compute the base source address in `temp1`.
+  if (src_pos.IsConstant()) {
+    int32_t constant = Int32ConstantFrom(src_pos);
+    __ Add(temp1, src, element_size * constant + offset);
+  } else {
+    __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift));
+    __ Add(temp1, temp1, offset);
+  }
+
+  // Compute the end source address in `temp3`.
+  if (length.IsConstant()) {
+    int32_t constant = Int32ConstantFrom(length);
+    __ Add(temp3, temp1, element_size * constant);
+  } else {
+    __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift));
+  }
+
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // The base destination address is computed later, as `temp2` is
+    // used for intermediate computations.
+
+    // SystemArrayCopy implementation for Baker read barriers (see
+    // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+    //
+    //   if (src_ptr != end_ptr) {
+    //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+    //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //     if (is_gray) {
+    //       // Slow-path copy.
+    //       do {
+    //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+    //       } while (src_ptr != end_ptr)
+    //     } else {
+    //       // Fast-path copy.
+    //       do {
+    //         *dest_ptr++ = *src_ptr++;
+    //       } while (src_ptr != end_ptr)
+    //     }
+    //   }
+
+    vixl32::Label loop, done;
+
+    // Don't enter copy loop if `length == 0`.
+    __ Cmp(temp1, temp3);
+    __ B(eq, &done);
+
+    // /* int32_t */ monitor = src->monitor_
+    __ Ldr(temp2, MemOperand(src, monitor_offset));
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Introduce a dependency on the lock_word including the rb_state,
+    // which shall prevent load-load reordering without using
+    // a memory barrier (which would be more expensive).
+    // `src` is unchanged by this operation, but its value now depends
+    // on `temp2`.
+    __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
+
+    // Slow path used to copy array when `src` is gray.
+    SlowPathCodeARMVIXL* read_barrier_slow_path =
+        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
+    codegen_->AddSlowPath(read_barrier_slow_path);
+
+    // Given the numeric representation, it's enough to check the low bit of the
+    // rb_state. We do that by shifting the bit out of the lock word with LSRS
+    // which can be a 16-bit instruction unlike the TST immediate.
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+    __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+    // Carry flag is the last bit shifted out by LSRS.
+    __ B(cs, read_barrier_slow_path->GetEntryLabel());
+
+    // Fast-path copy.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = Int32ConstantFrom(dest_pos);
+      __ Add(temp2, dest, element_size * constant + offset);
+    } else {
+      __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
+      __ Add(temp2, temp2, offset);
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    __ Bind(&loop);
+
+    {
+      UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+      const vixl32::Register temp_reg = temps.Acquire();
+
+      __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
+      __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+    }
+
+    __ Cmp(temp1, temp3);
+    __ B(ne, &loop);
+
+    __ Bind(read_barrier_slow_path->GetExitLabel());
+    __ Bind(&done);
+  } else {
+    // Non read barrier code.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = Int32ConstantFrom(dest_pos);
+      __ Add(temp2, dest, element_size * constant + offset);
+    } else {
+      __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
+      __ Add(temp2, temp2, offset);
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    vixl32::Label loop, done;
+    __ Cmp(temp1, temp3);
+    __ B(eq, &done);
+    __ Bind(&loop);
+
+    {
+      UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+      const vixl32::Register temp_reg = temps.Acquire();
+
+      __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
+      __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+    }
+
+    __ Cmp(temp1, temp3);
+    __ B(ne, &loop);
+    __ Bind(&done);
+  }
+
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
+
+  __ Bind(intrinsic_slow_path->GetExitLabel());
+}
+
+static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  // If the graph is debuggable, all callee-saved floating-point registers are blocked by
+  // the code generator. Furthermore, the register allocator creates fixed live intervals
+  // for all caller-saved registers because we are doing a function call. As a result, if
+  // the input and output locations are unallocated, the register allocator runs out of
+  // registers and fails; however, a debuggable graph is not the common case.
+  if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
+    return;
+  }
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCallOnMainOnly,
+                                                                 kIntrinsified);
+  const InvokeRuntimeCallingConventionARMVIXL calling_convention;
+
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+  // Native code uses the soft float ABI.
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  // If the graph is debuggable, all callee-saved floating-point registers are blocked by
+  // the code generator. Furthermore, the register allocator creates fixed live intervals
+  // for all caller-saved registers because we are doing a function call. As a result, if
+  // the input and output locations are unallocated, the register allocator runs out of
+  // registers and fails; however, a debuggable graph is not the common case.
+  if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
+    return;
+  }
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCallOnMainOnly,
+                                                                 kIntrinsified);
+  const InvokeRuntimeCallingConventionARMVIXL calling_convention;
+
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+  // Native code uses the soft float ABI.
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
+}
+
+static void GenFPToFPCall(HInvoke* invoke,
+                          ArmVIXLAssembler* assembler,
+                          CodeGeneratorARMVIXL* codegen,
+                          QuickEntrypointEnum entry) {
+  LocationSummary* const locations = invoke->GetLocations();
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK(locations->WillCall() && locations->Intrinsified());
+
+  // Native code uses the soft float ABI.
+  __ Vmov(RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)),
+          InputDRegisterAt(invoke, 0));
+  codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
+  __ Vmov(OutputDRegister(invoke),
+          RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)));
+}
+
+static void GenFPFPToFPCall(HInvoke* invoke,
+                            ArmVIXLAssembler* assembler,
+                            CodeGeneratorARMVIXL* codegen,
+                            QuickEntrypointEnum entry) {
+  LocationSummary* const locations = invoke->GetLocations();
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK(locations->WillCall() && locations->Intrinsified());
+
+  // Native code uses the soft float ABI.
+  __ Vmov(RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)),
+          InputDRegisterAt(invoke, 0));
+  __ Vmov(RegisterFrom(locations->GetTemp(2)),
+          RegisterFrom(locations->GetTemp(3)),
+          InputDRegisterAt(invoke, 1));
+  codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
+  __ Vmov(OutputDRegister(invoke),
+          RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
+  vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
+  vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
+  vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
+
+  __ Rbit(out_reg_lo, in_reg_hi);
+  __ Rbit(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
+  vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
+  vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
+  vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
+
+  __ Rev(out_reg_lo, in_reg_hi);
+  __ Rev(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
+}
+
+static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) {
+  DCHECK(Primitive::IsIntOrLongType(type)) << type;
+  DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
+  DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
+
+  bool is_long = type == Primitive::kPrimLong;
+  LocationSummary* locations = instr->GetLocations();
+  Location in = locations->InAt(0);
+  vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
+  vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
+  vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
+  vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
+  vixl32::Register  out_r = OutputRegister(instr);
+
+  // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
+  // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
+  // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
+  // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
+  __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
+  __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
+  __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
+  __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
+  if (is_long) {
+    __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
+  }
+  __ Vmov(out_r, tmp_s);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+  invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
+  VisitIntegerBitCount(invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Check assumption that sizeof(Char) is 2 (used in scaling below).
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Location of data in char array buffer.
+  const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
+
+  // Location of char array data in string.
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+
+  // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
+  // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
+  vixl32::Register srcObj = InputRegisterAt(invoke, 0);
+  vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
+  vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
+  vixl32::Register dstObj = InputRegisterAt(invoke, 3);
+  vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
+
+  vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
+  vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
+
+  vixl32::Label done, compressed_string_loop;
+  // dst to be copied.
+  __ Add(dst_ptr, dstObj, data_offset);
+  __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
+
+  __ Subs(num_chr, srcEnd, srcBegin);
+  // Early out for valid zero-length retrievals.
+  __ B(eq, &done);
+
+  // src range to copy.
+  __ Add(src_ptr, srcObj, value_offset);
+
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  vixl32::Register temp;
+  vixl32::Label compressed_string_preloop;
+  if (mirror::kUseStringCompression) {
+    // Location of count in string.
+    const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+    temp = temps.Acquire();
+    // String's length.
+    __ Ldr(temp, MemOperand(srcObj, count_offset));
+    __ Cmp(temp, 0);
+    temps.Release(temp);
+    __ B(lt, &compressed_string_preloop);
+  }
+  __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
+
+  // Do the copy.
+  vixl32::Label loop, remainder;
+
+  temp = temps.Acquire();
+  // Save repairing the value of num_chr on the < 4 character path.
+  __ Subs(temp, num_chr, 4);
+  __ B(lt, &remainder);
+
+  // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
+  __ Mov(num_chr, temp);
+
+  // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
+  // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
+  // to rectify these everywhere this intrinsic applies.)
+  __ Bind(&loop);
+  __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
+  __ Subs(num_chr, num_chr, 4);
+  __ Str(temp, MemOperand(dst_ptr, char_size * 2));
+  __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
+  __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
+  temps.Release(temp);
+  __ B(ge, &loop);
+
+  __ Adds(num_chr, num_chr, 4);
+  __ B(eq, &done);
+
+  // Main loop for < 4 character case and remainder handling. Loads and stores one
+  // 16-bit Java character at a time.
+  __ Bind(&remainder);
+  temp = temps.Acquire();
+  __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
+  __ Subs(num_chr, num_chr, 1);
+  __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
+  temps.Release(temp);
+  __ B(gt, &remainder);
+  __ B(&done);
+
+  if (mirror::kUseStringCompression) {
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+    // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+    __ Bind(&compressed_string_preloop);
+    __ Add(src_ptr, src_ptr, srcBegin);
+    __ Bind(&compressed_string_loop);
+    temp = temps.Acquire();
+    __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
+    __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
+    temps.Release(temp);
+    __ Subs(num_chr, num_chr, 1);
+    __ B(gt, &compressed_string_loop);
+  }
+
+  __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
+  ArmVIXLAssembler* const assembler = GetAssembler();
+  const vixl32::Register out = OutputRegister(invoke);
+  // Shifting left by 1 bit makes the value encodable as an immediate operand;
+  // we don't care about the sign bit anyway.
+  constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
+
+  __ Vmov(out, InputSRegisterAt(invoke, 0));
+  // We don't care about the sign bit, so shift left.
+  __ Lsl(out, out, 1);
+  __ Eor(out, out, infinity);
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ Clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
+  ArmVIXLAssembler* const assembler = GetAssembler();
+  const vixl32::Register out = OutputRegister(invoke);
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  const vixl32::Register temp = temps.Acquire();
+  // The highest 32 bits of double precision positive infinity separated into
+  // two constants encodable as immediate operands.
+  constexpr uint32_t infinity_high  = 0x7f000000U;
+  constexpr uint32_t infinity_high2 = 0x00f00000U;
+
+  static_assert((infinity_high | infinity_high2) ==
+                    static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
+                "The constants do not add up to the high 32 bits of double "
+                "precision positive infinity.");
+  __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
+  __ Eor(out, out, infinity_high);
+  __ Eor(out, out, infinity_high2);
+  // We don't care about the sign bit, so shift left.
+  __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ Clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathCeil)          // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFloor)         // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRint)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
+
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
+
+UNREACHABLE_INTRINSICS(ARMVIXL)
+
+#undef __
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
new file mode 100644
index 0000000..6e79cb7
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
+
+#include "intrinsics.h"
+#include "utils/arm/assembler_arm_vixl.h"
+
+namespace art {
+
+namespace arm {
+
+class ArmVIXLAssembler;
+class CodeGeneratorARMVIXL;
+
+class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen);
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+  // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+  // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+  // the invoke.
+  bool TryDispatch(HInvoke* invoke);
+
+ private:
+  ArenaAllocator* arena_;
+  ArmVIXLAssembler* assembler_;
+  const ArmInstructionSetFeatures& features_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARMVIXL);
+};
+
+class IntrinsicCodeGeneratorARMVIXL FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicCodeGeneratorARMVIXL(CodeGeneratorARMVIXL* codegen) : codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+  ArenaAllocator* GetAllocator();
+  ArmVIXLAssembler* GetAssembler();
+
+  CodeGeneratorARMVIXL* codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARMVIXL);
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index aae3899..43682c5 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3200,7 +3200,7 @@
     //   if (src_ptr != end_ptr) {
     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
     //     if (is_gray) {
     //       // Slow-path copy.
     //       for (size_t i = 0; i != length; ++i) {
@@ -3222,14 +3222,13 @@
     __ j(kEqual, &done);
 
     // Given the numeric representation, it's enough to check the low bit of the rb_state.
-    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-    // if (rb_state == ReadBarrier::gray_ptr_)
+    // if (rb_state == ReadBarrier::GrayState())
     //   goto slow_path;
     // At this point, just do the "if" and make sure that flags are preserved until the branch.
     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index cdef22f..de2606c 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1399,7 +1399,7 @@
     //   if (src_ptr != end_ptr) {
     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
     //     if (is_gray) {
     //       // Slow-path copy.
     //       do {
@@ -1420,14 +1420,13 @@
     __ j(kEqual, &done);
 
     // Given the numeric representation, it's enough to check the low bit of the rb_state.
-    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-    // if (rb_state == ReadBarrier::gray_ptr_)
+    // if (rb_state == ReadBarrier::GrayState())
     //   goto slow_path;
     // At this point, just do the "if" and make sure that flags are preserved until the branch.
     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index b91e9e6..15e6059 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -33,11 +33,11 @@
  public:
   ReferenceInfo(HInstruction* reference, size_t pos) : reference_(reference), position_(pos) {
     is_singleton_ = true;
-    is_singleton_and_not_returned_ = true;
+    is_singleton_and_non_escaping_ = true;
     if (!reference_->IsNewInstance() && !reference_->IsNewArray()) {
       // For references not allocated in the method, don't assume anything.
       is_singleton_ = false;
-      is_singleton_and_not_returned_ = false;
+      is_singleton_and_non_escaping_ = false;
       return;
     }
 
@@ -50,7 +50,7 @@
         // BoundType shouldn't normally be necessary for a NewInstance.
         // Just be conservative for the uncommon cases.
         is_singleton_ = false;
-        is_singleton_and_not_returned_ = false;
+        is_singleton_and_non_escaping_ = false;
         return;
       }
       if (user->IsPhi() || user->IsSelect() || user->IsInvoke() ||
@@ -62,21 +62,37 @@
         // reference_ is merged to HPhi/HSelect, passed to a callee, or stored to heap.
         // reference_ isn't the only name that can refer to its value anymore.
         is_singleton_ = false;
-        is_singleton_and_not_returned_ = false;
+        is_singleton_and_non_escaping_ = false;
         return;
       }
       if ((user->IsUnresolvedInstanceFieldGet() && (reference_ == user->InputAt(0))) ||
           (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(0)))) {
-        // The field is accessed in an unresolved way. We mark the object as a singleton to
-        // disable load/store optimizations on it.
+        // The field is accessed in an unresolved way. We mark the object as a non-singleton
+        // to disable load/store optimizations on it.
         // Note that we could optimize this case and still perform some optimizations until
         // we hit the unresolved access, but disabling is the simplest.
         is_singleton_ = false;
-        is_singleton_and_not_returned_ = false;
+        is_singleton_and_non_escaping_ = false;
         return;
       }
       if (user->IsReturn()) {
-        is_singleton_and_not_returned_ = false;
+        is_singleton_and_non_escaping_ = false;
+      }
+    }
+
+    if (!is_singleton_ || !is_singleton_and_non_escaping_) {
+      return;
+    }
+
+    // Look at Environment uses and if it's for HDeoptimize, it's treated the same
+    // as a return which escapes at the end of executing the compiled code. We don't
+    // do store elimination for singletons that escape through HDeoptimize.
+    // Other Environment uses are fine since LSE is disabled for debuggable.
+    for (const HUseListNode<HEnvironment*>& use : reference_->GetEnvUses()) {
+      HEnvironment* user = use.GetUser();
+      if (user->GetHolder()->IsDeoptimize()) {
+        is_singleton_and_non_escaping_ = false;
+        break;
       }
     }
   }
@@ -96,17 +112,22 @@
     return is_singleton_;
   }
 
-  // Returns true if reference_ is a singleton and not returned to the caller.
+  // Returns true if reference_ is a singleton and not returned to the caller or
+  // used as an environment local of an HDeoptimize instruction.
   // The allocation and stores into reference_ may be eliminated for such cases.
-  bool IsSingletonAndNotReturned() const {
-    return is_singleton_and_not_returned_;
+  bool IsSingletonAndNonEscaping() const {
+    return is_singleton_and_non_escaping_;
   }
 
  private:
   HInstruction* const reference_;
   const size_t position_;     // position in HeapLocationCollector's ref_info_array_.
   bool is_singleton_;         // can only be referred to by a single name in the method.
-  bool is_singleton_and_not_returned_;  // reference_ is singleton and not returned to caller.
+
+  // reference_ is singleton and does not escape in the end either by
+  // returning to the caller, or being used as an environment local of an
+  // HDeoptimize instruction.
+  bool is_singleton_and_non_escaping_;
 
   DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
 };
@@ -202,8 +223,7 @@
                          kArenaAllocLSE),
         has_heap_stores_(false),
         has_volatile_(false),
-        has_monitor_operations_(false),
-        may_deoptimize_(false) {}
+        has_monitor_operations_(false) {}
 
   size_t GetNumberOfHeapLocations() const {
     return heap_locations_.size();
@@ -236,13 +256,6 @@
     return has_monitor_operations_;
   }
 
-  // Returns whether this method may be deoptimized.
-  // Currently we don't have meta data support for deoptimizing
-  // a method that eliminates allocations/stores.
-  bool MayDeoptimize() const {
-    return may_deoptimize_;
-  }
-
   // Find and return the heap location index in heap_locations_.
   size_t FindHeapLocationIndex(ReferenceInfo* ref_info,
                                size_t offset,
@@ -493,10 +506,6 @@
     CreateReferenceInfoForReferenceType(instruction);
   }
 
-  void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE {
-    may_deoptimize_ = true;
-  }
-
   void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE {
     has_monitor_operations_ = true;
   }
@@ -508,7 +517,6 @@
                             // alias analysis and won't be as effective.
   bool has_volatile_;       // If there are volatile field accesses.
   bool has_monitor_operations_;    // If there are monitor operations.
-  bool may_deoptimize_;     // Only true for HDeoptimize with single-frame deoptimization.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
 };
@@ -663,27 +671,59 @@
     if (predecessors.size() == 0) {
       return;
     }
+
     ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()];
     for (size_t i = 0; i < heap_values.size(); i++) {
-      HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i];
-      heap_values[i] = pred0_value;
-      if (pred0_value != kUnknownHeapValue) {
-        for (size_t j = 1; j < predecessors.size(); j++) {
-          HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i];
-          if (pred_value != pred0_value) {
-            heap_values[i] = kUnknownHeapValue;
-            break;
-          }
+      HInstruction* merged_value = nullptr;
+      // Whether merged_value is a result that's merged from all predecessors.
+      bool from_all_predecessors = true;
+      ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
+      HInstruction* singleton_ref = nullptr;
+      if (ref_info->IsSingletonAndNonEscaping()) {
+        // We do more analysis of liveness when merging heap values for such
+        // cases since stores into such references may potentially be eliminated.
+        singleton_ref = ref_info->GetReference();
+      }
+
+      for (HBasicBlock* predecessor : predecessors) {
+        HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i];
+        if ((singleton_ref != nullptr) &&
+            !singleton_ref->GetBlock()->Dominates(predecessor)) {
+          // singleton_ref is not live in this predecessor. Skip this predecessor since
+          // it does not really have the location.
+          DCHECK_EQ(pred_value, kUnknownHeapValue);
+          from_all_predecessors = false;
+          continue;
+        }
+        if (merged_value == nullptr) {
+          // First seen heap value.
+          merged_value = pred_value;
+        } else if (pred_value != merged_value) {
+          // There are conflicting values.
+          merged_value = kUnknownHeapValue;
+          break;
         }
       }
 
-      if (heap_values[i] == kUnknownHeapValue) {
+      if (merged_value == kUnknownHeapValue) {
+        // There are conflicting heap values from different predecessors.
         // Keep the last store in each predecessor since future loads cannot be eliminated.
-        for (size_t j = 0; j < predecessors.size(); j++) {
-          ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()];
+        for (HBasicBlock* predecessor : predecessors) {
+          ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()];
           KeepIfIsStore(pred_values[i]);
         }
       }
+
+      if ((merged_value == nullptr) || !from_all_predecessors) {
+        DCHECK(singleton_ref != nullptr);
+        DCHECK((singleton_ref->GetBlock() == block) ||
+               !singleton_ref->GetBlock()->Dominates(block));
+        // singleton_ref is not defined before block or defined only in some of its
+        // predecessors, so block doesn't really have the location at its entry.
+        heap_values[i] = kUnknownHeapValue;
+      } else {
+        heap_values[i] = merged_value;
+      }
     }
   }
 
@@ -812,8 +852,7 @@
     } else if (index != nullptr) {
       // For array element, don't eliminate stores since it can be easily aliased
       // with non-constant index.
-    } else if (!heap_location_collector_.MayDeoptimize() &&
-               ref_info->IsSingletonAndNotReturned()) {
+    } else if (ref_info->IsSingletonAndNonEscaping()) {
       // Store into a field of a singleton that's not returned. The value cannot be
       // killed due to aliasing/invocation. It can be redundant since future loads can
       // directly get the value set by this instruction. The value can still be killed due to
@@ -987,8 +1026,7 @@
       // new_instance isn't used for field accesses. No need to process it.
       return;
     }
-    if (!heap_location_collector_.MayDeoptimize() &&
-        ref_info->IsSingletonAndNotReturned() &&
+    if (ref_info->IsSingletonAndNonEscaping() &&
         !new_instance->IsFinalizable() &&
         !new_instance->NeedsAccessCheck()) {
       singleton_new_instances_.push_back(new_instance);
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index d157509..a9fe209 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -16,11 +16,16 @@
 
 #include "locations.h"
 
+#include <type_traits>
+
 #include "nodes.h"
 #include "code_generator.h"
 
 namespace art {
 
+// Verify that Location is trivially copyable.
+static_assert(std::is_trivially_copyable<Location>::value, "Location should be trivially copyable");
+
 LocationSummary::LocationSummary(HInstruction* instruction,
                                  CallKind call_kind,
                                  bool intrinsified)
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index da27928..52747c0 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -91,12 +91,9 @@
     DCHECK(!IsValid());
   }
 
-  Location(const Location& other) : value_(other.value_) {}
+  Location(const Location& other) = default;
 
-  Location& operator=(const Location& other) {
-    value_ = other.value_;
-    return *this;
-  }
+  Location& operator=(const Location& other) = default;
 
   bool IsConstant() const {
     return (value_ & kLocationConstantMask) == kConstant;
@@ -328,7 +325,6 @@
         LOG(FATAL) << "Should not use this location kind";
     }
     UNREACHABLE();
-    return "?";
   }
 
   // Unallocated locations.
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 51be1d1..f4616e3 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -28,6 +28,30 @@
   instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false);
 }
 
+// Detect a goto block and sets succ to the single successor.
+static bool IsGotoBlock(HBasicBlock* block, /*out*/ HBasicBlock** succ) {
+  if (block->GetPredecessors().size() == 1 &&
+      block->GetSuccessors().size() == 1 &&
+      block->IsSingleGoto()) {
+    *succ = block->GetSingleSuccessor();
+    return true;
+  }
+  return false;
+}
+
+// Detect an early exit loop.
+static bool IsEarlyExit(HLoopInformation* loop_info) {
+  HBlocksInLoopReversePostOrderIterator it_loop(*loop_info);
+  for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) {
+    for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+      if (!loop_info->Contains(*successor)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 //
 // Class methods.
 //
@@ -168,7 +192,9 @@
     int32_t use_count = 0;
     if (IsPhiInduction(phi) &&
         IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) &&
-        TryReplaceWithLastValue(phi, use_count, preheader)) {
+        // No uses, or no early-exit with proper replacement.
+        (use_count == 0 ||
+         (!IsEarlyExit(node->loop_info) && TryReplaceWithLastValue(phi, preheader)))) {
       for (HInstruction* i : *iset_) {
         RemoveFromCycle(i);
       }
@@ -178,31 +204,57 @@
 }
 
 void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
-  for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    // Remove instructions that are dead.
-    for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) {
-      HInstruction* instruction = i.Current();
-      if (instruction->IsDeadAndRemovable()) {
-        block->RemoveInstruction(instruction);
+  // Repeat the block simplifications until no more changes occur. Note that since
+  // each simplification consists of eliminating code (without introducing new code),
+  // this process is always finite.
+  bool changed;
+  do {
+    changed = false;
+    // Iterate over all basic blocks in the loop-body.
+    for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      // Remove dead instructions from the loop-body.
+      for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) {
+        HInstruction* instruction = i.Current();
+        if (instruction->IsDeadAndRemovable()) {
+          changed = true;
+          block->RemoveInstruction(instruction);
+        }
       }
-    }
-    // Remove trivial control flow blocks from the loop-body.
-    if (block->GetPredecessors().size() == 1 &&
-        block->GetSuccessors().size() == 1 &&
-        block->GetFirstInstruction()->IsGoto()) {
-      HBasicBlock* pred = block->GetSinglePredecessor();
-      HBasicBlock* succ = block->GetSingleSuccessor();
-      if (succ->GetPredecessors().size() == 1) {
+      // Remove trivial control flow blocks from the loop-body.
+      HBasicBlock* succ = nullptr;
+      if (IsGotoBlock(block, &succ) && succ->GetPredecessors().size() == 1) {
+        // Trivial goto block can be removed.
+        HBasicBlock* pred = block->GetSinglePredecessor();
+        changed = true;
         pred->ReplaceSuccessor(block, succ);
-        block->ClearDominanceInformation();
-        block->SetDominator(pred);  // needed by next disconnect.
+        block->RemoveDominatedBlock(succ);
         block->DisconnectAndDelete();
         pred->AddDominatedBlock(succ);
         succ->SetDominator(pred);
+      } else if (block->GetSuccessors().size() == 2) {
+        // Trivial if block can be bypassed to either branch.
+        HBasicBlock* succ0 = block->GetSuccessors()[0];
+        HBasicBlock* succ1 = block->GetSuccessors()[1];
+        HBasicBlock* meet0 = nullptr;
+        HBasicBlock* meet1 = nullptr;
+        if (succ0 != succ1 &&
+            IsGotoBlock(succ0, &meet0) &&
+            IsGotoBlock(succ1, &meet1) &&
+            meet0 == meet1 &&  // meets again
+            meet0 != block &&  // no self-loop
+            meet0->GetPhis().IsEmpty()) {  // not used for merging
+          changed = true;
+          succ0->DisconnectAndDelete();
+          if (block->Dominates(meet0)) {
+            block->RemoveDominatedBlock(meet0);
+            succ1->AddDominatedBlock(meet0);
+            meet0->SetDominator(succ1);
+          }
+        }
       }
     }
-  }
+  } while (changed);
 }
 
 void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) {
@@ -240,12 +292,12 @@
   if (IsEmptyHeader(header) &&
       IsEmptyBody(body) &&
       IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) &&
-      TryReplaceWithLastValue(header->GetFirstPhi(), use_count, preheader)) {
+      // No uses, or proper replacement.
+      (use_count == 0 || TryReplaceWithLastValue(header->GetFirstPhi(), preheader))) {
     body->DisconnectAndDelete();
     exit->RemovePredecessor(header);
     header->RemoveSuccessor(exit);
-    header->ClearDominanceInformation();
-    header->SetDominator(preheader);  // needed by next disconnect.
+    header->RemoveDominatedBlock(exit);
     header->DisconnectAndDelete();
     preheader->AddSuccessor(exit);
     preheader->AddInstruction(new (graph_->GetArena()) HGoto());  // global allocator
@@ -259,22 +311,23 @@
 bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
   ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
   if (set != nullptr) {
+    DCHECK(iset_->empty());
     for (HInstruction* i : *set) {
-      // Check that, other than phi, instruction are removable with uses contained in the cycle.
-      // TODO: investigate what cases are no longer in the graph.
-      if (i != phi) {
-        if (!i->IsInBlock() || !i->IsRemovable()) {
-          return false;
-        }
+      // Check that, other than instructions that are no longer in the graph (removed earlier)
+      // each instruction is removable and, other than the phi, uses are contained in the cycle.
+      if (!i->IsInBlock()) {
+        continue;
+      } else if (!i->IsRemovable()) {
+        return false;
+      } else if (i != phi) {
         for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
           if (set->find(use.GetUser()) == set->end()) {
             return false;
           }
         }
       }
+      iset_->insert(i);  // copy
     }
-    DCHECK(iset_->empty());
-    iset_->insert(set->begin(), set->end());  // copy
     return true;
   }
   return false;
@@ -358,20 +411,16 @@
   }
 }
 
-bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction,
-                                                int32_t use_count,
-                                                HBasicBlock* block) {
-  // If true uses appear after the loop, replace these uses with the last value. Environment
-  // uses can consume this value too, since any first true use is outside the loop (although
-  // this may imply that de-opting may look "ahead" a bit on the phi value). If there are only
-  // environment uses, the value is dropped altogether, since the computations have no effect.
-  if (use_count > 0) {
-    if (!induction_range_.CanGenerateLastValue(instruction)) {
-      return false;
-    }
+bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block) {
+  // Try to replace outside uses with the last value. Environment uses can consume this
+  // value too, since any first true use is outside the loop (although this may imply
+  // that de-opting may look "ahead" a bit on the phi value). If there are only environment
+  // uses, the value is dropped altogether, since the computations have no effect.
+  if (induction_range_.CanGenerateLastValue(instruction)) {
     ReplaceAllUses(instruction, induction_range_.GenerateLastValue(instruction, graph_, block));
+    return true;
   }
-  return true;
+  return false;
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index e18d175..3391bef 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -72,9 +72,7 @@
                            HInstruction* instruction,
                            /*out*/ int32_t* use_count);
   void ReplaceAllUses(HInstruction* instruction, HInstruction* replacement);
-  bool TryReplaceWithLastValue(HInstruction* instruction,
-                               int32_t use_count,
-                               HBasicBlock* block);
+  bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block);
 
   // Range information based on prior induction variable analysis.
   InductionVarRange induction_range_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6a45149..a946e31 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1855,6 +1855,15 @@
   size_t InputCount() const { return GetInputRecords().size(); }
   HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); }
 
+  bool HasInput(HInstruction* input) const {
+    for (const HInstruction* i : GetInputs()) {
+      if (i == input) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   void SetRawInputAt(size_t index, HInstruction* input) {
     SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
   }
@@ -5683,7 +5692,10 @@
     // all other types are unavailable.
     kDexCacheViaMethod,
 
-    kLast = kDexCacheViaMethod
+    // Load from the root table associated with the JIT compiled method.
+    kJitTableAddress,
+
+    kLast = kJitTableAddress,
   };
 
   HLoadString(HCurrentMethod* current_method,
@@ -5741,7 +5753,8 @@
     LoadKind load_kind = GetLoadKind();
     if (load_kind == LoadKind::kBootImageLinkTimeAddress ||
         load_kind == LoadKind::kBootImageLinkTimePcRelative ||
-        load_kind == LoadKind::kBootImageAddress) {
+        load_kind == LoadKind::kBootImageAddress ||
+        load_kind == LoadKind::kJitTableAddress) {
       return false;
     }
     return !IsInDexCache();
@@ -5794,7 +5807,8 @@
     return load_kind == LoadKind::kBootImageLinkTimeAddress ||
         load_kind == LoadKind::kBootImageLinkTimePcRelative ||
         load_kind == LoadKind::kBssEntry ||
-        load_kind == LoadKind::kDexCacheViaMethod;
+        load_kind == LoadKind::kDexCacheViaMethod ||
+        load_kind == LoadKind::kJitTableAddress;
   }
 
   static bool HasAddress(LoadKind load_kind) {
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 19fd6f9..1add660 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -117,6 +117,7 @@
 
   size_t GetSize() const { return size_; }
   const ArenaVector<uint8_t>& GetMemory() const { return memory_; }
+  uint8_t* GetData() { return memory_.data(); }
 
  private:
   ArenaVector<uint8_t> memory_;
@@ -755,8 +756,10 @@
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$initial");
   HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
+      graph, stats, "dead_code_elimination$after_inlining");
+  HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$final");
-  HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
+  HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
   HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(
@@ -795,6 +798,7 @@
     select_generator,
     fold2,  // TODO: if we don't inline we can also skip fold2.
     simplify2,
+    dce2,
     side_effects,
     gvn,
     licm,
@@ -804,7 +808,7 @@
     fold3,  // evaluates code generated by dynamic bce
     simplify3,
     lse,
-    dce2,
+    dce3,
     // The codegen has a few assumptions that only the instruction simplifier
     // can satisfy. For example, the code generator does not expect to see a
     // HTypeConversion from a type to the same type.
@@ -1123,7 +1127,7 @@
                                     jit::JitCodeCache* code_cache,
                                     ArtMethod* method,
                                     bool osr) {
-  StackHandleScope<2> hs(self);
+  StackHandleScope<3> hs(self);
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       method->GetDeclaringClass()->GetClassLoader()));
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
@@ -1169,22 +1173,43 @@
   }
 
   size_t stack_map_size = codegen->ComputeStackMapsSize();
-  uint8_t* stack_map_data = code_cache->ReserveData(self, stack_map_size, method);
-  if (stack_map_data == nullptr) {
+  size_t number_of_roots = codegen->GetNumberOfJitRoots();
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  // We allocate an object array to ensure the JIT roots that we will collect in EmitJitRoots
+  // will be visible by the GC between EmitLiterals and CommitCode. Once CommitCode is
+  // executed, this array is not needed.
+  Handle<mirror::ObjectArray<mirror::Object>> roots(
+      hs.NewHandle(mirror::ObjectArray<mirror::Object>::Alloc(
+          self, class_linker->GetClassRoot(ClassLinker::kObjectArrayClass), number_of_roots)));
+  if (roots.Get() == nullptr) {
+    // Out of memory, just clear the exception to avoid any Java exception uncaught problems.
+    DCHECK(self->IsExceptionPending());
+    self->ClearException();
+    return false;
+  }
+  uint8_t* stack_map_data = nullptr;
+  uint8_t* roots_data = nullptr;
+  code_cache->ReserveData(
+      self, stack_map_size, number_of_roots, method, &stack_map_data, &roots_data);
+  if (stack_map_data == nullptr || roots_data == nullptr) {
     return false;
   }
   MaybeRecordStat(MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item);
+  codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data, dex_cache);
+
   const void* code = code_cache->CommitCode(
       self,
       method,
       stack_map_data,
+      roots_data,
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
       code_allocator.GetMemory().data(),
       code_allocator.GetSize(),
-      osr);
+      osr,
+      roots);
 
   if (code == nullptr) {
     code_cache->ClearData(self, stack_map_data);
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index caf6647..5991791 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -374,7 +374,9 @@
       if (current->GetType() == Primitive::kPrimNot) {
         DCHECK(interval->GetDefinedBy()->IsActualObject())
             << interval->GetDefinedBy()->DebugName()
-            << "@" << safepoint_position->GetInstruction()->DebugName();
+            << '(' << interval->GetDefinedBy()->GetId() << ')'
+            << "@" << safepoint_position->GetInstruction()->DebugName()
+            << '(' << safepoint_position->GetInstruction()->GetId() << ')';
         LocationSummary* locations = safepoint_position->GetLocations();
         if (current->GetParent()->HasSpillSlot()) {
           locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index fd1db59..13e4494 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -281,7 +281,8 @@
         : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
 
     if (codegen_->GetCompilerOptions().IsBootImage()) {
-      // Compiling boot image. Resolve the string and allocate it if needed.
+      // Compiling boot image. Resolve the string and allocate it if needed, to ensure
+      // the string will be added to the boot image.
       DCHECK(!runtime->UseJitCompilation());
       mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
       CHECK(string != nullptr);
@@ -297,10 +298,14 @@
     } else if (runtime->UseJitCompilation()) {
       // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
       // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
-      mirror::String* string = dex_cache->GetResolvedString(string_index);
-      if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
-        desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
-        address = reinterpret_cast64<uint64_t>(string);
+      mirror::String* string = class_linker->LookupString(dex_file, string_index, dex_cache);
+      if (string != nullptr) {
+        if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
+          desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
+          address = reinterpret_cast64<uint64_t>(string);
+        } else {
+          desired_load_kind = HLoadString::LoadKind::kJitTableAddress;
+        }
       }
     } else {
       // AOT app compilation. Try to lookup the string without allocating if not found.
@@ -322,6 +327,7 @@
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
     case HLoadString::LoadKind::kBssEntry:
     case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kJitTableAddress:
       load_string->SetLoadKindWithStringReference(load_kind, dex_file, string_index);
       break;
     case HLoadString::LoadKind::kBootImageAddress:
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index 8045bd2..e3b9fb6 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -62,6 +62,12 @@
   ___ Rsb(reg, reg, 0);
 }
 
+void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) {
+  if (kPoisonHeapReferences) {
+    PoisonHeapReference(reg);
+  }
+}
+
 void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) {
   if (kPoisonHeapReferences) {
     UnpoisonHeapReference(reg);
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 68fd32e..e020628 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -72,6 +72,8 @@
   void PoisonHeapReference(vixl32::Register reg);
   // Unpoison a heap reference contained in `reg`.
   void UnpoisonHeapReference(vixl32::Register reg);
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(vixl32::Register reg);
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(vixl32::Register reg);
 
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 9c65280..b34e125 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -51,30 +51,30 @@
 
   typedef std::string (*TestFn)(AssemblerTest* assembler_test, Ass* assembler);
 
-  void DriverFn(TestFn f, std::string test_name) {
+  void DriverFn(TestFn f, const std::string& test_name) {
     DriverWrapper(f(this, assembler_.get()), test_name);
   }
 
   // This driver assumes the assembler has already been called.
-  void DriverStr(std::string assembly_string, std::string test_name) {
+  void DriverStr(const std::string& assembly_string, const std::string& test_name) {
     DriverWrapper(assembly_string, test_name);
   }
 
-  std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) {
+  std::string RepeatR(void (Ass::*f)(Reg), const std::string& fmt) {
     return RepeatTemplatedRegister<Reg>(f,
         GetRegisters(),
         &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
         fmt);
   }
 
-  std::string Repeatr(void (Ass::*f)(Reg), std::string fmt) {
+  std::string Repeatr(void (Ass::*f)(Reg), const std::string& fmt) {
     return RepeatTemplatedRegister<Reg>(f,
         GetRegisters(),
         &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
         fmt);
   }
 
-  std::string RepeatRR(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string RepeatRR(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -83,7 +83,7 @@
         fmt);
   }
 
-  std::string RepeatRRNoDupes(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string RepeatRRNoDupes(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegistersNoDupes<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -92,7 +92,7 @@
         fmt);
   }
 
-  std::string Repeatrr(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string Repeatrr(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -101,7 +101,7 @@
         fmt);
   }
 
-  std::string RepeatRRR(void (Ass::*f)(Reg, Reg, Reg), std::string fmt) {
+  std::string RepeatRRR(void (Ass::*f)(Reg, Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -112,7 +112,7 @@
         fmt);
   }
 
-  std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string Repeatrb(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -121,7 +121,7 @@
         fmt);
   }
 
-  std::string RepeatRr(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string RepeatRr(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -130,11 +130,11 @@
         fmt);
   }
 
-  std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) {
+  std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) {
     return RepeatRegisterImm<RegisterView::kUsePrimaryName>(f, imm_bytes, fmt);
   }
 
-  std::string Repeatri(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) {
+  std::string Repeatri(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) {
     return RepeatRegisterImm<RegisterView::kUseSecondaryName>(f, imm_bytes, fmt);
   }
 
@@ -145,7 +145,7 @@
                                               const std::vector<Reg2*> reg2_registers,
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
-                                              std::string fmt) {
+                                              const std::string& fmt) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
@@ -195,7 +195,7 @@
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
                                               int imm_bits,
-                                              std::string fmt) {
+                                              const std::string& fmt) {
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
@@ -245,7 +245,7 @@
                                              int imm_bits,
                                              const std::vector<Reg*> registers,
                                              std::string (AssemblerTest::*GetName)(const RegType&),
-                                             std::string fmt) {
+                                             const std::string& fmt) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
@@ -281,7 +281,7 @@
   }
 
   template <typename ImmType>
-  std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, const std::string& fmt) {
     return RepeatTemplatedRegistersImmBits<Reg, Reg, ImmType>(f,
         imm_bits,
         GetRegisters(),
@@ -292,7 +292,7 @@
   }
 
   template <typename ImmType>
-  std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, const std::string& fmt) {
     return RepeatTemplatedRegisterImmBits<Reg, ImmType>(f,
         imm_bits,
         GetRegisters(),
@@ -301,7 +301,9 @@
   }
 
   template <typename ImmType>
-  std::string RepeatFRIb(void (Ass::*f)(FPReg, Reg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatFRIb(void (Ass::*f)(FPReg, Reg, ImmType),
+                         int imm_bits,
+                         const std::string& fmt) {
     return RepeatTemplatedRegistersImmBits<FPReg, Reg, ImmType>(f,
         imm_bits,
         GetFPRegisters(),
@@ -311,7 +313,7 @@
         fmt);
   }
 
-  std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), std::string fmt) {
+  std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, FPReg>(f,
                                                   GetFPRegisters(),
                                                   GetFPRegisters(),
@@ -320,7 +322,7 @@
                                                   fmt);
   }
 
-  std::string RepeatFFF(void (Ass::*f)(FPReg, FPReg, FPReg), std::string fmt) {
+  std::string RepeatFFF(void (Ass::*f)(FPReg, FPReg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, FPReg, FPReg>(f,
                                                          GetFPRegisters(),
                                                          GetFPRegisters(),
@@ -331,7 +333,7 @@
                                                          fmt);
   }
 
-  std::string RepeatFFR(void (Ass::*f)(FPReg, FPReg, Reg), std::string fmt) {
+  std::string RepeatFFR(void (Ass::*f)(FPReg, FPReg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, FPReg, Reg>(
         f,
         GetFPRegisters(),
@@ -345,7 +347,7 @@
 
   std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&),
                         size_t imm_bytes,
-                        std::string fmt) {
+                        const std::string& fmt) {
     return RepeatTemplatedRegistersImm<FPReg, FPReg>(f,
                                                      GetFPRegisters(),
                                                      GetFPRegisters(),
@@ -356,7 +358,9 @@
   }
 
   template <typename ImmType>
-  std::string RepeatFFIb(void (Ass::*f)(FPReg, FPReg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatFFIb(void (Ass::*f)(FPReg, FPReg, ImmType),
+                         int imm_bits,
+                         const std::string& fmt) {
     return RepeatTemplatedRegistersImmBits<FPReg, FPReg, ImmType>(f,
                                                                   imm_bits,
                                                                   GetFPRegisters(),
@@ -367,7 +371,9 @@
   }
 
   template <typename ImmType>
-  std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg), int imm_bits, std::string fmt) {
+  std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg),
+                         int imm_bits,
+                         const std::string& fmt) {
     return RepeatTemplatedImmBitsRegisters<ImmType, FPReg, FPReg>(f,
                                                                   GetFPRegisters(),
                                                                   GetFPRegisters(),
@@ -377,7 +383,7 @@
                                                                   fmt);
   }
 
-  std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) {
+  std::string RepeatFR(void (Ass::*f)(FPReg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, Reg>(f,
         GetFPRegisters(),
         GetRegisters(),
@@ -386,7 +392,7 @@
         fmt);
   }
 
-  std::string RepeatFr(void (Ass::*f)(FPReg, Reg), std::string fmt) {
+  std::string RepeatFr(void (Ass::*f)(FPReg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, Reg>(f,
         GetFPRegisters(),
         GetRegisters(),
@@ -395,7 +401,7 @@
         fmt);
   }
 
-  std::string RepeatRF(void (Ass::*f)(Reg, FPReg), std::string fmt) {
+  std::string RepeatRF(void (Ass::*f)(Reg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, FPReg>(f,
         GetRegisters(),
         GetFPRegisters(),
@@ -404,7 +410,7 @@
         fmt);
   }
 
-  std::string RepeatrF(void (Ass::*f)(Reg, FPReg), std::string fmt) {
+  std::string RepeatrF(void (Ass::*f)(Reg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, FPReg>(f,
         GetRegisters(),
         GetFPRegisters(),
@@ -413,7 +419,9 @@
         fmt);
   }
 
-  std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt,
+  std::string RepeatI(void (Ass::*f)(const Imm&),
+                      size_t imm_bytes,
+                      const std::string& fmt,
                       bool as_uint = false) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes, as_uint);
@@ -651,7 +659,7 @@
   std::string RepeatTemplatedRegister(void (Ass::*f)(RegType),
                                       const std::vector<RegType*> registers,
                                       std::string (AssemblerTest::*GetName)(const RegType&),
-                                      std::string fmt) {
+                                      const std::string& fmt) {
     std::string str;
     for (auto reg : registers) {
       (assembler_.get()->*f)(*reg);
@@ -679,7 +687,7 @@
                                        const std::vector<Reg2*> reg2_registers,
                                        std::string (AssemblerTest::*GetName1)(const Reg1&),
                                        std::string (AssemblerTest::*GetName2)(const Reg2&),
-                                       std::string fmt) {
+                                       const std::string& fmt) {
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size());
 
     std::string str;
@@ -717,7 +725,7 @@
                                               const std::vector<Reg2*> reg2_registers,
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
-                                              std::string fmt) {
+                                              const std::string& fmt) {
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size());
 
     std::string str;
@@ -758,7 +766,7 @@
                                        std::string (AssemblerTest::*GetName1)(const Reg1&),
                                        std::string (AssemblerTest::*GetName2)(const Reg2&),
                                        std::string (AssemblerTest::*GetName3)(const Reg3&),
-                                       std::string fmt) {
+                                       const std::string& fmt) {
     std::string str;
     for (auto reg1 : reg1_registers) {
       for (auto reg2 : reg2_registers) {
@@ -803,7 +811,7 @@
                                           std::string (AssemblerTest::*GetName1)(const Reg1&),
                                           std::string (AssemblerTest::*GetName2)(const Reg2&),
                                           size_t imm_bytes,
-                                          std::string fmt) {
+                                          const std::string& fmt) {
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
 
@@ -895,8 +903,9 @@
 
  private:
   template <RegisterView kRegView>
-  std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes,
-                                  std::string fmt) {
+  std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&),
+                                size_t imm_bytes,
+                                const std::string& fmt) {
     const std::vector<Reg*> registers = GetRegisters();
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
@@ -938,7 +947,7 @@
   virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
   }
 
-  void DriverWrapper(std::string assembly_text, std::string test_name) {
+  void DriverWrapper(const std::string& assembly_text, const std::string& test_name) {
     assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index 8c71292..ac24ee9 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -106,7 +106,9 @@
   // Driver() assembles and compares the results. If the results are not equal and we have a
   // disassembler, disassemble both and check whether they have the same mnemonics (in which case
   // we just warn).
-  void Driver(const std::vector<uint8_t>& data, std::string assembly_text, std::string test_name) {
+  void Driver(const std::vector<uint8_t>& data,
+              const std::string& assembly_text,
+              const std::string& test_name) {
     EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly";
 
     NativeAssemblerResult res;
@@ -229,7 +231,7 @@
     bool success = Exec(args, error_msg);
     if (!success) {
       LOG(ERROR) << "Assembler command line:";
-      for (std::string arg : args) {
+      for (const std::string& arg : args) {
         LOG(ERROR) << arg;
       }
     }
@@ -238,7 +240,7 @@
 
   // Runs objdump -h on the binary file and extracts the first line with .text.
   // Returns "" on failure.
-  std::string Objdump(std::string file) {
+  std::string Objdump(const std::string& file) {
     bool have_objdump = FileExists(FindTool(objdump_cmd_name_));
     EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand();
     if (!have_objdump) {
@@ -287,8 +289,9 @@
   }
 
   // Disassemble both binaries and compare the text.
-  bool DisassembleBinaries(const std::vector<uint8_t>& data, const std::vector<uint8_t>& as,
-                           std::string test_name) {
+  bool DisassembleBinaries(const std::vector<uint8_t>& data,
+                           const std::vector<uint8_t>& as,
+                           const std::string& test_name) {
     std::string disassembler = GetDisassembleCommand();
     if (disassembler.length() == 0) {
       LOG(WARNING) << "No dissassembler command.";
@@ -324,7 +327,7 @@
     return result;
   }
 
-  bool DisassembleBinary(std::string file, std::string* error_msg) {
+  bool DisassembleBinary(const std::string& file, std::string* error_msg) {
     std::vector<std::string> args;
 
     // Encaspulate the whole command line in a single string passed to
@@ -345,7 +348,7 @@
     return Exec(args, error_msg);
   }
 
-  std::string WriteToFile(const std::vector<uint8_t>& buffer, std::string test_name) {
+  std::string WriteToFile(const std::vector<uint8_t>& buffer, const std::string& test_name) {
     std::string file_name = GetTmpnam() + std::string("---") + test_name;
     const char* data = reinterpret_cast<const char*>(buffer.data());
     std::ofstream s_out(file_name + ".o");
@@ -354,7 +357,7 @@
     return file_name + ".o";
   }
 
-  bool CompareFiles(std::string f1, std::string f2) {
+  bool CompareFiles(const std::string& f1, const std::string& f2) {
     std::ifstream f1_in(f1);
     std::ifstream f2_in(f2);
 
@@ -369,7 +372,9 @@
   }
 
   // Compile the given assembly code and extract the binary, if possible. Put result into res.
-  bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) {
+  bool Compile(const std::string& assembly_code,
+               NativeAssemblerResult* res,
+               const std::string& test_name) {
     res->ok = false;
     res->code.reset(nullptr);
 
@@ -438,7 +443,7 @@
   // Check whether file exists. Is used for commands, so strips off any parameters: anything after
   // the first space. We skip to the last slash for this, so it should work with directories with
   // spaces.
-  static bool FileExists(std::string file) {
+  static bool FileExists(const std::string& file) {
     if (file.length() == 0) {
       return false;
     }
@@ -478,7 +483,7 @@
     return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string("");
   }
 
-  std::string FindTool(std::string tool_name) {
+  std::string FindTool(const std::string& tool_name) {
     // Find the current tool. Wild-card pattern is "arch-string*tool-name".
     std::string gcc_path = GetRootPath() + GetGCCRootPath();
     std::vector<std::string> args;
@@ -522,7 +527,8 @@
 
   // Helper for below. If name_predicate is empty, search for all files, otherwise use it for the
   // "-name" option.
-  static void FindToolDumpPrintout(std::string name_predicate, std::string tmp_file) {
+  static void FindToolDumpPrintout(const std::string& name_predicate,
+                                   const std::string& tmp_file) {
     std::string gcc_path = GetRootPath() + GetGCCRootPath();
     std::vector<std::string> args;
     args.push_back("find");
@@ -562,7 +568,7 @@
   }
 
   // For debug purposes.
-  void FindToolDump(std::string tool_name) {
+  void FindToolDump(const std::string& tool_name) {
     // Check with the tool name.
     FindToolDumpPrintout(architecture_string_ + "*" + tool_name, GetTmpnam());
     FindToolDumpPrintout("", GetTmpnam());
diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h
index 829f34b..293f4cd 100644
--- a/compiler/utils/jni_macro_assembler_test.h
+++ b/compiler/utils/jni_macro_assembler_test.h
@@ -39,12 +39,12 @@
 
   typedef std::string (*TestFn)(JNIMacroAssemblerTest* assembler_test, Ass* assembler);
 
-  void DriverFn(TestFn f, std::string test_name) {
+  void DriverFn(TestFn f, const std::string& test_name) {
     DriverWrapper(f(this, assembler_.get()), test_name);
   }
 
   // This driver assumes the assembler has already been called.
-  void DriverStr(std::string assembly_string, std::string test_name) {
+  void DriverStr(const std::string& assembly_string, const std::string& test_name) {
     DriverWrapper(assembly_string, test_name);
   }
 
@@ -128,7 +128,7 @@
   virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
   }
 
-  void DriverWrapper(std::string assembly_text, std::string test_name) {
+  void DriverWrapper(const std::string& assembly_text, const std::string& test_name) {
     assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 46adb3f..184cdf5 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -17,8 +17,11 @@
 #ifndef ART_COMPILER_UTILS_MANAGED_REGISTER_H_
 #define ART_COMPILER_UTILS_MANAGED_REGISTER_H_
 
+#include <type_traits>
 #include <vector>
 
+#include "base/value_object.h"
+
 namespace art {
 
 namespace arm {
@@ -42,17 +45,14 @@
 class X86_64ManagedRegister;
 }
 
-class ManagedRegister {
+class ManagedRegister : public ValueObject {
  public:
   // ManagedRegister is a value class. There exists no method to change the
   // internal state. We therefore allow a copy constructor and an
   // assignment-operator.
-  constexpr ManagedRegister(const ManagedRegister& other) : id_(other.id_) { }
+  constexpr ManagedRegister(const ManagedRegister& other) = default;
 
-  ManagedRegister& operator=(const ManagedRegister& other) {
-    id_ = other.id_;
-    return *this;
-  }
+  ManagedRegister& operator=(const ManagedRegister& other) = default;
 
   constexpr arm::ArmManagedRegister AsArm() const;
   constexpr arm64::Arm64ManagedRegister AsArm64() const;
@@ -85,6 +85,9 @@
   int id_;
 };
 
+static_assert(std::is_trivially_copyable<ManagedRegister>::value,
+              "ManagedRegister should be trivially copyable");
+
 class ManagedRegisterSpill : public ManagedRegister {
  public:
   // ManagedRegisterSpill contains information about data type size and location in caller frame
@@ -115,18 +118,18 @@
  public:
   // The ManagedRegister does not have information about size and offset.
   // In this case it's size and offset determined by BuildFrame (assembler)
-  void push_back(ManagedRegister __x) {
-    ManagedRegisterSpill spill(__x);
+  void push_back(ManagedRegister x) {
+    ManagedRegisterSpill spill(x);
     std::vector<ManagedRegisterSpill>::push_back(spill);
   }
 
-  void push_back(ManagedRegister __x, int32_t __size) {
-    ManagedRegisterSpill spill(__x, __size);
+  void push_back(ManagedRegister x, int32_t size) {
+    ManagedRegisterSpill spill(x, size);
     std::vector<ManagedRegisterSpill>::push_back(spill);
   }
 
-  void push_back(ManagedRegisterSpill __x) {
-    std::vector<ManagedRegisterSpill>::push_back(__x);
+  void push_back(ManagedRegisterSpill x) {
+    std::vector<ManagedRegisterSpill>::push_back(x);
   }
  private:
 };
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index 3ef2f94..a52f519 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -219,7 +219,7 @@
   void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
                                                               mips::Register,
                                                               mips::MipsLabel*),
-                               std::string instr_name) {
+                               const std::string& instr_name) {
     mips::MipsLabel label;
     (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
     constexpr size_t kAdduCount1 = 63;
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 75149cf..c24e1b1 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -188,7 +188,7 @@
 
   void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register,
                                                              mips::MipsLabel*),
-                              std::string instr_name) {
+                              const std::string& instr_name) {
     mips::MipsLabel label;
     (Base::GetAssembler()->*f)(mips::A0, &label);
     constexpr size_t kAdduCount1 = 63;
@@ -217,7 +217,7 @@
   void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
                                                               mips::Register,
                                                               mips::MipsLabel*),
-                               std::string instr_name) {
+                               const std::string& instr_name) {
     mips::MipsLabel label;
     (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
     constexpr size_t kAdduCount1 = 63;
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 1fdef96..ba8f25e 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -212,7 +212,7 @@
 
   void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
                                                                  mips64::Mips64Label*),
-                              std::string instr_name) {
+                              const std::string& instr_name) {
     mips64::Mips64Label label;
     (Base::GetAssembler()->*f)(mips64::A0, &label);
     constexpr size_t kAdduCount1 = 63;
@@ -241,7 +241,7 @@
   void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
                                                                   mips64::GpuRegister,
                                                                   mips64::Mips64Label*),
-                               std::string instr_name) {
+                               const std::string& instr_name) {
     mips64::Mips64Label label;
     (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label);
     constexpr size_t kAdduCount1 = 63;
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 8d2a0e7..6b690aa 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -72,6 +72,22 @@
     return klass;
   }
 
+  void SetupCompilerDriver() {
+    compiler_options_->boot_image_ = false;
+    compiler_driver_->InitializeThreadPools();
+  }
+
+  void VerifyWithCompilerDriver(verifier::VerifierDeps* deps) {
+    TimingLogger timings("Verify", false, false);
+    // The compiler driver handles the verifier deps in the callbacks, so
+    // remove what this class did for unit testing.
+    verifier_deps_.reset(nullptr);
+    callbacks_->SetVerifierDeps(nullptr);
+    compiler_driver_->Verify(class_loader_, dex_files_, deps, &timings);
+    // The compiler driver may have updated the VerifierDeps in the callback object.
+    verifier_deps_.reset(callbacks_->GetVerifierDeps());
+  }
+
   void SetVerifierDeps(const std::vector<const DexFile*>& dex_files) {
     verifier_deps_.reset(new verifier::VerifierDeps(dex_files));
     VerifierDepsCompilerCallbacks* callbacks =
@@ -156,15 +172,12 @@
   }
 
   void VerifyDexFile(const char* multidex = nullptr) {
-    std::string error_msg;
     {
       ScopedObjectAccess soa(Thread::Current());
       LoadDexFile(&soa, "VerifierDeps", multidex);
     }
-    TimingLogger timings("Verify", false, false);
-    compiler_options_->boot_image_ = false;
-    compiler_driver_->InitializeThreadPools();
-    compiler_driver_->Verify(class_loader_, dex_files_, &timings);
+    SetupCompilerDriver();
+    VerifyWithCompilerDriver(/* verifier_deps */ nullptr);
   }
 
   bool TestAssignabilityRecording(const std::string& dst,
@@ -185,6 +198,33 @@
     return true;
   }
 
+  // Check that the status of classes in `class_loader_` match the
+  // expected status in `deps`.
+  void VerifyClassStatus(const verifier::VerifierDeps& deps) {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<2> hs(soa.Self());
+    Handle<mirror::ClassLoader> class_loader_handle(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader_)));
+    MutableHandle<mirror::Class> cls(hs.NewHandle<mirror::Class>(nullptr));
+    for (const DexFile* dex_file : dex_files_) {
+      const std::vector<uint16_t>& unverified_classes = deps.GetUnverifiedClasses(*dex_file);
+      std::set<uint16_t> set(unverified_classes.begin(), unverified_classes.end());
+      for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+        const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+        const char* descriptor = dex_file->GetClassDescriptor(class_def);
+        cls.Assign(class_linker_->FindClass(soa.Self(), descriptor, class_loader_handle));
+        if (cls.Get() == nullptr) {
+          CHECK(soa.Self()->IsExceptionPending());
+          soa.Self()->ClearException();
+        } else if (set.find(class_def.class_idx_) == set.end()) {
+          ASSERT_EQ(cls->GetStatus(), mirror::Class::kStatusVerified);
+        } else {
+          ASSERT_LT(cls->GetStatus(), mirror::Class::kStatusVerified);
+        }
+      }
+    }
+  }
+
   bool HasUnverifiedClass(const std::string& cls) {
     const DexFile::TypeId* type_id = primary_dex_file_->FindTypeId(cls.c_str());
     DCHECK(type_id != nullptr);
@@ -1160,7 +1200,7 @@
   MutableHandle<mirror::ClassLoader> new_class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
   {
     new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-    ASSERT_TRUE(verifier_deps_->Verify(new_class_loader, soa.Self()));
+    ASSERT_TRUE(verifier_deps_->ValidateDependencies(new_class_loader, soa.Self()));
   }
 
   std::vector<uint8_t> buffer;
@@ -1170,7 +1210,7 @@
   {
     VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
     new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-    ASSERT_TRUE(decoded_deps.Verify(new_class_loader, soa.Self()));
+    ASSERT_TRUE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
   }
 
   // Fiddle with the dependencies to make sure we catch any change and fail to verify.
@@ -1181,7 +1221,7 @@
     VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
     deps->assignable_types_.insert(*deps->unassignable_types_.begin());
     new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-    ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
   }
 
   {
@@ -1190,7 +1230,7 @@
     VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
     deps->unassignable_types_.insert(*deps->assignable_types_.begin());
     new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-    ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
   }
 
   // Mess up with classes.
@@ -1208,7 +1248,7 @@
     }
     ASSERT_TRUE(found);
     new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-    ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
   }
 
   {
@@ -1225,7 +1265,7 @@
     }
     ASSERT_TRUE(found);
     new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-    ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
   }
 
   {
@@ -1242,7 +1282,7 @@
     }
     ASSERT_TRUE(found);
     new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-    ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
   }
 
   // Mess up with fields.
@@ -1261,7 +1301,7 @@
     }
     ASSERT_TRUE(found);
     new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-    ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
   }
 
   {
@@ -1279,7 +1319,7 @@
     }
     ASSERT_TRUE(found);
     new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-    ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
   }
 
   {
@@ -1297,7 +1337,7 @@
     }
     ASSERT_TRUE(found);
     new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-    ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
   }
 
   {
@@ -1316,7 +1356,7 @@
     }
     ASSERT_TRUE(found);
     new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-    ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
   }
 
   // Mess up with methods.
@@ -1338,7 +1378,7 @@
       }
       ASSERT_TRUE(found);
       new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-      ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
     }
 
     {
@@ -1357,7 +1397,7 @@
       }
       ASSERT_TRUE(found);
       new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-      ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
     }
 
     {
@@ -1376,7 +1416,7 @@
       }
       ASSERT_TRUE(found);
       new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-      ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
     }
 
     {
@@ -1396,7 +1436,7 @@
       }
       ASSERT_TRUE(found);
       new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-      ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
     }
 
     {
@@ -1415,7 +1455,7 @@
       }
       ASSERT_TRUE(found);
       new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-      ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
     }
 
     {
@@ -1434,7 +1474,56 @@
       }
       ASSERT_TRUE(found);
       new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
-      ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self()));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+  }
+}
+
+TEST_F(VerifierDepsTest, CompilerDriver) {
+  SetupCompilerDriver();
+
+  // Test both multi-dex and single-dex configuration.
+  for (const char* multi : { "MultiDex", static_cast<const char*>(nullptr) }) {
+    // Test that the compiler driver behaves as expected when the dependencies
+    // verify and when they don't verify.
+    for (bool verify_failure : { false, true }) {
+      {
+        ScopedObjectAccess soa(Thread::Current());
+        LoadDexFile(&soa, "VerifierDeps", multi);
+      }
+      VerifyWithCompilerDriver(/* verifier_deps */ nullptr);
+
+      std::vector<uint8_t> buffer;
+      verifier_deps_->Encode(dex_files_, &buffer);
+
+      {
+        ScopedObjectAccess soa(Thread::Current());
+        LoadDexFile(&soa, "VerifierDeps", multi);
+      }
+      verifier::VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      if (verify_failure) {
+        // Just taint the decoded VerifierDeps with one invalid entry.
+        VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+        bool found = false;
+        for (const auto& entry : deps->classes_) {
+          if (entry.IsResolved()) {
+            deps->classes_.insert(VerifierDeps::ClassResolution(
+                entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker));
+            found = true;
+            break;
+          }
+        }
+        ASSERT_TRUE(found);
+      }
+      VerifyWithCompilerDriver(&decoded_deps);
+
+      if (verify_failure) {
+        ASSERT_FALSE(verifier_deps_ == nullptr);
+        ASSERT_FALSE(verifier_deps_->Equals(decoded_deps));
+      } else {
+        ASSERT_TRUE(verifier_deps_ == nullptr);
+        VerifyClassStatus(decoded_deps);
+      }
     }
   }
 }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 4923054..6418c50 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1504,12 +1504,6 @@
 
     dex_files_ = MakeNonOwningPointerVector(opened_dex_files_);
 
-    if (!IsBootImage()) {
-      // Collect verification dependencies when compiling an app.
-      verifier_deps_.reset(new verifier::VerifierDeps(dex_files_));
-      callbacks_->SetVerifierDeps(verifier_deps_.get());
-    }
-
     // We had to postpone the swap decision till now, as this is the point when we actually
     // know about the dex files we're going to use.
 
@@ -1667,7 +1661,7 @@
                                      swap_fd_,
                                      profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
-    driver_->CompileAll(class_loader_, dex_files_, timings_);
+    driver_->CompileAll(class_loader_, dex_files_, /* verifier_deps */ nullptr, timings_);
   }
 
   // Notes on the interleaving of creating the images and oat files to
@@ -1792,13 +1786,13 @@
     {
       TimingLogger::ScopedTiming t2("dex2oat Write VDEX", timings_);
       DCHECK(IsBootImage() || oat_files_.size() == 1u);
-      DCHECK_EQ(IsBootImage(), verifier_deps_ == nullptr);
+      verifier::VerifierDeps* verifier_deps = callbacks_->GetVerifierDeps();
       for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
         File* vdex_file = vdex_files_[i].get();
         std::unique_ptr<BufferedOutputStream> vdex_out(
             MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file)));
 
-        if (!oat_writers_[i]->WriteVerifierDeps(vdex_out.get(), verifier_deps_.get())) {
+        if (!oat_writers_[i]->WriteVerifierDeps(vdex_out.get(), verifier_deps)) {
           LOG(ERROR) << "Failed to write verifier dependencies into VDEX " << vdex_file->GetPath();
           return false;
         }
@@ -2653,9 +2647,6 @@
   std::vector<std::vector<const DexFile*>> dex_files_per_oat_file_;
   std::unordered_map<const DexFile*, size_t> dex_file_oat_index_map_;
 
-  // Collector of verifier dependencies.
-  std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
-
   // Backing storage.
   std::vector<std::string> char_backing_storage_;
 
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 58dd047..fa32178 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -438,9 +438,7 @@
 
     Copy(GetDexSrc1(), dex_location);
 
-    std::vector<std::string> copy(extra_args);
-
-    GenerateOdexForTest(dex_location, odex_location, filter, copy);
+    GenerateOdexForTest(dex_location, odex_location, filter, extra_args);
 
     CheckValidity();
     ASSERT_TRUE(success_);
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 2b30a1b..aa80655 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -1527,7 +1527,7 @@
   // Output dex file.
   if (options_.output_dex_directory_ != nullptr) {
     std::string output_location(options_.output_dex_directory_);
-    size_t last_slash = dex_file->GetLocation().rfind("/");
+    size_t last_slash = dex_file->GetLocation().rfind('/');
     output_location.append(dex_file->GetLocation().substr(last_slash));
     DexWriter::OutputDexFile(*header, output_location.c_str());
   }
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index 89544d7..c7f36be 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -37,12 +37,12 @@
   bool FullPlainOutputExec(std::string* error_msg) {
     // TODO: dexdump2 -> dexdump ?
     ScratchFile dexdump_output;
-    std::string dexdump_filename = dexdump_output.GetFilename();
+    const std::string& dexdump_filename = dexdump_output.GetFilename();
     std::string dexdump = GetTestAndroidRoot() + "/bin/dexdump2";
     EXPECT_TRUE(OS::FileExists(dexdump.c_str())) << dexdump << " should be a valid file path";
 
     ScratchFile dexlayout_output;
-    std::string dexlayout_filename = dexlayout_output.GetFilename();
+    const std::string& dexlayout_filename = dexlayout_output.GetFilename();
     std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
     EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
 
@@ -70,8 +70,8 @@
   // Runs DexFileOutput test.
   bool DexFileOutputExec(std::string* error_msg) {
     ScratchFile tmp_file;
-    std::string tmp_name = tmp_file.GetFilename();
-    size_t tmp_last_slash = tmp_name.rfind("/");
+    const std::string& tmp_name = tmp_file.GetFilename();
+    size_t tmp_last_slash = tmp_name.rfind('/');
     std::string tmp_dir = tmp_name.substr(0, tmp_last_slash + 1);
     std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
     EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
@@ -84,7 +84,7 @@
         return false;
       }
 
-      size_t dex_file_last_slash = dex_file.rfind("/");
+      size_t dex_file_last_slash = dex_file.rfind('/');
       std::string dex_file_name = dex_file.substr(dex_file_last_slash + 1);
       std::vector<std::string> unzip_exec_argv =
           { "/usr/bin/unzip", dex_file, "classes.dex", "-d", tmp_dir};
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index f197fc1..a374686 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -89,7 +89,7 @@
 
   // Return suffix of the file path after the last /. (e.g. /foo/bar -> bar, bar -> bar)
   static std::string BaseName(const std::string& str) {
-    size_t idx = str.rfind("/");
+    size_t idx = str.rfind('/');
     if (idx == std::string::npos) {
       return str;
     }
@@ -516,8 +516,8 @@
 
       // Sanity check that we are reading a real object
       CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
-      if (kUseBakerOrBrooksReadBarrier) {
-        obj->AssertReadBarrierPointer();
+      if (kUseBakerReadBarrier) {
+        obj->AssertReadBarrierState();
       }
 
       // Iterate every page this object belongs to
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 986f265..3c8c1a3 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -747,13 +747,8 @@
 void PatchOat::VisitObject(mirror::Object* object) {
   mirror::Object* copy = RelocatedCopyOf(object);
   CHECK(copy != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    object->AssertReadBarrierPointer();
-    if (kUseBrooksReadBarrier) {
-      mirror::Object* moved_to = RelocatedAddressOfPointer(object);
-      copy->SetReadBarrierPointer(moved_to);
-      DCHECK_EQ(copy->GetReadBarrierPointer(), moved_to);
-    }
+  if (kUseBakerReadBarrier) {
+    object->AssertReadBarrierState();
   }
   PatchOat::PatchVisitor visitor(this, copy);
   object->VisitReferences<kVerifyNone>(visitor, visitor);
@@ -1073,7 +1068,7 @@
   TimingLogger::ScopedTiming pt("patch image and oat", &timings);
 
   std::string output_directory =
-      output_image_filename.substr(0, output_image_filename.find_last_of("/"));
+      output_image_filename.substr(0, output_image_filename.find_last_of('/'));
   bool ret = PatchOat::Patch(input_image_location, base_delta, output_directory, isa, &timings);
 
   if (kIsDebugBuild) {
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 8f961af..c6f479f 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -120,8 +120,10 @@
         "linear_alloc.cc",
         "mem_map.cc",
         "memory_region.cc",
+        "method_handles.cc",
         "mirror/array.cc",
         "mirror/class.cc",
+        "mirror/class_ext.cc",
         "mirror/dex_cache.cc",
         "mirror/emulated_stack_frame.cc",
         "mirror/executable.cc",
@@ -404,7 +406,8 @@
 
 gensrcs {
     name: "art_operator_srcs",
-    cmd: "art/tools/generate-operator-out.py art/runtime $in > $out",
+    cmd: "$(location generate-operator-out.py) art/runtime $(in) > $(out)",
+    tool_files: ["generate-operator-out.py"],
     srcs: [
         "arch/instruction_set.h",
         "base/allocator.h",
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index cb8edff..01b3f34 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -73,7 +72,7 @@
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // Math
   qpoints->pIdivmod = __aeabi_idivmod;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index bf70c55..550f8c7 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -764,11 +764,12 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artIsAssignableFromCode and on failure calls
-     * artThrowClassCastException.
+     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
+     * artThrowClassCastExceptionForObject.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     push {r0-r1, lr}                    @ save arguments, link register and pad
     .cfi_adjust_cfa_offset 12
     .cfi_rel_offset r0, 0
@@ -776,7 +777,7 @@
     .cfi_rel_offset lr, 8
     sub sp, #4
     .cfi_adjust_cfa_offset 4
-    bl artIsAssignableFromCode
+    bl artInstanceOfFromCode
     cbz    r0, .Lthrow_class_cast_exception
     add sp, #4
     .cfi_adjust_cfa_offset -4
@@ -792,9 +793,9 @@
     .cfi_restore lr
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
-    bl  artThrowClassCastException  @ (Class*, Class*, Thread*)
+    bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
     bkpt
-END art_quick_check_cast
+END art_quick_check_instance_of
 
 // Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
 .macro POP_REG_NE rReg, offset, rExclude
@@ -1999,11 +2000,17 @@
     // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
     ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
     tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
-    beq .Lslow_rb_\name
+    beq .Lnot_marked_rb_\name
     // Already marked, return right away.
 .Lret_rb_\name:
     bx lr
 
+.Lnot_marked_rb_\name:
+    // Test that both the forwarding state bits are 1.
+    mvn ip, ip
+    tst ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
+    beq .Lret_forwarding_address\name
+
 .Lslow_rb_\name:
     // Save IP: the kSaveEverything entrypoint art_quick_resolve_string makes a tail call here.
     push  {r0-r4, r9, ip, lr}           @ save return address, core caller-save registers and ip
@@ -2064,6 +2071,12 @@
     .cfi_restore ip
     .cfi_restore lr
     bx lr
+.Lret_forwarding_address\name:
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    mvn ip, ip
+    lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+    bx lr
 END \name
 .endm
 
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index c2078f0..3c77672 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -76,7 +75,7 @@
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // Math
   // TODO null entrypoints not needed for ARM64 - generate inline.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 483cee3..d8ebe26 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1294,18 +1294,19 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artIsAssignableFromCode and on failure calls
-     * artThrowClassCastException.
+     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
+     * artThrowClassCastExceptionForObject.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     // Store arguments and link register
     // Stack needs to be 16B aligned on calls.
     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
     SAVE_REG xLR, 24
 
     // Call runtime code
-    bl artIsAssignableFromCode
+    bl artInstanceOfFromCode
 
     // Check for exception
     cbz x0, .Lthrow_class_cast_exception
@@ -1324,9 +1325,9 @@
 
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
-    bl artThrowClassCastException     // (Class*, Class*, Thread*)
+    bl artThrowClassCastExceptionForObject     // (Object*, Class*, Thread*)
     brk 0                             // We should not return here...
-END art_quick_check_cast
+END art_quick_check_instance_of
 
 // Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
 .macro POP_REG_NE xReg, offset, xExclude
@@ -2539,10 +2540,17 @@
      */
     // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
     ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_rb_\name
+    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lnot_marked_rb_\name
 .Lret_rb_\name:
     ret
+.Lnot_marked_rb_\name:
+    // Check if the top two bits are one, if this is the case it is a forwarding address.
+    mvn wIP0, wIP0
+    cmp wzr, wIP0, lsr #30
+    beq .Lret_forwarding_address\name
 .Lslow_rb_\name:
+    // We must not clobber IP0 since art_quick_resolve_string makes a tail call here and relies on
+    // IP0 being restored.
     // Save all potentially live caller-save core registers.
     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 368
     SAVE_TWO_REGS  x2,  x3, 16
@@ -2608,6 +2616,12 @@
     RESTORE_REG xLR, 360
     DECREASE_FRAME 368
     ret
+.Lret_forwarding_address\name:
+    mvn wIP0, wIP0
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    lsl \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+    ret
 END \name
 .endm
 
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index e10d4e6..e3230f6 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class);
 
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
@@ -73,8 +72,8 @@
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
   static_assert(IsDirectEntrypoint(kQuickInstanceofNonTrivial), "Direct C stub not marked direct.");
-  qpoints->pCheckCast = art_quick_check_cast;
-  static_assert(!IsDirectEntrypoint(kQuickCheckCast), "Non-direct C stub marked direct.");
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
+  static_assert(!IsDirectEntrypoint(kQuickCheckInstanceOf), "Non-direct C stub marked direct.");
 
   // DexCache
   qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index c3c1882..34e34b4 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1171,10 +1171,11 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
+     * Entry from managed code that calls artInstanceOfFromCode and delivers exception on failure.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     addiu  $sp, $sp, -32
     .cfi_adjust_cfa_offset 32
     sw     $gp, 16($sp)
@@ -1183,7 +1184,7 @@
     sw     $t9, 8($sp)
     sw     $a1, 4($sp)
     sw     $a0, 0($sp)
-    la     $t9, artIsAssignableFromCode
+    la     $t9, artInstanceOfFromCode
     jalr   $t9
     addiu  $sp, $sp, -16             # reserve argument slots on the stack
     addiu  $sp, $sp, 16
@@ -1200,10 +1201,10 @@
     addiu  $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
-    la   $t9, artThrowClassCastException
-    jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
+    la   $t9, artThrowClassCastExceptionForObject
+    jalr $zero, $t9                 # artThrowClassCastException (Object*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
-END art_quick_check_cast
+END art_quick_check_instance_of
 
     /*
      * Restore rReg's value from offset($sp) if rReg is not the same as rExclude.
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index a037905..43b73f1 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -30,8 +30,8 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class);
+
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
 extern int32_t CmplDouble(double a, double b);
@@ -64,7 +64,7 @@
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // Math
   qpoints->pCmpgDouble = CmpgDouble;
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index cb2d1c8..0861d2d 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1256,10 +1256,11 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
+     * Entry from managed code that calls artInstanceOfFromCode and delivers exception on failure.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     daddiu $sp, $sp, -32
     .cfi_adjust_cfa_offset 32
     sd     $ra, 24($sp)
@@ -1267,7 +1268,7 @@
     sd     $t9, 16($sp)
     sd     $a1, 8($sp)
     sd     $a0, 0($sp)
-    jal    artIsAssignableFromCode
+    jal    artInstanceOfFromCode
     .cpreturn                       # Restore gp from t8 in branch delay slot.
                                     # t8 may be clobbered in artIsAssignableFromCode.
     beq    $v0, $zero, .Lthrow_class_cast_exception
@@ -1283,10 +1284,10 @@
     .cfi_adjust_cfa_offset -32
     SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
-    dla  $t9, artThrowClassCastException
-    jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
+    dla  $t9, artThrowClassCastExceptionForObject
+    jalr $zero, $t9                 # artThrowClassCastException (Object*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
-END art_quick_check_cast
+END art_quick_check_instance_of
 
 
     /*
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index c151f00..bbf9a8b 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -23,6 +23,7 @@
 #include "common_runtime_test.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "imt_conflict_table.h"
+#include "jni_internal.h"
 #include "linear_alloc.h"
 #include "mirror/class-inl.h"
 #include "mirror/string-inl.h"
@@ -805,7 +806,7 @@
 
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
-extern "C" void art_quick_check_cast(void);
+extern "C" void art_quick_check_instance_of(void);
 #endif
 
 TEST_F(StubTest, CheckCast) {
@@ -813,65 +814,89 @@
     (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
 
-  const uintptr_t art_quick_check_cast = StubTest::GetEntrypoint(self, kQuickCheckCast);
+  const uintptr_t art_quick_check_instance_of =
+      StubTest::GetEntrypoint(self, kQuickCheckInstanceOf);
 
   // Find some classes.
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
 
-  StackHandleScope<4> hs(soa.Self());
-  Handle<mirror::Class> c(
-      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;")));
-  Handle<mirror::Class> c2(
-      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/String;")));
-  Handle<mirror::Class> list(
-      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/util/List;")));
-  Handle<mirror::Class> array_list(
-      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/util/ArrayList;")));
+  VariableSizedHandleScope hs(soa.Self());
+  Handle<mirror::Class> klass_obj(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+  Handle<mirror::Class> klass_str(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/String;")));
+  Handle<mirror::Class> klass_list(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/util/List;")));
+  Handle<mirror::Class> klass_cloneable(
+        hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Cloneable;")));
+  Handle<mirror::Class> klass_array_list(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/util/ArrayList;")));
+  Handle<mirror::Object> obj(hs.NewHandle(klass_obj->AllocObject(soa.Self())));
+  Handle<mirror::String> string(hs.NewHandle(
+      mirror::String::AllocFromModifiedUtf8(soa.Self(), "ABCD")));
+  Handle<mirror::Object> array_list(hs.NewHandle(klass_array_list->AllocObject(soa.Self())));
 
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c.Get()),
-          reinterpret_cast<size_t>(c.Get()),
+  Invoke3(reinterpret_cast<size_t>(obj.Get()),
+          reinterpret_cast<size_t>(klass_obj.Get()),
           0U,
-          art_quick_check_cast,
+          art_quick_check_instance_of,
           self);
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c2.Get()),
-          reinterpret_cast<size_t>(c2.Get()),
+  // Expected true: Test string instance of java.lang.String.
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_str.Get()),
           0U,
-          art_quick_check_cast,
+          art_quick_check_instance_of,
           self);
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c.Get()),
-          reinterpret_cast<size_t>(c2.Get()),
+  // Expected true: Test string instance of java.lang.Object.
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_obj.Get()),
           0U,
-          art_quick_check_cast,
+          art_quick_check_instance_of,
           self);
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(list.Get()),
-          reinterpret_cast<size_t>(array_list.Get()),
+  // Expected false: Test object instance of java.lang.String.
+  Invoke3(reinterpret_cast<size_t>(obj.Get()),
+          reinterpret_cast<size_t>(klass_str.Get()),
           0U,
-          art_quick_check_cast,
-          self);
-  EXPECT_FALSE(self->IsExceptionPending());
-
-  Invoke3(reinterpret_cast<size_t>(list.Get()),
-          reinterpret_cast<size_t>(c2.Get()),
-          0U,
-          art_quick_check_cast,
+          art_quick_check_instance_of,
           self);
   EXPECT_TRUE(self->IsExceptionPending());
   self->ClearException();
 
-  // TODO: Make the following work. But that would require correct managed frames.
-  Invoke3(reinterpret_cast<size_t>(c2.Get()),
-          reinterpret_cast<size_t>(c.Get()),
+  Invoke3(reinterpret_cast<size_t>(array_list.Get()),
+          reinterpret_cast<size_t>(klass_list.Get()),
           0U,
-          art_quick_check_cast,
+          art_quick_check_instance_of,
+          self);
+  EXPECT_FALSE(self->IsExceptionPending());
+
+  Invoke3(reinterpret_cast<size_t>(array_list.Get()),
+          reinterpret_cast<size_t>(klass_cloneable.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
+  EXPECT_FALSE(self->IsExceptionPending());
+
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_array_list.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
+  EXPECT_TRUE(self->IsExceptionPending());
+  self->ClearException();
+
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_cloneable.Get()),
+          0U,
+          art_quick_check_instance_of,
           self);
   EXPECT_TRUE(self->IsExceptionPending());
   self->ClearException();
@@ -1990,7 +2015,7 @@
   ASSERT_NE(nullptr, add_jmethod);
 
   // Get representation.
-  ArtMethod* contains_amethod = soa.DecodeMethod(contains_jmethod);
+  ArtMethod* contains_amethod = jni::DecodeArtMethod(contains_jmethod);
 
   // Patch up ArrayList.contains.
   if (contains_amethod->GetEntryPointFromQuickCompiledCode() == nullptr) {
@@ -2008,7 +2033,7 @@
   ASSERT_NE(nullptr, inf_contains_jmethod);
 
   // Get mirror representation.
-  ArtMethod* inf_contains = soa.DecodeMethod(inf_contains_jmethod);
+  ArtMethod* inf_contains = jni::DecodeArtMethod(inf_contains_jmethod);
 
   // Object
 
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 0a10a3c..877df8f 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -27,8 +27,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t art_quick_is_assignable(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t art_quick_is_assignable(mirror::Class* klass, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -50,7 +49,7 @@
 
   // Cast
   qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // More math.
   qpoints->pCos = cos;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index f4f9a68..635bfa3 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1361,11 +1361,11 @@
     ret
 END_FUNCTION art_quick_is_assignable
 
-DEFINE_FUNCTION art_quick_check_cast
+DEFINE_FUNCTION art_quick_check_instance_of
     PUSH eax                              // alignment padding
-    PUSH ecx                              // pass arg2 - obj->klass
-    PUSH eax                              // pass arg1 - checked class
-    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
+    PUSH ecx                              // pass arg2 - checked class
+    PUSH eax                              // pass arg1 - obj
+    call SYMBOL(artInstanceOfFromCode)    // (Object* obj, Class* ref_klass)
     testl %eax, %eax
     jz 1f                                 // jump forward if not assignable
     addl LITERAL(12), %esp                // pop arguments
@@ -1385,9 +1385,9 @@
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                              // pass arg2
     PUSH eax                              // pass arg1
-    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
+    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
     UNREACHABLE
-END_FUNCTION art_quick_check_cast
+END_FUNCTION art_quick_check_instance_of
 
 // Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
 MACRO2(POP_REG_NE, reg, exclude_reg)
@@ -2155,8 +2155,15 @@
     jz .Lslow_rb_\name
     ret
 .Lslow_rb_\name:
-    // Save all potentially live caller-save core registers.
     PUSH eax
+    mov MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
+    add LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
+    // Jump if overflow, the only case where it overflows should be the forwarding address one.
+    // Taken ~25% of the time.
+    jnae .Lret_forwarding_address\name
+
+    // Save all potentially live caller-save core registers.
+    mov 0(%esp), %eax
     PUSH ecx
     PUSH edx
     PUSH ebx
@@ -2204,6 +2211,12 @@
     POP_REG_NE eax, RAW_VAR(reg)
 .Lret_rb_\name:
     ret
+.Lret_forwarding_address\name:
+    // The overflow cleared the top bits.
+    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
+    mov %eax, REG_VAR(reg)
+    POP_REG_NE eax, RAW_VAR(reg)
+    ret
     END_FUNCTION VAR(name)
 END_MACRO
 
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 8c425d5..59c9dfe 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t art_quick_assignable_from_code(const mirror::Class* klass,
-                                                 const mirror::Class* ref_class);
+extern "C" size_t art_quick_assignable_from_code(mirror::Class* klass, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -65,7 +64,7 @@
 
   // Cast
   qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // More math.
   qpoints->pCos = cos;
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 2856766..72a03eb 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1480,39 +1480,14 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object_no_inline
 
-DEFINE_FUNCTION art_quick_check_cast
-    testl LITERAL(ACCESS_FLAGS_CLASS_IS_INTERFACE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdi)
-    jz .Lnot_interface
-
-    // There are no read barriers since the iftable is immutable. There can be false negatives for
-    // the read barrier case if classes in the IfTable are in the from-space. In the case where
-    // we do not find a matching interface we call into artIsAssignableFromCode which will have
-    // read barriers.
-    movl MIRROR_CLASS_IF_TABLE_OFFSET(%rsi), %ecx
-    UNPOISON_HEAP_REF ecx
-    testl %ecx, %ecx
-    jz .Lnot_interface
-    movl MIRROR_ARRAY_LENGTH_OFFSET(%rcx), %r8d
-.Lstart_loop:
-    // Re-poison before comparing to prevent rare possible false positives. This is done inside
-    // the loop since heap poisoning is only for testing builds.
-    POISON_HEAP_REF edi
-    cmpl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rcx), %edi
-    je .Lreturn  // Return if same class.
-    UNPOISON_HEAP_REF edi
-    // Go to next interface.
-    add LITERAL(COMPRESSED_REFERENCE_SIZE * 2), %rcx
-    sub LITERAL(2), %r8
-    jnz .Lstart_loop
-
-.Lnot_interface:
+DEFINE_FUNCTION art_quick_check_instance_of
     // We could check the super classes here but that is usually already checked in the caller.
     PUSH rdi                          // Save args for exc
     PUSH rsi
     subq LITERAL(8), %rsp             // Alignment padding.
     CFI_ADJUST_CFA_OFFSET(8)
     SETUP_FP_CALLEE_SAVE_FRAME
-    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
+    call SYMBOL(artInstanceOfFromCode)  // (Object* obj, Class* ref_klass)
     testq %rax, %rax
     jz 1f                             // jump forward if not assignable
     RESTORE_FP_CALLEE_SAVE_FRAME
@@ -1531,9 +1506,9 @@
     POP rdi
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
+    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
     UNREACHABLE
-END_FUNCTION art_quick_check_cast
+END_FUNCTION art_quick_check_instance_of
 
 
 // Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
@@ -2301,8 +2276,16 @@
     jz .Lslow_rb_\name
     ret
 .Lslow_rb_\name:
-    // Save all potentially live caller-save core registers.
     PUSH rax
+    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
+    addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
+    // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
+    // forwarding address one.
+    // Taken ~25% of the time.
+    jnae .Lret_forwarding_address\name
+
+    // Save all potentially live caller-save core registers.
+    movq 0(%rsp), %rax
     PUSH rcx
     PUSH rdx
     PUSH rsi
@@ -2367,6 +2350,12 @@
     POP_REG_NE rax, RAW_VAR(reg)
 .Lret_rb_\name:
     ret
+.Lret_forwarding_address\name:
+    // The overflow cleared the top bits.
+    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
+    movq %rax, REG_VAR(reg)
+    POP_REG_NE rax, RAW_VAR(reg)
+    ret
     END_FUNCTION VAR(name)
 END_MACRO
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 1e5e127..5ef1f06 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -172,7 +172,7 @@
 #define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
             art::mirror::Class::ComponentTypeOffset().Int32Value())
-#define MIRROR_CLASS_IF_TABLE_OFFSET (12 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_IF_TABLE_OFFSET (16 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_IF_TABLE_OFFSET,
             art::mirror::Class::IfTableOffset().Int32Value())
 #define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (64 + MIRROR_OBJECT_HEADER_SIZE)
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index bde0327..5d92298 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -42,7 +42,6 @@
 ReaderWriterMutex* Locks::heap_bitmap_lock_ = nullptr;
 Mutex* Locks::instrument_entrypoints_lock_ = nullptr;
 Mutex* Locks::intern_table_lock_ = nullptr;
-Mutex* Locks::interpreter_string_init_map_lock_ = nullptr;
 Mutex* Locks::jni_libraries_lock_ = nullptr;
 Mutex* Locks::logging_lock_ = nullptr;
 Mutex* Locks::mem_maps_lock_ = nullptr;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 3f2c5a9..74b786c 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -79,7 +79,6 @@
   kAllocSpaceLock,
   kBumpPointerSpaceBlockLock,
   kArenaPoolLock,
-  kDexFileToMethodInlinerMapLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
   kHostDlOpenHandlesLock,
@@ -92,12 +91,10 @@
   kDefaultMutexLevel,
   kDexLock,
   kMarkSweepLargeObjectLock,
-  kPinTableLock,
   kJdwpObjectRegistryLock,
   kModifyLdtLock,
   kAllocatedThreadIdsLock,
   kMonitorPoolLock,
-  kMethodVerifiersLock,
   kClassLinkerClassesLock,  // TODO rename.
   kJitCodeCacheLock,
   kBreakpointLock,
@@ -630,12 +627,9 @@
   // TODO: improve name, perhaps instrumentation_update_lock_.
   static Mutex* deoptimization_lock_ ACQUIRED_AFTER(alloc_tracker_lock_);
 
-  // Guards String initializer register map in interpreter.
-  static Mutex* interpreter_string_init_map_lock_ ACQUIRED_AFTER(deoptimization_lock_);
-
   // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
   // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(interpreter_string_init_map_lock_);
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(deoptimization_lock_);
 
   // Signaled when threads terminate. Used to determine when all non-daemons have terminated.
   static ConditionVariable* thread_exit_cond_ GUARDED_BY(Locks::thread_list_lock_);
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 4498198..ff2dd1b 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -339,22 +339,59 @@
   return true;
 }
 
-void FdFile::Erase() {
+bool FdFile::Unlink() {
+  if (file_path_.empty()) {
+    return false;
+  }
+
+  // Try to figure out whether this file is still referring to the one on disk.
+  bool is_current = false;
+  {
+    struct stat this_stat, current_stat;
+    int cur_fd = TEMP_FAILURE_RETRY(open(file_path_.c_str(), O_RDONLY));
+    if (cur_fd > 0) {
+      // File still exists.
+      if (fstat(fd_, &this_stat) == 0 && fstat(cur_fd, &current_stat) == 0) {
+        is_current = (this_stat.st_dev == current_stat.st_dev) &&
+                     (this_stat.st_ino == current_stat.st_ino);
+      }
+      close(cur_fd);
+    }
+  }
+
+  if (is_current) {
+    unlink(file_path_.c_str());
+  }
+
+  return is_current;
+}
+
+bool FdFile::Erase(bool unlink) {
   DCHECK(!read_only_mode_);
-  TEMP_FAILURE_RETRY(SetLength(0));
-  TEMP_FAILURE_RETRY(Flush());
-  TEMP_FAILURE_RETRY(Close());
+
+  bool ret_result = true;
+  if (unlink) {
+    ret_result = Unlink();
+  }
+
+  int result;
+  result = SetLength(0);
+  result = Flush();
+  result = Close();
+  // Ignore the errors.
+
+  return ret_result;
 }
 
 int FdFile::FlushCloseOrErase() {
   DCHECK(!read_only_mode_);
-  int flush_result = TEMP_FAILURE_RETRY(Flush());
+  int flush_result = Flush();
   if (flush_result != 0) {
     LOG(ERROR) << "CloseOrErase failed while flushing a file.";
     Erase();
     return flush_result;
   }
-  int close_result = TEMP_FAILURE_RETRY(Close());
+  int close_result = Close();
   if (close_result != 0) {
     LOG(ERROR) << "CloseOrErase failed while closing a file.";
     Erase();
@@ -365,11 +402,11 @@
 
 int FdFile::FlushClose() {
   DCHECK(!read_only_mode_);
-  int flush_result = TEMP_FAILURE_RETRY(Flush());
+  int flush_result = Flush();
   if (flush_result != 0) {
     LOG(ERROR) << "FlushClose failed while flushing a file.";
   }
-  int close_result = TEMP_FAILURE_RETRY(Close());
+  int close_result = Close();
   if (close_result != 0) {
     LOG(ERROR) << "FlushClose failed while closing a file.";
   }
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index d896ee9..eb85c4f 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -97,7 +97,14 @@
   int Flush() OVERRIDE WARN_UNUSED;
 
   // Short for SetLength(0); Flush(); Close();
-  void Erase();
+  // If the file was opened with a path name and unlink = true, also calls Unlink() on the path.
+  // Note that it is the the caller's responsibility to avoid races.
+  bool Erase(bool unlink = false);
+
+  // Call unlink() if the file was opened with a path, and if open() with the name shows that
+  // the file descriptor of this file is still up-to-date. This is still racy, though, and it
+  // is up to the caller to ensure correctness in a multi-process setup.
+  bool Unlink();
 
   // Try to Flush(), then try to Close(); If either fails, call Erase().
   int FlushCloseOrErase() WARN_UNUSED;
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 99ef6f7..7657a38 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -186,4 +186,24 @@
   ASSERT_EQ(file2.Close(), 0);
 }
 
+TEST_F(FdFileTest, EraseWithPathUnlinks) {
+  // New scratch file, zero-length.
+  art::ScratchFile tmp;
+  std::string filename = tmp.GetFilename();
+  tmp.Close();  // This is required because of the unlink race between the scratch file and the
+                // FdFile, which leads to close-guard breakage.
+  FdFile file(filename, O_RDWR, false);
+  ASSERT_TRUE(file.IsOpened());
+  EXPECT_GE(file.Fd(), 0);
+  uint8_t buffer[16] = { 0 };
+  EXPECT_TRUE(file.WriteFully(&buffer, sizeof(buffer)));
+  EXPECT_EQ(file.Flush(), 0);
+
+  EXPECT_TRUE(file.Erase(true));
+
+  EXPECT_FALSE(file.IsOpened());
+
+  EXPECT_FALSE(art::OS::FileExists(filename.c_str())) << filename;
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/variant_map_test.cc b/runtime/base/variant_map_test.cc
index ccb22eb..93336e0 100644
--- a/runtime/base/variant_map_test.cc
+++ b/runtime/base/variant_map_test.cc
@@ -107,8 +107,8 @@
   fmFilled.Set(FruitMap::Orange, 555.0);
   EXPECT_EQ(size_t(2), fmFilled.Size());
 
-  // Test copy constructor
-  FruitMap fmEmptyCopy(fmEmpty);
+  // Test copy constructor (NOLINT as a reference is suggested, instead)
+  FruitMap fmEmptyCopy(fmEmpty);  // NOLINT
   EXPECT_EQ(size_t(0), fmEmptyCopy.Size());
 
   // Test copy constructor
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 5399dc5..6c27bc6 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -282,7 +282,7 @@
       return false;
     }
 
-    ArtField* f = CheckFieldID(soa, fid);
+    ArtField* f = CheckFieldID(fid);
     if (f == nullptr) {
       return false;
     }
@@ -313,7 +313,7 @@
   bool CheckMethodAndSig(ScopedObjectAccess& soa, jobject jobj, jclass jc,
                          jmethodID mid, Primitive::Type type, InvokeType invoke)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* m = CheckMethodID(soa, mid);
+    ArtMethod* m = CheckMethodID(mid);
     if (m == nullptr) {
       return false;
     }
@@ -362,7 +362,7 @@
   bool CheckStaticFieldID(ScopedObjectAccess& soa, jclass java_class, jfieldID fid)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(java_class);
-    ArtField* f = CheckFieldID(soa, fid);
+    ArtField* f = CheckFieldID(fid);
     if (f == nullptr) {
       return false;
     }
@@ -385,7 +385,7 @@
    */
   bool CheckStaticMethod(ScopedObjectAccess& soa, jclass java_class, jmethodID mid)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* m = CheckMethodID(soa, mid);
+    ArtMethod* m = CheckMethodID(mid);
     if (m == nullptr) {
       return false;
     }
@@ -407,7 +407,7 @@
    */
   bool CheckVirtualMethod(ScopedObjectAccess& soa, jobject java_object, jmethodID mid)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* m = CheckMethodID(soa, mid);
+    ArtMethod* m = CheckMethodID(mid);
     if (m == nullptr) {
       return false;
     }
@@ -577,9 +577,8 @@
     return true;
   }
 
-  bool CheckConstructor(ScopedObjectAccess& soa, jmethodID mid)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* method = soa.DecodeMethod(mid);
+  bool CheckConstructor(jmethodID mid) REQUIRES_SHARED(Locks::mutator_lock_) {
+    ArtMethod* method = jni::DecodeArtMethod(mid);
     if (method == nullptr) {
       AbortF("expected non-null constructor");
       return false;
@@ -682,7 +681,7 @@
     if (!is_static && !CheckInstanceFieldID(soa, obj, fid)) {
       return false;
     }
-    ArtField* field = soa.DecodeField(fid);
+    ArtField* field = jni::DecodeArtField(fid);
     DCHECK(field != nullptr);  // Already checked by Check.
     if (is_static != field->IsStatic()) {
       AbortF("attempt to access %s field %s: %p",
@@ -844,9 +843,9 @@
       case 'c':  // jclass
         return CheckInstance(soa, kClass, arg.c, false);
       case 'f':  // jfieldID
-        return CheckFieldID(soa, arg.f) != nullptr;
+        return CheckFieldID(arg.f) != nullptr;
       case 'm':  // jmethodID
-        return CheckMethodID(soa, arg.m) != nullptr;
+        return CheckMethodID(arg.m) != nullptr;
       case 'r':  // release int
         return CheckReleaseMode(arg.r);
       case 's':  // jstring
@@ -868,7 +867,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_) {
     CHECK(args_p != nullptr);
     VarArgs args(args_p->Clone());
-    ArtMethod* m = CheckMethodID(soa, args.GetMethodID());
+    ArtMethod* m = CheckMethodID(args.GetMethodID());
     if (m == nullptr) {
       return false;
     }
@@ -962,7 +961,7 @@
       }
       case 'f': {  // jfieldID
         jfieldID fid = arg.f;
-        ArtField* f = soa.DecodeField(fid);
+        ArtField* f = jni::DecodeArtField(fid);
         *msg += ArtField::PrettyField(f);
         if (!entry) {
           StringAppendF(msg, " (%p)", fid);
@@ -971,7 +970,7 @@
       }
       case 'm': {  // jmethodID
         jmethodID mid = arg.m;
-        ArtMethod* m = soa.DecodeMethod(mid);
+        ArtMethod* m = jni::DecodeArtMethod(mid);
         *msg += ArtMethod::PrettyMethod(m);
         if (!entry) {
           StringAppendF(msg, " (%p)", mid);
@@ -981,7 +980,7 @@
       case '.': {
         const VarArgs* va = arg.va;
         VarArgs args(va->Clone());
-        ArtMethod* m = soa.DecodeMethod(args.GetMethodID());
+        ArtMethod* m = jni::DecodeArtMethod(args.GetMethodID());
         uint32_t len;
         const char* shorty = m->GetShorty(&len);
         CHECK_GE(len, 1u);
@@ -1147,13 +1146,12 @@
     return true;
   }
 
-  ArtField* CheckFieldID(ScopedObjectAccess& soa, jfieldID fid)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+  ArtField* CheckFieldID(jfieldID fid) REQUIRES_SHARED(Locks::mutator_lock_) {
     if (fid == nullptr) {
       AbortF("jfieldID was NULL");
       return nullptr;
     }
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     // TODO: Better check here.
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(f->GetDeclaringClass().Ptr())) {
       Runtime::Current()->GetHeap()->DumpSpaces(LOG_STREAM(ERROR));
@@ -1163,13 +1161,12 @@
     return f;
   }
 
-  ArtMethod* CheckMethodID(ScopedObjectAccess& soa, jmethodID mid)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+  ArtMethod* CheckMethodID(jmethodID mid) REQUIRES_SHARED(Locks::mutator_lock_) {
     if (mid == nullptr) {
       AbortF("jmethodID was NULL");
       return nullptr;
     }
-    ArtMethod* m = soa.DecodeMethod(mid);
+    ArtMethod* m = jni::DecodeArtMethod(mid);
     // TODO: Better check here.
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(m->GetDeclaringClass())) {
       Runtime::Current()->GetHeap()->DumpSpaces(LOG_STREAM(ERROR));
@@ -2005,7 +2002,7 @@
     VarArgs rest(mid, vargs);
     JniValueType args[4] = {{.E = env}, {.c = c}, {.m = mid}, {.va = &rest}};
     if (sc.Check(soa, true, "Ecm.", args) && sc.CheckInstantiableNonArray(soa, c) &&
-        sc.CheckConstructor(soa, mid)) {
+        sc.CheckConstructor(mid)) {
       JniValueType result;
       result.L = baseEnv(env)->NewObjectV(env, c, mid, vargs);
       if (sc.Check(soa, false, "L", &result)) {
@@ -2029,7 +2026,7 @@
     VarArgs rest(mid, vargs);
     JniValueType args[4] = {{.E = env}, {.c = c}, {.m = mid}, {.va = &rest}};
     if (sc.Check(soa, true, "Ecm.", args) && sc.CheckInstantiableNonArray(soa, c) &&
-        sc.CheckConstructor(soa, mid)) {
+        sc.CheckConstructor(mid)) {
       JniValueType result;
       result.L = baseEnv(env)->NewObjectA(env, c, mid, vargs);
       if (sc.Check(soa, false, "L", &result)) {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d3d30d4..6d45dad 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -63,10 +63,12 @@
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "jit/offline_profiling_info.h"
+#include "jni_internal.h"
 #include "leb128.h"
 #include "linear_alloc.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
@@ -136,10 +138,22 @@
   return exception_init_method != nullptr;
 }
 
-// Helper for ThrowEarlierClassFailure. Throws the stored error.
-static void HandleEarlierVerifyError(Thread* self, ClassLinker* class_linker, ObjPtr<mirror::Class> c)
+static mirror::Object* GetVerifyError(ObjPtr<mirror::Class> c)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  ObjPtr<mirror::Object> obj = c->GetVerifyError();
+  ObjPtr<mirror::ClassExt> ext(c->GetExtData());
+  if (ext == nullptr) {
+    return nullptr;
+  } else {
+    return ext->GetVerifyError();
+  }
+}
+
+// Helper for ThrowEarlierClassFailure. Throws the stored error.
+static void HandleEarlierVerifyError(Thread* self,
+                                     ClassLinker* class_linker,
+                                     ObjPtr<mirror::Class> c)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Object> obj = GetVerifyError(c);
   DCHECK(obj != nullptr);
   self->AssertNoPendingException();
   if (obj->IsClass()) {
@@ -173,8 +187,8 @@
   Runtime* const runtime = Runtime::Current();
   if (!runtime->IsAotCompiler()) {  // Give info if this occurs at runtime.
     std::string extra;
-    if (c->GetVerifyError() != nullptr) {
-      ObjPtr<mirror::Object> verify_error = c->GetVerifyError();
+    if (GetVerifyError(c) != nullptr) {
+      ObjPtr<mirror::Object> verify_error = GetVerifyError(c);
       if (verify_error->IsClass()) {
         extra = mirror::Class::PrettyDescriptor(verify_error->AsClass());
       } else {
@@ -192,11 +206,14 @@
     ObjPtr<mirror::Throwable> pre_allocated = runtime->GetPreAllocatedNoClassDefFoundError();
     self->SetException(pre_allocated);
   } else {
-    if (c->GetVerifyError() != nullptr) {
+    if (GetVerifyError(c) != nullptr) {
       // Rethrow stored error.
       HandleEarlierVerifyError(self, this, c);
     }
-    if (c->GetVerifyError() == nullptr || wrap_in_no_class_def) {
+    // TODO This might be wrong if we hit an OOME while allocating the ClassExt. In that case we
+    // might have meant to go down the earlier if statement with the original error but it got
+    // swallowed by the OOM so we end up here.
+    if (GetVerifyError(c) == nullptr || wrap_in_no_class_def) {
       // If there isn't a recorded earlier error, or this is a repeat throw from initialization,
       // the top-level exception must be a NoClassDefFoundError. The potentially already pending
       // exception will be a cause.
@@ -378,8 +395,8 @@
   CHECK(java_lang_Class.Get() != nullptr);
   mirror::Class::SetClassClass(java_lang_Class.Get());
   java_lang_Class->SetClass(java_lang_Class.Get());
-  if (kUseBakerOrBrooksReadBarrier) {
-    java_lang_Class->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    java_lang_Class->AssertReadBarrierState();
   }
   java_lang_Class->SetClassSize(class_class_size);
   java_lang_Class->SetPrimitiveType(Primitive::kPrimNot);
@@ -495,6 +512,14 @@
   java_lang_DexCache->SetObjectSize(mirror::DexCache::InstanceSize());
   mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusResolved, self);
 
+
+  // Setup dalvik.system.ClassExt
+  Handle<mirror::Class> dalvik_system_ClassExt(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::ClassExt::ClassSize(image_pointer_size_))));
+  SetClassRoot(kDalvikSystemClassExt, dalvik_system_ClassExt.Get());
+  mirror::ClassExt::SetClass(dalvik_system_ClassExt.Get());
+  mirror::Class::SetStatus(dalvik_system_ClassExt, mirror::Class::kStatusResolved, self);
+
   // Set up array classes for string, field, method
   Handle<mirror::Class> object_array_string(hs.NewHandle(
       AllocClass(self, java_lang_Class.Get(),
@@ -540,7 +565,7 @@
     quick_to_interpreter_bridge_trampoline_ = GetQuickToInterpreterBridge();
   }
 
-  // Object, String and DexCache need to be rerun through FindSystemClass to finish init
+  // Object, String, ClassExt and DexCache need to be rerun through FindSystemClass to finish init
   mirror::Class::SetStatus(java_lang_Object, mirror::Class::kStatusNotReady, self);
   CheckSystemClass(self, java_lang_Object, "Ljava/lang/Object;");
   CHECK_EQ(java_lang_Object->GetObjectSize(), mirror::Object::InstanceSize());
@@ -549,6 +574,9 @@
   mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusNotReady, self);
   CheckSystemClass(self, java_lang_DexCache, "Ljava/lang/DexCache;");
   CHECK_EQ(java_lang_DexCache->GetObjectSize(), mirror::DexCache::InstanceSize());
+  mirror::Class::SetStatus(dalvik_system_ClassExt, mirror::Class::kStatusNotReady, self);
+  CheckSystemClass(self, dalvik_system_ClassExt, "Ldalvik/system/ClassExt;");
+  CHECK_EQ(dalvik_system_ClassExt->GetObjectSize(), mirror::ClassExt::InstanceSize());
 
   // Setup the primitive array type classes - can't be done until Object has a vtable.
   SetClassRoot(kBooleanArrayClass, FindSystemClass(self, "[Z"));
@@ -1066,6 +1094,7 @@
   mirror::Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
   mirror::EmulatedStackFrame::SetClass(GetClassRoot(kDalvikSystemEmulatedStackFrame));
+  mirror::ClassExt::SetClass(GetClassRoot(kDalvikSystemClassExt));
 
   for (gc::space::ImageSpace* image_space : spaces) {
     // Boot class loader, use a null handle.
@@ -1096,13 +1125,12 @@
            class_loader->GetClass();
 }
 
-static mirror::String* GetDexPathListElementName(ScopedObjectAccessUnchecked& soa,
-                                                 ObjPtr<mirror::Object> element)
+static mirror::String* GetDexPathListElementName(ObjPtr<mirror::Object> element)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ArtField* const dex_file_name_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_fileName);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_fileName);
   DCHECK(dex_file_field != nullptr);
   DCHECK(dex_file_name_field != nullptr);
   DCHECK(element != nullptr);
@@ -1126,9 +1154,9 @@
   DCHECK(error_msg != nullptr);
   ScopedObjectAccessUnchecked soa(Thread::Current());
   ArtField* const dex_path_list_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList);
   ArtField* const dex_elements_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
   CHECK(dex_path_list_field != nullptr);
   CHECK(dex_elements_field != nullptr);
   while (!ClassLinker::IsBootClassLoader(soa, class_loader)) {
@@ -1155,7 +1183,7 @@
             *error_msg = StringPrintf("Null dex element at index %d", i);
             return false;
           }
-          ObjPtr<mirror::String> const name = GetDexPathListElementName(soa, element);
+          ObjPtr<mirror::String> const name = GetDexPathListElementName(element);
           if (name == nullptr) {
             *error_msg = StringPrintf("Null name for dex element at index %d", i);
             return false;
@@ -1705,7 +1733,7 @@
         ObjPtr<mirror::Object> element = elements->GetWithoutChecks(i);
         if (element != nullptr) {
           // If we are somewhere in the middle of the array, there may be nulls at the end.
-          loader_dex_file_names.push_back(GetDexPathListElementName(soa, element));
+          loader_dex_file_names.push_back(GetDexPathListElementName(element));
         }
       }
       // Ignore the number of image dex files since we are adding those to the class loader anyways.
@@ -2397,16 +2425,17 @@
   // Handle as if this is the child PathClassLoader.
   // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
   // We need to get the DexPathList and loop through it.
-  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const cookie_field =
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Object> dex_path_list =
-      soa.DecodeField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
-      GetObject(class_loader.Get());
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
+          GetObject(class_loader.Get());
   if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
     // DexPathList has an array dexElements of Elements[] which each contain a dex file.
     ObjPtr<mirror::Object> dex_elements_obj =
-        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+        jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
         GetObject(dex_path_list);
     // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
     // at the mCookie which is a DexFile vector.
@@ -2578,6 +2607,8 @@
       klass.Assign(GetClassRoot(kJavaLangRefReference));
     } else if (strcmp(descriptor, "Ljava/lang/DexCache;") == 0) {
       klass.Assign(GetClassRoot(kJavaLangDexCache));
+    } else if (strcmp(descriptor, "Ldalvik/system/ClassExt;") == 0) {
+      klass.Assign(GetClassRoot(kDalvikSystemClassExt));
     }
   }
 
@@ -8087,6 +8118,7 @@
     "[J",
     "[S",
     "[Ljava/lang/StackTraceElement;",
+    "Ldalvik/system/ClassExt;",
   };
   static_assert(arraysize(class_roots_descriptors) == size_t(kClassRootsMax),
                 "Mismatch between class descriptors and class-root enum");
@@ -8106,7 +8138,7 @@
   StackHandleScope<11> hs(self);
 
   ArtField* dex_elements_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
 
   Handle<mirror::Class> dex_elements_class(hs.NewHandle(dex_elements_field->GetType<true>()));
   DCHECK(dex_elements_class.Get() != nullptr);
@@ -8119,13 +8151,13 @@
       hs.NewHandle(dex_elements_class->GetComponentType());
 
   ArtField* element_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   DCHECK_EQ(h_dex_element_class.Get(), element_file_field->GetDeclaringClass());
 
-  ArtField* cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* cookie_field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   DCHECK_EQ(cookie_field->GetDeclaringClass(), element_file_field->GetType<false>());
 
-  ArtField* file_name_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_fileName);
+  ArtField* file_name_field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_fileName);
   DCHECK_EQ(file_name_field->GetDeclaringClass(), element_file_field->GetType<false>());
 
   // Fill the elements array.
@@ -8175,7 +8207,7 @@
   DCHECK(h_path_class_loader.Get() != nullptr);
   // Set DexPathList.
   ArtField* path_list_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList);
   DCHECK(path_list_field != nullptr);
   path_list_field->SetObject<false>(h_path_class_loader.Get(), h_dex_path_list.Get());
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 4426056..1d29e31 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -128,6 +128,7 @@
     kLongArrayClass,
     kShortArrayClass,
     kJavaLangStackTraceElementArrayClass,
+    kDalvikSystemClassExt,
     kClassRootsMax,
   };
 
@@ -417,10 +418,10 @@
       REQUIRES(!dex_lock_);
 
   void VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags)
-      REQUIRES(!Locks::classlinker_classes_lock_)
+      REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
   void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!dex_lock_, !Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   mirror::DexCache* FindDexCache(Thread* self,
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 5878bf3..ab2d9d0 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -30,6 +30,7 @@
 #include "gc/heap.h"
 #include "mirror/accessible_object.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/dex_cache.h"
 #include "mirror/emulated_stack_frame.h"
 #include "mirror/executable.h"
@@ -586,6 +587,7 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_strings_), "dexCacheStrings");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_class_def_idx_), "dexClassDefIndex");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_type_idx_), "dexTypeIndex");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, ext_data_), "extData");
     addOffset(OFFSETOF_MEMBER(mirror::Class, ifields_), "iFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, iftable_), "ifTable");
     addOffset(OFFSETOF_MEMBER(mirror::Class, methods_), "methods");
@@ -603,12 +605,17 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, sfields_), "sFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, status_), "status");
     addOffset(OFFSETOF_MEMBER(mirror::Class, super_class_), "superClass");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_), "verifyError");
     addOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_offset_), "virtualMethodsOffset");
     addOffset(OFFSETOF_MEMBER(mirror::Class, vtable_), "vtable");
   };
 };
 
+struct ClassExtOffsets : public CheckOffsets<mirror::ClassExt> {
+  ClassExtOffsets() : CheckOffsets<mirror::ClassExt>(false, "Ldalvik/system/ClassExt;") {
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, verify_error_), "verifyError");
+  }
+};
+
 struct StringOffsets : public CheckOffsets<mirror::String> {
   StringOffsets() : CheckOffsets<mirror::String>(false, "Ljava/lang/String;") {
     addOffset(OFFSETOF_MEMBER(mirror::String, count_), "count");
@@ -735,8 +742,8 @@
   MethodHandleImplOffsets() : CheckOffsets<mirror::MethodHandleImpl>(
       false, "Ljava/lang/invoke/MethodHandle;") {
     addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, art_field_or_method_), "artFieldOrMethod");
-    addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, as_type_cache_), "asTypeCache");
     addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, handle_kind_), "handleKind");
+    addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, nominal_type_), "nominalType");
     addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, method_type_), "type");
   }
 };
@@ -757,6 +764,7 @@
   ScopedObjectAccess soa(Thread::Current());
   EXPECT_TRUE(ObjectOffsets().Check());
   EXPECT_TRUE(ClassOffsets().Check());
+  EXPECT_TRUE(ClassExtOffsets().Check());
   EXPECT_TRUE(StringOffsets().Check());
   EXPECT_TRUE(ThrowableOffsets().Check());
   EXPECT_TRUE(StackTraceElementOffsets().Check());
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 3409938..8226e60 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -520,17 +520,17 @@
 
   // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
   // We need to get the DexPathList and loop through it.
-  ArtField* cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* cookie_field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Object> dex_path_list =
-      soa.DecodeField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
-      GetObject(class_loader.Get());
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
+          GetObject(class_loader.Get());
   if (dex_path_list != nullptr && dex_file_field!= nullptr && cookie_field != nullptr) {
     // DexPathList has an array dexElements of Elements[] which each contain a dex file.
     ObjPtr<mirror::Object> dex_elements_obj =
-        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
-        GetObject(dex_path_list);
+        jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+            GetObject(dex_path_list);
     // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
     // at the mCookie which is a DexFile vector.
     if (dex_elements_obj != nullptr) {
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 827e85e..17e3729 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -52,7 +52,7 @@
 
   ScratchFile(const ScratchFile& other, const char* suffix);
 
-  explicit ScratchFile(ScratchFile&& other);
+  ScratchFile(ScratchFile&& other);
 
   ScratchFile& operator=(ScratchFile&& other);
 
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index 00dedef..806653a 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -47,6 +47,7 @@
   virtual bool IsRelocationPossible() = 0;
 
   virtual verifier::VerifierDeps* GetVerifierDeps() const = 0;
+  virtual void SetVerifierDeps(verifier::VerifierDeps* deps ATTRIBUTE_UNUSED) {}
 
   bool IsBootImage() {
     return mode_ == CallbackMode::kCompileBootImage;
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 1da888e..dc2ae2e 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -39,6 +39,7 @@
 #include "handle_scope.h"
 #include "jdwp/jdwp_priv.h"
 #include "jdwp/object_registry.h"
+#include "jni_internal.h"
 #include "jvalue-inl.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
@@ -2007,7 +2008,7 @@
   mirror::Object* thread_object = gRegistry->Get<mirror::Object*>(thread_id, &error);
   CHECK(thread_object != nullptr) << error;
   ArtField* java_lang_Thread_name_field =
-      soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
+      jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name);
   ObjPtr<mirror::String> s(java_lang_Thread_name_field->GetObject(thread_object)->AsString());
   if (s != nullptr) {
     *name = s->ToModifiedUtf8();
@@ -2032,7 +2033,7 @@
   } else if (error == JDWP::ERR_NONE) {
     ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(WellKnownClasses::java_lang_Thread);
     CHECK(c != nullptr);
-    ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_group);
+    ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group);
     CHECK(f != nullptr);
     ObjPtr<mirror::Object> group = f->GetObject(thread_object);
     CHECK(group != nullptr);
@@ -2074,7 +2075,7 @@
     return error;
   }
   ScopedAssertNoThreadSuspension ants("Debugger: GetThreadGroupName");
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_name);
   CHECK(f != nullptr);
   ObjPtr<mirror::String> s = f->GetObject(thread_group)->AsString();
 
@@ -2093,7 +2094,7 @@
   ObjPtr<mirror::Object> parent;
   {
     ScopedAssertNoThreadSuspension ants("Debugger: GetThreadGroupParent");
-    ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_parent);
+    ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_parent);
     CHECK(f != nullptr);
     parent = f->GetObject(thread_group);
   }
@@ -2102,13 +2103,13 @@
   return JDWP::ERR_NONE;
 }
 
-static void GetChildThreadGroups(ScopedObjectAccessUnchecked& soa, mirror::Object* thread_group,
+static void GetChildThreadGroups(mirror::Object* thread_group,
                                  std::vector<JDWP::ObjectId>* child_thread_group_ids)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   CHECK(thread_group != nullptr);
 
   // Get the int "ngroups" count of this thread group...
-  ArtField* ngroups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_ngroups);
+  ArtField* ngroups_field = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_ngroups);
   CHECK(ngroups_field != nullptr);
   const int32_t size = ngroups_field->GetInt(thread_group);
   if (size == 0) {
@@ -2116,7 +2117,7 @@
   }
 
   // Get the ThreadGroup[] "groups" out of this thread group...
-  ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
+  ArtField* groups_field = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_groups);
   ObjPtr<mirror::Object> groups_array = groups_field->GetObject(thread_group);
 
   CHECK(groups_array != nullptr);
@@ -2154,7 +2155,7 @@
   // Add child thread groups.
   {
     std::vector<JDWP::ObjectId> child_thread_groups_ids;
-    GetChildThreadGroups(soa, thread_group, &child_thread_groups_ids);
+    GetChildThreadGroups(thread_group, &child_thread_groups_ids);
     expandBufAdd4BE(pReply, child_thread_groups_ids.size());
     for (JDWP::ObjectId child_thread_group_id : child_thread_groups_ids) {
       expandBufAddObjectId(pReply, child_thread_group_id);
@@ -2166,7 +2167,7 @@
 
 JDWP::ObjectId Dbg::GetSystemThreadGroupId() {
   ScopedObjectAccessUnchecked soa(Thread::Current());
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup);
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup);
   ObjPtr<mirror::Object> group = f->GetObject(f->GetDeclaringClass());
   return gRegistry->Add(group);
 }
@@ -2256,14 +2257,13 @@
   return JDWP::ERR_NONE;
 }
 
-static bool IsInDesiredThreadGroup(ScopedObjectAccessUnchecked& soa,
-                                   mirror::Object* desired_thread_group, mirror::Object* peer)
+static bool IsInDesiredThreadGroup(mirror::Object* desired_thread_group, mirror::Object* peer)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Do we want threads from all thread groups?
   if (desired_thread_group == nullptr) {
     return true;
   }
-  ArtField* thread_group_field = soa.DecodeField(WellKnownClasses::java_lang_Thread_group);
+  ArtField* thread_group_field = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group);
   DCHECK(thread_group_field != nullptr);
   ObjPtr<mirror::Object> group = thread_group_field->GetObject(peer);
   return (group == desired_thread_group);
@@ -2296,7 +2296,7 @@
       // Doing so might help us report ZOMBIE threads too.
       continue;
     }
-    if (IsInDesiredThreadGroup(soa, thread_group, peer)) {
+    if (IsInDesiredThreadGroup(thread_group, peer)) {
       thread_ids->push_back(gRegistry->Add(peer));
     }
   }
@@ -4093,7 +4093,7 @@
 
   // Invoke the method.
   ScopedLocalRef<jobject> ref(soa.Env(), soa.AddLocalReference<jobject>(pReq->receiver.Read()));
-  JValue result = InvokeWithJValues(soa, ref.get(), soa.EncodeMethod(m),
+  JValue result = InvokeWithJValues(soa, ref.get(), jni::EncodeArtMethod(m),
                                     reinterpret_cast<jvalue*>(pReq->arg_values.get()));
 
   // Prepare JDWP ids for the reply.
@@ -4371,7 +4371,7 @@
     CHECK(type == CHUNK_TYPE("THCR") || type == CHUNK_TYPE("THNM")) << type;
     ScopedObjectAccessUnchecked soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::String> name(hs.NewHandle(t->GetThreadName(soa)));
+    Handle<mirror::String> name(hs.NewHandle(t->GetThreadName()));
     size_t char_count = (name.Get() != nullptr) ? name->GetLength() : 0;
     const jchar* chars = (name.Get() != nullptr) ? name->GetValue() : nullptr;
     bool is_compressed = (name.Get() != nullptr) ? name->IsCompressed() : false;
@@ -5117,13 +5117,11 @@
 }
 
 ArtMethod* DeoptimizationRequest::Method() const {
-  ScopedObjectAccessUnchecked soa(Thread::Current());
-  return soa.DecodeMethod(method_);
+  return jni::DecodeArtMethod(method_);
 }
 
 void DeoptimizationRequest::SetMethod(ArtMethod* m) {
-  ScopedObjectAccessUnchecked soa(Thread::Current());
-  method_ = soa.EncodeMethod(m);
+  method_ = jni::EncodeArtMethod(m);
 }
 
 void Dbg::VisitRoots(RootVisitor* visitor) {
diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc
index 0765465..835f456 100644
--- a/runtime/dex_file_annotations.cc
+++ b/runtime/dex_file_annotations.cc
@@ -22,6 +22,7 @@
 #include "art_method-inl.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
+#include "jni_internal.h"
 #include "jvalue-inl.h"
 #include "mirror/field.h"
 #include "mirror/method.h"
@@ -281,7 +282,7 @@
 
   JValue result;
   ArtMethod* create_annotation_method =
-      soa.DecodeMethod(WellKnownClasses::libcore_reflect_AnnotationFactory_createAnnotation);
+      jni::DecodeArtMethod(WellKnownClasses::libcore_reflect_AnnotationFactory_createAnnotation);
   uint32_t args[2] = { static_cast<uint32_t>(reinterpret_cast<uintptr_t>(annotation_class.Get())),
                        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(h_element_array.Get())) };
   create_annotation_method->Invoke(self, args, sizeof(args), &result, "LLL");
@@ -633,7 +634,7 @@
 
   JValue result;
   ArtMethod* annotation_member_init =
-      soa.DecodeMethod(WellKnownClasses::libcore_reflect_AnnotationMember_init);
+      jni::DecodeArtMethod(WellKnownClasses::libcore_reflect_AnnotationMember_init);
   uint32_t args[5] = { static_cast<uint32_t>(reinterpret_cast<uintptr_t>(new_member.Get())),
                        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(string_name.Get())),
                        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(value_object.Get())),
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 7b1fb95..be25803 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -456,22 +456,22 @@
 
 #define DECODE_UNSIGNED_CHECKED_FROM_WITH_ERROR_VALUE(ptr, var, error_value)  \
   uint32_t var;                                                               \
-  if (!DecodeUnsignedLeb128Checked(&ptr, begin_ + size_, &var)) {             \
+  if (!DecodeUnsignedLeb128Checked(&(ptr), begin_ + size_, &(var))) {         \
     return error_value;                                                       \
   }
 
-#define DECODE_UNSIGNED_CHECKED_FROM(ptr, var)                      \
-  uint32_t var;                                                     \
-  if (!DecodeUnsignedLeb128Checked(&ptr, begin_ + size_, &var)) {   \
-    ErrorStringPrintf("Read out of bounds");                        \
-    return false;                                                   \
+#define DECODE_UNSIGNED_CHECKED_FROM(ptr, var)                        \
+  uint32_t var;                                                       \
+  if (!DecodeUnsignedLeb128Checked(&(ptr), begin_ + size_, &(var))) { \
+    ErrorStringPrintf("Read out of bounds");                          \
+    return false;                                                     \
   }
 
-#define DECODE_SIGNED_CHECKED_FROM(ptr, var)                      \
-  int32_t var;                                                    \
-  if (!DecodeSignedLeb128Checked(&ptr, begin_ + size_, &var)) {   \
-    ErrorStringPrintf("Read out of bounds");                      \
-    return false;                                                 \
+#define DECODE_SIGNED_CHECKED_FROM(ptr, var)                        \
+  int32_t var;                                                      \
+  if (!DecodeSignedLeb128Checked(&(ptr), begin_ + size_, &(var))) { \
+    ErrorStringPrintf("Read out of bounds");                        \
+    return false;                                                   \
   }
 
 bool DexFileVerifier::CheckAndGetHandlerOffsets(const DexFile::CodeItem* code_item,
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index e392870..3801c22 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -58,7 +58,7 @@
 
   void VerifyModification(const char* dex_file_base64_content,
                           const char* location,
-                          std::function<void(DexFile*)> f,
+                          const std::function<void(DexFile*)>& f,
                           const char* expected_error) {
     size_t length;
     std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(dex_file_base64_content, &length));
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index dc5fd07..515fcbf 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -43,11 +43,8 @@
         obj = self->AllocTlab(byte_count); \
         DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
         obj->SetClass(klass); \
-        if (kUseBakerOrBrooksReadBarrier) { \
-          if (kUseBrooksReadBarrier) { \
-            obj->SetReadBarrierPointer(obj); \
-          } \
-          obj->AssertReadBarrierPointer(); \
+        if (kUseBakerReadBarrier) { \
+          obj->AssertReadBarrierState(); \
         } \
         QuasiAtomic::ThreadFenceForConstructor(); \
         return obj; \
@@ -69,11 +66,8 @@
         obj = self->AllocTlab(byte_count); \
         DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
         obj->SetClass(klass); \
-        if (kUseBakerOrBrooksReadBarrier) { \
-          if (kUseBrooksReadBarrier) { \
-            obj->SetReadBarrierPointer(obj); \
-          } \
-          obj->AssertReadBarrierPointer(); \
+        if (kUseBakerReadBarrier) { \
+          obj->AssertReadBarrierState(); \
         } \
         QuasiAtomic::ThreadFenceForConstructor(); \
         return obj; \
@@ -94,11 +88,8 @@
       obj = self->AllocTlab(byte_count); \
       DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
       obj->SetClass(klass); \
-      if (kUseBakerOrBrooksReadBarrier) { \
-        if (kUseBrooksReadBarrier) { \
-          obj->SetReadBarrierPointer(obj); \
-        } \
-        obj->AssertReadBarrierPointer(); \
+      if (kUseBakerReadBarrier) { \
+        obj->AssertReadBarrierState(); \
       } \
       QuasiAtomic::ThreadFenceForConstructor(); \
       return obj; \
diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
index 2732d68..083d578 100644
--- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
@@ -27,4 +27,12 @@
   return klass->IsAssignableFrom(ref_class) ? 1 : 0;
 }
 
+// Is assignable test for code, won't throw.  Null and equality test already performed.
+extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(obj != nullptr);
+  DCHECK(ref_class != nullptr);
+  return obj->InstanceOf(ref_class) ? 1 : 0;
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index cfa5325..64030f3 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -31,7 +31,7 @@
 // These are extern declarations of assembly stubs with common names.
 
 // Cast entrypoints.
-extern "C" void art_quick_check_cast(const art::mirror::Class*, const art::mirror::Class*);
+extern "C" void art_quick_check_instance_of(art::mirror::Object*, art::mirror::Class*);
 
 // DexCache entrypoints.
 extern "C" void* art_quick_initialize_static_storage(uint32_t);
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 3cfee45..dd8fe55 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -33,8 +33,8 @@
   V(AllocStringFromChars, void*, int32_t, int32_t, void*) \
   V(AllocStringFromString, void*, void*) \
 \
-  V(InstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*) \
-  V(CheckCast, void, const mirror::Class*, const mirror::Class*) \
+  V(InstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*) \
+  V(CheckInstanceOf, void, mirror::Object*, mirror::Class*) \
 \
   V(InitializeStaticStorage, void*, uint32_t) \
   V(InitializeTypeAndVerifyAccess, void*, uint32_t) \
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index a205b17..c8ee99a 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -111,6 +111,14 @@
   self->QuickDeliverException();
 }
 
+extern "C" NO_RETURN void artThrowClassCastExceptionForObject(mirror::Object* obj,
+                                                              mirror::Class* dest_type,
+                                                              Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(obj != nullptr);
+  artThrowClassCastException(dest_type, obj->GetClass(), self);
+}
+
 extern "C" NO_RETURN void artThrowArrayStoreException(mirror::Object* array, mirror::Object* value,
                                                       Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index cdb1051..b0463d7 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -174,8 +174,9 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocStringFromString, pInstanceofNonTrivial,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInstanceofNonTrivial, pCheckCast, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckCast, pInitializeStaticStorage, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInstanceofNonTrivial, pCheckInstanceOf, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckInstanceOf, pInitializeStaticStorage,
+                         sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInitializeStaticStorage, pInitializeTypeAndVerifyAccess,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInitializeTypeAndVerifyAccess, pInitializeType,
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 76f500c..7c64952 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -32,7 +32,7 @@
     mirror::Object* ref, accounting::ContinuousSpaceBitmap* bitmap) {
   // For the Baker-style RB, in a rare case, we could incorrectly change the object from white
   // to gray even though the object has already been marked through. This happens if a mutator
-  // thread gets preempted before the AtomicSetReadBarrierPointer below, GC marks through the
+  // thread gets preempted before the AtomicSetReadBarrierState below, GC marks through the
   // object (changes it from white to gray and back to white), and the thread runs and
   // incorrectly changes it from white to gray. If this happens, the object will get added to the
   // mark stack again and get changed back to white after it is processed.
@@ -50,14 +50,14 @@
     // we can avoid an expensive CAS.
     // For the baker case, an object is marked if either the mark bit marked or the bitmap bit is
     // set.
-    success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+    success = ref->AtomicSetReadBarrierState(ReadBarrier::WhiteState(), ReadBarrier::GrayState());
   } else {
     success = !bitmap->AtomicTestAndSet(ref);
   }
   if (success) {
     // Newly marked.
     if (kUseBakerReadBarrier) {
-      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+      DCHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::GrayState());
     }
     PushOntoMarkStack(ref);
   }
@@ -84,8 +84,8 @@
       return ref;
     }
     // This may or may not succeed, which is ok because the object may already be gray.
-    bool success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
-                                                    ReadBarrier::GrayPtr());
+    bool success = ref->AtomicSetReadBarrierState(ReadBarrier::WhiteState(),
+                                                  ReadBarrier::GrayState());
     if (success) {
       MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
       immune_gray_stack_.push_back(ref);
@@ -125,10 +125,6 @@
       return from_ref;
     case space::RegionSpace::RegionType::kRegionTypeFromSpace: {
       mirror::Object* to_ref = GetFwdPtr(from_ref);
-      if (kUseBakerReadBarrier) {
-        DCHECK_NE(to_ref, ReadBarrier::GrayPtr())
-            << "from_ref=" << from_ref << " to_ref=" << to_ref;
-      }
       if (to_ref == nullptr) {
         // It isn't marked yet. Mark it by copying it to the to-space.
         to_ref = Copy(from_ref);
@@ -153,9 +149,7 @@
 
 inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
   mirror::Object* ret;
-  // TODO: Delete GetMarkBit check when all of the callers properly check the bit. Remaining caller
-  // is array allocations.
-  if (from_ref == nullptr || from_ref->GetMarkBit()) {
+  if (from_ref == nullptr) {
     return from_ref;
   }
   // TODO: Consider removing this check when we are done investigating slow paths. b/30162165
@@ -192,9 +186,9 @@
 
 inline bool ConcurrentCopying::IsMarkedInUnevacFromSpace(mirror::Object* from_ref) {
   // Use load acquire on the read barrier pointer to ensure that we never see a white read barrier
-  // pointer with an unmarked bit due to reordering.
+  // state with an unmarked bit due to reordering.
   DCHECK(region_space_->IsInUnevacFromSpace(from_ref));
-  if (kUseBakerReadBarrier && from_ref->GetReadBarrierPointerAcquire() == ReadBarrier::GrayPtr()) {
+  if (kUseBakerReadBarrier && from_ref->GetReadBarrierStateAcquire() == ReadBarrier::GrayState()) {
     return true;
   }
   return region_space_bitmap_->Test(from_ref);
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 6dfab8b..11d6849 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -418,7 +418,7 @@
                                   [&visitor](mirror::Object* obj)
         REQUIRES_SHARED(Locks::mutator_lock_) {
       // If an object is not gray, it should only have references to things in the immune spaces.
-      if (obj->GetReadBarrierPointer() != ReadBarrier::GrayPtr()) {
+      if (obj->GetReadBarrierState() != ReadBarrier::GrayState()) {
         obj->VisitReferences</*kVisitNativeRoots*/true,
                              kDefaultVerifyFlags,
                              kWithoutReadBarrier>(visitor, visitor);
@@ -463,7 +463,7 @@
       if (kIsDebugBuild) {
         Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
       }
-      obj->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+      obj->SetReadBarrierState(ReadBarrier::GrayState());
     }
   }
 
@@ -549,11 +549,11 @@
 
   ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_) {
     if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
-      if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      if (obj->GetReadBarrierState() == ReadBarrier::GrayState()) {
         collector_->ScanImmuneObject(obj);
         // Done scanning the object, go back to white.
-        bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                        ReadBarrier::WhitePtr());
+        bool success = obj->AtomicSetReadBarrierState(ReadBarrier::GrayState(),
+                                                      ReadBarrier::WhiteState());
         CHECK(success);
       }
     } else {
@@ -620,9 +620,9 @@
       LOG(INFO) << "immune gray stack size=" << immune_gray_stack_.size();
     }
     for (mirror::Object* obj : immune_gray_stack_) {
-      DCHECK(obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                      ReadBarrier::WhitePtr());
+      DCHECK(obj->GetReadBarrierState() == ReadBarrier::GrayState());
+      bool success = obj->AtomicSetReadBarrierState(ReadBarrier::GrayState(),
+                                                    ReadBarrier::WhiteState());
       DCHECK(success);
     }
     immune_gray_stack_.clear();
@@ -821,11 +821,11 @@
   for (mirror::Object* obj : false_gray_stack_) {
     DCHECK(IsMarked(obj));
     // The object could be white here if a thread got preempted after a success at the
-    // AtomicSetReadBarrierPointer in Mark(), GC started marking through it (but not finished so
+    // AtomicSetReadBarrierState in Mark(), GC started marking through it (but not finished so
     // still gray), and the thread ran to register it onto the false gray stack.
-    if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
-      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                      ReadBarrier::WhitePtr());
+    if (obj->GetReadBarrierState() == ReadBarrier::GrayState()) {
+      bool success = obj->AtomicSetReadBarrierState(ReadBarrier::GrayState(),
+                                                    ReadBarrier::WhiteState());
       DCHECK(success);
     }
   }
@@ -955,9 +955,9 @@
     }
     collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
     if (kUseBakerReadBarrier) {
-      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
+      CHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::WhiteState())
           << "Ref " << ref << " " << ref->PrettyTypeOf()
-          << " has non-white rb_ptr ";
+          << " has non-white rb_state ";
     }
   }
 
@@ -1026,8 +1026,8 @@
     VerifyNoFromSpaceRefsFieldVisitor visitor(collector);
     obj->VisitReferences(visitor, visitor);
     if (kUseBakerReadBarrier) {
-      CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
-          << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
+      CHECK_EQ(obj->GetReadBarrierState(), ReadBarrier::WhiteState())
+          << "obj=" << obj << " non-white rb_state " << obj->GetReadBarrierState();
     }
   }
 
@@ -1333,8 +1333,8 @@
 inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   if (kUseBakerReadBarrier) {
-    DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
-        << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
+    DCHECK(to_ref->GetReadBarrierState() == ReadBarrier::GrayState())
+        << " " << to_ref << " " << to_ref->GetReadBarrierState()
         << " is_marked=" << IsMarked(to_ref);
   }
   bool add_to_live_bytes = false;
@@ -1351,8 +1351,8 @@
     Scan(to_ref);
   }
   if (kUseBakerReadBarrier) {
-    DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
-        << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
+    DCHECK(to_ref->GetReadBarrierState() == ReadBarrier::GrayState())
+        << " " << to_ref << " " << to_ref->GetReadBarrierState()
         << " is_marked=" << IsMarked(to_ref);
   }
 #ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
@@ -1368,9 +1368,9 @@
     // above IsInToSpace() evaluates to true and we change the color from gray to white here in this
     // else block.
     if (kUseBakerReadBarrier) {
-      bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
-          ReadBarrier::GrayPtr(),
-          ReadBarrier::WhitePtr());
+      bool success = to_ref->AtomicSetReadBarrierState</*kCasRelease*/true>(
+          ReadBarrier::GrayState(),
+          ReadBarrier::WhiteState());
       DCHECK(success) << "Must succeed as we won the race.";
     }
   }
@@ -1458,9 +1458,9 @@
         while (!mark_stack->IsEmpty()) {
           mirror::Object* obj = mark_stack->PopBack();
           if (kUseBakerReadBarrier) {
-            mirror::Object* rb_ptr = obj->GetReadBarrierPointer();
-            LOG(INFO) << "On mark queue : " << obj << " " << obj->PrettyTypeOf() << " rb_ptr="
-                      << rb_ptr << " is_marked=" << IsMarked(obj);
+            uint32_t rb_state = obj->GetReadBarrierState();
+            LOG(INFO) << "On mark queue : " << obj << " " << obj->PrettyTypeOf() << " rb_state="
+                      << rb_state << " is_marked=" << IsMarked(obj);
           } else {
             LOG(INFO) << "On mark queue : " << obj << " " << obj->PrettyTypeOf()
                       << " is_marked=" << IsMarked(obj);
@@ -1707,7 +1707,7 @@
 void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset offset) {
   if (kUseBakerReadBarrier) {
     LOG(INFO) << "holder=" << obj << " " << obj->PrettyTypeOf()
-              << " holder rb_ptr=" << obj->GetReadBarrierPointer();
+              << " holder rb_state=" << obj->GetReadBarrierState();
   } else {
     LOG(INFO) << "holder=" << obj << " " << obj->PrettyTypeOf();
   }
@@ -1762,10 +1762,10 @@
         return;
       }
       bool updated_all_immune_objects = updated_all_immune_objects_.LoadSequentiallyConsistent();
-      CHECK(updated_all_immune_objects || ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
-          << "Unmarked immune space ref. obj=" << obj << " rb_ptr="
-          << (obj != nullptr ? obj->GetReadBarrierPointer() : nullptr)
-          << " ref=" << ref << " ref rb_ptr=" << ref->GetReadBarrierPointer()
+      CHECK(updated_all_immune_objects || ref->GetReadBarrierState() == ReadBarrier::GrayState())
+          << "Unmarked immune space ref. obj=" << obj << " rb_state="
+          << (obj != nullptr ? obj->GetReadBarrierState() : 0U)
+          << " ref=" << ref << " ref rb_state=" << ref->GetReadBarrierState()
           << " updated_all_immune_objects=" << updated_all_immune_objects;
     }
   } else {
@@ -2158,7 +2158,7 @@
     to_ref->SetLockWord(old_lock_word, false);
     // Set the gray ptr.
     if (kUseBakerReadBarrier) {
-      to_ref->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+      to_ref->SetReadBarrierState(ReadBarrier::GrayState());
     }
 
     LockWord new_lock_word = LockWord::FromForwardingAddress(reinterpret_cast<size_t>(to_ref));
@@ -2176,7 +2176,7 @@
         DCHECK_EQ(bytes_allocated, non_moving_space_bytes_allocated);
       }
       if (kUseBakerReadBarrier) {
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+        DCHECK(to_ref->GetReadBarrierState() == ReadBarrier::GrayState());
       }
       DCHECK(GetFwdPtr(from_ref) == to_ref);
       CHECK_NE(to_ref->GetLockWord(false).GetState(), LockWord::kForwardingAddress);
@@ -2262,14 +2262,14 @@
   if (!is_los && mark_bitmap->Test(ref)) {
     // Already marked.
     if (kUseBakerReadBarrier) {
-      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+      DCHECK(ref->GetReadBarrierState() == ReadBarrier::GrayState() ||
+             ref->GetReadBarrierState() == ReadBarrier::WhiteState());
     }
   } else if (is_los && los_bitmap->Test(ref)) {
     // Already marked in LOS.
     if (kUseBakerReadBarrier) {
-      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+      DCHECK(ref->GetReadBarrierState() == ReadBarrier::GrayState() ||
+             ref->GetReadBarrierState() == ReadBarrier::WhiteState());
     }
   } else {
     // Not marked.
@@ -2282,7 +2282,7 @@
         DCHECK(!los_bitmap->Test(ref));
       }
       if (kUseBakerReadBarrier) {
-        DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
+        DCHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::WhiteState());
       }
     } else {
       // For the baker-style RB, we need to handle 'false-gray' cases. See the
@@ -2298,25 +2298,25 @@
       // This may or may not succeed, which is ok.
       bool cas_success = false;
       if (kUseBakerReadBarrier) {
-        cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
-                                                       ReadBarrier::GrayPtr());
+        cas_success = ref->AtomicSetReadBarrierState(ReadBarrier::WhiteState(),
+                                                     ReadBarrier::GrayState());
       }
       if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
         // Already marked.
         if (kUseBakerReadBarrier && cas_success &&
-            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+            ref->GetReadBarrierState() == ReadBarrier::GrayState()) {
           PushOntoFalseGrayStack(ref);
         }
       } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
         // Already marked in LOS.
         if (kUseBakerReadBarrier && cas_success &&
-            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+            ref->GetReadBarrierState() == ReadBarrier::GrayState()) {
           PushOntoFalseGrayStack(ref);
         }
       } else {
         // Newly marked.
         if (kUseBakerReadBarrier) {
-          DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+          DCHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::GrayState());
         }
         PushOntoMarkStack(ref);
       }
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index e0bf744..ddcb6c0 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -124,9 +124,9 @@
   if (obj == nullptr) {
     return nullptr;
   }
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct forward pointer installed.
-    obj->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    // Verify all the objects have the correct forward state installed.
+    obj->AssertReadBarrierState();
   }
   if (!immune_spaces_.IsInImmuneRegion(obj)) {
     if (objects_before_forwarding_->HasAddress(obj)) {
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 77d7274..7b73e43 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -462,9 +462,9 @@
                                          mirror::Object* holder,
                                          MemberOffset offset) {
   DCHECK(obj != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct pointer installed.
-    obj->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    // Verify all the objects have the correct state installed.
+    obj->AssertReadBarrierState();
   }
   if (immune_spaces_.IsInImmuneRegion(obj)) {
     if (kCountMarkedObjects) {
@@ -503,9 +503,9 @@
 
 inline bool MarkSweep::MarkObjectParallel(mirror::Object* obj) {
   DCHECK(obj != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct pointer installed.
-    obj->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    // Verify all the objects have the correct state installed.
+    obj->AssertReadBarrierState();
   }
   if (immune_spaces_.IsInImmuneRegion(obj)) {
     DCHECK(IsMarked(obj) != nullptr);
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 2ff4a3f..a815b83 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -589,13 +589,9 @@
   // references.
   saved_bytes_ +=
       CopyAvoidingDirtyingPages(reinterpret_cast<void*>(forward_address), obj, object_size);
-  if (kUseBakerOrBrooksReadBarrier) {
-    obj->AssertReadBarrierPointer();
-    if (kUseBrooksReadBarrier) {
-      DCHECK_EQ(forward_address->GetReadBarrierPointer(), obj);
-      forward_address->SetReadBarrierPointer(forward_address);
-    }
-    forward_address->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    obj->AssertReadBarrierState();
+    forward_address->AssertReadBarrierState();
   }
   DCHECK(to_space_->HasAddress(forward_address) ||
          fallback_space_->HasAddress(forward_address) ||
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 05ce9c7..97129e8 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -86,11 +86,8 @@
     obj = self->AllocTlab(byte_count);
     DCHECK(obj != nullptr) << "AllocTlab can't fail";
     obj->SetClass(klass);
-    if (kUseBakerOrBrooksReadBarrier) {
-      if (kUseBrooksReadBarrier) {
-        obj->SetReadBarrierPointer(obj.Ptr());
-      }
-      obj->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     bytes_allocated = byte_count;
     usable_size = bytes_allocated;
@@ -102,11 +99,8 @@
       LIKELY(obj != nullptr)) {
     DCHECK(!is_running_on_memory_tool_);
     obj->SetClass(klass);
-    if (kUseBakerOrBrooksReadBarrier) {
-      if (kUseBrooksReadBarrier) {
-        obj->SetReadBarrierPointer(obj.Ptr());
-      }
-      obj->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     usable_size = bytes_allocated;
     pre_fence_visitor(obj, usable_size);
@@ -143,11 +137,8 @@
     DCHECK_GT(bytes_allocated, 0u);
     DCHECK_GT(usable_size, 0u);
     obj->SetClass(klass);
-    if (kUseBakerOrBrooksReadBarrier) {
-      if (kUseBrooksReadBarrier) {
-        obj->SetReadBarrierPointer(obj.Ptr());
-      }
-      obj->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     if (collector::SemiSpace::kUseRememberedSet && UNLIKELY(allocator == kAllocatorTypeNonMoving)) {
       // (Note this if statement will be constant folded away for the
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 5de004b..19760af 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2406,13 +2406,9 @@
     }
     // Copy the object over to its new location. Don't use alloc_size to avoid valgrind error.
     memcpy(reinterpret_cast<void*>(forward_address), obj, obj_size);
-    if (kUseBakerOrBrooksReadBarrier) {
-      obj->AssertReadBarrierPointer();
-      if (kUseBrooksReadBarrier) {
-        DCHECK_EQ(forward_address->GetReadBarrierPointer(), obj);
-        forward_address->SetReadBarrierPointer(forward_address);
-      }
-      forward_address->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
+      forward_address->AssertReadBarrierState();
     }
     return forward_address;
   }
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 4e6f7da..a0eb197 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -75,19 +75,19 @@
     // collector (SemiSpace) is running.
     CHECK(ref != nullptr);
     collector::ConcurrentCopying* concurrent_copying = heap->ConcurrentCopyingCollector();
-    mirror::Object* rb_ptr = ref->GetReadBarrierPointer();
-    if (rb_ptr == ReadBarrier::GrayPtr()) {
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
-      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
+    uint32_t rb_state = ref->GetReadBarrierState();
+    if (rb_state == ReadBarrier::GrayState()) {
+      ref->AtomicSetReadBarrierState(ReadBarrier::GrayState(), ReadBarrier::WhiteState());
+      CHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::WhiteState());
     } else {
       // In ConcurrentCopying::ProcessMarkStackRef() we may leave a white reference in the queue and
       // find it here, which is OK.
-      CHECK_EQ(rb_ptr, ReadBarrier::WhitePtr()) << "ref=" << ref << " rb_ptr=" << rb_ptr;
+      CHECK_EQ(rb_state, ReadBarrier::WhiteState()) << "ref=" << ref << " rb_state=" << rb_state;
       ObjPtr<mirror::Object> referent = ref->GetReferent<kWithoutReadBarrier>();
       // The referent could be null if it's cleared by a mutator (Reference.clear()).
       if (referent != nullptr) {
         CHECK(concurrent_copying->IsInToSpace(referent.Ptr()))
-            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer()
+            << "ref=" << ref << " rb_state=" << ref->GetReadBarrierState()
             << " referent=" << referent;
       }
     }
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 6035406..0b602e9 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -399,8 +399,8 @@
     auto* obj = reinterpret_cast<mirror::Object*>(current);
     CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
     CHECK(live_bitmap_->Test(obj)) << obj->PrettyTypeOf();
-    if (kUseBakerOrBrooksReadBarrier) {
-      obj->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     current += RoundUp(obj->SizeOf(), kObjectAlignment);
   }
@@ -1606,7 +1606,7 @@
 
   std::ostringstream oss;
   bool first = true;
-  for (auto msg : error_msgs) {
+  for (const auto& msg : error_msgs) {
     if (!first) {
       oss << "\n    ";
     }
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 7778871..cbb3d73 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -108,13 +108,10 @@
     EXPECT_GE(size, SizeOfZeroLengthByteArray());
     EXPECT_TRUE(byte_array_class != nullptr);
     o->SetClass(byte_array_class);
-    if (kUseBakerOrBrooksReadBarrier) {
+    if (kUseBakerReadBarrier) {
       // Like the proper heap object allocation, install and verify
-      // the correct read barrier pointer.
-      if (kUseBrooksReadBarrier) {
-        o->SetReadBarrierPointer(o);
-      }
-      o->AssertReadBarrierPointer();
+      // the correct read barrier state.
+      o->AssertReadBarrierState();
     }
     mirror::Array* arr = o->AsArray<kVerifyNone>();
     size_t header_size = SizeOfZeroLengthByteArray();
diff --git a/runtime/gc/system_weak.h b/runtime/gc/system_weak.h
index 3910a28..887059b 100644
--- a/runtime/gc/system_weak.h
+++ b/runtime/gc/system_weak.h
@@ -69,6 +69,11 @@
     new_weak_condition_.Broadcast(Thread::Current());
   }
 
+  // WARNING: For lock annotations only.
+  Mutex* GetAllowDisallowLock() const RETURN_CAPABILITY(allow_disallow_lock_) {
+    return nullptr;
+  }
+
  protected:
   void Wait(Thread* self) REQUIRES_SHARED(allow_disallow_lock_) {
     // Wait for GC's sweeping to complete and allow new records
diff --git a/runtime/generate-operator-out.py b/runtime/generate-operator-out.py
new file mode 120000
index 0000000..cc291d2
--- /dev/null
+++ b/runtime/generate-operator-out.py
@@ -0,0 +1 @@
+../tools/generate-operator-out.py
\ No newline at end of file
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 6c189b0..2c95fe9 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -98,6 +98,12 @@
 DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled)))
 #define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
 DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_THIN_LOCK_COUNT_ONE), (static_cast<int32_t>(art::LockWord::kThinLockCountOne)))
+#define LOCK_WORD_STATE_FORWARDING_ADDRESS 0x3
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_FORWARDING_ADDRESS), (static_cast<uint32_t>(art::LockWord::kStateForwardingAddress)))
+#define LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW 0x40000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), (static_cast<uint32_t>(art::LockWord::kStateForwardingAddressOverflow)))
+#define LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 0x3
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), (static_cast<uint32_t>(art::LockWord::kForwardingAddressShift)))
 #define LOCK_WORD_GC_STATE_MASK_SHIFTED 0x30000000
 DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShifted)))
 #define LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED 0xcfffffff
diff --git a/runtime/handle.h b/runtime/handle.h
index d33d4a6..3db3be2 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -42,13 +42,9 @@
   Handle() : reference_(nullptr) {
   }
 
-  ALWAYS_INLINE Handle(const Handle<T>& handle) : reference_(handle.reference_) {
-  }
+  ALWAYS_INLINE Handle(const Handle<T>& handle) = default;
 
-  ALWAYS_INLINE Handle<T>& operator=(const Handle<T>& handle) {
-    reference_ = handle.reference_;
-    return *this;
-  }
+  ALWAYS_INLINE Handle<T>& operator=(const Handle<T>& handle) = default;
 
   ALWAYS_INLINE explicit Handle(StackReference<T>* reference) : reference_(reference) {
   }
@@ -109,15 +105,10 @@
   }
 
   ALWAYS_INLINE MutableHandle(const MutableHandle<T>& handle)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      : Handle<T>(handle.reference_) {
-  }
+      REQUIRES_SHARED(Locks::mutator_lock_) = default;
 
   ALWAYS_INLINE MutableHandle<T>& operator=(const MutableHandle<T>& handle)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    Handle<T>::operator=(handle);
-    return *this;
-  }
+      REQUIRES_SHARED(Locks::mutator_lock_) = default;
 
   ALWAYS_INLINE explicit MutableHandle(StackReference<T>* reference)
       REQUIRES_SHARED(Locks::mutator_lock_)
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index 8a0aba6..adb7d8a 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -69,7 +69,7 @@
         number_of_references_(num_references) {}
 
   // Variable sized constructor.
-  BaseHandleScope(BaseHandleScope* link)
+  explicit BaseHandleScope(BaseHandleScope* link)
       : link_(link),
         number_of_references_(kNumReferencesVariableSized) {}
 
diff --git a/runtime/handle_scope_test.cc b/runtime/handle_scope_test.cc
index 92063c4..aab1d9c 100644
--- a/runtime/handle_scope_test.cc
+++ b/runtime/handle_scope_test.cc
@@ -14,15 +14,27 @@
  * limitations under the License.
  */
 
+#include <type_traits>
+
 #include "base/enums.h"
 #include "common_runtime_test.h"
 #include "gtest/gtest.h"
+#include "handle.h"
 #include "handle_scope-inl.h"
+#include "mirror/object.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 
 namespace art {
 
+// Handles are value objects and should be trivially copyable.
+static_assert(std::is_trivially_copyable<Handle<mirror::Object>>::value,
+              "Handle should be trivially copyable");
+static_assert(std::is_trivially_copyable<MutableHandle<mirror::Object>>::value,
+              "MutableHandle should be trivially copyable");
+static_assert(std::is_trivially_copyable<ScopedNullHandle<mirror::Object>>::value,
+              "ScopedNullHandle should be trivially copyable");
+
 class HandleScopeTest : public CommonRuntimeTest {};
 
 // Test the offsets computed for members of HandleScope. Because of cross-compiling
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 2336759..8cbe491 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -574,9 +574,9 @@
   }
 
   void WriteStringTable() {
-    for (const std::pair<std::string, HprofStringId>& p : strings_) {
+    for (const auto& p : strings_) {
       const std::string& string = p.first;
-      const size_t id = p.second;
+      const HprofStringId id = p.second;
 
       output_->StartNewRecord(HPROF_TAG_STRING, kHprofTime);
 
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 7b75109..d885226 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -199,7 +199,8 @@
   {
     ScopedThreadSuspension sts(self, kWaitingWeakGcRootRead);
     MutexLock mu(self, *Locks::intern_table_lock_);
-    while (weak_root_state_ == gc::kWeakRootStateNoReadsOrWrites) {
+    while ((!kUseReadBarrier && weak_root_state_ == gc::kWeakRootStateNoReadsOrWrites) ||
+           (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) {
       weak_intern_condition_.Wait(self);
     }
   }
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 4fc92a3..79a2a4d 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -48,7 +48,8 @@
                                            const ShadowFrame& shadow_frame,
                                            ObjPtr<mirror::Object>& obj,
                                            ArtField* field,
-                                           JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+                                           JValue* result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   field->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
 
   // Report this field access to instrumentation if needed.
@@ -299,6 +300,42 @@
 EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimNot);      // iget-object-quick.
 #undef EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL
 
+static JValue GetFieldValue(const ShadowFrame& shadow_frame,
+                            Primitive::Type field_type,
+                            uint32_t vreg)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  JValue field_value;
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      field_value.SetZ(static_cast<uint8_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimByte:
+      field_value.SetB(static_cast<int8_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimChar:
+      field_value.SetC(static_cast<uint16_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimShort:
+      field_value.SetS(static_cast<int16_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      field_value.SetI(shadow_frame.GetVReg(vreg));
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      field_value.SetJ(shadow_frame.GetVRegLong(vreg));
+      break;
+    case Primitive::kPrimNot:
+      field_value.SetL(shadow_frame.GetVRegReference(vreg));
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable: " << field_type;
+      UNREACHABLE();
+  }
+  return field_value;
+}
+
 template<Primitive::Type field_type>
 static JValue GetFieldValue(const ShadowFrame& shadow_frame, uint32_t vreg)
     REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -337,7 +374,8 @@
                                     const ShadowFrame& shadow_frame,
                                     ObjPtr<mirror::Object>& obj,
                                     ArtField* f,
-                                    size_t vregA) REQUIRES_SHARED(Locks::mutator_lock_) {
+                                    const JValue& value)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   f->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
 
   // Report this field access to instrumentation if needed. Since we only have the offset of
@@ -347,36 +385,35 @@
     StackHandleScope<1> hs(self);
     // Wrap in handle wrapper in case the listener does thread suspension.
     HandleWrapperObjPtr<mirror::Object> h(hs.NewHandleWrapper(&obj));
-    JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
     ObjPtr<mirror::Object> this_object = f->IsStatic() ? nullptr : obj;
     instrumentation->FieldWriteEvent(self, this_object.Ptr(),
                                      shadow_frame.GetMethod(),
                                      shadow_frame.GetDexPC(),
                                      f,
-                                     field_value);
+                                     value);
   }
 
   switch (field_type) {
     case Primitive::kPrimBoolean:
-      f->SetBoolean<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetBoolean<transaction_active>(obj, value.GetZ());
       break;
     case Primitive::kPrimByte:
-      f->SetByte<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetByte<transaction_active>(obj, value.GetB());
       break;
     case Primitive::kPrimChar:
-      f->SetChar<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetChar<transaction_active>(obj, value.GetC());
       break;
     case Primitive::kPrimShort:
-      f->SetShort<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetShort<transaction_active>(obj, value.GetS());
       break;
     case Primitive::kPrimInt:
-      f->SetInt<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetInt<transaction_active>(obj, value.GetI());
       break;
     case Primitive::kPrimLong:
-      f->SetLong<transaction_active>(obj, shadow_frame.GetVRegLong(vregA));
+      f->SetLong<transaction_active>(obj, value.GetJ());
       break;
     case Primitive::kPrimNot: {
-      ObjPtr<mirror::Object> reg = shadow_frame.GetVRegReference(vregA);
+      ObjPtr<mirror::Object> reg = value.GetL();
       if (do_assignability_check && reg != nullptr) {
         // FieldHelper::GetType can resolve classes, use a handle wrapper which will restore the
         // object in the destructor.
@@ -434,11 +471,12 @@
   }
 
   uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
+  JValue value = GetFieldValue<field_type>(shadow_frame, vregA);
   return DoFieldPutCommon<field_type, do_assignability_check, transaction_active>(self,
                                                                                   shadow_frame,
                                                                                   obj,
                                                                                   f,
-                                                                                  vregA);
+                                                                                  value);
 }
 
 // Explicitly instantiate all DoFieldPut functions.
@@ -479,37 +517,34 @@
                                     ObjPtr<mirror::Object>& obj,
                                     ArtField* field,
                                     Primitive::Type field_type,
-                                    size_t vregA) REQUIRES_SHARED(Locks::mutator_lock_) {
+                                    const JValue& value)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   static const bool kDoCheckAssignability = false;
   static const bool kTransaction = false;
   switch (field_type) {
     case Primitive::kPrimBoolean:
       return DoFieldPutCommon<Primitive::kPrimBoolean, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimByte:
       return DoFieldPutCommon<Primitive::kPrimByte, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimChar:
       return DoFieldPutCommon<Primitive::kPrimChar, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimShort:
       return DoFieldPutCommon<Primitive::kPrimShort, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimInt:
-      return DoFieldPutCommon<Primitive::kPrimInt, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
-    case Primitive::kPrimLong:
-      return DoFieldPutCommon<Primitive::kPrimLong, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
     case Primitive::kPrimFloat:
       return DoFieldPutCommon<Primitive::kPrimInt, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
+    case Primitive::kPrimLong:
     case Primitive::kPrimDouble:
       return DoFieldPutCommon<Primitive::kPrimLong, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimNot:
       return DoFieldPutCommon<Primitive::kPrimNot, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable: " << field_type;
       UNREACHABLE();
@@ -653,53 +688,51 @@
 //
 
 template <bool is_range, bool do_assignability_check>
-    REQUIRES_SHARED(Locks::mutator_lock_)
-static inline bool DoCallCommon(ArtMethod* called_method,
-                                Thread* self,
-                                ShadowFrame& shadow_frame,
-                                JValue* result,
-                                uint16_t number_of_inputs,
-                                uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                                uint32_t vregC) ALWAYS_INLINE;
-
-template <bool is_range> REQUIRES_SHARED(Locks::mutator_lock_)
-static inline bool DoCallPolymorphic(ArtMethod* called_method,
-                                     Handle<mirror::MethodType> callsite_type,
-                                     Handle<mirror::MethodType> target_type,
-                                     Thread* self,
-                                     ShadowFrame& shadow_frame,
-                                     JValue* result,
-                                     uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                                     uint32_t vregC,
-                                     const MethodHandleKind handle_kind) ALWAYS_INLINE;
-
-template <bool is_range> REQUIRES_SHARED(Locks::mutator_lock_)
-static inline bool DoCallTransform(ArtMethod* called_method,
-                                   Handle<mirror::MethodType> callsite_type,
-                                   Handle<mirror::MethodType> callee_type,
-                                   Thread* self,
-                                   ShadowFrame& shadow_frame,
-                                   Handle<mirror::MethodHandleImpl> receiver,
-                                   JValue* result,
-                                   uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                                   uint32_t vregC) ALWAYS_INLINE;
-
-REQUIRES_SHARED(Locks::mutator_lock_)
-inline void PerformCall(Thread* self,
-                        const DexFile::CodeItem* code_item,
-                        ArtMethod* caller_method,
-                        const size_t first_dest_reg,
-                        ShadowFrame* callee_frame,
-                        JValue* result) ALWAYS_INLINE;
+static ALWAYS_INLINE bool DoCallCommon(ArtMethod* called_method,
+                                       Thread* self,
+                                       ShadowFrame& shadow_frame,
+                                       JValue* result,
+                                       uint16_t number_of_inputs,
+                                       uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                       uint32_t vregC) REQUIRES_SHARED(Locks::mutator_lock_);
 
 template <bool is_range>
-REQUIRES_SHARED(Locks::mutator_lock_)
-inline void CopyRegisters(ShadowFrame& caller_frame,
-                          ShadowFrame* callee_frame,
-                          const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                          const size_t first_src_reg,
-                          const size_t first_dest_reg,
-                          const size_t num_regs) ALWAYS_INLINE;
+static ALWAYS_INLINE bool DoCallPolymorphic(ArtMethod* called_method,
+                                            Handle<mirror::MethodType> callsite_type,
+                                            Handle<mirror::MethodType> target_type,
+                                            Thread* self,
+                                            ShadowFrame& shadow_frame,
+                                            JValue* result,
+                                            uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                            uint32_t vregC,
+                                            const MethodHandleKind handle_kind)
+  REQUIRES_SHARED(Locks::mutator_lock_);
+
+template <bool is_range>
+static ALWAYS_INLINE bool DoCallTransform(ArtMethod* called_method,
+                                          Handle<mirror::MethodType> callsite_type,
+                                          Handle<mirror::MethodType> callee_type,
+                                          Thread* self,
+                                          ShadowFrame& shadow_frame,
+                                          Handle<mirror::MethodHandleImpl> receiver,
+                                          JValue* result,
+                                          uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                          uint32_t vregC) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ALWAYS_INLINE void PerformCall(Thread* self,
+                               const DexFile::CodeItem* code_item,
+                               ArtMethod* caller_method,
+                               const size_t first_dest_reg,
+                               ShadowFrame* callee_frame,
+                               JValue* result) REQUIRES_SHARED(Locks::mutator_lock_);
+
+template <bool is_range>
+ALWAYS_INLINE void CopyRegisters(ShadowFrame& caller_frame,
+                                 ShadowFrame* callee_frame,
+                                 const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                 const size_t first_src_reg,
+                                 const size_t first_dest_reg,
+                                 const size_t num_regs) REQUIRES_SHARED(Locks::mutator_lock_);
 
 // END DECLARATIONS.
 
@@ -776,12 +809,31 @@
   return is_invoke_exact;
 }
 
+inline static ObjPtr<mirror::Class> GetAndInitializeDeclaringClass(Thread* self, ArtField* field)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // Method handle invocations on static fields should ensure class is
+  // initialized. This usually happens when an instance is constructed
+  // or class members referenced, but this is not guaranteed when
+  // looking up method handles.
+  ObjPtr<mirror::Class> klass = field->GetDeclaringClass();
+  if (UNLIKELY(!klass->IsInitialized())) {
+    StackHandleScope<1> hs(self);
+    HandleWrapperObjPtr<mirror::Class> h(hs.NewHandleWrapper(&klass));
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, h, true, true)) {
+      DCHECK(self->IsExceptionPending());
+      return nullptr;
+    }
+  }
+  return klass;
+}
+
 template<bool is_range, bool do_access_check>
 inline bool DoInvokePolymorphic(Thread* self,
                                 ShadowFrame& shadow_frame,
                                 const Instruction* inst,
                                 uint16_t inst_data,
-                                JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+                                JValue* result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   // Invoke-polymorphic instructions always take a receiver. i.e, they are never static.
   const uint32_t vRegC = (is_range) ? inst->VRegC_4rcc() : inst->VRegC_45cc();
   const int invoke_method_idx = (is_range) ? inst->VRegB_4rcc() : inst->VRegB_45cc();
@@ -835,9 +887,23 @@
   const MethodHandleKind handle_kind = method_handle->GetHandleKind();
   Handle<mirror::MethodType> handle_type(hs.NewHandle(method_handle->GetMethodType()));
   CHECK(handle_type.Get() != nullptr);
-  if (UNLIKELY(is_invoke_exact && !callsite_type->IsExactMatch(handle_type.Get()))) {
-    ThrowWrongMethodTypeException(handle_type.Get(), callsite_type.Get());
-    return false;
+  if (is_invoke_exact) {
+    // We need to check the nominal type of the handle in addition to the
+    // real type. The "nominal" type is present when MethodHandle.asType is
+    // called any handle, and results in the declared type of the handle
+    // changing.
+    ObjPtr<mirror::MethodType> nominal_type(method_handle->GetNominalType());
+    ObjPtr<mirror::MethodType> check_type(nullptr);
+    if (LIKELY(nominal_type.Ptr() == nullptr)) {
+      check_type.Assign(handle_type.Get());
+    } else {
+      check_type.Assign(nominal_type.Ptr());
+    }
+
+    if (UNLIKELY(!callsite_type->IsExactMatch(check_type.Ptr()))) {
+      ThrowWrongMethodTypeException(check_type.Ptr(), callsite_type.Get());
+      return false;
+    }
   }
 
   uint32_t arg[Instruction::kMaxVarArgRegs] = {};
@@ -868,16 +934,12 @@
       // frame, which means that it is unknown at this point. We perform these
       // checks inside DoCallPolymorphic right before we do the actualy invoke.
     } else if (handle_kind == kInvokeDirect) {
-      if (called_method->IsConstructor()) {
-        // TODO(narayan) : We need to handle the case where the target method is a
-        // constructor here.
-        UNIMPLEMENTED(FATAL) << "Direct invokes for constructors are not implemented yet.";
-        return false;
+      // String constructors are a special case, they are replaced with StringFactory
+      // methods.
+      if (called_method->IsConstructor() && called_method->GetDeclaringClass()->IsStringClass()) {
+        DCHECK(handle_type->GetRType()->IsStringClass());
+        called_method = WellKnownClasses::StringInitToStringFactory(called_method);
       }
-
-      // Nothing special to do in the case where we're not dealing with a
-      // constructor. It's a private method, and we've already access checked at
-      // the point of creating the handle.
     } else if (handle_kind == kInvokeSuper) {
       ObjPtr<mirror::Class> declaring_class = called_method->GetDeclaringClass();
 
@@ -930,28 +992,55 @@
     Primitive::Type field_type = field->GetTypeAsPrimitiveType();;
 
     if (!is_invoke_exact) {
-      // TODO(oth): conversion plumbing for invoke().
-      UNIMPLEMENTED(FATAL);
+      if (handle_type->GetPTypes()->GetLength() != callsite_type->GetPTypes()->GetLength()) {
+        // Too many arguments to setter or getter.
+        ThrowWrongMethodTypeException(callsite_type.Get(), handle_type.Get());
+        return false;
+      }
     }
 
     switch (handle_kind) {
       case kInstanceGet: {
         ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(first_src_reg);
         DoFieldGetForInvokePolymorphic(self, shadow_frame, obj, field, field_type, result);
+        if (!ConvertReturnValue(callsite_type, handle_type, result)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
+        return true;
+      }
+      case kStaticGet: {
+        ObjPtr<mirror::Object> obj = GetAndInitializeDeclaringClass(self, field);
+        if (obj == nullptr) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
+        DoFieldGetForInvokePolymorphic(self, shadow_frame, obj, field, field_type, result);
+        if (!ConvertReturnValue(callsite_type, handle_type, result)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
         return true;
       }
       case kInstancePut: {
+        JValue value = GetFieldValue(shadow_frame, field_type, arg[1]);
+        if (!ConvertArgumentValue(callsite_type, handle_type, 1, &value)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
         ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(first_src_reg);
-        return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, arg[1]);
-      }
-      case kStaticGet: {
-        ObjPtr<mirror::Object> obj = field->GetDeclaringClass();
-        DoFieldGetForInvokePolymorphic(self, shadow_frame, obj, field, field_type, result);
-        return true;
+        result->SetL(0);
+        return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, value);
       }
       case kStaticPut: {
+        JValue value = GetFieldValue(shadow_frame, field_type, arg[0]);
+        if (!ConvertArgumentValue(callsite_type, handle_type, 0, &value)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
         ObjPtr<mirror::Object> obj = field->GetDeclaringClass();
-        return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, arg[0]);
+        result->SetL(0);
+        return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, value);
       }
       default:
         LOG(FATAL) << "Unreachable: " << handle_kind;
diff --git a/runtime/interpreter/mterp/mips/binop.S b/runtime/interpreter/mterp/mips/binop.S
index 66627e2..862d95a 100644
--- a/runtime/interpreter/mterp/mips/binop.S
+++ b/runtime/interpreter/mterp/mips/binop.S
@@ -30,4 +30,3 @@
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/mips/binop2addr.S b/runtime/interpreter/mterp/mips/binop2addr.S
index 548cbcb..17aa8eb 100644
--- a/runtime/interpreter/mterp/mips/binop2addr.S
+++ b/runtime/interpreter/mterp/mips/binop2addr.S
@@ -25,5 +25,4 @@
     $preinstr                              #  optional op
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 10-13 instructions */
+    SET_VREG_GOTO($result, rOBJ, t0)       #  vA <- $result
diff --git a/runtime/interpreter/mterp/mips/binopLit16.S b/runtime/interpreter/mterp/mips/binopLit16.S
index fc0c9ff..0696e7a 100644
--- a/runtime/interpreter/mterp/mips/binopLit16.S
+++ b/runtime/interpreter/mterp/mips/binopLit16.S
@@ -11,12 +11,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if $chkzero
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -26,5 +25,4 @@
     $preinstr                              #  optional op
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 10-13 instructions */
+    SET_VREG_GOTO($result, rOBJ, t0)       #  vA <- $result
diff --git a/runtime/interpreter/mterp/mips/binopLit8.S b/runtime/interpreter/mterp/mips/binopLit8.S
index a591408..382dd2b 100644
--- a/runtime/interpreter/mterp/mips/binopLit8.S
+++ b/runtime/interpreter/mterp/mips/binopLit8.S
@@ -12,7 +12,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -28,4 +28,3 @@
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/mips/binopWide.S b/runtime/interpreter/mterp/mips/binopWide.S
index 608525b..604134d 100644
--- a/runtime/interpreter/mterp/mips/binopWide.S
+++ b/runtime/interpreter/mterp/mips/binopWide.S
@@ -3,10 +3,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -32,4 +32,3 @@
     $instr                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vAA/vAA+1 <- $result0/$result1
-    /* 14-17 instructions */
diff --git a/runtime/interpreter/mterp/mips/binopWide2addr.S b/runtime/interpreter/mterp/mips/binopWide2addr.S
index cc92149..f96fdb2 100644
--- a/runtime/interpreter/mterp/mips/binopWide2addr.S
+++ b/runtime/interpreter/mterp/mips/binopWide2addr.S
@@ -3,22 +3,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64($arg2, $arg3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64($arg0, $arg1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64($arg2, $arg3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64($arg0, $arg1, t0)               #  a0/a1 <- vA/vA+1
     .if $chkzero
     or        t0, $arg2, $arg3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -28,6 +27,4 @@
     $preinstr                              #  optional op
     $instr                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64($result0, $result1, rOBJ)   #  vAA/vAA+1 <- $result0/$result1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vA/vA+1 <- $result0/$result1
diff --git a/runtime/interpreter/mterp/mips/fbinop.S b/runtime/interpreter/mterp/mips/fbinop.S
index d0d39ae..6c1468c 100644
--- a/runtime/interpreter/mterp/mips/fbinop.S
+++ b/runtime/interpreter/mterp/mips/fbinop.S
@@ -6,7 +6,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -14,6 +14,5 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     $instr                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
diff --git a/runtime/interpreter/mterp/mips/fbinop2addr.S b/runtime/interpreter/mterp/mips/fbinop2addr.S
index ccb67b1..2caaf9c 100644
--- a/runtime/interpreter/mterp/mips/fbinop2addr.S
+++ b/runtime/interpreter/mterp/mips/fbinop2addr.S
@@ -1,19 +1,18 @@
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     $instr
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
diff --git a/runtime/interpreter/mterp/mips/fbinopWide.S b/runtime/interpreter/mterp/mips/fbinopWide.S
index 3be9325..a1fe91e 100644
--- a/runtime/interpreter/mterp/mips/fbinopWide.S
+++ b/runtime/interpreter/mterp/mips/fbinopWide.S
@@ -1,6 +1,6 @@
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -9,7 +9,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -19,10 +19,5 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     $instr
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .L${opcode}_finish
-%break
-
-.L${opcode}_finish:
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
diff --git a/runtime/interpreter/mterp/mips/fbinopWide2addr.S b/runtime/interpreter/mterp/mips/fbinopWide2addr.S
index 8541f11..7303441 100644
--- a/runtime/interpreter/mterp/mips/fbinopWide2addr.S
+++ b/runtime/interpreter/mterp/mips/fbinopWide2addr.S
@@ -1,10 +1,11 @@
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -16,6 +17,5 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
diff --git a/runtime/interpreter/mterp/mips/funop.S b/runtime/interpreter/mterp/mips/funop.S
index bfb9346..b2b22c9 100644
--- a/runtime/interpreter/mterp/mips/funop.S
+++ b/runtime/interpreter/mterp/mips/funop.S
@@ -1,18 +1,15 @@
     /*
-     * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * Generic 32-bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: int-to-float, float-to-int
+     * for: int-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-
-.L${opcode}_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t1)         #  vA <- fv0
diff --git a/runtime/interpreter/mterp/mips/funopWide.S b/runtime/interpreter/mterp/mips/funopWide.S
deleted file mode 100644
index 3d4cf22..0000000
--- a/runtime/interpreter/mterp/mips/funopWide.S
+++ /dev/null
@@ -1,22 +0,0 @@
-%default {"preinstr":"", "ld_arg":"LOAD64_F(fa0, fa0f, a3)", "st_result":"SET_VREG64_F(fv0, fv0f, rOBJ)"}
-    /*
-     * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
-     * This could be a MIPS instruction or a function call.
-     *
-     * long-to-double, double-to-long
-     */
-    /* unop vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
-    GET_OPB(a3)                            #  a3 <- B
-    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    $ld_arg
-    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    $preinstr                              #  optional op
-    $instr                                 #  a0/a1 <- op, a2-a3 changed
-
-.L${opcode}_set_vreg:
-    $st_result                             #  vAA <- a0/a1
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
diff --git a/runtime/interpreter/mterp/mips/funopWider.S b/runtime/interpreter/mterp/mips/funopWider.S
index efb85f3..6862e24 100644
--- a/runtime/interpreter/mterp/mips/funopWider.S
+++ b/runtime/interpreter/mterp/mips/funopWider.S
@@ -1,10 +1,8 @@
-%default {"st_result":"SET_VREG64_F(fv0, fv0f, rOBJ)"}
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * Generic 32bit-to-64bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * For: int-to-double, float-to-double
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -12,8 +10,5 @@
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-
-.L${opcode}_set_vreg:
-    $st_result                             #  vA/vA+1 <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- fv0
diff --git a/runtime/interpreter/mterp/mips/header.S b/runtime/interpreter/mterp/mips/header.S
index a3a6744..0ce7745 100644
--- a/runtime/interpreter/mterp/mips/header.S
+++ b/runtime/interpreter/mterp/mips/header.S
@@ -153,6 +153,58 @@
 #define fcc1   $$fcc1
 #endif
 
+#ifdef MIPS32REVGE2
+#define SEB(rd, rt) \
+    seb       rd, rt
+#define SEH(rd, rt) \
+    seh       rd, rt
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    ins       rd_lo, rt_hi, 16, 16
+#else
+#define SEB(rd, rt) \
+    sll       rd, rt, 24; \
+    sra       rd, rd, 24
+#define SEH(rd, rt) \
+    sll       rd, rt, 16; \
+    sra       rd, rd, 16
+/* Clobbers rt_hi on pre-R2. */
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    sll       rt_hi, rt_hi, 16; \
+    or        rd_lo, rt_hi
+#endif
+
+#ifdef FPU64
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mthc1     r, flo
+#else
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mtc1      r, fhi
+#endif
+
+#ifdef MIPS32REVGE6
+#define JR(rt) \
+    jic       rt, 0
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    lsa       rd, rs, rt, sa; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#else
+#define JR(rt) \
+    jalr      zero, rt
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    .set      push; \
+    .set      noat; \
+    sll       AT, rs, sa; \
+    addu      rd, AT, rt; \
+    .set      pop; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#endif
+
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
  * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
@@ -186,12 +238,12 @@
     sw        rPC, OFF_FP_DEX_PC_PTR(rFP)
 
 #define EXPORT_DEX_PC(tmp) \
-    lw   tmp, OFF_FP_CODE_ITEM(rFP) \
-    sw   rPC, OFF_FP_DEX_PC_PTR(rFP) \
-    addu tmp, CODEITEM_INSNS_OFFSET \
-    subu tmp, rPC, tmp \
-    sra  tmp, tmp, 1 \
-    sw   tmp, OFF_FP_DEX_PC(rFP)
+    lw        tmp, OFF_FP_CODE_ITEM(rFP); \
+    sw        rPC, OFF_FP_DEX_PC_PTR(rFP); \
+    addu      tmp, CODEITEM_INSNS_OFFSET; \
+    subu      tmp, rPC, tmp; \
+    sra       tmp, tmp, 1; \
+    sw        tmp, OFF_FP_DEX_PC(rFP)
 
 /*
  * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
@@ -206,18 +258,11 @@
  * exception catch may miss.  (This also implies that it must come after
  * EXPORT_PC().)
  */
-#define FETCH_ADVANCE_INST(_count) lhu rINST, ((_count)*2)(rPC); \
+#define FETCH_ADVANCE_INST(_count) \
+    lhu       rINST, ((_count)*2)(rPC); \
     addu      rPC, rPC, ((_count) * 2)
 
 /*
- * The operation performed here is similar to FETCH_ADVANCE_INST, except the
- * src and dest registers are parameterized (not hard-wired to rPC and rINST).
- */
-#define PREFETCH_ADVANCE_INST(_dreg, _sreg, _count) \
-    lhu       _dreg, ((_count)*2)(_sreg) ;            \
-    addu      _sreg, _sreg, (_count)*2
-
-/*
  * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
  * rINST ahead of possible exception point.  Be sure to manually advance rPC
  * later.
@@ -232,7 +277,8 @@
  * rPC to point to the next instruction.  "rd" must specify the distance
  * in bytes, *not* 16-bit code units, and may be a signed value.
  */
-#define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \
+#define FETCH_ADVANCE_INST_RB(rd) \
+    addu      rPC, rPC, rd; \
     lhu       rINST, (rPC)
 
 /*
@@ -257,38 +303,75 @@
 #define GET_INST_OPCODE(rd) and rd, rINST, 0xFF
 
 /*
- * Put the prefetched instruction's opcode field into the specified register.
+ * Transform opcode into branch target address.
  */
-#define GET_PREFETCHED_OPCODE(dreg, sreg)   andi     dreg, sreg, 255
+#define GET_OPCODE_TARGET(rd) \
+    sll       rd, rd, ${handler_size_bits}; \
+    addu      rd, rIBASE, rd
 
 /*
  * Begin executing the opcode in rd.
  */
-#define GOTO_OPCODE(rd) sll rd, rd, ${handler_size_bits}; \
-    addu      rd, rIBASE, rd; \
-    jalr      zero, rd
-
-#define GOTO_OPCODE_BASE(_base, rd)  sll rd, rd, ${handler_size_bits}; \
-    addu      rd, _base, rd; \
-    jalr      zero, rd
+#define GOTO_OPCODE(rd) \
+    GET_OPCODE_TARGET(rd); \
+    JR(rd)
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
  */
 #define GET_VREG(rd, rix) LOAD_eas2(rd, rFP, rix)
 
-#define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \
-    .set noat; l.s rd, (AT); .set at
+#define GET_VREG_F(rd, rix) \
+    .set noat; \
+    EAS2(AT, rFP, rix); \
+    l.s       rd, (AT); \
+    .set at
 
-#define SET_VREG(rd, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG(rd, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
     sw        zero, 0(t8)
+#endif
 
-#define SET_VREG64(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        rd, 0(t8)
+#else
+#define SET_VREG_OBJECT(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        rd, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#else
+#define SET_VREG64(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rlo, 0(t8); \
@@ -297,9 +380,39 @@
     .set at; \
     sw        zero, 0(t8); \
     sw        zero, 4(t8)
+#endif
 
-#ifdef FPU64
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_F(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG_F(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#elif defined(FPU64)
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rREFS, AT; \
     sw        zero, 0(t8); \
@@ -310,7 +423,8 @@
     .set at; \
     s.s       rlo, 0(t8)
 #else
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rlo, 0(t8); \
@@ -321,18 +435,21 @@
     sw        zero, 4(t8)
 #endif
 
-#define SET_VREG_OBJECT(rd, rix) .set noat; \
-    sll       AT, rix, 2; \
-    addu      t8, rFP, AT; \
-    sw        rd, 0(t8); \
-    addu      t8, rREFS, AT; \
-    .set at; \
-    sw        rd, 0(t8)
-
 /* Combination of the SET_VREG and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \
-    sll       dst, dst, ${handler_size_bits}; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -342,11 +459,51 @@
     jalr      zero, dst; \
     sw        zero, 0(t8); \
     .set reorder
+#endif
+
+/* Combination of the SET_VREG_OBJECT and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#endif
 
 /* Combination of the SET_VREG64 and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG64_GOTO(rlo, rhi, rix, dst) .set noreorder; \
-    sll       dst, dst, ${handler_size_bits}; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#else
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -358,14 +515,82 @@
     jalr      zero, dst; \
     sw        zero, 4(t8); \
     .set reorder
+#endif
 
-#define SET_VREG_F(rd, rix) .set noat; \
+/* Combination of the SET_VREG_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
-    sw        zero, 0(t8)
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#endif
+
+/* Combination of the SET_VREG64_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#elif defined(FPU64)
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rREFS, AT; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8); \
+    addu      t8, rFP, AT; \
+    mfhc1     AT, rlo; \
+    sw        AT, 4(t8); \
+    .set at; \
+    jalr      zero, dst; \
+    s.s       rlo, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rlo, 0(t8); \
+    s.s       rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#endif
 
 #define GET_OPA(rd) srl rd, rINST, 8
 #ifdef MIPS32REVGE2
@@ -376,60 +601,60 @@
 #define GET_OPB(rd) srl rd, rINST, 12
 
 /*
- * Form an Effective Address rd = rbase + roff<<n;
- * Uses reg AT
+ * Form an Effective Address rd = rbase + roff<<shift;
+ * Uses reg AT on pre-R6.
  */
-#define EASN(rd, rbase, roff, rshift) .set noat; \
-    sll       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
-    .set at
+#define EASN(rd, rbase, roff, shift) LSA(rd, roff, rbase, shift)
 
 #define EAS1(rd, rbase, roff) EASN(rd, rbase, roff, 1)
 #define EAS2(rd, rbase, roff) EASN(rd, rbase, roff, 2)
 #define EAS3(rd, rbase, roff) EASN(rd, rbase, roff, 3)
 #define EAS4(rd, rbase, roff) EASN(rd, rbase, roff, 4)
 
-/*
- * Form an Effective Shift Right rd = rbase + roff>>n;
- * Uses reg AT
- */
-#define ESRN(rd, rbase, roff, rshift) .set noat; \
-    srl       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
+#define LOAD_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    lw        rd, 0(AT); \
     .set at
 
-#define LOAD_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; lw rd, 0(AT); .set at
-
-#define STORE_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; sw rd, 0(AT); .set at
+#define STORE_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    sw        rd, 0(AT); \
+    .set at
 
 #define LOAD_RB_OFF(rd, rbase, off) lw rd, off(rbase)
 #define STORE_RB_OFF(rd, rbase, off) sw rd, off(rbase)
 
-#define STORE64_off(rlo, rhi, rbase, off) sw rlo, off(rbase); \
+#define STORE64_off(rlo, rhi, rbase, off) \
+    sw        rlo, off(rbase); \
     sw        rhi, (off+4)(rbase)
-#define LOAD64_off(rlo, rhi, rbase, off) lw rlo, off(rbase); \
+#define LOAD64_off(rlo, rhi, rbase, off) \
+    lw        rlo, off(rbase); \
     lw        rhi, (off+4)(rbase)
 
 #define STORE64(rlo, rhi, rbase) STORE64_off(rlo, rhi, rbase, 0)
 #define LOAD64(rlo, rhi, rbase) LOAD64_off(rlo, rhi, rbase, 0)
 
 #ifdef FPU64
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     .set noat; \
     mfhc1     AT, rlo; \
     sw        AT, (off+4)(rbase); \
     .set at
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     .set noat; \
     lw        AT, (off+4)(rbase); \
     mthc1     AT, rlo; \
     .set at
 #else
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     s.s       rhi, (off+4)(rbase)
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     l.s       rhi, (off+4)(rbase)
 #endif
 
@@ -490,3 +715,11 @@
 
 #define REFRESH_IBASE() \
     lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+
+/* Constants for float/double_to_int/long conversions */
+#define INT_MIN                 0x80000000
+#define INT_MIN_AS_FLOAT        0xCF000000
+#define INT_MIN_AS_DOUBLE_HIGH  0xC1E00000
+#define LONG_MIN_HIGH           0x80000000
+#define LONG_MIN_AS_FLOAT       0xDF000000
+#define LONG_MIN_AS_DOUBLE_HIGH 0xC3E00000
diff --git a/runtime/interpreter/mterp/mips/invoke.S b/runtime/interpreter/mterp/mips/invoke.S
index bcd3a57..db3b8af 100644
--- a/runtime/interpreter/mterp/mips/invoke.S
+++ b/runtime/interpreter/mterp/mips/invoke.S
@@ -2,8 +2,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern $helper
     EXPORT_PC()
     move    a0, rSELF
diff --git a/runtime/interpreter/mterp/mips/op_aget.S b/runtime/interpreter/mterp/mips/op_aget.S
index 8aa8992..e88402c 100644
--- a/runtime/interpreter/mterp/mips/op_aget.S
+++ b/runtime/interpreter/mterp/mips/op_aget.S
@@ -19,11 +19,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if $shift
     EASN(a0, a0, a1, $shift)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/mips/op_aget_object.S b/runtime/interpreter/mterp/mips/op_aget_object.S
index e3ab9d8..9c49dfe 100644
--- a/runtime/interpreter/mterp/mips/op_aget_object.S
+++ b/runtime/interpreter/mterp/mips/op_aget_object.S
@@ -14,7 +14,6 @@
     lw   a1, THREAD_EXCEPTION_OFFSET(rSELF)
     PREFETCH_INST(2)                       #  load rINST
     bnez a1, MterpException
-    SET_VREG_OBJECT(v0, rOBJ)              #  vAA <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, rOBJ, t0)     #  vAA <- v0
diff --git a/runtime/interpreter/mterp/mips/op_aput.S b/runtime/interpreter/mterp/mips/op_aput.S
index 53d6ae0..46dcaee 100644
--- a/runtime/interpreter/mterp/mips/op_aput.S
+++ b/runtime/interpreter/mterp/mips/op_aput.S
@@ -17,14 +17,11 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if $shift
     EASN(a0, a0, a1, $shift)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     $store a2, $data_offset(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_aput_wide.S b/runtime/interpreter/mterp/mips/op_aput_wide.S
index ef99261..c3cff56 100644
--- a/runtime/interpreter/mterp/mips/op_aput_wide.S
+++ b/runtime/interpreter/mterp/mips/op_aput_wide.S
@@ -1,7 +1,5 @@
     /*
      * Array put, 64 bits.  vBB[vCC] <- vAA.
-     *
-     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
      */
     /* aput-wide vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
@@ -21,5 +19,6 @@
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     LOAD64(a2, a3, rOBJ)                   #  a2/a3 <- vAA/vAA+1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     STORE64_off(a2, a3, a0, MIRROR_WIDE_ARRAY_DATA_OFFSET) #  a2/a3 <- vBB[vCC]
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_array_length.S b/runtime/interpreter/mterp/mips/op_array_length.S
index 2b4a86f..ae2fe68 100644
--- a/runtime/interpreter/mterp/mips/op_array_length.S
+++ b/runtime/interpreter/mterp/mips/op_array_length.S
@@ -1,6 +1,7 @@
     /*
      * Return the length of an array.
      */
+    /* array-length vA, vB */
     GET_OPB(a1)                            #  a1 <- B
     GET_OPA4(a2)                           #  a2 <- A+
     GET_VREG(a0, a1)                       #  a0 <- vB (object ref)
diff --git a/runtime/interpreter/mterp/mips/op_check_cast.S b/runtime/interpreter/mterp/mips/op_check_cast.S
index 9a6cefa..3875ce6 100644
--- a/runtime/interpreter/mterp/mips/op_check_cast.S
+++ b/runtime/interpreter/mterp/mips/op_check_cast.S
@@ -1,7 +1,7 @@
     /*
      * Check to see if a cast from one class to another is allowed.
      */
-    # check-cast vAA, class                /* BBBB */
+    /* check-cast vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           #  a0 <- BBBB
     GET_OPA(a1)                            #  a1 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_cmpg_double.S b/runtime/interpreter/mterp/mips/op_cmpg_double.S
index e7965a7..b2e7532 100644
--- a/runtime/interpreter/mterp/mips/op_cmpg_double.S
+++ b/runtime/interpreter/mterp/mips/op_cmpg_double.S
@@ -1 +1 @@
-%include "mips/op_cmpl_double.S" { "naninst":"li rTEMP, 1" }
+%include "mips/op_cmpl_double.S" { "gt_bias":"1" }
diff --git a/runtime/interpreter/mterp/mips/op_cmpg_float.S b/runtime/interpreter/mterp/mips/op_cmpg_float.S
index 53519a6..76550b5 100644
--- a/runtime/interpreter/mterp/mips/op_cmpg_float.S
+++ b/runtime/interpreter/mterp/mips/op_cmpg_float.S
@@ -1 +1 @@
-%include "mips/op_cmpl_float.S" { "naninst":"li rTEMP, 1" }
+%include "mips/op_cmpl_float.S" { "gt_bias":"1" }
diff --git a/runtime/interpreter/mterp/mips/op_cmpl_double.S b/runtime/interpreter/mterp/mips/op_cmpl_double.S
index db89242..369e5b3 100644
--- a/runtime/interpreter/mterp/mips/op_cmpl_double.S
+++ b/runtime/interpreter/mterp/mips/op_cmpl_double.S
@@ -1,53 +1,51 @@
-%default { "naninst":"li rTEMP, -1" }
+%default { "gt_bias":"0" }
     /*
      * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
-     * into the destination register (rTEMP) based on the comparison results.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * See op_cmpl_float for more details.
+     * into the destination register based on the comparison results.
      *
      * For: cmpl-double, cmpg-double
      */
     /* op vAA, vBB, vCC */
 
     FETCH(a0, 1)                           #  a0 <- CCBB
-    and       rOBJ, a0, 255                #  s5 <- BB
+    and       rOBJ, a0, 255                #  rOBJ <- BB
     srl       t0, a0, 8                    #  t0 <- CC
-    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(rOBJ, rFP, rOBJ)                  #  rOBJ <- &fp[BB]
     EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
     LOAD64_F(ft0, ft0f, rOBJ)
     LOAD64_F(ft1, ft1f, t0)
 #ifdef MIPS32REVGE6
-    cmp.lt.d  ft2, ft0, ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .L${opcode}_finish
-    cmp.lt.d  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .L${opcode}_finish
     cmp.eq.d  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .L${opcode}_finish
-    b         .L${opcode}_nan
-#else
-    c.olt.d   fcc0, ft0, ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if $gt_bias
+    cmp.lt.d  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .L${opcode}_finish
-    c.olt.d   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.d  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .L${opcode}_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.d    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .L${opcode}_finish
-    b         .L${opcode}_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if $gt_bias
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
-%break
-
-.L${opcode}_nan:
-    $naninst
-
-.L${opcode}_finish:
+1:
     GET_OPA(rOBJ)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mips/op_cmpl_float.S b/runtime/interpreter/mterp/mips/op_cmpl_float.S
index b8c0961..1dd5506 100644
--- a/runtime/interpreter/mterp/mips/op_cmpl_float.S
+++ b/runtime/interpreter/mterp/mips/op_cmpl_float.S
@@ -1,60 +1,49 @@
-%default { "naninst":"li rTEMP, -1" }
+%default { "gt_bias":"0" }
     /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register rTEMP based on the results of the comparison.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * The operation we're implementing is:
-     *   if (x == y)
-     *     return 0;
-     *   else if (x < y)
-     *     return -1;
-     *   else if (x > y)
-     *     return 1;
-     *   else
-     *     return {-1 or 1};  // one or both operands was NaN
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register based on the comparison results.
      *
      * for: cmpl-float, cmpg-float
      */
     /* op vAA, vBB, vCC */
 
-    /* "clasic" form */
     FETCH(a0, 1)                           #  a0 <- CCBB
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8
     GET_VREG_F(ft0, a2)
     GET_VREG_F(ft1, a3)
 #ifdef MIPS32REVGE6
-    cmp.lt.s  ft2, ft0, ft1               # Is ft0 < ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .L${opcode}_finish
-    cmp.lt.s  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .L${opcode}_finish
     cmp.eq.s  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .L${opcode}_finish
-    b         .L${opcode}_nan
-#else
-    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if $gt_bias
+    cmp.lt.s  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .L${opcode}_finish
-    c.olt.s   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.s  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .L${opcode}_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.s    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .L${opcode}_finish
-    b         .L${opcode}_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if $gt_bias
+    c.olt.s   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
-%break
-
-.L${opcode}_nan:
-    $naninst
-
-.L${opcode}_finish:
+1:
     GET_OPA(rOBJ)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mips/op_const.S b/runtime/interpreter/mterp/mips/op_const.S
index c505761..bd9f873 100644
--- a/runtime/interpreter/mterp/mips/op_const.S
+++ b/runtime/interpreter/mterp/mips/op_const.S
@@ -1,9 +1,8 @@
-    # const vAA,                           /* +BBBBbbbb */
+    /* const vAA, +BBBBbbbb */
     GET_OPA(a3)                            #  a3 <- AA
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a1, a1, 16
-    or        a0, a1, a0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
diff --git a/runtime/interpreter/mterp/mips/op_const_16.S b/runtime/interpreter/mterp/mips/op_const_16.S
index 5e47633..2ffb30f 100644
--- a/runtime/interpreter/mterp/mips/op_const_16.S
+++ b/runtime/interpreter/mterp/mips/op_const_16.S
@@ -1,4 +1,4 @@
-    # const/16 vAA,                        /* +BBBB */
+    /* const/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/mips/op_const_4.S b/runtime/interpreter/mterp/mips/op_const_4.S
index 8b662f9..6866c78 100644
--- a/runtime/interpreter/mterp/mips/op_const_4.S
+++ b/runtime/interpreter/mterp/mips/op_const_4.S
@@ -1,4 +1,4 @@
-    # const/4 vA,                          /* +B */
+    /* const/4 vA, +B */
     sll       a1, rINST, 16                #  a1 <- Bxxx0000
     GET_OPA(a0)                            #  a0 <- A+
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/mips/op_const_class.S b/runtime/interpreter/mterp/mips/op_const_class.S
index 7202b11..9adea44 100644
--- a/runtime/interpreter/mterp/mips/op_const_class.S
+++ b/runtime/interpreter/mterp/mips/op_const_class.S
@@ -1,4 +1,4 @@
-    # const/class vAA, Class               /* BBBB */
+    /* const/class vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_const_high16.S b/runtime/interpreter/mterp/mips/op_const_high16.S
index 36c1c35..5162402 100644
--- a/runtime/interpreter/mterp/mips/op_const_high16.S
+++ b/runtime/interpreter/mterp/mips/op_const_high16.S
@@ -1,4 +1,4 @@
-    # const/high16 vAA,                    /* +BBBB0000 */
+    /* const/high16 vAA, +BBBB0000 */
     FETCH(a0, 1)                           #  a0 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sll       a0, a0, 16                   #  a0 <- BBBB0000
diff --git a/runtime/interpreter/mterp/mips/op_const_string.S b/runtime/interpreter/mterp/mips/op_const_string.S
index d8eeb46..006e114 100644
--- a/runtime/interpreter/mterp/mips/op_const_string.S
+++ b/runtime/interpreter/mterp/mips/op_const_string.S
@@ -1,4 +1,4 @@
-    # const/string vAA, String             /* BBBB */
+    /* const/string vAA, string@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_const_string_jumbo.S b/runtime/interpreter/mterp/mips/op_const_string_jumbo.S
index d732ca1..54cec97 100644
--- a/runtime/interpreter/mterp/mips/op_const_string_jumbo.S
+++ b/runtime/interpreter/mterp/mips/op_const_string_jumbo.S
@@ -1,10 +1,9 @@
-    # const/string vAA, String          /* BBBBBBBB */
+    /* const/string vAA, string@BBBBBBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- bbbb (low)
     FETCH(a2, 2)                        # a2 <- BBBB (high)
     GET_OPA(a1)                         # a1 <- AA
-    sll    a2, a2, 16
-    or     a0, a0, a2                   # a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)            # a0 <- BBBBbbbb
     addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
     move   a3, rSELF
     JAL(MterpConstString)               # v0 <- Mterp(index, tgt_reg, shadow_frame, self)
diff --git a/runtime/interpreter/mterp/mips/op_const_wide.S b/runtime/interpreter/mterp/mips/op_const_wide.S
index 01d0f87..f8911e3 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide.S
@@ -1,14 +1,11 @@
-    # const-wide vAA,                      /* +HHHHhhhhBBBBbbbb */
+    /* const-wide vAA, +HHHHhhhhBBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (low middle)
     FETCH(a2, 3)                           #  a2 <- hhhh (high middle)
-    sll       a1, 16 #
-    or        a0, a1                       #  a0 <- BBBBbbbb (low word)
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb (low word)
     FETCH(a3, 4)                           #  a3 <- HHHH (high)
     GET_OPA(t1)                            #  t1 <- AA
-    sll       a3, 16
-    or        a1, a3, a2                   #  a1 <- HHHHhhhh (high word)
+    INSERT_HIGH_HALF(a2, a3)               #  a2 <- HHHHhhhh (high word)
     FETCH_ADVANCE_INST(5)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, t1)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a2, t1, t0)        #  vAA/vAA+1 <- a0/a2
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_16.S b/runtime/interpreter/mterp/mips/op_const_wide_16.S
index 583d9ef..2ca5ab9 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide_16.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide_16.S
@@ -1,8 +1,7 @@
-    # const-wide/16 vAA,                   /* +BBBB */
+    /* const-wide/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sra       a1, a0, 31                   #  a1 <- ssssssss
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_32.S b/runtime/interpreter/mterp/mips/op_const_wide_32.S
index 3eb4574..bf802ca 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide_32.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide_32.S
@@ -1,11 +1,9 @@
-    # const-wide/32 vAA,                   /* +BBBBbbbb */
+    /* const-wide/32 vAA, +BBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- 0000bbbb (low)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_S(a2, 2)                         #  a2 <- ssssBBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a2, a2, 16
-    or        a0, a0, a2                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)               #  a0 <- BBBBbbbb
     sra       a1, a0, 31                   #  a1 <- ssssssss
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_high16.S b/runtime/interpreter/mterp/mips/op_const_wide_high16.S
index 88382c6..04b90fa 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide_high16.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide_high16.S
@@ -1,9 +1,8 @@
-    # const-wide/high16 vAA,               /* +BBBB000000000000 */
+    /* const-wide/high16 vAA, +BBBB000000000000 */
     FETCH(a1, 1)                           #  a1 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     li        a0, 0                        #  a0 <- 00000000
     sll       a1, 16                       #  a1 <- BBBB0000
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_double_to_int.S b/runtime/interpreter/mterp/mips/op_double_to_int.S
index b1792ec..3b44964 100644
--- a/runtime/interpreter/mterp/mips/op_double_to_int.S
+++ b/runtime/interpreter/mterp/mips/op_double_to_int.S
@@ -1,58 +1,39 @@
-%include "mips/unopNarrower.S" {"instr":"b d2i_doconv"}
-/*
- * Convert the double in a0/a1 to an int in a0.
- *
- * We have to clip values to int min/max per the specification.  The
- * expected common case is a "reasonable" value that converts directly
- * to modest integer.  The EABI convert function isn't doing this for us.
- */
-%break
+    /*
+     * double-to-int
+     *
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64_F(fa0, fa0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
-d2i_doconv:
+    li        t0, INT_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
 #ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .L${opcode}_set_vreg_f
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
 #else
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
     c.ole.d   fcc0, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1t      .L${opcode}_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1t      .L${opcode}_set_vreg_f
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .L${opcode}_set_vreg_f
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.d    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    MOVE_TO_FPU_HIGH(zero, fa0, fa0f)
+    movt.d    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
 #endif
-
-    trunc.w.d  fv0, fa0
-    b         .L${opcode}_set_vreg_f
-
-.LDOUBLE_TO_INT_max:
-    .dword 0x41dfffffffc00000
-.LDOUBLE_TO_INT_min:
-    .dword 0xc1e0000000000000              #  minint, as a double (high word)
-.LDOUBLE_TO_INT_maxret:
-    .word 0x7fffffff
-.LDOUBLE_TO_INT_minret:
-    .word 0x80000000
+1:
+    trunc.w.d fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
diff --git a/runtime/interpreter/mterp/mips/op_double_to_long.S b/runtime/interpreter/mterp/mips/op_double_to_long.S
index 7f7a799..78d4a8f 100644
--- a/runtime/interpreter/mterp/mips/op_double_to_long.S
+++ b/runtime/interpreter/mterp/mips/op_double_to_long.S
@@ -1,56 +1,61 @@
-%include "mips/funopWide.S" {"instr":"b d2l_doconv", "st_result":"SET_VREG64(rRESULT0, rRESULT1, rOBJ)"}
+    /*
+     * double-to-long
+     *
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64_F(fa0, fa0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    mthc1     t0, fa1
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_DOUBLE : 0
+1:
+    trunc.l.d fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
+#else
+    c.eq.d    fcc0, fa0, fa0
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1f      fcc0, .L${opcode}_get_opcode
+
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
+    c.ole.d   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .L${opcode}_get_opcode
+
+    neg.d     fa1, fa1
+    c.ole.d   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .L${opcode}_get_opcode
+
+    JAL(__fixdfdi)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    b         .L${opcode}_set_vreg
+#endif
 %break
 
-d2l_doconv:
-#ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .L${opcode}_set_vreg
-#else
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .L${opcode}_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .L${opcode}_set_vreg
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1t      .L${opcode}_set_vreg
+#ifndef MIPS32REVGE6
+.L${opcode}_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.L${opcode}_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
 #endif
-    JAL(__fixdfdi)
-    b         .L${opcode}_set_vreg
-
-.LDOUBLE_TO_LONG_max:
-    .dword 0x43e0000000000000              #  maxlong, as a double (high word)
-.LDOUBLE_TO_LONG_min:
-    .dword 0xc3e0000000000000              #  minlong, as a double (high word)
-.LDOUBLE_TO_LONG_ret_max:
-    .dword 0x7fffffffffffffff
-.LDOUBLE_TO_LONG_ret_min:
-    .dword 0x8000000000000000
diff --git a/runtime/interpreter/mterp/mips/op_fill_array_data.S b/runtime/interpreter/mterp/mips/op_fill_array_data.S
index 8605746..c3cd371 100644
--- a/runtime/interpreter/mterp/mips/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/mips/op_fill_array_data.S
@@ -1,10 +1,9 @@
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC()
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
+    FETCH(a1, 1)                           #  a1 <- bbbb (lo)
+    FETCH(a0, 2)                           #  a0 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       a1, a1, 16                   #  a1 <- BBBBbbbb
-    or        a1, a0, a1                   #  a1 <- BBBBbbbb
+    INSERT_HIGH_HALF(a1, a0)               #  a1 <- BBBBbbbb
     GET_VREG(a0, a3)                       #  a0 <- vAA (array object)
     EAS1(a1, rPC, a1)                      #  a1 <- PC + BBBBbbbb*2 (array data off.)
     JAL(MterpFillArrayData)                #  v0 <- Mterp(obj, payload)
diff --git a/runtime/interpreter/mterp/mips/op_filled_new_array.S b/runtime/interpreter/mterp/mips/op_filled_new_array.S
index 3f62fae..9511578 100644
--- a/runtime/interpreter/mterp/mips/op_filled_new_array.S
+++ b/runtime/interpreter/mterp/mips/op_filled_new_array.S
@@ -4,8 +4,8 @@
      *
      * for: filled-new-array, filled-new-array/range
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
     .extern $helper
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
diff --git a/runtime/interpreter/mterp/mips/op_float_to_int.S b/runtime/interpreter/mterp/mips/op_float_to_int.S
index 8292652..087e50f 100644
--- a/runtime/interpreter/mterp/mips/op_float_to_int.S
+++ b/runtime/interpreter/mterp/mips/op_float_to_int.S
@@ -1,50 +1,36 @@
-%include "mips/funop.S" {"instr":"b f2i_doconv"}
-%break
+    /*
+     * float-to-int
+     *
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
-/*
- * Not an entry point as it is used only once !!
- */
-f2i_doconv:
+    li        t0, INT_MIN_AS_FLOAT
+    mtc1      t0, fa1
 #ifdef MIPS32REVGE6
-    l.s       fa1, .LFLOAT_TO_INT_max
-    cmp.le.s  ft2, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    cmp.le.s  ft2, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .L${opcode}_set_vreg_f
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.s  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
 #else
-    l.s       fa1, .LFLOAT_TO_INT_max
     c.ole.s   fcc0, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1t      .L${opcode}_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    c.ole.s   fcc0, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1t      .L${opcode}_set_vreg_f
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .L${opcode}_set_vreg_f
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.s    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    movt.s    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
 #endif
-
-    trunc.w.s  fv0, fa0
-    b         .L${opcode}_set_vreg_f
-
-.LFLOAT_TO_INT_max:
-    .word 0x4f000000
-.LFLOAT_TO_INT_min:
-    .word 0xcf000000
-.LFLOAT_TO_INT_ret_max:
-    .word 0x7fffffff
-.LFLOAT_TO_INT_ret_min:
-    .word 0x80000000
+1:
+    trunc.w.s fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
diff --git a/runtime/interpreter/mterp/mips/op_float_to_long.S b/runtime/interpreter/mterp/mips/op_float_to_long.S
index a51384f..dc88a78 100644
--- a/runtime/interpreter/mterp/mips/op_float_to_long.S
+++ b/runtime/interpreter/mterp/mips/op_float_to_long.S
@@ -1,51 +1,58 @@
-%include "mips/funopWider.S" {"instr":"b f2l_doconv", "st_result":"SET_VREG64(rRESULT0, rRESULT1, rOBJ)"}
-%break
+    /*
+     * float-to-long
+     *
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
-f2l_doconv:
 #ifdef MIPS32REVGE6
-    l.s       fa1, .LLONG_TO_max
-    cmp.le.s  ft2, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    cmp.le.s  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .L${opcode}_set_vreg
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    cmp.le.s  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_FLOAT : 0
+1:
+    trunc.l.s fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
 #else
-    l.s       fa1, .LLONG_TO_max
-    c.ole.s   fcc0, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1t      .L${opcode}_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    c.ole.s   fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1t      .L${opcode}_set_vreg
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
+    c.eq.s    fcc0, fa0, fa0
     li        rRESULT0, 0
     li        rRESULT1, 0
-    bc1t      .L${opcode}_set_vreg
-#endif
+    bc1f      fcc0, .L${opcode}_get_opcode
+
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    c.ole.s   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .L${opcode}_get_opcode
+
+    neg.s     fa1, fa1
+    c.ole.s   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .L${opcode}_get_opcode
 
     JAL(__fixsfdi)
-
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
     b         .L${opcode}_set_vreg
+#endif
+%break
 
-.LLONG_TO_max:
-    .word 0x5f000000
-
-.LLONG_TO_min:
-    .word 0xdf000000
+#ifndef MIPS32REVGE6
+.L${opcode}_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.L${opcode}_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
+#endif
diff --git a/runtime/interpreter/mterp/mips/op_goto_32.S b/runtime/interpreter/mterp/mips/op_goto_32.S
index 67f52e9..ef5bf6b 100644
--- a/runtime/interpreter/mterp/mips/op_goto_32.S
+++ b/runtime/interpreter/mterp/mips/op_goto_32.S
@@ -8,8 +8,7 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
+    FETCH(rINST, 1)                        #  rINST <- aaaa (lo)
     FETCH(a1, 2)                           #  a1 <- AAAA (hi)
-    sll       a1, a1, 16
-    or        rINST, a0, a1                #  rINST <- AAAAaaaa
+    INSERT_HIGH_HALF(rINST, a1)            #  rINST <- AAAAaaaa
     b         MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_iget.S b/runtime/interpreter/mterp/mips/op_iget.S
index 86d44fa..01f42d9 100644
--- a/runtime/interpreter/mterp/mips/op_iget.S
+++ b/runtime/interpreter/mterp/mips/op_iget.S
@@ -4,6 +4,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -15,11 +16,10 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if $is_object
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if $is_object
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
diff --git a/runtime/interpreter/mterp/mips/op_iget_object_quick.S b/runtime/interpreter/mterp/mips/op_iget_object_quick.S
index 31d94b9..95c34d7 100644
--- a/runtime/interpreter/mterp/mips/op_iget_object_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iget_object_quick.S
@@ -9,7 +9,6 @@
     GET_OPA4(a2)                           #  a2<- A+
     PREFETCH_INST(2)                       #  load rINST
     bnez a3, MterpPossibleException        #  bail out
-    SET_VREG_OBJECT(v0, a2)                #  fp[A] <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       #  fp[A] <- v0
diff --git a/runtime/interpreter/mterp/mips/op_iget_quick.S b/runtime/interpreter/mterp/mips/op_iget_quick.S
index fbafa5b..46277d3 100644
--- a/runtime/interpreter/mterp/mips/op_iget_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iget_quick.S
@@ -1,6 +1,6 @@
 %default { "load":"lw" }
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
diff --git a/runtime/interpreter/mterp/mips/op_iget_wide.S b/runtime/interpreter/mterp/mips/op_iget_wide.S
index 8fe3089..cf5019e 100644
--- a/runtime/interpreter/mterp/mips/op_iget_wide.S
+++ b/runtime/interpreter/mterp/mips/op_iget_wide.S
@@ -3,6 +3,7 @@
      *
      * for: iget-wide
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field byte offset
     GET_OPB(a1)                            # a1 <- B
@@ -14,7 +15,6 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez a3, MterpException                # bail out
-    SET_VREG64(v0, v1, a2)                 # fp[A] <- v0/v1
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a2, t0)        # fp[A] <- v0/v1
diff --git a/runtime/interpreter/mterp/mips/op_iget_wide_quick.S b/runtime/interpreter/mterp/mips/op_iget_wide_quick.S
index 4d2f291..128be57 100644
--- a/runtime/interpreter/mterp/mips/op_iget_wide_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iget_wide_quick.S
@@ -1,4 +1,4 @@
-    # iget-wide-quick vA, vB, offset       /* CCCC */
+    /* iget-wide-quick vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -9,5 +9,4 @@
     LOAD64(a0, a1, t0)                     #  a0 <- obj.field (64 bits, aligned)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_instance_of.S b/runtime/interpreter/mterp/mips/op_instance_of.S
index d2679bd..706dcf3 100644
--- a/runtime/interpreter/mterp/mips/op_instance_of.S
+++ b/runtime/interpreter/mterp/mips/op_instance_of.S
@@ -4,7 +4,7 @@
      * Most common situation is a non-null object, being compared against
      * an already-resolved class.
      */
-    # instance-of vA, vB, class            /* CCCC */
+    /* instance-of vA, vB, class@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- CCCC
     GET_OPB(a1)                            # a1 <- B
diff --git a/runtime/interpreter/mterp/mips/op_int_to_byte.S b/runtime/interpreter/mterp/mips/op_int_to_byte.S
index 77314c62..9266aab 100644
--- a/runtime/interpreter/mterp/mips/op_int_to_byte.S
+++ b/runtime/interpreter/mterp/mips/op_int_to_byte.S
@@ -1 +1 @@
-%include "mips/unop.S" {"preinstr":"sll a0, a0, 24", "instr":"sra a0, a0, 24"}
+%include "mips/unop.S" {"instr":"SEB(a0, a0)"}
diff --git a/runtime/interpreter/mterp/mips/op_int_to_short.S b/runtime/interpreter/mterp/mips/op_int_to_short.S
index 5649c2a..8749cd8 100644
--- a/runtime/interpreter/mterp/mips/op_int_to_short.S
+++ b/runtime/interpreter/mterp/mips/op_int_to_short.S
@@ -1 +1 @@
-%include "mips/unop.S" {"preinstr":"sll a0, 16", "instr":"sra a0, 16"}
+%include "mips/unop.S" {"instr":"SEH(a0, a0)"}
diff --git a/runtime/interpreter/mterp/mips/op_iput.S b/runtime/interpreter/mterp/mips/op_iput.S
index 732a9a4..9133d60 100644
--- a/runtime/interpreter/mterp/mips/op_iput.S
+++ b/runtime/interpreter/mterp/mips/op_iput.S
@@ -4,7 +4,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern $handler
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
diff --git a/runtime/interpreter/mterp/mips/op_iput_object.S b/runtime/interpreter/mterp/mips/op_iput_object.S
index 6b856e7..cfa56ec 100644
--- a/runtime/interpreter/mterp/mips/op_iput_object.S
+++ b/runtime/interpreter/mterp/mips/op_iput_object.S
@@ -3,7 +3,7 @@
      *
      * for: iput-object, iput-object-volatile
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
diff --git a/runtime/interpreter/mterp/mips/op_iput_object_quick.S b/runtime/interpreter/mterp/mips/op_iput_object_quick.S
index c3f1526..82044f5 100644
--- a/runtime/interpreter/mterp/mips/op_iput_object_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iput_object_quick.S
@@ -1,5 +1,5 @@
     /* For: iput-object-quick */
-    # op vA, vB, offset                 /* CCCC */
+    /* op vA, vB, offset@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
diff --git a/runtime/interpreter/mterp/mips/op_iput_quick.S b/runtime/interpreter/mterp/mips/op_iput_quick.S
index 0829666..d9753b1 100644
--- a/runtime/interpreter/mterp/mips/op_iput_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iput_quick.S
@@ -1,6 +1,6 @@
 %default { "store":"sw" }
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -9,6 +9,7 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     $store    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_iput_wide.S b/runtime/interpreter/mterp/mips/op_iput_wide.S
index 6d23f8c..bc3d758 100644
--- a/runtime/interpreter/mterp/mips/op_iput_wide.S
+++ b/runtime/interpreter/mterp/mips/op_iput_wide.S
@@ -1,4 +1,4 @@
-    # iput-wide vA, vB, field              /* CCCC */
+    /* iput-wide vA, vB, field@CCCC */
     .extern artSet64InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
diff --git a/runtime/interpreter/mterp/mips/op_iput_wide_quick.S b/runtime/interpreter/mterp/mips/op_iput_wide_quick.S
index 9fdb847..0eb228d 100644
--- a/runtime/interpreter/mterp/mips/op_iput_wide_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iput_wide_quick.S
@@ -1,4 +1,4 @@
-    # iput-wide-quick vA, vB, offset       /* CCCC */
+    /* iput-wide-quick vA, vB, offset@CCCC */
     GET_OPA4(a0)                           #  a0 <- A(+)
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a2, a1)                       #  a2 <- fp[B], the object pointer
@@ -9,6 +9,7 @@
     FETCH(a3, 1)                           #  a3 <- field byte offset
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      a2, a2, a3                   #  obj.field (64 bits, aligned) <- a0/a1
-    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    GET_OPCODE_TARGET(t0)
+    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
+    JR(t0)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_long_to_double.S b/runtime/interpreter/mterp/mips/op_long_to_double.S
index b83aaf4..153f582 100644
--- a/runtime/interpreter/mterp/mips/op_long_to_double.S
+++ b/runtime/interpreter/mterp/mips/op_long_to_double.S
@@ -1 +1,20 @@
-%include "mips/funopWide.S" {"instr":"JAL(__floatdidf)", "ld_arg":"LOAD64(rARG0, rARG1, a3)"}
+    /*
+     * long-to-double
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.d.l   fv0, fv0
+#else
+    LOAD64(rARG0, rARG1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    JAL(__floatdidf)                       #  a0/a1 <- op, a2-a3 changed
+#endif
+
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- result
diff --git a/runtime/interpreter/mterp/mips/op_long_to_float.S b/runtime/interpreter/mterp/mips/op_long_to_float.S
index 27faba5..dd1ab81 100644
--- a/runtime/interpreter/mterp/mips/op_long_to_float.S
+++ b/runtime/interpreter/mterp/mips/op_long_to_float.S
@@ -1 +1,20 @@
-%include "mips/unopNarrower.S" {"instr":"JAL(__floatdisf)", "load":"LOAD64(rARG0, rARG1, a3)"}
+    /*
+     * long-to-float
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.s.l   fv0, fv0
+#else
+    LOAD64(rARG0, rARG1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    JAL(__floatdisf)
+#endif
+
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
diff --git a/runtime/interpreter/mterp/mips/op_move.S b/runtime/interpreter/mterp/mips/op_move.S
index 76588ba..547ea3a 100644
--- a/runtime/interpreter/mterp/mips/op_move.S
+++ b/runtime/interpreter/mterp/mips/op_move.S
@@ -7,8 +7,7 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_16.S b/runtime/interpreter/mterp/mips/op_move_16.S
index f7de6c2..91b7399 100644
--- a/runtime/interpreter/mterp/mips/op_move_16.S
+++ b/runtime/interpreter/mterp/mips/op_move_16.S
@@ -7,8 +7,7 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AAAA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AAAA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_exception.S b/runtime/interpreter/mterp/mips/op_move_exception.S
index f04a035..f1bece7 100644
--- a/runtime/interpreter/mterp/mips/op_move_exception.S
+++ b/runtime/interpreter/mterp/mips/op_move_exception.S
@@ -2,7 +2,8 @@
     GET_OPA(a2)                                 #  a2 <- AA
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)    #  get exception obj
     FETCH_ADVANCE_INST(1)                       #  advance rPC, load rINST
-    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     GET_INST_OPCODE(t0)                         #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
+    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     sw    zero, THREAD_EXCEPTION_OFFSET(rSELF)  #  clear exception
-    GOTO_OPCODE(t0)                             #  jump to next instruction
+    JR(t0)                                      #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_from16.S b/runtime/interpreter/mterp/mips/op_move_from16.S
index b8be741..90c25c9 100644
--- a/runtime/interpreter/mterp/mips/op_move_from16.S
+++ b/runtime/interpreter/mterp/mips/op_move_from16.S
@@ -7,8 +7,7 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_result.S b/runtime/interpreter/mterp/mips/op_move_result.S
index 315c68e..a4d5bfe 100644
--- a/runtime/interpreter/mterp/mips/op_move_result.S
+++ b/runtime/interpreter/mterp/mips/op_move_result.S
@@ -7,8 +7,7 @@
     lw    a0, 0(a0)                        #  a0 <- result.i
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    SET_VREG_OBJECT_GOTO(a0, a2, t0)       #  fp[AA] <- a0
     .else
-    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[AA] <- a0
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_result_wide.S b/runtime/interpreter/mterp/mips/op_move_result_wide.S
index 940c1ff..1259218 100644
--- a/runtime/interpreter/mterp/mips/op_move_result_wide.S
+++ b/runtime/interpreter/mterp/mips/op_move_result_wide.S
@@ -3,6 +3,5 @@
     lw    a3, OFF_FP_RESULT_REGISTER(rFP)  #  get pointer to result JType
     LOAD64(a0, a1, a3)                     #  a0/a1 <- retval.j
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_move_wide.S b/runtime/interpreter/mterp/mips/op_move_wide.S
index dd224c3..01d0949 100644
--- a/runtime/interpreter/mterp/mips/op_move_wide.S
+++ b/runtime/interpreter/mterp/mips/op_move_wide.S
@@ -5,6 +5,5 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[B]
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_move_wide_16.S b/runtime/interpreter/mterp/mips/op_move_wide_16.S
index d8761eb..587ba04 100644
--- a/runtime/interpreter/mterp/mips/op_move_wide_16.S
+++ b/runtime/interpreter/mterp/mips/op_move_wide_16.S
@@ -5,6 +5,5 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AAAA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AAAA] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_move_wide_from16.S b/runtime/interpreter/mterp/mips/op_move_wide_from16.S
index 2103fa1..5003fbd 100644
--- a/runtime/interpreter/mterp/mips/op_move_wide_from16.S
+++ b/runtime/interpreter/mterp/mips/op_move_wide_from16.S
@@ -5,6 +5,5 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_mul_long.S b/runtime/interpreter/mterp/mips/op_mul_long.S
index 803bbec..74b049a 100644
--- a/runtime/interpreter/mterp/mips/op_mul_long.S
+++ b/runtime/interpreter/mterp/mips/op_mul_long.S
@@ -39,5 +39,4 @@
 
 .L${opcode}_finish:
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, a0)                 #  vAA::vAA+1 <- v0(low) :: v1(high)
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a0, t0)        #  vAA/vAA+1 <- v0(low)/v1(high)
diff --git a/runtime/interpreter/mterp/mips/op_mul_long_2addr.S b/runtime/interpreter/mterp/mips/op_mul_long_2addr.S
index 6950b71..683b055 100644
--- a/runtime/interpreter/mterp/mips/op_mul_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_mul_long_2addr.S
@@ -26,6 +26,4 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    # vAA <- v0 (low)
-    SET_VREG64(v0, v1, rOBJ)               #  vAA+1 <- v1 (high)
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, rOBJ, t1)      #  vA/vA+1 <- v0(low)/v1(high)
diff --git a/runtime/interpreter/mterp/mips/op_new_instance.S b/runtime/interpreter/mterp/mips/op_new_instance.S
index 51a09b2..3c9e83f 100644
--- a/runtime/interpreter/mterp/mips/op_new_instance.S
+++ b/runtime/interpreter/mterp/mips/op_new_instance.S
@@ -1,7 +1,7 @@
     /*
      * Create a new instance of a class.
      */
-    # new-instance vAA, class              /* BBBB */
+    /* new-instance vAA, class@BBBB */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rSELF
diff --git a/runtime/interpreter/mterp/mips/op_packed_switch.S b/runtime/interpreter/mterp/mips/op_packed_switch.S
index ffa4f47..0a1ff98 100644
--- a/runtime/interpreter/mterp/mips/op_packed_switch.S
+++ b/runtime/interpreter/mterp/mips/op_packed_switch.S
@@ -12,8 +12,7 @@
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_VREG(a1, a3)                       #  a1 <- vAA
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL($func)                             #  a0 <- code-unit branch offset
diff --git a/runtime/interpreter/mterp/mips/op_sget.S b/runtime/interpreter/mterp/mips/op_sget.S
index 3efcfbb..64ece1e 100644
--- a/runtime/interpreter/mterp/mips/op_sget.S
+++ b/runtime/interpreter/mterp/mips/op_sget.S
@@ -4,7 +4,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern $helper
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -15,11 +15,10 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if $is_object
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if $is_object
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
diff --git a/runtime/interpreter/mterp/mips/op_sget_wide.S b/runtime/interpreter/mterp/mips/op_sget_wide.S
index 7aee386..c729250 100644
--- a/runtime/interpreter/mterp/mips/op_sget_wide.S
+++ b/runtime/interpreter/mterp/mips/op_sget_wide.S
@@ -1,7 +1,7 @@
     /*
      * 64-bit SGET handler.
      */
-    # sget-wide vAA, field                 /* BBBB */
+    /* sget-wide vAA, field@BBBB */
     .extern artGet64StaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -12,6 +12,5 @@
     bnez  a3, MterpException
     GET_OPA(a1)                            # a1 <- AA
     FETCH_ADVANCE_INST(2)                  # advance rPC, load rINST
-    SET_VREG64(v0, v1, a1)                 # vAA/vAA+1 <- v0/v1
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a1, t0)        # vAA/vAA+1 <- v0/v1
diff --git a/runtime/interpreter/mterp/mips/op_shl_long.S b/runtime/interpreter/mterp/mips/op_shl_long.S
index 0121669..cc08112 100644
--- a/runtime/interpreter/mterp/mips/op_shl_long.S
+++ b/runtime/interpreter/mterp/mips/op_shl_long.S
@@ -24,7 +24,7 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
diff --git a/runtime/interpreter/mterp/mips/op_shl_long_2addr.S b/runtime/interpreter/mterp/mips/op_shl_long_2addr.S
index 8ce6058..93c5783 100644
--- a/runtime/interpreter/mterp/mips/op_shl_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_shl_long_2addr.S
@@ -7,7 +7,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t2, rFP, rOBJ)                    #  t2 <- &fp[A]
-    LOAD64(a0, a1, t2)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t2)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -20,8 +20,8 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vA/vA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
-    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vA/vA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_shr_long.S b/runtime/interpreter/mterp/mips/op_shr_long.S
index 4c42758..ea032fe 100644
--- a/runtime/interpreter/mterp/mips/op_shr_long.S
+++ b/runtime/interpreter/mterp/mips/op_shr_long.S
@@ -23,7 +23,7 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v0
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
diff --git a/runtime/interpreter/mterp/mips/op_shr_long_2addr.S b/runtime/interpreter/mterp/mips/op_shr_long_2addr.S
index 3adc085..c805ea4 100644
--- a/runtime/interpreter/mterp/mips/op_shr_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_shr_long_2addr.S
@@ -7,7 +7,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t2)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
 
@@ -19,9 +19,9 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vA/vA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
     sra     a3, a1, 31                     #  a3<- sign(ah)
-    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vA/vA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_sput.S b/runtime/interpreter/mterp/mips/op_sput.S
index ee313b9..7034a0e 100644
--- a/runtime/interpreter/mterp/mips/op_sput.S
+++ b/runtime/interpreter/mterp/mips/op_sput.S
@@ -4,7 +4,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_sput_wide.S b/runtime/interpreter/mterp/mips/op_sput_wide.S
index 1e11466..3b347fc 100644
--- a/runtime/interpreter/mterp/mips/op_sput_wide.S
+++ b/runtime/interpreter/mterp/mips/op_sput_wide.S
@@ -1,7 +1,7 @@
     /*
      * 64-bit SPUT handler.
      */
-    # sput-wide vAA, field                 /* BBBB */
+    /* sput-wide vAA, field@BBBB */
     .extern artSet64IndirectStaticFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
diff --git a/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S b/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S
index ccf1f7e..9e93f34 100644
--- a/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S
@@ -7,7 +7,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t3)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -20,8 +20,8 @@
     sll       a1, 1
     sll       a1, a0                       #  ahi<- ahi << (32-(shift&31))
     or        v0, a1                       #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vA/vA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
-    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vA/vA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/unop.S b/runtime/interpreter/mterp/mips/unop.S
index 52a8f0a..bc99263 100644
--- a/runtime/interpreter/mterp/mips/unop.S
+++ b/runtime/interpreter/mterp/mips/unop.S
@@ -1,11 +1,11 @@
 %default {"preinstr":"", "result0":"a0"}
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -15,5 +15,4 @@
     $preinstr                              #  optional op
     $instr                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO($result0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO($result0, t0, t1)        #  vA <- result0
diff --git a/runtime/interpreter/mterp/mips/unopNarrower.S b/runtime/interpreter/mterp/mips/unopNarrower.S
index 9c38bad..0196e27 100644
--- a/runtime/interpreter/mterp/mips/unopNarrower.S
+++ b/runtime/interpreter/mterp/mips/unopNarrower.S
@@ -1,24 +1,16 @@
 %default {"load":"LOAD64_F(fa0, fa0f, a3)"}
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
+     * Generic 64bit-to-32bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * For: double-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     $load
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-
-.L${opcode}_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
diff --git a/runtime/interpreter/mterp/mips/unopWide.S b/runtime/interpreter/mterp/mips/unopWide.S
index fd25dff..135d9fa 100644
--- a/runtime/interpreter/mterp/mips/unopWide.S
+++ b/runtime/interpreter/mterp/mips/unopWide.S
@@ -1,7 +1,7 @@
 %default {"preinstr":"", "result0":"a0", "result1":"a1"}
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -10,11 +10,9 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $preinstr                              #  optional op
     $instr                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64($result0, $result1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/unopWider.S b/runtime/interpreter/mterp/mips/unopWider.S
index 1c18837..ca888ad 100644
--- a/runtime/interpreter/mterp/mips/unopWider.S
+++ b/runtime/interpreter/mterp/mips/unopWider.S
@@ -1,8 +1,7 @@
 %default {"preinstr":"", "result0":"a0", "result1":"a1"}
     /*
      * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * that specifies an instruction that performs "result0/result1 = op a0".
      *
      * For: int-to-long
      */
@@ -14,6 +13,4 @@
     $preinstr                              #  optional op
     $instr                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64($result0, $result1, rOBJ)   #  vA/vA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 10-11 instructions */
+    SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index c1ba794..d3b91e2 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -160,6 +160,58 @@
 #define fcc1   $fcc1
 #endif
 
+#ifdef MIPS32REVGE2
+#define SEB(rd, rt) \
+    seb       rd, rt
+#define SEH(rd, rt) \
+    seh       rd, rt
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    ins       rd_lo, rt_hi, 16, 16
+#else
+#define SEB(rd, rt) \
+    sll       rd, rt, 24; \
+    sra       rd, rd, 24
+#define SEH(rd, rt) \
+    sll       rd, rt, 16; \
+    sra       rd, rd, 16
+/* Clobbers rt_hi on pre-R2. */
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    sll       rt_hi, rt_hi, 16; \
+    or        rd_lo, rt_hi
+#endif
+
+#ifdef FPU64
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mthc1     r, flo
+#else
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mtc1      r, fhi
+#endif
+
+#ifdef MIPS32REVGE6
+#define JR(rt) \
+    jic       rt, 0
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    lsa       rd, rs, rt, sa; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#else
+#define JR(rt) \
+    jalr      zero, rt
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    .set      push; \
+    .set      noat; \
+    sll       AT, rs, sa; \
+    addu      rd, AT, rt; \
+    .set      pop; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#endif
+
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
  * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
@@ -193,12 +245,12 @@
     sw        rPC, OFF_FP_DEX_PC_PTR(rFP)
 
 #define EXPORT_DEX_PC(tmp) \
-    lw   tmp, OFF_FP_CODE_ITEM(rFP) \
-    sw   rPC, OFF_FP_DEX_PC_PTR(rFP) \
-    addu tmp, CODEITEM_INSNS_OFFSET \
-    subu tmp, rPC, tmp \
-    sra  tmp, tmp, 1 \
-    sw   tmp, OFF_FP_DEX_PC(rFP)
+    lw        tmp, OFF_FP_CODE_ITEM(rFP); \
+    sw        rPC, OFF_FP_DEX_PC_PTR(rFP); \
+    addu      tmp, CODEITEM_INSNS_OFFSET; \
+    subu      tmp, rPC, tmp; \
+    sra       tmp, tmp, 1; \
+    sw        tmp, OFF_FP_DEX_PC(rFP)
 
 /*
  * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
@@ -213,18 +265,11 @@
  * exception catch may miss.  (This also implies that it must come after
  * EXPORT_PC().)
  */
-#define FETCH_ADVANCE_INST(_count) lhu rINST, ((_count)*2)(rPC); \
+#define FETCH_ADVANCE_INST(_count) \
+    lhu       rINST, ((_count)*2)(rPC); \
     addu      rPC, rPC, ((_count) * 2)
 
 /*
- * The operation performed here is similar to FETCH_ADVANCE_INST, except the
- * src and dest registers are parameterized (not hard-wired to rPC and rINST).
- */
-#define PREFETCH_ADVANCE_INST(_dreg, _sreg, _count) \
-    lhu       _dreg, ((_count)*2)(_sreg) ;            \
-    addu      _sreg, _sreg, (_count)*2
-
-/*
  * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
  * rINST ahead of possible exception point.  Be sure to manually advance rPC
  * later.
@@ -239,7 +284,8 @@
  * rPC to point to the next instruction.  "rd" must specify the distance
  * in bytes, *not* 16-bit code units, and may be a signed value.
  */
-#define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \
+#define FETCH_ADVANCE_INST_RB(rd) \
+    addu      rPC, rPC, rd; \
     lhu       rINST, (rPC)
 
 /*
@@ -264,38 +310,75 @@
 #define GET_INST_OPCODE(rd) and rd, rINST, 0xFF
 
 /*
- * Put the prefetched instruction's opcode field into the specified register.
+ * Transform opcode into branch target address.
  */
-#define GET_PREFETCHED_OPCODE(dreg, sreg)   andi     dreg, sreg, 255
+#define GET_OPCODE_TARGET(rd) \
+    sll       rd, rd, 7; \
+    addu      rd, rIBASE, rd
 
 /*
  * Begin executing the opcode in rd.
  */
-#define GOTO_OPCODE(rd) sll rd, rd, 7; \
-    addu      rd, rIBASE, rd; \
-    jalr      zero, rd
-
-#define GOTO_OPCODE_BASE(_base, rd)  sll rd, rd, 7; \
-    addu      rd, _base, rd; \
-    jalr      zero, rd
+#define GOTO_OPCODE(rd) \
+    GET_OPCODE_TARGET(rd); \
+    JR(rd)
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
  */
 #define GET_VREG(rd, rix) LOAD_eas2(rd, rFP, rix)
 
-#define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \
-    .set noat; l.s rd, (AT); .set at
+#define GET_VREG_F(rd, rix) \
+    .set noat; \
+    EAS2(AT, rFP, rix); \
+    l.s       rd, (AT); \
+    .set at
 
-#define SET_VREG(rd, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG(rd, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
     sw        zero, 0(t8)
+#endif
 
-#define SET_VREG64(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        rd, 0(t8)
+#else
+#define SET_VREG_OBJECT(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        rd, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#else
+#define SET_VREG64(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rlo, 0(t8); \
@@ -304,9 +387,39 @@
     .set at; \
     sw        zero, 0(t8); \
     sw        zero, 4(t8)
+#endif
 
-#ifdef FPU64
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_F(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG_F(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#elif defined(FPU64)
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rREFS, AT; \
     sw        zero, 0(t8); \
@@ -317,7 +430,8 @@
     .set at; \
     s.s       rlo, 0(t8)
 #else
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rlo, 0(t8); \
@@ -328,18 +442,21 @@
     sw        zero, 4(t8)
 #endif
 
-#define SET_VREG_OBJECT(rd, rix) .set noat; \
-    sll       AT, rix, 2; \
-    addu      t8, rFP, AT; \
-    sw        rd, 0(t8); \
-    addu      t8, rREFS, AT; \
-    .set at; \
-    sw        rd, 0(t8)
-
 /* Combination of the SET_VREG and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \
-    sll       dst, dst, 7; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -349,11 +466,51 @@
     jalr      zero, dst; \
     sw        zero, 0(t8); \
     .set reorder
+#endif
+
+/* Combination of the SET_VREG_OBJECT and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#endif
 
 /* Combination of the SET_VREG64 and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG64_GOTO(rlo, rhi, rix, dst) .set noreorder; \
-    sll       dst, dst, 7; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#else
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -365,14 +522,82 @@
     jalr      zero, dst; \
     sw        zero, 4(t8); \
     .set reorder
+#endif
 
-#define SET_VREG_F(rd, rix) .set noat; \
+/* Combination of the SET_VREG_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
-    sw        zero, 0(t8)
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#endif
+
+/* Combination of the SET_VREG64_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#elif defined(FPU64)
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rREFS, AT; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8); \
+    addu      t8, rFP, AT; \
+    mfhc1     AT, rlo; \
+    sw        AT, 4(t8); \
+    .set at; \
+    jalr      zero, dst; \
+    s.s       rlo, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rlo, 0(t8); \
+    s.s       rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#endif
 
 #define GET_OPA(rd) srl rd, rINST, 8
 #ifdef MIPS32REVGE2
@@ -383,60 +608,60 @@
 #define GET_OPB(rd) srl rd, rINST, 12
 
 /*
- * Form an Effective Address rd = rbase + roff<<n;
- * Uses reg AT
+ * Form an Effective Address rd = rbase + roff<<shift;
+ * Uses reg AT on pre-R6.
  */
-#define EASN(rd, rbase, roff, rshift) .set noat; \
-    sll       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
-    .set at
+#define EASN(rd, rbase, roff, shift) LSA(rd, roff, rbase, shift)
 
 #define EAS1(rd, rbase, roff) EASN(rd, rbase, roff, 1)
 #define EAS2(rd, rbase, roff) EASN(rd, rbase, roff, 2)
 #define EAS3(rd, rbase, roff) EASN(rd, rbase, roff, 3)
 #define EAS4(rd, rbase, roff) EASN(rd, rbase, roff, 4)
 
-/*
- * Form an Effective Shift Right rd = rbase + roff>>n;
- * Uses reg AT
- */
-#define ESRN(rd, rbase, roff, rshift) .set noat; \
-    srl       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
+#define LOAD_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    lw        rd, 0(AT); \
     .set at
 
-#define LOAD_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; lw rd, 0(AT); .set at
-
-#define STORE_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; sw rd, 0(AT); .set at
+#define STORE_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    sw        rd, 0(AT); \
+    .set at
 
 #define LOAD_RB_OFF(rd, rbase, off) lw rd, off(rbase)
 #define STORE_RB_OFF(rd, rbase, off) sw rd, off(rbase)
 
-#define STORE64_off(rlo, rhi, rbase, off) sw rlo, off(rbase); \
+#define STORE64_off(rlo, rhi, rbase, off) \
+    sw        rlo, off(rbase); \
     sw        rhi, (off+4)(rbase)
-#define LOAD64_off(rlo, rhi, rbase, off) lw rlo, off(rbase); \
+#define LOAD64_off(rlo, rhi, rbase, off) \
+    lw        rlo, off(rbase); \
     lw        rhi, (off+4)(rbase)
 
 #define STORE64(rlo, rhi, rbase) STORE64_off(rlo, rhi, rbase, 0)
 #define LOAD64(rlo, rhi, rbase) LOAD64_off(rlo, rhi, rbase, 0)
 
 #ifdef FPU64
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     .set noat; \
     mfhc1     AT, rlo; \
     sw        AT, (off+4)(rbase); \
     .set at
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     .set noat; \
     lw        AT, (off+4)(rbase); \
     mthc1     AT, rlo; \
     .set at
 #else
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     s.s       rhi, (off+4)(rbase)
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     l.s       rhi, (off+4)(rbase)
 #endif
 
@@ -498,6 +723,14 @@
 #define REFRESH_IBASE() \
     lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
 
+/* Constants for float/double_to_int/long conversions */
+#define INT_MIN                 0x80000000
+#define INT_MIN_AS_FLOAT        0xCF000000
+#define INT_MIN_AS_DOUBLE_HIGH  0xC1E00000
+#define LONG_MIN_HIGH           0x80000000
+#define LONG_MIN_AS_FLOAT       0xDF000000
+#define LONG_MIN_AS_DOUBLE_HIGH 0xC3E00000
+
 /* File: mips/entry.S */
 /*
  * Copyright (C) 2016 The Android Open Source Project
@@ -599,11 +832,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -617,11 +849,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -635,11 +866,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AAAA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AAAA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -652,9 +882,8 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[B]
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -667,9 +896,8 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -682,9 +910,8 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AAAA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AAAA] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -699,11 +926,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -719,11 +945,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -739,11 +964,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AAAA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AAAA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -758,11 +982,10 @@
     lw    a0, 0(a0)                        #  a0 <- result.i
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    SET_VREG_OBJECT_GOTO(a0, a2, t0)       #  fp[AA] <- a0
     .else
-    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[AA] <- a0
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -773,9 +996,8 @@
     lw    a3, OFF_FP_RESULT_REGISTER(rFP)  #  get pointer to result JType
     LOAD64(a0, a1, a3)                     #  a0/a1 <- retval.j
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -790,11 +1012,10 @@
     lw    a0, 0(a0)                        #  a0 <- result.i
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    SET_VREG_OBJECT_GOTO(a0, a2, t0)       #  fp[AA] <- a0
     .else
-    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[AA] <- a0
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -805,10 +1026,11 @@
     GET_OPA(a2)                                 #  a2 <- AA
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)    #  get exception obj
     FETCH_ADVANCE_INST(1)                       #  advance rPC, load rINST
-    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     GET_INST_OPCODE(t0)                         #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
+    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     sw    zero, THREAD_EXCEPTION_OFFSET(rSELF)  #  clear exception
-    GOTO_OPCODE(t0)                             #  jump to next instruction
+    JR(t0)                                      #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -899,7 +1121,7 @@
     .balign 128
 .L_op_const_4: /* 0x12 */
 /* File: mips/op_const_4.S */
-    # const/4 vA,                          /* +B */
+    /* const/4 vA, +B */
     sll       a1, rINST, 16                #  a1 <- Bxxx0000
     GET_OPA(a0)                            #  a0 <- A+
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
@@ -912,7 +1134,7 @@
     .balign 128
 .L_op_const_16: /* 0x13 */
 /* File: mips/op_const_16.S */
-    # const/16 vAA,                        /* +BBBB */
+    /* const/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -923,13 +1145,12 @@
     .balign 128
 .L_op_const: /* 0x14 */
 /* File: mips/op_const.S */
-    # const vAA,                           /* +BBBBbbbb */
+    /* const vAA, +BBBBbbbb */
     GET_OPA(a3)                            #  a3 <- AA
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a1, a1, 16
-    or        a0, a1, a0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
 
@@ -937,7 +1158,7 @@
     .balign 128
 .L_op_const_high16: /* 0x15 */
 /* File: mips/op_const_high16.S */
-    # const/high16 vAA,                    /* +BBBB0000 */
+    /* const/high16 vAA, +BBBB0000 */
     FETCH(a0, 1)                           #  a0 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sll       a0, a0, 16                   #  a0 <- BBBB0000
@@ -949,69 +1170,62 @@
     .balign 128
 .L_op_const_wide_16: /* 0x16 */
 /* File: mips/op_const_wide_16.S */
-    # const-wide/16 vAA,                   /* +BBBB */
+    /* const-wide/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sra       a1, a0, 31                   #  a1 <- ssssssss
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_wide_32: /* 0x17 */
 /* File: mips/op_const_wide_32.S */
-    # const-wide/32 vAA,                   /* +BBBBbbbb */
+    /* const-wide/32 vAA, +BBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- 0000bbbb (low)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_S(a2, 2)                         #  a2 <- ssssBBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a2, a2, 16
-    or        a0, a0, a2                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)               #  a0 <- BBBBbbbb
     sra       a1, a0, 31                   #  a1 <- ssssssss
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_wide: /* 0x18 */
 /* File: mips/op_const_wide.S */
-    # const-wide vAA,                      /* +HHHHhhhhBBBBbbbb */
+    /* const-wide vAA, +HHHHhhhhBBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (low middle)
     FETCH(a2, 3)                           #  a2 <- hhhh (high middle)
-    sll       a1, 16 #
-    or        a0, a1                       #  a0 <- BBBBbbbb (low word)
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb (low word)
     FETCH(a3, 4)                           #  a3 <- HHHH (high)
     GET_OPA(t1)                            #  t1 <- AA
-    sll       a3, 16
-    or        a1, a3, a2                   #  a1 <- HHHHhhhh (high word)
+    INSERT_HIGH_HALF(a2, a3)               #  a2 <- HHHHhhhh (high word)
     FETCH_ADVANCE_INST(5)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, t1)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a2, t1, t0)        #  vAA/vAA+1 <- a0/a2
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_wide_high16: /* 0x19 */
 /* File: mips/op_const_wide_high16.S */
-    # const-wide/high16 vAA,               /* +BBBB000000000000 */
+    /* const-wide/high16 vAA, +BBBB000000000000 */
     FETCH(a1, 1)                           #  a1 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     li        a0, 0                        #  a0 <- 00000000
     sll       a1, 16                       #  a1 <- BBBB0000
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_string: /* 0x1a */
 /* File: mips/op_const_string.S */
-    # const/string vAA, String             /* BBBB */
+    /* const/string vAA, string@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
@@ -1028,13 +1242,12 @@
     .balign 128
 .L_op_const_string_jumbo: /* 0x1b */
 /* File: mips/op_const_string_jumbo.S */
-    # const/string vAA, String          /* BBBBBBBB */
+    /* const/string vAA, string@BBBBBBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- bbbb (low)
     FETCH(a2, 2)                        # a2 <- BBBB (high)
     GET_OPA(a1)                         # a1 <- AA
-    sll    a2, a2, 16
-    or     a0, a0, a2                   # a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)            # a0 <- BBBBbbbb
     addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
     move   a3, rSELF
     JAL(MterpConstString)               # v0 <- Mterp(index, tgt_reg, shadow_frame, self)
@@ -1048,7 +1261,7 @@
     .balign 128
 .L_op_const_class: /* 0x1c */
 /* File: mips/op_const_class.S */
-    # const/class vAA, Class               /* BBBB */
+    /* const/class vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
@@ -1108,7 +1321,7 @@
     /*
      * Check to see if a cast from one class to another is allowed.
      */
-    # check-cast vAA, class                /* BBBB */
+    /* check-cast vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           #  a0 <- BBBB
     GET_OPA(a1)                            #  a1 <- AA
@@ -1132,7 +1345,7 @@
      * Most common situation is a non-null object, being compared against
      * an already-resolved class.
      */
-    # instance-of vA, vB, class            /* CCCC */
+    /* instance-of vA, vB, class@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -1155,6 +1368,7 @@
     /*
      * Return the length of an array.
      */
+    /* array-length vA, vB */
     GET_OPB(a1)                            #  a1 <- B
     GET_OPA4(a2)                           #  a2 <- A+
     GET_VREG(a0, a1)                       #  a0 <- vB (object ref)
@@ -1172,7 +1386,7 @@
     /*
      * Create a new instance of a class.
      */
-    # new-instance vAA, class              /* BBBB */
+    /* new-instance vAA, class@BBBB */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rSELF
@@ -1215,8 +1429,8 @@
      *
      * for: filled-new-array, filled-new-array/range
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
     .extern MterpFilledNewArray
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
@@ -1238,8 +1452,8 @@
      *
      * for: filled-new-array, filled-new-array/range
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
     .extern MterpFilledNewArrayRange
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
@@ -1258,11 +1472,10 @@
 /* File: mips/op_fill_array_data.S */
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC()
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
+    FETCH(a1, 1)                           #  a1 <- bbbb (lo)
+    FETCH(a0, 2)                           #  a0 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       a1, a1, 16                   #  a1 <- BBBBbbbb
-    or        a1, a0, a1                   #  a1 <- BBBBbbbb
+    INSERT_HIGH_HALF(a1, a0)               #  a1 <- BBBBbbbb
     GET_VREG(a0, a3)                       #  a0 <- vAA (array object)
     EAS1(a1, rPC, a1)                      #  a1 <- PC + BBBBbbbb*2 (array data off.)
     JAL(MterpFillArrayData)                #  v0 <- Mterp(obj, payload)
@@ -1330,10 +1543,9 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
+    FETCH(rINST, 1)                        #  rINST <- aaaa (lo)
     FETCH(a1, 2)                           #  a1 <- AAAA (hi)
-    sll       a1, a1, 16
-    or        rINST, a0, a1                #  rINST <- AAAAaaaa
+    INSERT_HIGH_HALF(rINST, a1)            #  rINST <- AAAAaaaa
     b         MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
@@ -1353,8 +1565,7 @@
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_VREG(a1, a3)                       #  a1 <- vAA
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL(MterpDoPackedSwitch)                             #  a0 <- code-unit branch offset
@@ -1379,8 +1590,7 @@
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_VREG(a1, a3)                       #  a1 <- vAA
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL(MterpDoSparseSwitch)                             #  a0 <- code-unit branch offset
@@ -1393,55 +1603,54 @@
 .L_op_cmpl_float: /* 0x2d */
 /* File: mips/op_cmpl_float.S */
     /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register rTEMP based on the results of the comparison.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * The operation we're implementing is:
-     *   if (x == y)
-     *     return 0;
-     *   else if (x < y)
-     *     return -1;
-     *   else if (x > y)
-     *     return 1;
-     *   else
-     *     return {-1 or 1};  // one or both operands was NaN
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register based on the comparison results.
      *
      * for: cmpl-float, cmpg-float
      */
     /* op vAA, vBB, vCC */
 
-    /* "clasic" form */
     FETCH(a0, 1)                           #  a0 <- CCBB
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8
     GET_VREG_F(ft0, a2)
     GET_VREG_F(ft1, a3)
 #ifdef MIPS32REVGE6
-    cmp.lt.s  ft2, ft0, ft1               # Is ft0 < ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpl_float_finish
-    cmp.lt.s  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpl_float_finish
     cmp.eq.s  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpl_float_finish
-    b         .Lop_cmpl_float_nan
-#else
-    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 0
+    cmp.lt.s  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpl_float_finish
-    c.olt.s   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.s  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpl_float_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.s    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpl_float_finish
-    b         .Lop_cmpl_float_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 0
+    c.olt.s   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 /* ------------------------------ */
     .balign 128
@@ -1449,55 +1658,54 @@
 /* File: mips/op_cmpg_float.S */
 /* File: mips/op_cmpl_float.S */
     /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register rTEMP based on the results of the comparison.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * The operation we're implementing is:
-     *   if (x == y)
-     *     return 0;
-     *   else if (x < y)
-     *     return -1;
-     *   else if (x > y)
-     *     return 1;
-     *   else
-     *     return {-1 or 1};  // one or both operands was NaN
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register based on the comparison results.
      *
      * for: cmpl-float, cmpg-float
      */
     /* op vAA, vBB, vCC */
 
-    /* "clasic" form */
     FETCH(a0, 1)                           #  a0 <- CCBB
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8
     GET_VREG_F(ft0, a2)
     GET_VREG_F(ft1, a3)
 #ifdef MIPS32REVGE6
-    cmp.lt.s  ft2, ft0, ft1               # Is ft0 < ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpg_float_finish
-    cmp.lt.s  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpg_float_finish
     cmp.eq.s  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpg_float_finish
-    b         .Lop_cmpg_float_nan
-#else
-    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 1
+    cmp.lt.s  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpg_float_finish
-    c.olt.s   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.s  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpg_float_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.s    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpg_float_finish
-    b         .Lop_cmpg_float_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 1
+    c.olt.s   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 
 /* ------------------------------ */
@@ -1506,47 +1714,55 @@
 /* File: mips/op_cmpl_double.S */
     /*
      * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
-     * into the destination register (rTEMP) based on the comparison results.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * See op_cmpl_float for more details.
+     * into the destination register based on the comparison results.
      *
      * For: cmpl-double, cmpg-double
      */
     /* op vAA, vBB, vCC */
 
     FETCH(a0, 1)                           #  a0 <- CCBB
-    and       rOBJ, a0, 255                #  s5 <- BB
+    and       rOBJ, a0, 255                #  rOBJ <- BB
     srl       t0, a0, 8                    #  t0 <- CC
-    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(rOBJ, rFP, rOBJ)                  #  rOBJ <- &fp[BB]
     EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
     LOAD64_F(ft0, ft0f, rOBJ)
     LOAD64_F(ft1, ft1f, t0)
 #ifdef MIPS32REVGE6
-    cmp.lt.d  ft2, ft0, ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpl_double_finish
-    cmp.lt.d  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpl_double_finish
     cmp.eq.d  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpl_double_finish
-    b         .Lop_cmpl_double_nan
-#else
-    c.olt.d   fcc0, ft0, ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 0
+    cmp.lt.d  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpl_double_finish
-    c.olt.d   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.d  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpl_double_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.d    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpl_double_finish
-    b         .Lop_cmpl_double_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 0
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 /* ------------------------------ */
     .balign 128
@@ -1555,47 +1771,55 @@
 /* File: mips/op_cmpl_double.S */
     /*
      * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
-     * into the destination register (rTEMP) based on the comparison results.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * See op_cmpl_float for more details.
+     * into the destination register based on the comparison results.
      *
      * For: cmpl-double, cmpg-double
      */
     /* op vAA, vBB, vCC */
 
     FETCH(a0, 1)                           #  a0 <- CCBB
-    and       rOBJ, a0, 255                #  s5 <- BB
+    and       rOBJ, a0, 255                #  rOBJ <- BB
     srl       t0, a0, 8                    #  t0 <- CC
-    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(rOBJ, rFP, rOBJ)                  #  rOBJ <- &fp[BB]
     EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
     LOAD64_F(ft0, ft0f, rOBJ)
     LOAD64_F(ft1, ft1f, t0)
 #ifdef MIPS32REVGE6
-    cmp.lt.d  ft2, ft0, ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpg_double_finish
-    cmp.lt.d  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpg_double_finish
     cmp.eq.d  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpg_double_finish
-    b         .Lop_cmpg_double_nan
-#else
-    c.olt.d   fcc0, ft0, ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 1
+    cmp.lt.d  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpg_double_finish
-    c.olt.d   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.d  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpg_double_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.d    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpg_double_finish
-    b         .Lop_cmpg_double_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 1
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 
 /* ------------------------------ */
@@ -2015,11 +2239,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 2
     EASN(a0, a0, a1, 2)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2074,10 +2294,9 @@
     lw   a1, THREAD_EXCEPTION_OFFSET(rSELF)
     PREFETCH_INST(2)                       #  load rINST
     bnez a1, MterpException
-    SET_VREG_OBJECT(v0, rOBJ)              #  vAA <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, rOBJ, t0)     #  vAA <- v0
 
 /* ------------------------------ */
     .balign 128
@@ -2104,11 +2323,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2142,11 +2357,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2180,11 +2391,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2218,11 +2425,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2253,17 +2456,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 2
     EASN(a0, a0, a1, 2)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sw a2, MIRROR_INT_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -2271,8 +2471,6 @@
 /* File: mips/op_aput_wide.S */
     /*
      * Array put, 64 bits.  vBB[vCC] <- vAA.
-     *
-     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
      */
     /* aput-wide vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
@@ -2292,8 +2490,9 @@
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     LOAD64(a2, a3, rOBJ)                   #  a2/a3 <- vAA/vAA+1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     STORE64_off(a2, a3, a0, MIRROR_WIDE_ARRAY_DATA_OFFSET) #  a2/a3 <- vBB[vCC]
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -2337,17 +2536,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sb a2, MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2373,17 +2569,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sb a2, MIRROR_BYTE_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2409,17 +2602,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sh a2, MIRROR_CHAR_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2445,17 +2635,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sh a2, MIRROR_SHORT_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2467,6 +2654,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2478,14 +2666,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 /* ------------------------------ */
     .balign 128
@@ -2496,6 +2683,7 @@
      *
      * for: iget-wide
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field byte offset
     GET_OPB(a1)                            # a1 <- B
@@ -2507,10 +2695,9 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez a3, MterpException                # bail out
-    SET_VREG64(v0, v1, a2)                 # fp[A] <- v0/v1
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a2, t0)        # fp[A] <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -2522,6 +2709,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2533,14 +2721,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 1
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 1
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2553,6 +2740,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2564,14 +2752,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2584,6 +2771,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2595,14 +2783,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2615,6 +2802,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2626,14 +2814,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2646,6 +2833,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2657,14 +2845,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2676,7 +2863,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet32InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2696,7 +2883,7 @@
     .balign 128
 .L_op_iput_wide: /* 0x5a */
 /* File: mips/op_iput_wide.S */
-    # iput-wide vA, vB, field              /* CCCC */
+    /* iput-wide vA, vB, field@CCCC */
     .extern artSet64InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2721,7 +2908,7 @@
      *
      * for: iput-object, iput-object-volatile
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
@@ -2743,7 +2930,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet8InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2770,7 +2957,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet8InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2797,7 +2984,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet16InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2824,7 +3011,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet16InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2850,7 +3037,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGet32StaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2861,14 +3048,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 /* ------------------------------ */
     .balign 128
@@ -2877,7 +3063,7 @@
     /*
      * 64-bit SGET handler.
      */
-    # sget-wide vAA, field                 /* BBBB */
+    /* sget-wide vAA, field@BBBB */
     .extern artGet64StaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2888,9 +3074,8 @@
     bnez  a3, MterpException
     GET_OPA(a1)                            # a1 <- AA
     FETCH_ADVANCE_INST(2)                  # advance rPC, load rINST
-    SET_VREG64(v0, v1, a1)                 # vAA/vAA+1 <- v0/v1
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a1, t0)        # vAA/vAA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -2902,7 +3087,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetObjStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2913,14 +3098,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 1
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 1
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -2933,7 +3117,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetBooleanStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2944,14 +3128,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -2964,7 +3147,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetByteStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2975,14 +3158,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -2995,7 +3177,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetCharStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -3006,14 +3188,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -3026,7 +3207,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetShortStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -3037,14 +3218,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -3056,7 +3236,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3077,7 +3257,7 @@
     /*
      * 64-bit SPUT handler.
      */
-    # sput-wide vAA, field                 /* BBBB */
+    /* sput-wide vAA, field@BBBB */
     .extern artSet64IndirectStaticFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -3123,7 +3303,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3148,7 +3328,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3173,7 +3353,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3198,7 +3378,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3221,8 +3401,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtual
     EXPORT_PC()
     move    a0, rSELF
@@ -3246,8 +3426,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeSuper
     EXPORT_PC()
     move    a0, rSELF
@@ -3271,8 +3451,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeDirect
     EXPORT_PC()
     move    a0, rSELF
@@ -3296,8 +3476,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeStatic
     EXPORT_PC()
     move    a0, rSELF
@@ -3321,8 +3501,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeInterface
     EXPORT_PC()
     move    a0, rSELF
@@ -3360,8 +3540,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3385,8 +3565,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeSuperRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3410,8 +3590,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeDirectRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3435,8 +3615,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeStaticRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3460,8 +3640,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeInterfaceRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3506,11 +3686,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -3520,8 +3700,7 @@
                                   #  optional op
     negu a0, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -3531,11 +3710,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -3545,8 +3724,7 @@
                                   #  optional op
     not a0, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -3556,7 +3734,7 @@
 /* File: mips/unopWide.S */
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -3565,14 +3743,12 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     negu v0, a0                              #  optional op
     negu v1, a1; sltu a0, zero, v0; subu v1, v1, a0                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3582,7 +3758,7 @@
 /* File: mips/unopWide.S */
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -3591,14 +3767,12 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     not a0, a0                              #  optional op
     not a1, a1                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3608,11 +3782,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -3622,8 +3796,7 @@
                                   #  optional op
     addu a0, a0, 0x80000000                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -3633,7 +3806,7 @@
 /* File: mips/unopWide.S */
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -3642,14 +3815,12 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
                                   #  optional op
     addu a1, a1, 0x80000000                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3659,8 +3830,7 @@
 /* File: mips/unopWider.S */
     /*
      * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * that specifies an instruction that performs "result0/result1 = op a0".
      *
      * For: int-to-long
      */
@@ -3672,9 +3842,7 @@
                                   #  optional op
     sra a1, a0, 31                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vA/vA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 10-11 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3683,23 +3851,20 @@
 /* File: mips/op_int_to_float.S */
 /* File: mips/funop.S */
     /*
-     * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * Generic 32-bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: int-to-float, float-to-int
+     * for: int-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.s.w fv0, fa0
-
-.Lop_int_to_float_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t1)         #  vA <- fv0
 
 
 /* ------------------------------ */
@@ -3708,11 +3873,10 @@
 /* File: mips/op_int_to_double.S */
 /* File: mips/funopWider.S */
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * Generic 32bit-to-64bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * For: int-to-double, float-to-double
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -3720,11 +3884,8 @@
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.d.w fv0, fa0
-
-.Lop_int_to_double_set_vreg:
-    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vA/vA+1 <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -3741,120 +3902,157 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
     .balign 128
 .L_op_long_to_float: /* 0x85 */
 /* File: mips/op_long_to_float.S */
-/* File: mips/unopNarrower.S */
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
-     *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * long-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.s.l   fv0, fv0
+#else
     LOAD64(rARG0, rARG1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     JAL(__floatdisf)
+#endif
 
-.Lop_long_to_float_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
 
 /* ------------------------------ */
     .balign 128
 .L_op_long_to_double: /* 0x86 */
 /* File: mips/op_long_to_double.S */
-/* File: mips/funopWide.S */
     /*
-     * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
-     * This could be a MIPS instruction or a function call.
-     *
-     * long-to-double, double-to-long
+     * long-to-double
      */
     /* unop vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.d.l   fv0, fv0
+#else
     LOAD64(rARG0, rARG1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-                                  #  optional op
-    JAL(__floatdidf)                                 #  a0/a1 <- op, a2-a3 changed
+    JAL(__floatdidf)                       #  a0/a1 <- op, a2-a3 changed
+#endif
 
-.Lop_long_to_double_set_vreg:
-    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vAA <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
-
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- result
 
 /* ------------------------------ */
     .balign 128
 .L_op_float_to_int: /* 0x87 */
 /* File: mips/op_float_to_int.S */
-/* File: mips/funop.S */
     /*
-     * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
-     * This could be a MIPS instruction or a function call.
+     * float-to-int
      *
-     * for: int-to-float, float-to-int
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    b f2i_doconv
 
-.Lop_float_to_int_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)
+    li        t0, INT_MIN_AS_FLOAT
+    mtc1      t0, fa1
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.s  ft0, fa1, fa0
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    GOTO_OPCODE(t1)                        #  jump to next instruction
-
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
+#else
+    c.ole.s   fcc0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.s    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    movt.s    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
+#endif
+1:
+    trunc.w.s fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
 
 /* ------------------------------ */
     .balign 128
 .L_op_float_to_long: /* 0x88 */
 /* File: mips/op_float_to_long.S */
-/* File: mips/funopWider.S */
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * float-to-long
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    b f2l_doconv
 
-.Lop_float_to_long_set_vreg:
-    SET_VREG64(rRESULT0, rRESULT1, rOBJ)                             #  vA/vA+1 <- a0/a1
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    cmp.le.s  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_FLOAT : 0
+1:
+    trunc.l.s fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
+#else
+    c.eq.s    fcc0, fa0, fa0
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1f      fcc0, .Lop_float_to_long_get_opcode
 
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    c.ole.s   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .Lop_float_to_long_get_opcode
+
+    neg.s     fa1, fa1
+    c.ole.s   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .Lop_float_to_long_get_opcode
+
+    JAL(__fixsfdi)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    b         .Lop_float_to_long_set_vreg
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -3862,11 +4060,10 @@
 /* File: mips/op_float_to_double.S */
 /* File: mips/funopWider.S */
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * Generic 32bit-to-64bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * For: int-to-double, float-to-double
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -3874,77 +4071,111 @@
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.d.s fv0, fa0
-
-.Lop_float_to_double_set_vreg:
-    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vA/vA+1 <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
     .balign 128
 .L_op_double_to_int: /* 0x8a */
 /* File: mips/op_double_to_int.S */
-/* File: mips/unopNarrower.S */
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
+     * double-to-int
      *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64_F(fa0, fa0f, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    b d2i_doconv
 
-.Lop_double_to_int_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/*
- * Convert the double in a0/a1 to an int in a0.
- *
- * We have to clip values to int min/max per the specification.  The
- * expected common case is a "reasonable" value that converts directly
- * to modest integer.  The EABI convert function isn't doing this for us.
- */
+    li        t0, INT_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
+#else
+    c.ole.d   fcc0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.d    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    MOVE_TO_FPU_HIGH(zero, fa0, fa0f)
+    movt.d    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
+#endif
+1:
+    trunc.w.d fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
 
 /* ------------------------------ */
     .balign 128
 .L_op_double_to_long: /* 0x8b */
 /* File: mips/op_double_to_long.S */
-/* File: mips/funopWide.S */
     /*
-     * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
-     * This could be a MIPS instruction or a function call.
+     * double-to-long
      *
-     * long-to-double, double-to-long
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64_F(fa0, fa0f, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-                                  #  optional op
-    b d2l_doconv                                 #  a0/a1 <- op, a2-a3 changed
 
-.Lop_double_to_long_set_vreg:
-    SET_VREG64(rRESULT0, rRESULT1, rOBJ)                             #  vAA <- a0/a1
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    mthc1     t0, fa1
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_DOUBLE : 0
+1:
+    trunc.l.d fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
+#else
+    c.eq.d    fcc0, fa0, fa0
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1f      fcc0, .Lop_double_to_long_get_opcode
 
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
+    c.ole.d   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .Lop_double_to_long_get_opcode
+
+    neg.d     fa1, fa1
+    c.ole.d   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .Lop_double_to_long_get_opcode
+
+    JAL(__fixdfdi)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    b         .Lop_double_to_long_set_vreg
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -3952,28 +4183,20 @@
 /* File: mips/op_double_to_float.S */
 /* File: mips/unopNarrower.S */
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
+     * Generic 64bit-to-32bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * For: double-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64_F(fa0, fa0f, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.s.d fv0, fa0
-
-.Lop_double_to_float_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
 
 
 /* ------------------------------ */
@@ -3983,22 +4206,21 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
     GET_OPA4(t0)                           #  t0 <- A+
     GET_VREG(a0, a3)                       #  a0 <- vB
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    sll a0, a0, 24                              #  optional op
-    sra a0, a0, 24                                 #  a0 <- op, a0-a3 changed
+                                  #  optional op
+    SEB(a0, a0)                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -4008,11 +4230,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -4022,8 +4244,7 @@
                                   #  optional op
     and a0, 0xffff                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -4033,22 +4254,21 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
     GET_OPA4(t0)                           #  t0 <- A+
     GET_VREG(a0, a3)                       #  a0 <- vB
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    sll a0, 16                              #  optional op
-    sra a0, 16                                 #  a0 <- op, a0-a3 changed
+                                  #  optional op
+    SEH(a0, a0)                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -4087,7 +4307,6 @@
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4126,7 +4345,6 @@
     subu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4165,7 +4383,6 @@
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4205,7 +4422,6 @@
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #else
 /* File: mips/binop.S */
@@ -4240,7 +4456,6 @@
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #endif
 
@@ -4281,7 +4496,6 @@
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #else
 /* File: mips/binop.S */
@@ -4316,7 +4530,6 @@
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #endif
 
@@ -4356,7 +4569,6 @@
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4395,7 +4607,6 @@
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4434,7 +4645,6 @@
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4473,7 +4683,6 @@
     sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4512,7 +4721,6 @@
     sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4551,7 +4759,6 @@
     srl a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4571,10 +4778,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4600,7 +4807,6 @@
     addu a1, a3, a1; sltu v1, v0, a2; addu v1, v1, a1                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4619,10 +4825,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4648,7 +4854,6 @@
     subu v1, a1, a3; sltu a0, a0, v0; subu v1, v1, a0                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4702,10 +4907,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4731,7 +4936,6 @@
     JAL(__divdi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4743,10 +4947,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4772,7 +4976,6 @@
     JAL(__moddi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4784,10 +4987,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4813,7 +5016,6 @@
     and a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4825,10 +5027,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4854,7 +5056,6 @@
     or a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4866,10 +5067,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4895,7 +5096,6 @@
     xor a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4928,7 +5128,7 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -4959,7 +5159,7 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v0
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -5006,7 +5206,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5014,9 +5214,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     add.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5032,7 +5231,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5040,9 +5239,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     sub.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5058,7 +5256,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5066,9 +5264,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     mul.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5084,7 +5281,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5092,9 +5289,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     div.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5110,7 +5306,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5118,9 +5314,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     JAL(fmodf)                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5129,8 +5324,8 @@
 /* File: mips/op_add_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5139,7 +5334,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5149,8 +5344,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     add.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_add_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5159,8 +5354,8 @@
 /* File: mips/op_sub_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5169,7 +5364,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5179,8 +5374,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     sub.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_sub_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5189,8 +5384,8 @@
 /* File: mips/op_mul_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5199,7 +5394,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5209,8 +5404,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     mul.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_mul_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5219,8 +5414,8 @@
 /* File: mips/op_div_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5229,7 +5424,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5239,8 +5434,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     div.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_div_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5249,8 +5444,8 @@
 /* File: mips/op_rem_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5259,7 +5454,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5269,8 +5464,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     JAL(fmod)
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_rem_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5304,8 +5499,7 @@
                                   #  optional op
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5339,8 +5533,7 @@
                                   #  optional op
     subu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5374,8 +5567,7 @@
                                   #  optional op
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5410,8 +5602,7 @@
                                   #  optional op
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binop2addr.S */
@@ -5441,8 +5632,7 @@
     div zero, a0, a1                              #  optional op
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -5478,8 +5668,7 @@
                                   #  optional op
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binop2addr.S */
@@ -5509,8 +5698,7 @@
     div zero, a0, a1                              #  optional op
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -5545,8 +5733,7 @@
                                   #  optional op
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5580,8 +5767,7 @@
                                   #  optional op
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5615,8 +5801,7 @@
                                   #  optional op
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5650,8 +5835,7 @@
                                   #  optional op
     sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5685,8 +5869,7 @@
                                   #  optional op
     sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5720,8 +5903,7 @@
                                   #  optional op
     srl a0, a0, a1                                  #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5736,22 +5918,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5761,9 +5942,7 @@
     addu v0, a2, a0                              #  optional op
     addu a1, a3, a1; sltu v1, v0, a2; addu v1, v1, a1                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5778,22 +5957,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5803,9 +5981,7 @@
     subu v0, a0, a2                              #  optional op
     subu v1, a1, a3; sltu a0, a0, v0; subu v1, v1, a0                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5840,9 +6016,7 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    # vAA <- v0 (low)
-    SET_VREG64(v0, v1, rOBJ)               #  vAA+1 <- v1 (high)
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, rOBJ, t1)      #  vA/vA+1 <- v0(low)/v1(high)
 
 /* ------------------------------ */
     .balign 128
@@ -5853,22 +6027,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 1
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5878,9 +6051,7 @@
                                   #  optional op
     JAL(__divdi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5892,22 +6063,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 1
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5917,9 +6087,7 @@
                                   #  optional op
     JAL(__moddi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5931,22 +6099,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5956,9 +6123,7 @@
     and a0, a0, a2                              #  optional op
     and a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -5970,22 +6135,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5995,9 +6159,7 @@
     or a0, a0, a2                              #  optional op
     or a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -6009,22 +6171,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -6034,9 +6195,7 @@
     xor a0, a0, a2                              #  optional op
     xor a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -6052,7 +6211,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t2, rFP, rOBJ)                    #  t2 <- &fp[A]
-    LOAD64(a0, a1, t2)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t2)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -6065,7 +6224,7 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vA/vA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -6080,7 +6239,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t2)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
 
@@ -6092,7 +6251,7 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vA/vA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -6107,7 +6266,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t3)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -6120,7 +6279,7 @@
     sll       a1, 1
     sll       a1, a0                       #  ahi<- ahi << (32-(shift&31))
     or        v0, a1                       #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vA/vA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -6129,23 +6288,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     add.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6155,23 +6313,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     sub.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6181,23 +6338,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     mul.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6207,23 +6363,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     div.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6233,23 +6388,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     JAL(fmodf)
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6258,12 +6412,13 @@
 /* File: mips/op_add_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6275,9 +6430,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     add.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6286,12 +6440,13 @@
 /* File: mips/op_sub_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6303,9 +6458,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     sub.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6314,12 +6468,13 @@
 /* File: mips/op_mul_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6331,9 +6486,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     mul.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6342,12 +6496,13 @@
 /* File: mips/op_div_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6359,9 +6514,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     div.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6370,12 +6524,13 @@
 /* File: mips/op_rem_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6387,9 +6542,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     JAL(fmod)
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6409,12 +6563,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6424,8 +6577,7 @@
                                   #  optional op
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6446,12 +6598,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6461,8 +6612,7 @@
                                   #  optional op
     subu a0, a1, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6482,12 +6632,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6497,8 +6646,7 @@
                                   #  optional op
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6519,12 +6667,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6534,8 +6681,7 @@
                                   #  optional op
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binopLit16.S */
@@ -6551,12 +6697,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6566,8 +6711,7 @@
     div zero, a0, a1                              #  optional op
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -6589,12 +6733,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6604,8 +6747,7 @@
                                   #  optional op
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binopLit16.S */
@@ -6621,12 +6763,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6636,8 +6777,7 @@
     div zero, a0, a1                              #  optional op
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -6658,12 +6798,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6673,8 +6812,7 @@
                                   #  optional op
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6694,12 +6832,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6709,8 +6846,7 @@
                                   #  optional op
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6730,12 +6866,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6745,8 +6880,7 @@
                                   #  optional op
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6767,7 +6901,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6783,7 +6917,6 @@
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -6804,7 +6937,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6820,7 +6953,6 @@
     subu a0, a1, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -6841,7 +6973,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6857,7 +6989,6 @@
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -6879,7 +7010,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6895,7 +7026,6 @@
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #else
 /* File: mips/binopLit8.S */
@@ -6912,7 +7042,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6928,7 +7058,6 @@
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #endif
 
@@ -6951,7 +7080,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6967,7 +7096,6 @@
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #else
 /* File: mips/binopLit8.S */
@@ -6984,7 +7112,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7000,7 +7128,6 @@
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #endif
 
@@ -7022,7 +7149,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7038,7 +7165,6 @@
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7059,7 +7185,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7075,7 +7201,6 @@
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7096,7 +7221,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7112,7 +7237,6 @@
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7133,7 +7257,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7149,7 +7273,6 @@
     sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7170,7 +7293,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7186,7 +7309,6 @@
     sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7207,7 +7329,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7223,7 +7345,6 @@
     srl a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7231,7 +7352,7 @@
 .L_op_iget_quick: /* 0xe3 */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7248,7 +7369,7 @@
     .balign 128
 .L_op_iget_wide_quick: /* 0xe4 */
 /* File: mips/op_iget_wide_quick.S */
-    # iget-wide-quick vA, vB, offset       /* CCCC */
+    /* iget-wide-quick vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7259,8 +7380,7 @@
     LOAD64(a0, a1, t0)                     #  a0 <- obj.field (64 bits, aligned)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -7277,17 +7397,16 @@
     GET_OPA4(a2)                           #  a2<- A+
     PREFETCH_INST(2)                       #  load rINST
     bnez a3, MterpPossibleException        #  bail out
-    SET_VREG_OBJECT(v0, a2)                #  fp[A] <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       #  fp[A] <- v0
 
 /* ------------------------------ */
     .balign 128
 .L_op_iput_quick: /* 0xe6 */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7296,15 +7415,16 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sw    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
 .L_op_iput_wide_quick: /* 0xe7 */
 /* File: mips/op_iput_wide_quick.S */
-    # iput-wide-quick vA, vB, offset       /* CCCC */
+    /* iput-wide-quick vA, vB, offset@CCCC */
     GET_OPA4(a0)                           #  a0 <- A(+)
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a2, a1)                       #  a2 <- fp[B], the object pointer
@@ -7315,16 +7435,17 @@
     FETCH(a3, 1)                           #  a3 <- field byte offset
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      a2, a2, a3                   #  obj.field (64 bits, aligned) <- a0/a1
-    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    GET_OPCODE_TARGET(t0)
+    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
+    JR(t0)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
 .L_op_iput_object_quick: /* 0xe8 */
 /* File: mips/op_iput_object_quick.S */
     /* For: iput-object-quick */
-    # op vA, vB, offset                 /* CCCC */
+    /* op vA, vB, offset@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
@@ -7343,8 +7464,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualQuick
     EXPORT_PC()
     move    a0, rSELF
@@ -7368,8 +7489,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualQuickRange
     EXPORT_PC()
     move    a0, rSELF
@@ -7391,7 +7512,7 @@
 /* File: mips/op_iput_boolean_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7400,9 +7521,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sb    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7411,7 +7533,7 @@
 /* File: mips/op_iput_byte_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7420,9 +7542,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sb    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7431,7 +7554,7 @@
 /* File: mips/op_iput_char_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7440,9 +7563,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sh    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7451,7 +7575,7 @@
 /* File: mips/op_iput_short_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7460,9 +7584,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sh    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7471,7 +7596,7 @@
 /* File: mips/op_iget_boolean_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7491,7 +7616,7 @@
 /* File: mips/op_iget_byte_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7511,7 +7636,7 @@
 /* File: mips/op_iget_char_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7531,7 +7656,7 @@
 /* File: mips/op_iget_short_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7694,264 +7819,29 @@
     .balign 4
 artMterpAsmSisterStart:
 
-/* continuation for op_cmpl_float */
-
-.Lop_cmpl_float_nan:
-    li rTEMP, -1
-
-.Lop_cmpl_float_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_cmpg_float */
-
-.Lop_cmpg_float_nan:
-    li rTEMP, 1
-
-.Lop_cmpg_float_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_cmpl_double */
-
-.Lop_cmpl_double_nan:
-    li rTEMP, -1
-
-.Lop_cmpl_double_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_cmpg_double */
-
-.Lop_cmpg_double_nan:
-    li rTEMP, 1
-
-.Lop_cmpg_double_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_float_to_int */
-
-/*
- * Not an entry point as it is used only once !!
- */
-f2i_doconv:
-#ifdef MIPS32REVGE6
-    l.s       fa1, .LFLOAT_TO_INT_max
-    cmp.le.s  ft2, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    cmp.le.s  ft2, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
-#else
-    l.s       fa1, .LFLOAT_TO_INT_max
-    c.ole.s   fcc0, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1t      .Lop_float_to_int_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    c.ole.s   fcc0, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1t      .Lop_float_to_int_set_vreg_f
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .Lop_float_to_int_set_vreg_f
-#endif
-
-    trunc.w.s  fv0, fa0
-    b         .Lop_float_to_int_set_vreg_f
-
-.LFLOAT_TO_INT_max:
-    .word 0x4f000000
-.LFLOAT_TO_INT_min:
-    .word 0xcf000000
-.LFLOAT_TO_INT_ret_max:
-    .word 0x7fffffff
-.LFLOAT_TO_INT_ret_min:
-    .word 0x80000000
-
 /* continuation for op_float_to_long */
 
-f2l_doconv:
-#ifdef MIPS32REVGE6
-    l.s       fa1, .LLONG_TO_max
-    cmp.le.s  ft2, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1nez    ft2, .Lop_float_to_long_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    cmp.le.s  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1nez    ft2, .Lop_float_to_long_set_vreg
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .Lop_float_to_long_set_vreg
-#else
-    l.s       fa1, .LLONG_TO_max
-    c.ole.s   fcc0, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1t      .Lop_float_to_long_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    c.ole.s   fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1t      .Lop_float_to_long_set_vreg
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1t      .Lop_float_to_long_set_vreg
+#ifndef MIPS32REVGE6
+.Lop_float_to_long_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.Lop_float_to_long_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
 #endif
 
-    JAL(__fixsfdi)
-
-    b         .Lop_float_to_long_set_vreg
-
-.LLONG_TO_max:
-    .word 0x5f000000
-
-.LLONG_TO_min:
-    .word 0xdf000000
-
-/* continuation for op_double_to_int */
-
-d2i_doconv:
-#ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
-#else
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1t      .Lop_double_to_int_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1t      .Lop_double_to_int_set_vreg_f
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .Lop_double_to_int_set_vreg_f
-#endif
-
-    trunc.w.d  fv0, fa0
-    b         .Lop_double_to_int_set_vreg_f
-
-.LDOUBLE_TO_INT_max:
-    .dword 0x41dfffffffc00000
-.LDOUBLE_TO_INT_min:
-    .dword 0xc1e0000000000000              #  minint, as a double (high word)
-.LDOUBLE_TO_INT_maxret:
-    .word 0x7fffffff
-.LDOUBLE_TO_INT_minret:
-    .word 0x80000000
-
 /* continuation for op_double_to_long */
 
-d2l_doconv:
-#ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .Lop_double_to_long_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .Lop_double_to_long_set_vreg
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .Lop_double_to_long_set_vreg
-#else
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .Lop_double_to_long_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .Lop_double_to_long_set_vreg
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1t      .Lop_double_to_long_set_vreg
+#ifndef MIPS32REVGE6
+.Lop_double_to_long_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.Lop_double_to_long_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
 #endif
-    JAL(__fixdfdi)
-    b         .Lop_double_to_long_set_vreg
-
-.LDOUBLE_TO_LONG_max:
-    .dword 0x43e0000000000000              #  maxlong, as a double (high word)
-.LDOUBLE_TO_LONG_min:
-    .dword 0xc3e0000000000000              #  minlong, as a double (high word)
-.LDOUBLE_TO_LONG_ret_max:
-    .dword 0x7fffffffffffffff
-.LDOUBLE_TO_LONG_ret_min:
-    .dword 0x8000000000000000
 
 /* continuation for op_mul_long */
 
 .Lop_mul_long_finish:
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, a0)                 #  vAA::vAA+1 <- v0(low) :: v1(high)
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a0, t0)        #  vAA/vAA+1 <- v0(low)/v1(high)
 
 /* continuation for op_shl_long */
 
@@ -7969,51 +7859,21 @@
 .Lop_ushr_long_finish:
     SET_VREG64_GOTO(v1, zero, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
 
-/* continuation for op_add_double */
-
-.Lop_add_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_sub_double */
-
-.Lop_sub_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_mul_double */
-
-.Lop_mul_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_div_double */
-
-.Lop_div_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_rem_double */
-
-.Lop_rem_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
 /* continuation for op_shl_long_2addr */
 
 .Lop_shl_long_2addr_finish:
-    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vA/vA+1 <- rlo/rhi
 
 /* continuation for op_shr_long_2addr */
 
 .Lop_shr_long_2addr_finish:
     sra     a3, a1, 31                     #  a3<- sign(ah)
-    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vA/vA+1 <- rlo/rhi
 
 /* continuation for op_ushr_long_2addr */
 
 .Lop_ushr_long_2addr_finish:
-    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vA/vA+1 <- rlo/rhi
 
     .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
     .global artMterpAsmSisterEnd
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 85bfd17..fad7d90 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -781,7 +781,7 @@
   SendRequestAndPossiblySuspend(pReq, suspend_policy, threadId);
 }
 
-static void LogMatchingEventsAndThread(const std::vector<JdwpEvent*> match_list,
+static void LogMatchingEventsAndThread(const std::vector<JdwpEvent*>& match_list,
                                        ObjectId thread_id)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   for (size_t i = 0, e = match_list.size(); i < e; ++i) {
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index a26d850..2fbf5ef 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -80,8 +80,18 @@
 
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
+  // Map in low 4gb to simplify accessing root tables for x86_64.
+  // We could do PC-relative addressing to avoid this problem, but that
+  // would require reserving code and data area before submitting, which
+  // means more windows for the code memory to be RWX.
   MemMap* data_map = MemMap::MapAnonymous(
-      "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str, use_ashmem);
+      "data-code-cache", nullptr,
+      max_capacity,
+      kProtAll,
+      /* low_4gb */ true,
+      /* reuse */ false,
+      &error_str,
+      use_ashmem);
   if (data_map == nullptr) {
     std::ostringstream oss;
     oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
@@ -197,34 +207,40 @@
 
 uint8_t* JitCodeCache::CommitCode(Thread* self,
                                   ArtMethod* method,
-                                  const uint8_t* vmap_table,
+                                  uint8_t* stack_map,
+                                  uint8_t* roots_data,
                                   size_t frame_size_in_bytes,
                                   size_t core_spill_mask,
                                   size_t fp_spill_mask,
                                   const uint8_t* code,
                                   size_t code_size,
-                                  bool osr) {
+                                  bool osr,
+                                  Handle<mirror::ObjectArray<mirror::Object>> roots) {
   uint8_t* result = CommitCodeInternal(self,
                                        method,
-                                       vmap_table,
+                                       stack_map,
+                                       roots_data,
                                        frame_size_in_bytes,
                                        core_spill_mask,
                                        fp_spill_mask,
                                        code,
                                        code_size,
-                                       osr);
+                                       osr,
+                                       roots);
   if (result == nullptr) {
     // Retry.
     GarbageCollectCache(self);
     result = CommitCodeInternal(self,
                                 method,
-                                vmap_table,
+                                stack_map,
+                                roots_data,
                                 frame_size_in_bytes,
                                 core_spill_mask,
                                 fp_spill_mask,
                                 code,
                                 code_size,
-                                osr);
+                                osr,
+                                roots);
   }
   return result;
 }
@@ -243,20 +259,66 @@
   return reinterpret_cast<uintptr_t>(code) - RoundUp(sizeof(OatQuickMethodHeader), alignment);
 }
 
+static uint32_t ComputeRootTableSize(uint32_t number_of_roots) {
+  return sizeof(uint32_t) + number_of_roots * sizeof(GcRoot<mirror::Object>);
+}
+
+static uint32_t GetNumberOfRoots(const uint8_t* stack_map) {
+  // The length of the table is stored just before the stack map (and therefore at the end of
+  // the table itself), in order to be able to fetch it from a `stack_map` pointer.
+  return reinterpret_cast<const uint32_t*>(stack_map)[-1];
+}
+
+static void FillRootTable(uint8_t* roots_data, Handle<mirror::ObjectArray<mirror::Object>> roots)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  GcRoot<mirror::Object>* gc_roots = reinterpret_cast<GcRoot<mirror::Object>*>(roots_data);
+  uint32_t length = roots->GetLength();
+  // Put all roots in `roots_data`.
+  for (uint32_t i = 0; i < length; ++i) {
+    gc_roots[i] = GcRoot<mirror::Object>(roots->Get(i));
+  }
+  // Store the length of the table at the end. This will allow fetching it from a `stack_map`
+  // pointer.
+  reinterpret_cast<uint32_t*>(gc_roots + length)[0] = length;
+}
+
+static uint8_t* GetRootTable(const void* code_ptr, uint32_t* number_of_roots = nullptr) {
+  OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+  uint8_t* data = method_header->GetOptimizedCodeInfoPtr();
+  uint32_t roots = GetNumberOfRoots(data);
+  if (number_of_roots != nullptr) {
+    *number_of_roots = roots;
+  }
+  return data - ComputeRootTableSize(roots);
+}
+
+void JitCodeCache::SweepRootTables(IsMarkedVisitor* visitor) {
+  MutexLock mu(Thread::Current(), lock_);
+  for (const auto& entry : method_code_map_) {
+    uint32_t number_of_roots = 0;
+    uint8_t* roots_data = GetRootTable(entry.first, &number_of_roots);
+    GcRoot<mirror::Object>* roots = reinterpret_cast<GcRoot<mirror::Object>*>(roots_data);
+    for (uint32_t i = 0; i < number_of_roots; ++i) {
+      // This does not need a read barrier because this is called by GC.
+      mirror::Object* object = roots[i].Read<kWithoutReadBarrier>();
+      DCHECK(object->IsString());
+      mirror::Object* new_string = visitor->IsMarked(object);
+      // We know the string is marked because it's a strongly-interned string that
+      // is always alive.
+      // TODO: Do not use IsMarked for j.l.Class, and adjust once we move this method
+      // out of the weak access/creation pause. b/32167580
+      DCHECK(new_string != nullptr);
+      roots[i] = GcRoot<mirror::Object>(new_string);
+    }
+  }
+}
+
 void JitCodeCache::FreeCode(const void* code_ptr, ArtMethod* method ATTRIBUTE_UNUSED) {
   uintptr_t allocation = FromCodeToAllocation(code_ptr);
-  const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
   DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
-
-  // Use the offset directly to prevent sanity check that the method is
-  // compiled with optimizing.
-  // TODO(ngeoffray): Clean up.
-  if (method_header->vmap_table_offset_ != 0) {
-    const uint8_t* data = method_header->code_ - method_header->vmap_table_offset_;
-    FreeData(const_cast<uint8_t*>(data));
-  }
+  FreeData(GetRootTable(code_ptr));
   FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
@@ -308,13 +370,16 @@
 
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
-                                          const uint8_t* vmap_table,
+                                          uint8_t* stack_map,
+                                          uint8_t* roots_data,
                                           size_t frame_size_in_bytes,
                                           size_t core_spill_mask,
                                           size_t fp_spill_mask,
                                           const uint8_t* code,
                                           size_t code_size,
-                                          bool osr) {
+                                          bool osr,
+                                          Handle<mirror::ObjectArray<mirror::Object>> roots) {
+  DCHECK(stack_map != nullptr);
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   // Ensure the header ends up at expected instruction alignment.
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
@@ -338,7 +403,7 @@
       std::copy(code, code + code_size, code_ptr);
       method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
       new (method_header) OatQuickMethodHeader(
-          (vmap_table == nullptr) ? 0 : code_ptr - vmap_table,
+          code_ptr - stack_map,
           frame_size_in_bytes,
           core_spill_mask,
           fp_spill_mask,
@@ -353,6 +418,8 @@
   {
     MutexLock mu(self, lock_);
     method_code_map_.Put(code_ptr, method);
+    // Fill the root table before updating the entry point.
+    FillRootTable(roots_data, roots);
     if (osr) {
       number_of_osr_compilations_++;
       osr_code_map_.Put(method, code_ptr);
@@ -408,8 +475,14 @@
   FreeData(reinterpret_cast<uint8_t*>(data));
 }
 
-uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size, ArtMethod* method) {
-  size = RoundUp(size, sizeof(void*));
+void JitCodeCache::ReserveData(Thread* self,
+                               size_t stack_map_size,
+                               size_t number_of_roots,
+                               ArtMethod* method,
+                               uint8_t** stack_map_data,
+                               uint8_t** roots_data) {
+  size_t table_size = ComputeRootTableSize(number_of_roots);
+  size_t size = RoundUp(stack_map_size + table_size, sizeof(void*));
   uint8_t* result = nullptr;
 
   {
@@ -436,7 +509,8 @@
               << " for stack maps of "
               << ArtMethod::PrettyMethod(method);
   }
-  return result;
+  *roots_data = result;
+  *stack_map_data = result + table_size;
 }
 
 class MarkCodeVisitor FINAL : public StackVisitor {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index e15c93a..a97ef68 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -92,13 +92,15 @@
   // Allocate and write code and its metadata to the code cache.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
-                      const uint8_t* vmap_table,
+                      uint8_t* stack_map,
+                      uint8_t* roots_data,
                       size_t frame_size_in_bytes,
                       size_t core_spill_mask,
                       size_t fp_spill_mask,
                       const uint8_t* code,
                       size_t code_size,
-                      bool osr)
+                      bool osr,
+                      Handle<mirror::ObjectArray<mirror::Object>> roots)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -108,8 +110,14 @@
   // Return true if the code cache contains this method.
   bool ContainsMethod(ArtMethod* method) REQUIRES(!lock_);
 
-  // Reserve a region of data of size at least "size". Returns null if there is no more room.
-  uint8_t* ReserveData(Thread* self, size_t size, ArtMethod* method)
+  // Allocate a region of data that contain `size` bytes, and potentially space
+  // for storing `number_of_roots` roots. Returns null if there is no more room.
+  void ReserveData(Thread* self,
+                   size_t size,
+                   size_t number_of_roots,
+                   ArtMethod* method,
+                   uint8_t** stack_map_data,
+                   uint8_t** roots_data)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -188,6 +196,10 @@
 
   bool IsOsrCompiled(ArtMethod* method) REQUIRES(!lock_);
 
+  void SweepRootTables(IsMarkedVisitor* visitor)
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
@@ -201,13 +213,15 @@
   // allocation fails. Return null if the allocation fails.
   uint8_t* CommitCodeInternal(Thread* self,
                               ArtMethod* method,
-                              const uint8_t* vmap_table,
+                              uint8_t* stack_map,
+                              uint8_t* roots_data,
                               size_t frame_size_in_bytes,
                               size_t core_spill_mask,
                               size_t fp_spill_mask,
                               const uint8_t* code,
                               size_t code_size,
-                              bool osr)
+                              bool osr,
+                              Handle<mirror::ObjectArray<mirror::Object>> roots)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 0217a67..01a2ad8 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -157,14 +157,14 @@
     ThrowNoSuchMethodError(soa, c, name, sig, is_static ? "static" : "non-static");
     return nullptr;
   }
-  return soa.EncodeMethod(method);
+  return jni::EncodeArtMethod(method);
 }
 
 static ObjPtr<mirror::ClassLoader> GetClassLoader(const ScopedObjectAccess& soa)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ArtMethod* method = soa.Self()->GetCurrentMethod(nullptr);
   // If we are running Runtime.nativeLoad, use the overriding ClassLoader it set.
-  if (method == soa.DecodeMethod(WellKnownClasses::java_lang_Runtime_nativeLoad)) {
+  if (method == jni::DecodeArtMethod(WellKnownClasses::java_lang_Runtime_nativeLoad)) {
     return soa.Decode<mirror::ClassLoader>(soa.Self()->GetClassLoaderOverride());
   }
   // If we have a method, use its ClassLoader for context.
@@ -235,7 +235,7 @@
                                    sig, name, c->GetDescriptor(&temp));
     return nullptr;
   }
-  return soa.EncodeField(field);
+  return jni::EncodeArtField(field);
 }
 
 static void ThrowAIOOBE(ScopedObjectAccess& soa, mirror::Array* array, jsize start,
@@ -368,7 +368,7 @@
   static jmethodID FromReflectedMethod(JNIEnv* env, jobject jlr_method) {
     CHECK_NON_NULL_ARGUMENT(jlr_method);
     ScopedObjectAccess soa(env);
-    return soa.EncodeMethod(ArtMethod::FromReflectedMethod(soa, jlr_method));
+    return jni::EncodeArtMethod(ArtMethod::FromReflectedMethod(soa, jlr_method));
   }
 
   static jfieldID FromReflectedField(JNIEnv* env, jobject jlr_field) {
@@ -380,13 +380,13 @@
       return nullptr;
     }
     ObjPtr<mirror::Field> field = ObjPtr<mirror::Field>::DownCast(obj_field);
-    return soa.EncodeField(field->GetArtField());
+    return jni::EncodeArtField(field->GetArtField());
   }
 
   static jobject ToReflectedMethod(JNIEnv* env, jclass, jmethodID mid, jboolean) {
     CHECK_NON_NULL_ARGUMENT(mid);
     ScopedObjectAccess soa(env);
-    ArtMethod* m = soa.DecodeMethod(mid);
+    ArtMethod* m = jni::DecodeArtMethod(mid);
     mirror::Executable* method;
     DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
     DCHECK(!Runtime::Current()->IsActiveTransaction());
@@ -401,7 +401,7 @@
   static jobject ToReflectedField(JNIEnv* env, jclass, jfieldID fid, jboolean) {
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     return soa.AddLocalReference<jobject>(
         mirror::Field::CreateFromArtField<kRuntimePointerSize>(soa.Self(), f, true));
   }
@@ -631,8 +631,8 @@
     }
     if (c->IsStringClass()) {
       // Replace calls to String.<init> with equivalent StringFactory call.
-      jmethodID sf_mid = soa.EncodeMethod(
-          WellKnownClasses::StringInitToStringFactory(soa.DecodeMethod(mid)));
+      jmethodID sf_mid = jni::EncodeArtMethod(
+          WellKnownClasses::StringInitToStringFactory(jni::DecodeArtMethod(mid)));
       return CallStaticObjectMethodV(env, WellKnownClasses::java_lang_StringFactory, sf_mid, args);
     }
     ObjPtr<mirror::Object> result = c->AllocObject(soa.Self());
@@ -658,8 +658,8 @@
     }
     if (c->IsStringClass()) {
       // Replace calls to String.<init> with equivalent StringFactory call.
-      jmethodID sf_mid = soa.EncodeMethod(
-          WellKnownClasses::StringInitToStringFactory(soa.DecodeMethod(mid)));
+      jmethodID sf_mid = jni::EncodeArtMethod(
+          WellKnownClasses::StringInitToStringFactory(jni::DecodeArtMethod(mid)));
       return CallStaticObjectMethodA(env, WellKnownClasses::java_lang_StringFactory, sf_mid, args);
     }
     ObjPtr<mirror::Object> result = c->AllocObject(soa.Self());
@@ -1237,14 +1237,14 @@
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(obj);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     return soa.AddLocalReference<jobject>(f->GetObject(o));
   }
 
   static jobject GetStaticObjectField(JNIEnv* env, jclass, jfieldID fid) {
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     return soa.AddLocalReference<jobject>(f->GetObject(f->GetDeclaringClass()));
   }
 
@@ -1254,7 +1254,7 @@
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(java_object);
     ObjPtr<mirror::Object> v = soa.Decode<mirror::Object>(java_value);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     f->SetObject<false>(o, v);
   }
 
@@ -1262,7 +1262,7 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid);
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> v = soa.Decode<mirror::Object>(java_value);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     f->SetObject<false>(f->GetDeclaringClass(), v);
   }
 
@@ -1271,13 +1271,13 @@
   CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(fid); \
   ScopedObjectAccess soa(env); \
   ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(instance); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   return f->Get ##fn (o)
 
 #define GET_STATIC_PRIMITIVE_FIELD(fn) \
   CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(fid); \
   ScopedObjectAccess soa(env); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   return f->Get ##fn (f->GetDeclaringClass())
 
 #define SET_PRIMITIVE_FIELD(fn, instance, value) \
@@ -1285,13 +1285,13 @@
   CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid); \
   ScopedObjectAccess soa(env); \
   ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(instance); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   f->Set ##fn <false>(o, value)
 
 #define SET_STATIC_PRIMITIVE_FIELD(fn, value) \
   CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid); \
   ScopedObjectAccess soa(env); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   f->Set ##fn <false>(f->GetDeclaringClass(), value)
 
   static jboolean GetBooleanField(JNIEnv* env, jobject obj, jfieldID fid) {
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index b829934..b3837c4 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -20,6 +20,8 @@
 #include <jni.h>
 #include <iosfwd>
 
+#include "base/macros.h"
+
 #ifndef NATIVE_METHOD
 #define NATIVE_METHOD(className, functionName, signature) \
   { #functionName, signature, reinterpret_cast<void*>(className ## _ ## functionName) }
@@ -36,6 +38,9 @@
 
 namespace art {
 
+class ArtField;
+class ArtMethod;
+
 const JNINativeInterface* GetJniNativeInterface();
 const JNINativeInterface* GetRuntimeShutdownNativeInterface();
 
@@ -46,6 +51,29 @@
 
 int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause);
 
+namespace jni {
+
+ALWAYS_INLINE
+static inline ArtField* DecodeArtField(jfieldID fid) {
+  return reinterpret_cast<ArtField*>(fid);
+}
+
+ALWAYS_INLINE
+static inline jfieldID EncodeArtField(ArtField* field) {
+  return reinterpret_cast<jfieldID>(field);
+}
+
+ALWAYS_INLINE
+static inline jmethodID EncodeArtMethod(ArtMethod* art_method) {
+  return reinterpret_cast<jmethodID>(art_method);
+}
+
+ALWAYS_INLINE
+static inline ArtMethod* DecodeArtMethod(jmethodID method_id) {
+  return reinterpret_cast<ArtMethod*>(method_id);
+}
+
+}  // namespace jni
 }  // namespace art
 
 std::ostream& operator<<(std::ostream& os, const jobjectRefType& rhs);
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 538b6eb..2f2565b 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -61,7 +61,7 @@
  */
 class LockWord {
  public:
-  enum SizeShiftsAndMasks {  // private marker to avoid generate-operator-out.py from processing.
+  enum SizeShiftsAndMasks : uint32_t {  // private marker to avoid generate-operator-out.py from processing.
     // Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
     kStateSize = 2,
     kReadBarrierStateSize = 1,
@@ -91,6 +91,8 @@
     kStateFat = 1,
     kStateHash = 2,
     kStateForwardingAddress = 3,
+    kStateForwardingAddressShifted = kStateForwardingAddress << kStateShift,
+    kStateForwardingAddressOverflow = (1 + kStateMask - kStateForwardingAddress) << kStateShift,
 
     // Read barrier bit.
     kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
@@ -140,7 +142,7 @@
 
   static LockWord FromForwardingAddress(size_t target) {
     DCHECK_ALIGNED(target, (1 << kStateSize));
-    return LockWord((target >> kForwardingAddressShift) | (kStateForwardingAddress << kStateShift));
+    return LockWord((target >> kForwardingAddressShift) | kStateForwardingAddressShifted);
   }
 
   static LockWord FromHashCode(uint32_t hash_code, uint32_t gc_state) {
@@ -202,6 +204,8 @@
 
   void SetReadBarrierState(uint32_t rb_state) {
     DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+    DCHECK(rb_state == ReadBarrier::WhiteState() ||
+           rb_state == ReadBarrier::GrayState()) << rb_state;
     DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
     // Clear and or the bits.
     value_ &= ~(kReadBarrierStateMask << kReadBarrierStateShift);
@@ -256,6 +260,14 @@
   LockWord();
 
   explicit LockWord(uint32_t val) : value_(val) {
+    // Make sure adding the overflow causes an overflow.
+    constexpr uint64_t overflow = static_cast<uint64_t>(kStateForwardingAddressShifted) +
+        static_cast<uint64_t>(kStateForwardingAddressOverflow);
+    constexpr bool is_larger = overflow > static_cast<uint64_t>(0xFFFFFFFF);
+    static_assert(is_larger, "should have overflowed");
+    static_assert(
+         (~kStateForwardingAddress & kStateMask) == 0,
+        "READ_BARRIER_MARK_REG relies on the forwarding address state being only one bits");
     CheckReadBarrierState();
   }
 
@@ -270,9 +282,8 @@
       if (!kUseReadBarrier) {
         DCHECK_EQ(rb_state, 0U);
       } else {
-        DCHECK(rb_state == ReadBarrier::white_ptr_ ||
-               rb_state == ReadBarrier::gray_ptr_ ||
-               rb_state == ReadBarrier::black_ptr_) << rb_state;
+        DCHECK(rb_state == ReadBarrier::WhiteState() ||
+               rb_state == ReadBarrier::GrayState()) << rb_state;
       }
     }
   }
diff --git a/runtime/method_handles-inl.h b/runtime/method_handles-inl.h
index ff5d2a1..1240792 100644
--- a/runtime/method_handles-inl.h
+++ b/runtime/method_handles-inl.h
@@ -31,122 +31,70 @@
 
 namespace art {
 
-// Assigns |type| to the primitive type associated with |dst_class|. Returns
-// true iff. |dst_class| was a boxed type (Integer, Long etc.), false otherwise.
-REQUIRES_SHARED(Locks::mutator_lock_)
-static inline bool GetPrimitiveType(ObjPtr<mirror::Class> dst_class, Primitive::Type* type) {
-  if (dst_class->DescriptorEquals("Ljava/lang/Boolean;")) {
-    (*type) = Primitive::kPrimBoolean;
+inline bool ConvertArgumentValue(Handle<mirror::MethodType> callsite_type,
+                                 Handle<mirror::MethodType> callee_type,
+                                 int index,
+                                 JValue* value) REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Class> from_class(callsite_type->GetPTypes()->GetWithoutChecks(index));
+  ObjPtr<mirror::Class> to_class(callee_type->GetPTypes()->GetWithoutChecks(index));
+  if (from_class == to_class) {
     return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Byte;")) {
-    (*type) = Primitive::kPrimByte;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Character;")) {
-    (*type) = Primitive::kPrimChar;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Float;")) {
-    (*type) = Primitive::kPrimFloat;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Double;")) {
-    (*type) = Primitive::kPrimDouble;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Integer;")) {
-    (*type) = Primitive::kPrimInt;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Long;")) {
-    (*type) = Primitive::kPrimLong;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Short;")) {
-    (*type) = Primitive::kPrimShort;
+  }
+
+  // |value| may contain a bare heap pointer which is generally
+  // |unsafe. ConvertJValueCommon() saves |value|, |from_class|, and
+  // |to_class| to Handles where necessary to avoid issues if the heap
+  // changes.
+  if (ConvertJValueCommon(callsite_type, callee_type, from_class, to_class, value)) {
+    DCHECK(!Thread::Current()->IsExceptionPending());
     return true;
   } else {
+    DCHECK(Thread::Current()->IsExceptionPending());
+    value->SetJ(0);
     return false;
   }
 }
 
-REQUIRES_SHARED(Locks::mutator_lock_)
-inline bool ConvertJValue(Handle<mirror::Class> from,
-                          Handle<mirror::Class> to,
-                          const JValue& from_value,
-                          JValue* to_value) {
-  const Primitive::Type from_type = from->GetPrimitiveType();
-  const Primitive::Type to_type = to->GetPrimitiveType();
-
-  // This method must be called only when the types don't match.
-  DCHECK(from.Get() != to.Get());
-
-  if ((from_type != Primitive::kPrimNot) && (to_type != Primitive::kPrimNot)) {
-    // Throws a ClassCastException if we're unable to convert a primitive value.
-    return ConvertPrimitiveValue(false, from_type, to_type, from_value, to_value);
-  } else if ((from_type == Primitive::kPrimNot) && (to_type == Primitive::kPrimNot)) {
-    // They're both reference types. If "from" is null, we can pass it
-    // through unchanged. If not, we must generate a cast exception if
-    // |to| is not assignable from the dynamic type of |ref|.
-    mirror::Object* const ref = from_value.GetL();
-    if (ref == nullptr || to->IsAssignableFrom(ref->GetClass())) {
-      to_value->SetL(ref);
-      return true;
-    } else {
-      ThrowClassCastException(to.Get(), ref->GetClass());
-      return false;
-    }
-  } else {
-    // Precisely one of the source or the destination are reference types.
-    // We must box or unbox.
-    if (to_type == Primitive::kPrimNot) {
-      // The target type is a reference, we must box.
-      Primitive::Type type;
-      // TODO(narayan): This is a CHECK for now. There might be a few corner cases
-      // here that we might not have handled yet. For exmple, if |to| is java/lang/Number;,
-      // we will need to box this "naturally".
-      CHECK(GetPrimitiveType(to.Get(), &type));
-      // First perform a primitive conversion to the unboxed equivalent of the target,
-      // if necessary. This should be for the rarer cases like (int->Long) etc.
-      if (UNLIKELY(from_type != type)) {
-         if (!ConvertPrimitiveValue(false, from_type, type, from_value, to_value)) {
-           return false;
-         }
-      } else {
-        *to_value = from_value;
-      }
-
-      // Then perform the actual boxing, and then set the reference.
-      ObjPtr<mirror::Object> boxed = BoxPrimitive(type, from_value);
-      to_value->SetL(boxed.Ptr());
-      return true;
-    } else {
-      // The target type is a primitive, we must unbox.
-      ObjPtr<mirror::Object> ref(from_value.GetL());
-
-      // Note that UnboxPrimitiveForResult already performs all of the type
-      // conversions that we want, based on |to|.
-      JValue unboxed_value;
-      return UnboxPrimitiveForResult(ref, to.Get(), to_value);
-    }
+inline bool ConvertReturnValue(Handle<mirror::MethodType> callsite_type,
+                               Handle<mirror::MethodType> callee_type,
+                               JValue* value)  REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Class> from_class(callee_type->GetRType());
+  ObjPtr<mirror::Class> to_class(callsite_type->GetRType());
+  if (to_class->GetPrimitiveType() == Primitive::kPrimVoid || from_class == to_class) {
+    return true;
   }
 
-  return true;
+  // |value| may contain a bare heap pointer which is generally
+  // unsafe. ConvertJValueCommon() saves |value|, |from_class|, and
+  // |to_class| to Handles where necessary to avoid issues if the heap
+  // changes.
+  if (ConvertJValueCommon(callsite_type, callee_type, from_class, to_class, value)) {
+    DCHECK(!Thread::Current()->IsExceptionPending());
+    return true;
+  } else {
+    DCHECK(Thread::Current()->IsExceptionPending());
+    value->SetJ(0);
+    return false;
+  }
 }
 
 template <typename G, typename S>
 bool PerformConversions(Thread* self,
-                        Handle<mirror::ObjectArray<mirror::Class>> from_types,
-                        Handle<mirror::ObjectArray<mirror::Class>> to_types,
+                        Handle<mirror::MethodType> callsite_type,
+                        Handle<mirror::MethodType> callee_type,
                         G* getter,
                         S* setter,
-                        int32_t num_conversions) {
+                        int32_t num_conversions) REQUIRES_SHARED(Locks::mutator_lock_) {
   StackHandleScope<2> hs(self);
-  MutableHandle<mirror::Class> from(hs.NewHandle<mirror::Class>(nullptr));
-  MutableHandle<mirror::Class> to(hs.NewHandle<mirror::Class>(nullptr));
+  Handle<mirror::ObjectArray<mirror::Class>> from_types(hs.NewHandle(callsite_type->GetPTypes()));
+  Handle<mirror::ObjectArray<mirror::Class>> to_types(hs.NewHandle(callee_type->GetPTypes()));
 
   for (int32_t i = 0; i < num_conversions; ++i) {
-    from.Assign(from_types->GetWithoutChecks(i));
-    to.Assign(to_types->GetWithoutChecks(i));
-
-    const Primitive::Type from_type = from->GetPrimitiveType();
-    const Primitive::Type to_type = to->GetPrimitiveType();
-
-    if (from.Get() == to.Get()) {
+    ObjPtr<mirror::Class> from(from_types->GetWithoutChecks(i));
+    ObjPtr<mirror::Class> to(to_types->GetWithoutChecks(i));
+    const Primitive::Type from_type = from_types->GetWithoutChecks(i)->GetPrimitiveType();
+    const Primitive::Type to_type = to_types->GetWithoutChecks(i)->GetPrimitiveType();
+    if (from == to) {
       // Easy case - the types are identical. Nothing left to do except to pass
       // the arguments along verbatim.
       if (Primitive::Is64BitType(from_type)) {
@@ -156,31 +104,29 @@
       } else {
         setter->Set(getter->Get());
       }
-
-      continue;
     } else {
-      JValue from_value;
-      JValue to_value;
+      JValue value;
 
       if (Primitive::Is64BitType(from_type)) {
-        from_value.SetJ(getter->GetLong());
+        value.SetJ(getter->GetLong());
       } else if (from_type == Primitive::kPrimNot) {
-        from_value.SetL(getter->GetReference());
+        value.SetL(getter->GetReference());
       } else {
-        from_value.SetI(getter->Get());
+        value.SetI(getter->Get());
       }
 
-      if (!ConvertJValue(from, to, from_value, &to_value)) {
+      // Caveat emptor - ObjPtr's not guaranteed valid after this call.
+      if (!ConvertArgumentValue(callsite_type, callee_type, i, &value)) {
         DCHECK(self->IsExceptionPending());
         return false;
       }
 
       if (Primitive::Is64BitType(to_type)) {
-        setter->SetLong(to_value.GetJ());
+        setter->SetLong(value.GetJ());
       } else if (to_type == Primitive::kPrimNot) {
-        setter->SetReference(to_value.GetL());
+        setter->SetReference(value.GetL());
       } else {
-        setter->Set(to_value.GetI());
+        setter->Set(value.GetI());
       }
     }
   }
@@ -196,10 +142,10 @@
                                             uint32_t first_src_reg,
                                             uint32_t first_dest_reg,
                                             const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                                            ShadowFrame* callee_frame) {
-  StackHandleScope<4> hs(self);
-  Handle<mirror::ObjectArray<mirror::Class>> from_types(hs.NewHandle(callsite_type->GetPTypes()));
-  Handle<mirror::ObjectArray<mirror::Class>> to_types(hs.NewHandle(callee_type->GetPTypes()));
+                                            ShadowFrame* callee_frame)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::ObjectArray<mirror::Class>> from_types(callsite_type->GetPTypes());
+  ObjPtr<mirror::ObjectArray<mirror::Class>> to_types(callee_type->GetPTypes());
 
   const int32_t num_method_params = from_types->GetLength();
   if (to_types->GetLength() != num_method_params) {
@@ -211,8 +157,8 @@
   ShadowFrameSetter setter(callee_frame, first_dest_reg);
 
   return PerformConversions<ShadowFrameGetter<is_range>, ShadowFrameSetter>(self,
-                                                                            from_types,
-                                                                            to_types,
+                                                                            callsite_type,
+                                                                            callee_type,
                                                                             &getter,
                                                                             &setter,
                                                                             num_method_params);
diff --git a/runtime/method_handles.cc b/runtime/method_handles.cc
new file mode 100644
index 0000000..491d139
--- /dev/null
+++ b/runtime/method_handles.cc
@@ -0,0 +1,317 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "method_handles.h"
+
+#include "method_handles-inl.h"
+#include "jvalue.h"
+#include "jvalue-inl.h"
+#include "reflection.h"
+#include "reflection-inl.h"
+
+namespace art {
+
+namespace {
+
+static const char* kBoxedBooleanClass = "Ljava/lang/Boolean;";
+static const char* kBoxedByteClass = "Ljava/lang/Byte;";
+static const char* kBoxedCharacterClass = "Ljava/lang/Character;";
+static const char* kBoxedDoubleClass = "Ljava/lang/Double;";
+static const char* kBoxedFloatClass = "Ljava/lang/Float;";
+static const char* kBoxedIntegerClass = "Ljava/lang/Integer;";
+static const char* kBoxedLongClass = "Ljava/lang/Long;";
+static const char* kBoxedShortClass = "Ljava/lang/Short;";
+
+// Assigns |type| to the primitive type associated with |klass|. Returns
+// true iff. |klass| was a boxed type (Integer, Long etc.), false otherwise.
+bool GetUnboxedPrimitiveType(ObjPtr<mirror::Class> klass, Primitive::Type* type)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+  if (klass->DescriptorEquals(kBoxedBooleanClass)) {
+    (*type) = Primitive::kPrimBoolean;
+    return true;
+  } else if (klass->DescriptorEquals(kBoxedByteClass)) {
+    (*type) = Primitive::kPrimByte;
+    return true;
+  } else if (klass->DescriptorEquals(kBoxedCharacterClass)) {
+    (*type) = Primitive::kPrimChar;
+    return true;
+  } else if (klass->DescriptorEquals(kBoxedFloatClass)) {
+    (*type) = Primitive::kPrimFloat;
+    return true;
+  } else if (klass->DescriptorEquals(kBoxedDoubleClass)) {
+    (*type) = Primitive::kPrimDouble;
+    return true;
+  } else if (klass->DescriptorEquals(kBoxedIntegerClass)) {
+    (*type) = Primitive::kPrimInt;
+    return true;
+  } else if (klass->DescriptorEquals(kBoxedLongClass)) {
+    (*type) = Primitive::kPrimLong;
+    return true;
+  } else if (klass->DescriptorEquals(kBoxedShortClass)) {
+    (*type) = Primitive::kPrimShort;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+// Returns the class corresponding to the boxed type for the primitive |type|.
+ObjPtr<mirror::Class> GetBoxedPrimitiveClass(Primitive::Type type)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  switch (type) {
+    case Primitive::kPrimBoolean:
+      return class_linker->FindSystemClass(Thread::Current(), kBoxedBooleanClass);
+    case Primitive::kPrimByte:
+      return class_linker->FindSystemClass(Thread::Current(), kBoxedByteClass);
+    case Primitive::kPrimChar:
+      return class_linker->FindSystemClass(Thread::Current(), kBoxedCharacterClass);
+    case Primitive::kPrimShort:
+      return class_linker->FindSystemClass(Thread::Current(), kBoxedShortClass);
+    case Primitive::kPrimInt:
+      return class_linker->FindSystemClass(Thread::Current(), kBoxedIntegerClass);
+    case Primitive::kPrimLong:
+      return class_linker->FindSystemClass(Thread::Current(), kBoxedLongClass);
+    case Primitive::kPrimFloat:
+      return class_linker->FindSystemClass(Thread::Current(), kBoxedFloatClass);
+    case Primitive::kPrimDouble:
+      return class_linker->FindSystemClass(Thread::Current(), kBoxedDoubleClass);
+    case Primitive::kPrimNot:
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable";
+      return nullptr;
+  }
+}
+
+// Returns true if |klass| is a boxed primitive type or a sub-class of a boxed primitive type.
+bool IsSubClassOfBoxedPrimitive(const Handle<mirror::Class>& klass)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  StackHandleScope<1> hs(Thread::Current());
+  MutableHandle<mirror::Class> h_klass(hs.NewHandle(klass.Get()));
+  do {
+    Primitive::Type type;
+    if (GetUnboxedPrimitiveType(h_klass.Get(), &type)) {
+      return true;
+    }
+    h_klass.Assign(h_klass->GetSuperClass());
+  } while (h_klass.Get() != nullptr);
+  return false;
+}
+
+// Unboxed the value |o| to |unboxed_value| of type |dst_class|.
+// |unboxed_value| must be zero on entry to avoid dangling pointers.
+// Returns true on success, false if an exception is raised.
+bool UnboxPrimitiveForMethodHandles(ObjPtr<mirror::Object> o,
+                                    ObjPtr<mirror::Class> dst_class,
+                                    JValue* unboxed_value)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // Check unboxed_value does not contain a dangling pointer.
+  DCHECK_EQ(unboxed_value->GetJ(), 0);
+  DCHECK(dst_class->IsPrimitive());
+
+  // This is derived from UnboxPrimitive() in reflection.cc, but with
+  // exceptions appropriate to method handles.
+  if (UNLIKELY(dst_class->GetPrimitiveType() == Primitive::kPrimVoid)) {
+    ThrowClassCastException(o->GetClass(), dst_class);
+    return false;
+  }
+  if (UNLIKELY(o == nullptr)) {
+    ThrowNullPointerException(
+        StringPrintf("Expected to unbox a '%s' primitive type but was returned null",
+                     dst_class->PrettyDescriptor().c_str()).c_str());
+    return false;
+  }
+
+  JValue boxed_value;
+  ObjPtr<mirror::Class> klass = o->GetClass();
+  ObjPtr<mirror::Class> src_class = nullptr;
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  ArtField* primitive_field = &klass->GetIFieldsPtr()->At(0);
+  if (klass->DescriptorEquals(kBoxedBooleanClass)) {
+    src_class = class_linker->FindPrimitiveClass('Z');
+    boxed_value.SetZ(primitive_field->GetBoolean(o));
+  } else if (klass->DescriptorEquals(kBoxedByteClass)) {
+    src_class = class_linker->FindPrimitiveClass('B');
+    boxed_value.SetB(primitive_field->GetByte(o));
+  } else if (klass->DescriptorEquals(kBoxedCharacterClass)) {
+    src_class = class_linker->FindPrimitiveClass('C');
+    boxed_value.SetC(primitive_field->GetChar(o));
+  } else if (klass->DescriptorEquals(kBoxedFloatClass)) {
+    src_class = class_linker->FindPrimitiveClass('F');
+    boxed_value.SetF(primitive_field->GetFloat(o));
+  } else if (klass->DescriptorEquals(kBoxedDoubleClass)) {
+    src_class = class_linker->FindPrimitiveClass('D');
+    boxed_value.SetD(primitive_field->GetDouble(o));
+  } else if (klass->DescriptorEquals(kBoxedIntegerClass)) {
+    src_class = class_linker->FindPrimitiveClass('I');
+    boxed_value.SetI(primitive_field->GetInt(o));
+  } else if (klass->DescriptorEquals(kBoxedLongClass)) {
+    src_class = class_linker->FindPrimitiveClass('J');
+    boxed_value.SetJ(primitive_field->GetLong(o));
+  } else if (klass->DescriptorEquals(kBoxedShortClass)) {
+    src_class = class_linker->FindPrimitiveClass('S');
+    boxed_value.SetS(primitive_field->GetShort(o));
+  } else {
+    std::string temp;
+    ThrowIllegalArgumentException(
+        StringPrintf("result has type %s, got %s",
+                     dst_class->PrettyDescriptor().c_str(),
+                     PrettyDescriptor(o->GetClass()->GetDescriptor(&temp)).c_str()).c_str());
+    return false;
+  }
+
+  if (!ConvertPrimitiveValueNoThrow(src_class->GetPrimitiveType(),
+                                    dst_class->GetPrimitiveType(),
+                                    boxed_value,
+                                    unboxed_value)) {
+    ThrowClassCastException(src_class, dst_class);
+    return false;
+  }
+  return true;
+}
+
+inline bool IsReferenceType(Primitive::Type type) {
+  return type == Primitive::kPrimNot;
+}
+
+inline bool IsPrimitiveType(Primitive::Type type) {
+  return !IsReferenceType(type);
+}
+
+}  // namespace
+
+bool ConvertJValueCommon(
+    Handle<mirror::MethodType> callsite_type,
+    Handle<mirror::MethodType> callee_type,
+    ObjPtr<mirror::Class> from,
+    ObjPtr<mirror::Class> to,
+    JValue* value) {
+  // The reader maybe concerned about the safety of the heap object
+  // that may be in |value|. There is only one case where allocation
+  // is obviously needed and that's for boxing. However, in the case
+  // of boxing |value| contains a non-reference type.
+
+  const Primitive::Type from_type = from->GetPrimitiveType();
+  const Primitive::Type to_type = to->GetPrimitiveType();
+
+  // This method must be called only when the types don't match.
+  DCHECK(from != to);
+
+  if (IsPrimitiveType(from_type) && IsPrimitiveType(to_type)) {
+    // The source and target types are both primitives.
+    if (UNLIKELY(!ConvertPrimitiveValueNoThrow(from_type, to_type, *value, value))) {
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      value->SetJ(0);
+      return false;
+    }
+    return true;
+  } else if (IsReferenceType(from_type) && IsReferenceType(to_type)) {
+    // They're both reference types. If "from" is null, we can pass it
+    // through unchanged. If not, we must generate a cast exception if
+    // |to| is not assignable from the dynamic type of |ref|.
+    //
+    // Playing it safe with StackHandleScope here, not expecting any allocation
+    // in mirror::Class::IsAssignable().
+    StackHandleScope<2> hs(Thread::Current());
+    Handle<mirror::Class> h_to(hs.NewHandle(to));
+    Handle<mirror::Object> h_obj(hs.NewHandle(value->GetL()));
+
+    // |value| will now be the result value, invalidate its existing value
+    // as |h_obj| now owns it.
+    value->SetJ(0);
+
+    if (h_obj.Get() != nullptr && !to->IsAssignableFrom(h_obj->GetClass())) {
+      ThrowClassCastException(h_to.Get(), h_obj->GetClass());
+      return false;
+    }
+    value->SetL(h_obj.Get());
+    return true;
+  } else if (IsReferenceType(to_type)) {
+    DCHECK(IsPrimitiveType(from_type));
+    // Playing it safe with StackHandleScope here with regards to
+    // GetUnboxedPrimitiveType() and GetBoxedPrimitiveClass().
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::Class> h_to(hs.NewHandle(to));
+    // The source type is a primitive and the target type is a reference, so we must box.
+    // The target type maybe a super class of the boxed source type, for example,
+    // if the source type is int, it's boxed type is java.lang.Integer, and the target
+    // type could be java.lang.Number.
+    Primitive::Type type;
+    if (!GetUnboxedPrimitiveType(to, &type)) {
+      ObjPtr<mirror::Class> boxed_from_class = GetBoxedPrimitiveClass(from_type);
+      if (boxed_from_class->IsSubClass(h_to.Get())) {
+        type = from_type;
+      } else {
+        value->SetJ(0);
+        ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+        return false;
+      }
+    }
+
+    if (UNLIKELY(from_type != type)) {
+      value->SetJ(0);
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      return false;
+    }
+
+    if (!ConvertPrimitiveValueNoThrow(from_type, type, *value, value)) {
+      value->SetJ(0);
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      return false;
+    }
+
+    // Then perform the actual boxing, and then set the reference.
+    ObjPtr<mirror::Object> boxed = BoxPrimitive(type, *value);
+    value->SetL(boxed.Ptr());
+    return true;
+  } else {
+    // The source type is a reference and the target type is a primitive, so we must unbox.
+    DCHECK(IsReferenceType(from_type));
+    DCHECK(IsPrimitiveType(to_type));
+
+    // Use StackHandleScope to protect |from|, |to|, and the reference
+    // in |value| from heap re-arrangements that could be triggered
+    // ahead of unboxing step.
+    StackHandleScope<3> hs(Thread::Current());
+    Handle<mirror::Class> h_to(hs.NewHandle(to));
+    Handle<mirror::Class> h_from(hs.NewHandle(from));
+    Handle<mirror::Object> h_obj(hs.NewHandle(value->GetL()));
+
+    // |value| will now be the result value, invalidate its existing value
+    // as |h_obj| now owns it.
+    value->SetJ(0);
+
+    // Check source type is a boxed primitive or has a boxed primitive super-class.
+    ObjPtr<mirror::Class> boxed_to_class = GetBoxedPrimitiveClass(to_type);
+    if (!IsSubClassOfBoxedPrimitive(h_from) && !boxed_to_class->IsSubClass(h_from.Get())) {
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      return false;
+    }
+
+    if (h_obj.Get() == nullptr) {
+      ThrowNullPointerException(
+        StringPrintf("Expected to unbox a '%s' but instance was null",
+                     h_from->PrettyDescriptor().c_str()).c_str());
+      return false;
+    }
+
+    return UnboxPrimitiveForMethodHandles(h_obj.Get(), h_to.Get(), value);
+  }
+}
+
+}  // namespace art
diff --git a/runtime/method_handles.h b/runtime/method_handles.h
index 0d3f9f1..0cc69f2 100644
--- a/runtime/method_handles.h
+++ b/runtime/method_handles.h
@@ -20,7 +20,10 @@
 #include <ostream>
 
 #include "dex_instruction.h"
+#include "handle.h"
 #include "jvalue.h"
+#include "mirror/class.h"
+#include "mirror/method_type.h"
 
 namespace art {
 
@@ -56,13 +59,36 @@
   return handle_kind <= kLastInvokeKind;
 }
 
-// Performs a single argument conversion from type |from| to a distinct
-// type |to|. Returns true on success, false otherwise.
-REQUIRES_SHARED(Locks::mutator_lock_)
-inline bool ConvertJValue(Handle<mirror::Class> from,
-                          Handle<mirror::Class> to,
-                          const JValue& from_value,
-                          JValue* to_value) ALWAYS_INLINE;
+// Performs a conversion from type |from| to a distinct type |to| as
+// part of conversion of |caller_type| to |callee_type|. The value to
+// be converted is in |value|. Returns true on success and updates
+// |value| with the converted value, false otherwise.
+bool ConvertJValueCommon(Handle<mirror::MethodType> callsite_type,
+                         Handle<mirror::MethodType> callee_type,
+                         ObjPtr<mirror::Class> from,
+                         ObjPtr<mirror::Class> to,
+                         JValue* value)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+// Converts the value of the argument at position |index| from type
+// expected by |callee_type| to type used by |callsite_type|. |value|
+// represents the value to be converted. Returns true on success and
+// updates |value|, false otherwise.
+ALWAYS_INLINE bool ConvertArgumentValue(Handle<mirror::MethodType> callsite_type,
+                                        Handle<mirror::MethodType> callee_type,
+                                        int index,
+                                        JValue* value)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+// Converts the return value from return type yielded by
+// |callee_type| to the return type yielded by
+// |callsite_type|. |value| represents the value to be
+// converted. Returns true on success and updates |value|, false
+// otherwise.
+ALWAYS_INLINE bool ConvertReturnValue(Handle<mirror::MethodType> callsite_type,
+                                      Handle<mirror::MethodType> callee_type,
+                                      JValue* value)
+    REQUIRES_SHARED(Locks::mutator_lock_);
 
 // Perform argument conversions between |callsite_type| (the type of the
 // incoming arguments) and |callee_type| (the type of the method being
@@ -109,17 +135,16 @@
 // up too much space, we can make G / S abstract base classes that are
 // overridden by concrete classes.
 template <typename G, typename S>
-REQUIRES_SHARED(Locks::mutator_lock_)
 bool PerformConversions(Thread* self,
-                        Handle<mirror::ObjectArray<mirror::Class>> from_types,
-                        Handle<mirror::ObjectArray<mirror::Class>> to_types,
+                        Handle<mirror::MethodType> callsite_type,
+                        Handle<mirror::MethodType> callee_type,
                         G* getter,
                         S* setter,
-                        int32_t num_conversions);
+                        int32_t num_conversions) REQUIRES_SHARED(Locks::mutator_lock_);
 
 // A convenience wrapper around |PerformConversions|, for the case where
 // the setter and getter are both ShadowFrame based.
-template <bool is_range> REQUIRES_SHARED(Locks::mutator_lock_)
+template <bool is_range>
 bool ConvertAndCopyArgumentsFromCallerFrame(Thread* self,
                                             Handle<mirror::MethodType> callsite_type,
                                             Handle<mirror::MethodType> callee_type,
@@ -127,7 +152,8 @@
                                             uint32_t first_src_reg,
                                             uint32_t first_dest_reg,
                                             const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                                            ShadowFrame* callee_frame);
+                                            ShadowFrame* callee_frame)
+    REQUIRES_SHARED(Locks::mutator_lock_);
 
 // A convenience class that allows for iteration through a list of
 // input argument registers |arg| for non-range invokes or a list of
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 6a357b3..db46027 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "class_ext.h"
 #include "class_linker-inl.h"
 #include "class_loader.h"
 #include "class-inl.h"
@@ -29,6 +30,7 @@
 #include "method.h"
 #include "object_array-inl.h"
 #include "object-inl.h"
+#include "object_lock.h"
 #include "runtime.h"
 #include "thread.h"
 #include "throwable.h"
@@ -58,12 +60,49 @@
   java_lang_Class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
-inline void Class::SetVerifyError(ObjPtr<Object> error) {
-  CHECK(error != nullptr) << PrettyClass();
-  if (Runtime::Current()->IsActiveTransaction()) {
-    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+ClassExt* Class::GetExtData() {
+  return GetFieldObject<ClassExt>(OFFSET_OF_OBJECT_MEMBER(Class, ext_data_));
+}
+
+ClassExt* Class::EnsureExtDataPresent(Thread* self) {
+  ObjPtr<ClassExt> existing(GetExtData());
+  if (!existing.IsNull()) {
+    return existing.Ptr();
+  }
+  StackHandleScope<3> hs(self);
+  // Handlerize 'this' since we are allocating here.
+  Handle<Class> h_this(hs.NewHandle(this));
+  // Clear exception so we can allocate.
+  Handle<Throwable> throwable(hs.NewHandle(self->GetException()));
+  self->ClearException();
+  // Allocate the ClassExt
+  Handle<ClassExt> new_ext(hs.NewHandle(ClassExt::Alloc(self)));
+  if (new_ext.Get() == nullptr) {
+    // OOM allocating the classExt.
+    // TODO Should we restore the suppressed exception?
+    self->AssertPendingOOMException();
+    return nullptr;
   } else {
-    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+    MemberOffset ext_offset(OFFSET_OF_OBJECT_MEMBER(Class, ext_data_));
+    bool set;
+    // Set the ext_data_ field using CAS semantics.
+    if (Runtime::Current()->IsActiveTransaction()) {
+      set = h_this->CasFieldStrongSequentiallyConsistentObject<true>(ext_offset,
+                                                                     ObjPtr<ClassExt>(nullptr),
+                                                                     new_ext.Get());
+    } else {
+      set = h_this->CasFieldStrongSequentiallyConsistentObject<false>(ext_offset,
+                                                                      ObjPtr<ClassExt>(nullptr),
+                                                                      new_ext.Get());
+    }
+    ObjPtr<ClassExt> ret(set ? new_ext.Get() : h_this->GetExtData());
+    DCHECK(!set || h_this->GetExtData() == new_ext.Get());
+    CHECK(!ret.IsNull());
+    // Restore the exception if there was one.
+    if (throwable.Get() != nullptr) {
+      self->SetException(throwable.Get());
+    }
+    return ret.Ptr();
   }
 }
 
@@ -95,10 +134,16 @@
       }
     }
 
-    // Remember the current exception.
-    CHECK(self->GetException() != nullptr);
-    h_this->SetVerifyError(self->GetException());
+    ObjPtr<ClassExt> ext(h_this->EnsureExtDataPresent(self));
+    if (!ext.IsNull()) {
+      self->AssertPendingException();
+      ext->SetVerifyError(self->GetException());
+    } else {
+      self->AssertPendingOOMException();
+    }
+    self->AssertPendingException();
   }
+
   static_assert(sizeof(Status) == sizeof(uint32_t), "Size of status not equal to uint32");
   if (Runtime::Current()->IsActiveTransaction()) {
     h_this->SetField32Volatile<true>(StatusOffset(), new_status);
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 57bb2ed..711914d 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -49,6 +49,7 @@
 
 namespace mirror {
 
+class ClassExt;
 class ClassLoader;
 class Constructor;
 class DexCache;
@@ -1130,10 +1131,13 @@
 
   void SetClinitThreadId(pid_t new_clinit_thread_id) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  Object* GetVerifyError() REQUIRES_SHARED(Locks::mutator_lock_) {
-    // DCHECK(IsErroneous());
-    return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_));
-  }
+  ClassExt* GetExtData() REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Returns the ExtData for this class, allocating one if necessary. This should be the only way
+  // to force ext_data_ to be set. No functions are available for changing an already set ext_data_
+  // since doing so is not allowed.
+  ClassExt* EnsureExtDataPresent(Thread* self)
+      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   uint16_t GetDexClassDefIndex() REQUIRES_SHARED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_class_def_idx_));
@@ -1322,8 +1326,6 @@
   ALWAYS_INLINE void SetMethodsPtrInternal(LengthPrefixedArray<ArtMethod>* new_methods)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetVerifyError(ObjPtr<Object> klass) REQUIRES_SHARED(Locks::mutator_lock_);
-
   template <bool throw_on_failure, bool use_referrers_cache>
   bool ResolvedFieldAccessTest(ObjPtr<Class> access_to,
                                ArtField* field,
@@ -1388,6 +1390,12 @@
   // runtime such as arrays and primitive classes).
   HeapReference<DexCache> dex_cache_;
 
+  // Extraneous class data that is not always needed. This field is allocated lazily and may
+  // only be set with 'this' locked. This is synchronized on 'this'.
+  // TODO(allight) We should probably synchronize it on something external or handle allocation in
+  // some other (safe) way to prevent possible deadlocks.
+  HeapReference<ClassExt> ext_data_;
+
   // The interface table (iftable_) contains pairs of a interface class and an array of the
   // interface methods. There is one pair per interface supported by this class.  That means one
   // pair for each interface we support directly, indirectly via superclass, or indirectly via a
@@ -1412,10 +1420,6 @@
   // check for interfaces and return null.
   HeapReference<Class> super_class_;
 
-  // If class verify fails, we must return same error on subsequent tries. We may store either
-  // the class of the error, or an actual instance of Throwable here.
-  HeapReference<Object> verify_error_;
-
   // Virtual method table (vtable), for use by "invoke-virtual".  The vtable from the superclass is
   // copied in, and virtual methods from our class either replace those from the super or are
   // appended. For abstract classes, methods may be created in the vtable that aren't in
diff --git a/runtime/mirror/class_ext.cc b/runtime/mirror/class_ext.cc
new file mode 100644
index 0000000..cc208e4
--- /dev/null
+++ b/runtime/mirror/class_ext.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "class_ext.h"
+
+#include "art_method-inl.h"
+#include "base/casts.h"
+#include "base/enums.h"
+#include "class-inl.h"
+#include "dex_file-inl.h"
+#include "gc/accounting/card_table-inl.h"
+#include "object-inl.h"
+#include "object_array.h"
+#include "object_array-inl.h"
+#include "stack_trace_element.h"
+#include "utils.h"
+#include "well_known_classes.h"
+
+namespace art {
+namespace mirror {
+
+GcRoot<Class> ClassExt::dalvik_system_ClassExt_;
+
+ClassExt* ClassExt::Alloc(Thread* self) {
+  DCHECK(dalvik_system_ClassExt_.Read() != nullptr);
+  return down_cast<ClassExt*>(dalvik_system_ClassExt_.Read()->AllocObject(self).Ptr());
+}
+
+void ClassExt::SetVerifyError(ObjPtr<Object> err) {
+  if (Runtime::Current()->IsActiveTransaction()) {
+    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(ClassExt, verify_error_), err);
+  } else {
+    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ClassExt, verify_error_), err);
+  }
+}
+
+void ClassExt::SetClass(ObjPtr<Class> dalvik_system_ClassExt) {
+  CHECK(dalvik_system_ClassExt != nullptr);
+  dalvik_system_ClassExt_ = GcRoot<Class>(dalvik_system_ClassExt);
+}
+
+void ClassExt::ResetClass() {
+  CHECK(!dalvik_system_ClassExt_.IsNull());
+  dalvik_system_ClassExt_ = GcRoot<Class>(nullptr);
+}
+
+void ClassExt::VisitRoots(RootVisitor* visitor) {
+  dalvik_system_ClassExt_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
+}
+
+}  // namespace mirror
+}  // namespace art
diff --git a/runtime/mirror/class_ext.h b/runtime/mirror/class_ext.h
new file mode 100644
index 0000000..35eaae1
--- /dev/null
+++ b/runtime/mirror/class_ext.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_CLASS_EXT_H_
+#define ART_RUNTIME_MIRROR_CLASS_EXT_H_
+
+#include "class-inl.h"
+
+#include "gc_root.h"
+#include "object.h"
+#include "object_callbacks.h"
+#include "string.h"
+
+namespace art {
+
+struct ClassExtOffsets;
+
+namespace mirror {
+
+// C++ mirror of dalvik.system.ClassExt
+class MANAGED ClassExt : public Object {
+ public:
+  static uint32_t ClassSize(PointerSize pointer_size) {
+    uint32_t vtable_entries = Object::kVTableLength;
+    return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
+  }
+
+  // Size of an instance of dalvik.system.ClassExt.
+  static constexpr uint32_t InstanceSize() {
+    return sizeof(ClassExt);
+  }
+
+  void SetVerifyError(ObjPtr<Object> obj) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  Object* GetVerifyError() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<ClassExt>(OFFSET_OF_OBJECT_MEMBER(ClassExt, verify_error_));
+  }
+
+  static void SetClass(ObjPtr<Class> dalvik_system_ClassExt);
+  static void ResetClass();
+  static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  static ClassExt* Alloc(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
+  HeapReference<Object> verify_error_;
+
+  static GcRoot<Class> dalvik_system_ClassExt_;
+
+  friend struct art::ClassExtOffsets;  // for verifying offset information
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ClassExt);
+};
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_CLASS_EXT_H_
diff --git a/runtime/mirror/emulated_stack_frame.cc b/runtime/mirror/emulated_stack_frame.cc
index 4ba71ea..d607040 100644
--- a/runtime/mirror/emulated_stack_frame.cc
+++ b/runtime/mirror/emulated_stack_frame.cc
@@ -173,13 +173,22 @@
 
   Handle<mirror::ObjectArray<mirror::Object>> references(hs.NewHandle(
       mirror::ObjectArray<mirror::Object>::Alloc(self, array_class, refs_size)));
+  if (references.Get() == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return nullptr;
+  }
+
   Handle<ByteArray> stack_frame(hs.NewHandle(ByteArray::Alloc(self, frame_size)));
+  if (stack_frame.Get() == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return nullptr;
+  }
 
   // Step 4 : Perform argument conversions (if required).
   ShadowFrameGetter<is_range> getter(first_src_reg, arg, caller_frame);
   EmulatedStackFrameAccessor setter(references, stack_frame, stack_frame->GetLength());
   if (!PerformConversions<ShadowFrameGetter<is_range>, EmulatedStackFrameAccessor>(
-      self, from_types, to_types, &getter, &setter, num_method_params)) {
+          self, caller_type, callee_type, &getter, &setter, num_method_params)) {
     return nullptr;
   }
 
@@ -197,9 +206,8 @@
                                             Handle<mirror::MethodType> callee_type,
                                             const uint32_t first_dest_reg,
                                             ShadowFrame* callee_frame) {
-  StackHandleScope<4> hs(self);
-  Handle<mirror::ObjectArray<mirror::Class>> from_types(hs.NewHandle(GetType()->GetPTypes()));
-  Handle<mirror::ObjectArray<mirror::Class>> to_types(hs.NewHandle(callee_type->GetPTypes()));
+  ObjPtr<mirror::ObjectArray<mirror::Class>> from_types(GetType()->GetPTypes());
+  ObjPtr<mirror::ObjectArray<mirror::Class>> to_types(callee_type->GetPTypes());
 
   const int32_t num_method_params = from_types->GetLength();
   if (to_types->GetLength() != num_method_params) {
@@ -207,6 +215,8 @@
     return false;
   }
 
+  StackHandleScope<3> hs(self);
+  Handle<mirror::MethodType> frame_callsite_type(hs.NewHandle(GetType()));
   Handle<mirror::ObjectArray<mirror::Object>> references(hs.NewHandle(GetReferences()));
   Handle<ByteArray> stack_frame(hs.NewHandle(GetStackFrame()));
 
@@ -214,7 +224,7 @@
   ShadowFrameSetter setter(callee_frame, first_dest_reg);
 
   return PerformConversions<EmulatedStackFrameAccessor, ShadowFrameSetter>(
-      self, from_types, to_types, &getter, &setter, num_method_params);
+      self, frame_callsite_type, callee_type, &getter, &setter, num_method_params);
 }
 
 void EmulatedStackFrame::GetReturnValue(Thread* self, JValue* value) {
diff --git a/runtime/mirror/method_handle_impl.h b/runtime/mirror/method_handle_impl.h
index 7bf9c5b..9054216 100644
--- a/runtime/mirror/method_handle_impl.h
+++ b/runtime/mirror/method_handle_impl.h
@@ -36,6 +36,10 @@
     return GetFieldObject<mirror::MethodType>(OFFSET_OF_OBJECT_MEMBER(MethodHandle, method_type_));
   }
 
+  mirror::MethodType* GetNominalType() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<mirror::MethodType>(OFFSET_OF_OBJECT_MEMBER(MethodHandle, nominal_type_));
+  }
+
   ArtField* GetTargetField() REQUIRES_SHARED(Locks::mutator_lock_) {
     return reinterpret_cast<ArtField*>(
         GetField64(OFFSET_OF_OBJECT_MEMBER(MethodHandle, art_field_or_method_)));
@@ -54,14 +58,14 @@
   }
 
  private:
-  HeapReference<mirror::Object> as_type_cache_;
+  HeapReference<mirror::MethodType> nominal_type_;
   HeapReference<mirror::MethodType> method_type_;
   uint64_t art_field_or_method_;
   uint32_t handle_kind_;
 
  private:
-  static MemberOffset AsTypeCacheOffset() {
-    return MemberOffset(OFFSETOF_MEMBER(MethodHandle, as_type_cache_));
+  static MemberOffset NominalTypeOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(MethodHandle, nominal_type_));
   }
   static MemberOffset MethodTypeOffset() {
     return MemberOffset(OFFSETOF_MEMBER(MethodHandle, method_type_));
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 3bf9d94..6d29ed3 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -135,25 +135,82 @@
   Monitor::Wait(self, this, ms, ns, true, kTimedWaiting);
 }
 
-inline Object* Object::GetReadBarrierPointer() {
+inline uint32_t Object::GetReadBarrierState(uintptr_t* fake_address_dependency) {
+#ifdef USE_BAKER_READ_BARRIER
+  CHECK(kUseBakerReadBarrier);
+#if defined(__arm__)
+  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
+  uintptr_t result;
+  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
+  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
+  __asm__ __volatile__(
+      "ldr %[result], [%[obj], #4]\n\t"
+      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
+      // null, without them being able to assume that fact.
+      "eor %[fad], %[result], %[result]\n\t"
+      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
+      : [obj] "r" (obj));
+  DCHECK_EQ(*fake_address_dependency, 0U);
+  LockWord lw(static_cast<uint32_t>(result));
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#elif defined(__aarch64__)
+  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
+  uintptr_t result;
+  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
+  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
+  __asm__ __volatile__(
+      "ldr %w[result], [%[obj], #4]\n\t"
+      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
+      // null, without them being able to assume that fact.
+      "eor %[fad], %[result], %[result]\n\t"
+      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
+      : [obj] "r" (obj));
+  DCHECK_EQ(*fake_address_dependency, 0U);
+  LockWord lw(static_cast<uint32_t>(result));
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#elif defined(__i386__) || defined(__x86_64__)
+  LockWord lw = GetLockWord(false);
+  // i386/x86_64 don't need fake address dependency. Use a compiler fence to avoid compiler
+  // reordering.
+  *fake_address_dependency = 0;
+  std::atomic_signal_fence(std::memory_order_acquire);
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#else
+  // mips/mips64
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+  UNUSED(fake_address_dependency);
+#endif
+#else  // !USE_BAKER_READ_BARRIER
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+  UNUSED(fake_address_dependency);
+#endif
+}
+
+inline uint32_t Object::GetReadBarrierState() {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
-  return reinterpret_cast<Object*>(GetLockWord(false).ReadBarrierState());
-#elif USE_BROOKS_READ_BARRIER
-  DCHECK(kUseBrooksReadBarrier);
-  return GetFieldObject<Object, kVerifyNone, kWithoutReadBarrier>(
-      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_));
+  LockWord lw(GetField<uint32_t, /*kIsVolatile*/false>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+  uint32_t rb_state = lw.ReadBarrierState();
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  return rb_state;
 #else
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
 #endif
 }
 
-inline Object* Object::GetReadBarrierPointerAcquire() {
+inline uint32_t Object::GetReadBarrierStateAcquire() {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
   LockWord lw(GetFieldAcquire<uint32_t>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
-  return reinterpret_cast<Object*>(lw.ReadBarrierState());
+  uint32_t rb_state = lw.ReadBarrierState();
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  return rb_state;
 #else
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
@@ -169,48 +226,38 @@
 #endif
 }
 
-inline void Object::SetReadBarrierPointer(Object* rb_ptr) {
+inline void Object::SetReadBarrierState(uint32_t rb_state) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
-  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
-  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
   LockWord lw = GetLockWord(false);
-  lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+  lw.SetReadBarrierState(rb_state);
   SetLockWord(lw, false);
-#elif USE_BROOKS_READ_BARRIER
-  DCHECK(kUseBrooksReadBarrier);
-  // We don't mark the card as this occurs as part of object allocation. Not all objects have
-  // backing cards, such as large objects.
-  SetFieldObjectWithoutWriteBarrier<false, false, kVerifyNone>(
-      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), rb_ptr);
 #else
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
-  UNUSED(rb_ptr);
+  UNUSED(rb_state);
 #endif
 }
 
 template<bool kCasRelease>
-inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) {
+inline bool Object::AtomicSetReadBarrierState(uint32_t expected_rb_state, uint32_t rb_state) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
-  DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U);
-  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
-  DCHECK_NE(expected_rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
-  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
+  DCHECK(ReadBarrier::IsValidReadBarrierState(expected_rb_state)) << expected_rb_state;
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
   LockWord expected_lw;
   LockWord new_lw;
   do {
     LockWord lw = GetLockWord(false);
-    if (UNLIKELY(reinterpret_cast<Object*>(lw.ReadBarrierState()) != expected_rb_ptr)) {
+    if (UNLIKELY(lw.ReadBarrierState() != expected_rb_state)) {
       // Lost the race.
       return false;
     }
     expected_lw = lw;
-    expected_lw.SetReadBarrierState(
-        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
+    expected_lw.SetReadBarrierState(expected_rb_state);
     new_lw = lw;
-    new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+    new_lw.SetReadBarrierState(rb_state);
     // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
     // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
     // an object and then changes the object from gray to black, the field updates (stores) will be
@@ -219,23 +266,8 @@
              CasLockWordWeakRelease(expected_lw, new_lw) :
              CasLockWordWeakRelaxed(expected_lw, new_lw)));
   return true;
-#elif USE_BROOKS_READ_BARRIER
-  DCHECK(kUseBrooksReadBarrier);
-  MemberOffset offset = OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_);
-  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + offset.SizeValue();
-  Atomic<uint32_t>* atomic_rb_ptr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
-  HeapReference<Object> expected_ref(HeapReference<Object>::FromMirrorPtr(expected_rb_ptr));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(rb_ptr));
-  do {
-    if (UNLIKELY(atomic_rb_ptr->LoadRelaxed() != expected_ref.reference_)) {
-      // Lost the race.
-      return false;
-    }
-  } while (!atomic_rb_ptr->CompareExchangeWeakSequentiallyConsistent(expected_ref.reference_,
-                                                                     new_ref.reference_));
-  return true;
 #else
-  UNUSED(expected_rb_ptr, rb_ptr);
+  UNUSED(expected_rb_state, rb_state);
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
 #endif
@@ -259,19 +291,12 @@
 }
 
 
-inline void Object::AssertReadBarrierPointer() const {
-  if (kUseBakerReadBarrier) {
-    Object* obj = const_cast<Object*>(this);
-    DCHECK(obj->GetReadBarrierPointer() == nullptr)
-        << "Bad Baker pointer: obj=" << reinterpret_cast<void*>(obj)
-        << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer());
-  } else {
-    CHECK(kUseBrooksReadBarrier);
-    Object* obj = const_cast<Object*>(this);
-    DCHECK_EQ(obj, obj->GetReadBarrierPointer())
-        << "Bad Brooks pointer: obj=" << reinterpret_cast<void*>(obj)
-        << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer());
-  }
+inline void Object::AssertReadBarrierState() const {
+  CHECK(kUseBakerReadBarrier);
+  Object* obj = const_cast<Object*>(this);
+  DCHECK(obj->GetReadBarrierState() == ReadBarrier::WhiteState())
+      << "Bad Baker pointer: obj=" << reinterpret_cast<void*>(obj)
+      << " rb_state" << reinterpret_cast<void*>(obj->GetReadBarrierState());
 }
 
 template<VerifyObjectFlags kVerifyFlags>
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 8cfb60e..f5b9ab3 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -235,8 +235,6 @@
   }
   for (ObjPtr<Class> cur = c; cur != nullptr; cur = cur->GetSuperClass()) {
     for (ArtField& field : cur->GetIFields()) {
-      StackHandleScope<1> hs(Thread::Current());
-      Handle<Object> h_object(hs.NewHandle(new_value));
       if (field.GetOffset().Int32Value() == field_offset.Int32Value()) {
         CHECK_NE(field.GetTypeAsPrimitiveType(), Primitive::kPrimNot);
         // TODO: resolve the field type for moving GC.
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 886637b..67b5ddb 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -94,19 +94,22 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   void SetClass(ObjPtr<Class> new_klass) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // TODO: Clean these up and change to return int32_t
-  Object* GetReadBarrierPointer() REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Get the read barrier pointer with release semantics, only supported for baker.
-  Object* GetReadBarrierPointerAcquire() REQUIRES_SHARED(Locks::mutator_lock_);
+  // Get the read barrier state with a fake address dependency.
+  // '*fake_address_dependency' will be set to 0.
+  ALWAYS_INLINE uint32_t GetReadBarrierState(uintptr_t* fake_address_dependency)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  // This version does not offer any special mechanism to prevent load-load reordering.
+  ALWAYS_INLINE uint32_t GetReadBarrierState() REQUIRES_SHARED(Locks::mutator_lock_);
+  // Get the read barrier state with a load-acquire.
+  ALWAYS_INLINE uint32_t GetReadBarrierStateAcquire() REQUIRES_SHARED(Locks::mutator_lock_);
 
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
   NO_RETURN
 #endif
-  void SetReadBarrierPointer(Object* rb_ptr) REQUIRES_SHARED(Locks::mutator_lock_);
+  ALWAYS_INLINE void SetReadBarrierState(uint32_t rb_state) REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<bool kCasRelease = false>
-  ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
+  ALWAYS_INLINE bool AtomicSetReadBarrierState(uint32_t expected_rb_state, uint32_t rb_state)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE uint32_t GetMarkBit() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -114,7 +117,8 @@
   ALWAYS_INLINE bool AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void AssertReadBarrierPointer() const REQUIRES_SHARED(Locks::mutator_lock_);
+  // Assert that the read barrier state is in the default (white) state.
+  ALWAYS_INLINE void AssertReadBarrierState() const REQUIRES_SHARED(Locks::mutator_lock_);
 
   // The verifier treats all interfaces as java.lang.Object and relies on runtime checks in
   // invoke-interface to detect incompatible interface types.
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index eb74fcf..f583167 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -771,7 +771,7 @@
         return false;
       }
       // Can't deflate if our lock count is too high.
-      if (monitor->lock_count_ > LockWord::kThinLockMaxCount) {
+      if (static_cast<uint32_t>(monitor->lock_count_) > LockWord::kThinLockMaxCount) {
         return false;
       }
       // Deflate to a thin lock.
diff --git a/runtime/monitor_test.cc b/runtime/monitor_test.cc
index 4ee46dc..4fbfe47 100644
--- a/runtime/monitor_test.cc
+++ b/runtime/monitor_test.cc
@@ -401,14 +401,11 @@
   Thread* const self = Thread::Current();
   ThreadPool thread_pool("the pool", 2);
   ScopedObjectAccess soa(self);
-  StackHandleScope<3> hs(self);
+  StackHandleScope<1> hs(self);
   Handle<mirror::Object> obj1(
       hs.NewHandle<mirror::Object>(mirror::String::AllocFromModifiedUtf8(self, "hello, world!")));
-  Handle<mirror::Object> obj2(
-      hs.NewHandle<mirror::Object>(mirror::String::AllocFromModifiedUtf8(self, "hello, world!")));
   {
     ObjectLock<mirror::Object> lock1(self, obj1);
-    ObjectLock<mirror::Object> lock2(self, obj1);
     {
       ObjectTryLock<mirror::Object> trylock(self, obj1);
       EXPECT_TRUE(trylock.Acquired());
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 8d85425..adf35b6 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -420,8 +420,10 @@
   }
 }
 
-static bool SetRuntimeStatValue(JNIEnv* env, jobjectArray result, VMDebugRuntimeStatId id,
-                                std::string value) {
+static bool SetRuntimeStatValue(JNIEnv* env,
+                                jobjectArray result,
+                                VMDebugRuntimeStatId id,
+                                const std::string& value) {
   ScopedLocalRef<jstring> jvalue(env, env->NewStringUTF(value.c_str()));
   if (jvalue.get() == nullptr) {
     return false;
diff --git a/runtime/native/java_lang_reflect_Executable.cc b/runtime/native/java_lang_reflect_Executable.cc
index 1b128fb..73b81a7 100644
--- a/runtime/native/java_lang_reflect_Executable.cc
+++ b/runtime/native/java_lang_reflect_Executable.cc
@@ -136,7 +136,7 @@
   Handle<mirror::Class> parameter_class =
       hs.NewHandle(soa.Decode<mirror::Class>(WellKnownClasses::java_lang_reflect_Parameter));
   ArtMethod* parameter_init =
-      soa.DecodeMethod(WellKnownClasses::java_lang_reflect_Parameter_init);
+      jni::DecodeArtMethod(WellKnownClasses::java_lang_reflect_Parameter_init);
 
   // Mutable handles used in the loop below to ensure cleanup without scaling the number of
   // handles by the number of parameters.
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index 5ab6097..c58854b 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -25,6 +25,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "dex_file-inl.h"
+#include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "sigchain.h"
@@ -33,7 +34,7 @@
 
 static const char* GetMethodShorty(JNIEnv* env, jmethodID mid) {
   ScopedObjectAccess soa(env);
-  ArtMethod* m = soa.DecodeMethod(mid);
+  ArtMethod* m = jni::DecodeArtMethod(mid);
   return m->GetShorty();
 }
 
@@ -90,14 +91,14 @@
   GetMethodShorty, GetNativeMethodCount, GetNativeMethods
 };
 
-bool LoadNativeBridge(std::string& native_bridge_library_filename) {
+bool LoadNativeBridge(const std::string& native_bridge_library_filename) {
   VLOG(startup) << "Runtime::Setup native bridge library: "
       << (native_bridge_library_filename.empty() ? "(empty)" : native_bridge_library_filename);
   return android::LoadNativeBridge(native_bridge_library_filename.c_str(),
                                    &native_bridge_art_callbacks_);
 }
 
-void PreInitializeNativeBridge(std::string dir) {
+void PreInitializeNativeBridge(const std::string& dir) {
   VLOG(startup) << "Runtime::Pre-initialize native bridge";
 #ifndef __APPLE__  // Mac OS does not support CLONE_NEWNS.
   if (unshare(CLONE_NEWNS) == -1) {
diff --git a/runtime/native_bridge_art_interface.h b/runtime/native_bridge_art_interface.h
index 090cddb..c86e5da 100644
--- a/runtime/native_bridge_art_interface.h
+++ b/runtime/native_bridge_art_interface.h
@@ -26,10 +26,10 @@
 // Mirror libnativebridge interface. Done to have the ART callbacks out of line, and not require
 // the system/core header file in other files.
 
-bool LoadNativeBridge(std::string& native_bridge_library_filename);
+bool LoadNativeBridge(const std::string& native_bridge_library_filename);
 
 // This is mostly for testing purposes, as in a full system this is called by Zygote code.
-void PreInitializeNativeBridge(std::string dir);
+void PreInitializeNativeBridge(const std::string& dir);
 
 void InitializeNativeBridge(JNIEnv* env, const char* instruction_set);
 
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
index 00ab577..2376889 100644
--- a/runtime/native_stack_dump.cc
+++ b/runtime/native_stack_dump.cc
@@ -256,7 +256,7 @@
   Drain(2U, prefix, pipe, os);
 }
 
-static bool RunCommand(std::string cmd) {
+static bool RunCommand(const std::string& cmd) {
   FILE* stream = popen(cmd.c_str(), "r");
   if (stream) {
     pclose(stream);
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index d18e946..d4337b9 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -999,7 +999,7 @@
 
 // Turn an absolute path into a path relative to the current working
 // directory.
-static std::string MakePathRelative(std::string target) {
+static std::string MakePathRelative(const std::string& target) {
   char buf[MAXPATHLEN];
   std::string cwd = getcwd(buf, MAXPATHLEN);
 
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index fde0a2c..651a6ee 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -28,6 +28,7 @@
 #include "gc/scoped_gc_critical_section.h"
 #include "gc/space/image_space.h"
 #include "handle_scope-inl.h"
+#include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "oat_file_assistant.h"
 #include "obj_ptr-inl.h"
@@ -224,9 +225,10 @@
   }
 }
 
+template <typename T>
 static void IterateOverJavaDexFile(ObjPtr<mirror::Object> dex_file,
                                    ArtField* const cookie_field,
-                                   std::function<bool(const DexFile*)> fn)
+                                   const T& fn)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (dex_file != nullptr) {
     mirror::LongArray* long_array = cookie_field->GetObject(dex_file)->AsLongArray();
@@ -247,26 +249,27 @@
   }
 }
 
+template <typename T>
 static void IterateOverPathClassLoader(
-    ScopedObjectAccessAlreadyRunnable& soa,
     Handle<mirror::ClassLoader> class_loader,
     MutableHandle<mirror::ObjectArray<mirror::Object>> dex_elements,
-    std::function<bool(const DexFile*)> fn) REQUIRES_SHARED(Locks::mutator_lock_) {
+    const T& fn) REQUIRES_SHARED(Locks::mutator_lock_) {
   // Handle this step.
   // Handle as if this is the child PathClassLoader.
   // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
   // We need to get the DexPathList and loop through it.
-  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const cookie_field =
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Object> dex_path_list =
-      soa.DecodeField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
-      GetObject(class_loader.Get());
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
+          GetObject(class_loader.Get());
   if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
     // DexPathList has an array dexElements of Elements[] which each contain a dex file.
     ObjPtr<mirror::Object> dex_elements_obj =
-        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
-        GetObject(dex_path_list);
+        jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+            GetObject(dex_path_list);
     // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
     // at the mCookie which is a DexFile vector.
     if (dex_elements_obj != nullptr) {
@@ -323,7 +326,7 @@
       hs.NewHandle<mirror::ObjectArray<mirror::Object>>(nullptr));
   Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(class_loader));
 
-  IterateOverPathClassLoader(soa, h_class_loader, dex_elements, GetDexFilesFn);
+  IterateOverPathClassLoader(h_class_loader, dex_elements, GetDexFilesFn);
 
   return true;
 }
@@ -337,9 +340,10 @@
     return;
   }
 
-  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const cookie_field =
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Class> const element_class = soa.Decode<mirror::Class>(
       WellKnownClasses::dalvik_system_DexPathList__Element);
   ObjPtr<mirror::Class> const dexfile_class = soa.Decode<mirror::Class>(
@@ -377,7 +381,7 @@
   }
 }
 
-static bool AreSharedLibrariesOk(const std::string shared_libraries,
+static bool AreSharedLibrariesOk(const std::string& shared_libraries,
                                  std::priority_queue<DexFileAndClassPair>& queue) {
   if (shared_libraries.empty()) {
     if (queue.empty()) {
@@ -398,7 +402,7 @@
     while (!temp.empty() && index < shared_libraries_split.size() - 1) {
       DexFileAndClassPair pair(temp.top());
       const DexFile* dex_file = pair.GetDexFile();
-      std::string dex_filename(dex_file->GetLocation());
+      const std::string& dex_filename = dex_file->GetLocation();
       uint32_t dex_checksum = dex_file->GetLocationChecksum();
       if (dex_filename != shared_libraries_split[index] ||
           dex_checksum != std::stoul(shared_libraries_split[index + 1])) {
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index ee5002f..4afca7d 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -67,6 +67,11 @@
     return data;
   }
 
+  uint8_t* GetOptimizedCodeInfoPtr() {
+    DCHECK(IsOptimized());
+    return code_ - vmap_table_offset_;
+  }
+
   CodeInfo GetOptimizedCodeInfo() const {
     return CodeInfo(GetOptimizedCodeInfoPtr());
   }
diff --git a/runtime/obj_ptr.h b/runtime/obj_ptr.h
index 9318232..d24c6fb 100644
--- a/runtime/obj_ptr.h
+++ b/runtime/obj_ptr.h
@@ -20,6 +20,7 @@
 #include <ostream>
 #include <type_traits>
 
+#include "base/macros.h"
 #include "base/mutex.h"  // For Locks::mutator_lock_.
 #include "globals.h"
 
@@ -41,17 +42,26 @@
  public:
   ALWAYS_INLINE ObjPtr() REQUIRES_SHARED(Locks::mutator_lock_) : reference_(0u) {}
 
-  ALWAYS_INLINE ObjPtr(std::nullptr_t) REQUIRES_SHARED(Locks::mutator_lock_) : reference_(0u) {}
+  // Note: The following constructors allow implicit conversion. This simplifies code that uses
+  //       them, e.g., for parameter passing. However, in general, implicit-conversion constructors
+  //       are discouraged and detected by cpplint and clang-tidy. So mark these constructors
+  //       as NOLINT (without category, as the categories are different).
+
+  ALWAYS_INLINE ObjPtr(std::nullptr_t)  // NOLINT
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      : reference_(0u) {}
 
   template <typename Type>
-  ALWAYS_INLINE ObjPtr(Type* ptr) REQUIRES_SHARED(Locks::mutator_lock_)
+  ALWAYS_INLINE ObjPtr(Type* ptr)  // NOLINT
+      REQUIRES_SHARED(Locks::mutator_lock_)
       : reference_(Encode(static_cast<MirrorType*>(ptr))) {
     static_assert(std::is_base_of<MirrorType, Type>::value,
                   "Input type must be a subtype of the ObjPtr type");
   }
 
   template <typename Type>
-  ALWAYS_INLINE ObjPtr(const ObjPtr<Type, kPoison>& other) REQUIRES_SHARED(Locks::mutator_lock_)
+  ALWAYS_INLINE ObjPtr(const ObjPtr<Type, kPoison>& other)  // NOLINT
+      REQUIRES_SHARED(Locks::mutator_lock_)
       : reference_(Encode(static_cast<MirrorType*>(other.Ptr()))) {
     static_assert(std::is_base_of<MirrorType, Type>::value,
                   "Input type must be a subtype of the ObjPtr type");
@@ -154,6 +164,9 @@
   uintptr_t reference_;
 };
 
+static_assert(std::is_trivially_copyable<ObjPtr<void>>::value,
+              "ObjPtr should be trivially copyable");
+
 // Hash function for stl data structures.
 class HashObjPtr {
  public:
diff --git a/runtime/openjdkjvmti/object_tagging.cc b/runtime/openjdkjvmti/object_tagging.cc
index b359f36..b983e79 100644
--- a/runtime/openjdkjvmti/object_tagging.cc
+++ b/runtime/openjdkjvmti/object_tagging.cc
@@ -47,6 +47,16 @@
 
 namespace openjdkjvmti {
 
+void ObjectTagTable::Lock() {
+  allow_disallow_lock_.ExclusiveLock(art::Thread::Current());
+}
+void ObjectTagTable::Unlock() {
+  allow_disallow_lock_.ExclusiveUnlock(art::Thread::Current());
+}
+void ObjectTagTable::AssertLocked() {
+  allow_disallow_lock_.AssertHeld(art::Thread::Current());
+}
+
 void ObjectTagTable::UpdateTableWithReadBarrier() {
   update_since_last_sweep_ = true;
 
@@ -80,6 +90,13 @@
 
   return RemoveLocked(self, obj, tag);
 }
+bool ObjectTagTable::RemoveLocked(art::mirror::Object* obj, jlong* tag) {
+  art::Thread* self = art::Thread::Current();
+  allow_disallow_lock_.AssertHeld(self);
+  Wait(self);
+
+  return RemoveLocked(self, obj, tag);
+}
 
 bool ObjectTagTable::RemoveLocked(art::Thread* self, art::mirror::Object* obj, jlong* tag) {
   auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
@@ -109,12 +126,29 @@
 }
 
 bool ObjectTagTable::Set(art::mirror::Object* obj, jlong new_tag) {
+  if (new_tag == 0) {
+    jlong tmp;
+    return Remove(obj, &tmp);
+  }
+
   art::Thread* self = art::Thread::Current();
   art::MutexLock mu(self, allow_disallow_lock_);
   Wait(self);
 
   return SetLocked(self, obj, new_tag);
 }
+bool ObjectTagTable::SetLocked(art::mirror::Object* obj, jlong new_tag) {
+  if (new_tag == 0) {
+    jlong tmp;
+    return RemoveLocked(obj, &tmp);
+  }
+
+  art::Thread* self = art::Thread::Current();
+  allow_disallow_lock_.AssertHeld(self);
+  Wait(self);
+
+  return SetLocked(self, obj, new_tag);
+}
 
 bool ObjectTagTable::SetLocked(art::Thread* self, art::mirror::Object* obj, jlong new_tag) {
   auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
diff --git a/runtime/openjdkjvmti/object_tagging.h b/runtime/openjdkjvmti/object_tagging.h
index 071d139..997cedb 100644
--- a/runtime/openjdkjvmti/object_tagging.h
+++ b/runtime/openjdkjvmti/object_tagging.h
@@ -46,10 +46,16 @@
   bool Remove(art::mirror::Object* obj, jlong* tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
+  bool RemoveLocked(art::mirror::Object* obj, jlong* tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
 
   bool Set(art::mirror::Object* obj, jlong tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
+  bool SetLocked(art::mirror::Object* obj, jlong tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
 
   bool GetTag(art::mirror::Object* obj, jlong* result)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
@@ -60,6 +66,30 @@
 
     return GetTagLocked(self, obj, result);
   }
+  bool GetTagLocked(art::mirror::Object* obj, jlong* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
+    art::Thread* self = art::Thread::Current();
+    allow_disallow_lock_.AssertHeld(self);
+    Wait(self);
+
+    return GetTagLocked(self, obj, result);
+  }
+
+  jlong GetTagOrZero(art::mirror::Object* obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_) {
+    jlong tmp = 0;
+    GetTag(obj, &tmp);
+    return tmp;
+  }
+  jlong GetTagOrZeroLocked(art::mirror::Object* obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
+    jlong tmp = 0;
+    GetTagLocked(obj, &tmp);
+    return tmp;
+  }
 
   void Sweep(art::IsMarkedVisitor* visitor)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
@@ -74,6 +104,10 @@
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
 
+  void Lock() ACQUIRE(allow_disallow_lock_);
+  void Unlock() RELEASE(allow_disallow_lock_);
+  void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_);
+
  private:
   bool SetLocked(art::Thread* self, art::mirror::Object* obj, jlong tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
diff --git a/runtime/openjdkjvmti/ti_method.cc b/runtime/openjdkjvmti/ti_method.cc
index 6210936..e391a9d 100644
--- a/runtime/openjdkjvmti/ti_method.cc
+++ b/runtime/openjdkjvmti/ti_method.cc
@@ -34,6 +34,7 @@
 #include "art_jvmti.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
+#include "jni_internal.h"
 #include "modifiers.h"
 #include "scoped_thread_state_change-inl.h"
 
@@ -45,7 +46,7 @@
                                      char** signature_ptr,
                                      char** generic_ptr) {
   art::ScopedObjectAccess soa(art::Thread::Current());
-  art::ArtMethod* art_method = soa.DecodeMethod(method);
+  art::ArtMethod* art_method = art::jni::DecodeArtMethod(method);
   art_method = art_method->GetInterfaceMethodIfProxy(art::kRuntimePointerSize);
 
   JvmtiUniquePtr name_copy;
@@ -93,10 +94,10 @@
     return ERR(NULL_POINTER);
   }
 
-  art::ScopedObjectAccess soa(art::Thread::Current());
-  art::ArtMethod* art_method = soa.DecodeMethod(method);
+  art::ArtMethod* art_method = art::jni::DecodeArtMethod(method);
   // Note: No GetInterfaceMethodIfProxy, we want to actual class.
 
+  art::ScopedObjectAccess soa(art::Thread::Current());
   art::mirror::Class* klass = art_method->GetDeclaringClass();
   *declaring_class_ptr = soa.AddLocalReference<jclass>(klass);
 
@@ -110,9 +111,7 @@
     return ERR(NULL_POINTER);
   }
 
-  art::ScopedObjectAccess soa(art::Thread::Current());
-  art::ArtMethod* art_method = soa.DecodeMethod(method);
-
+  art::ArtMethod* art_method = art::jni::DecodeArtMethod(method);
   uint32_t modifiers = art_method->GetAccessFlags();
 
   // Note: Keep this code in sync with Executable.fixMethodFlags.
diff --git a/runtime/openjdkjvmti/ti_stack.cc b/runtime/openjdkjvmti/ti_stack.cc
index 33e677f..6f8976f 100644
--- a/runtime/openjdkjvmti/ti_stack.cc
+++ b/runtime/openjdkjvmti/ti_stack.cc
@@ -37,6 +37,7 @@
 #include "dex_file.h"
 #include "dex_file_annotations.h"
 #include "jni_env_ext.h"
+#include "jni_internal.h"
 #include "mirror/class.h"
 #include "mirror/dex_cache.h"
 #include "scoped_thread_state_change-inl.h"
@@ -64,7 +65,7 @@
 
     if (start == 0) {
       m = m->GetInterfaceMethodIfProxy(art::kRuntimePointerSize);
-      jmethodID id = soa.EncodeMethod(m);
+      jmethodID id = art::jni::EncodeArtMethod(m);
 
       art::mirror::DexCache* dex_cache = m->GetDexCache();
       int32_t line_number = -1;
diff --git a/runtime/openjdkjvmti/transform.cc b/runtime/openjdkjvmti/transform.cc
index 3443aea..fa2983c 100644
--- a/runtime/openjdkjvmti/transform.cc
+++ b/runtime/openjdkjvmti/transform.cc
@@ -283,7 +283,7 @@
 // Install the new dex file.
 // TODO do error checks for bad state (method in a stack, changes to number of methods/fields/etc).
 jvmtiError MoveTransformedFileIntoRuntime(jclass jklass,
-                                          std::string original_location,
+                                          const std::string& original_location,
                                           jint data_len,
                                           unsigned char* dex_data) {
   const char* dex_file_name = "Ldalvik/system/DexFile;";
diff --git a/runtime/openjdkjvmti/transform.h b/runtime/openjdkjvmti/transform.h
index 85bcb00..a76ed93 100644
--- a/runtime/openjdkjvmti/transform.h
+++ b/runtime/openjdkjvmti/transform.h
@@ -54,7 +54,7 @@
 
 // Install the new dex file.
 jvmtiError MoveTransformedFileIntoRuntime(jclass jklass,
-                                          std::string original_location,
+                                          const std::string& original_location,
                                           jint data_len,
                                           unsigned char* dex_data);
 
diff --git a/runtime/plugin.h b/runtime/plugin.h
index 18f3977..f077aaf 100644
--- a/runtime/plugin.h
+++ b/runtime/plugin.h
@@ -34,7 +34,7 @@
 // single-threaded fashion so not much need
 class Plugin {
  public:
-  static Plugin Create(std::string lib) {
+  static Plugin Create(const std::string& lib) {
     return Plugin(lib);
   }
 
@@ -66,7 +66,7 @@
   }
 
  private:
-  explicit Plugin(std::string library) : library_(library), dlopen_handle_(nullptr) { }
+  explicit Plugin(const std::string& library) : library_(library), dlopen_handle_(nullptr) { }
 
   std::string library_;
   void* dlopen_handle_;
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 32a5582..fd7e56d 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -199,8 +199,6 @@
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Interfaces");
   StackHandleScope<7> hs(soa.Self());
-  Handle<mirror::ClassLoader> class_loader(
-      hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader)));
 
   Handle<mirror::Class> proxyClass0;
   Handle<mirror::Class> proxyClass1;
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index be95600..37cf257 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -40,14 +40,16 @@
       }
     }
     if (kUseBakerReadBarrier) {
-      // The higher bits of the rb_ptr, rb_ptr_high_bits (must be zero)
-      // is used to create artificial data dependency from the is_gray
-      // load to the ref field (ptr) load to avoid needing a load-load
-      // barrier between the two.
-      uintptr_t rb_ptr_high_bits;
-      bool is_gray = HasGrayReadBarrierPointer(obj, &rb_ptr_high_bits);
+      // fake_address_dependency (must be zero) is used to create artificial data dependency from
+      // the is_gray load to the ref field (ptr) load to avoid needing a load-load barrier between
+      // the two.
+      uintptr_t fake_address_dependency;
+      bool is_gray = IsGray(obj, &fake_address_dependency);
+      if (kEnableReadBarrierInvariantChecks) {
+        CHECK_EQ(fake_address_dependency, 0U) << obj << " rb_state=" << obj->GetReadBarrierState();
+      }
       ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
-          rb_ptr_high_bits | reinterpret_cast<uintptr_t>(ref_addr));
+          fake_address_dependency | reinterpret_cast<uintptr_t>(ref_addr));
       MirrorType* ref = ref_addr->AsMirrorPtr();
       MirrorType* old_ref = ref;
       if (is_gray) {
@@ -60,9 +62,6 @@
               offset, old_ref, ref);
         }
       }
-      if (kEnableReadBarrierInvariantChecks) {
-        CHECK_EQ(rb_ptr_high_bits, 0U) << obj << " rb_ptr=" << obj->GetReadBarrierPointer();
-      }
       AssertToSpaceInvariant(obj, offset, ref);
       return ref;
     } else if (kUseBrooksReadBarrier) {
@@ -223,20 +222,14 @@
   return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->MarkFromReadBarrier(obj);
 }
 
-inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj,
-                                                   uintptr_t* out_rb_ptr_high_bits) {
-  mirror::Object* rb_ptr = obj->GetReadBarrierPointer();
-  uintptr_t rb_ptr_bits = reinterpret_cast<uintptr_t>(rb_ptr);
-  uintptr_t rb_ptr_low_bits = rb_ptr_bits & rb_ptr_mask_;
-  if (kEnableReadBarrierInvariantChecks) {
-    CHECK(rb_ptr_low_bits == white_ptr_ || rb_ptr_low_bits == gray_ptr_ ||
-          rb_ptr_low_bits == black_ptr_)
-        << "obj=" << obj << " rb_ptr=" << rb_ptr << " " << obj->PrettyTypeOf();
-  }
-  bool is_gray = rb_ptr_low_bits == gray_ptr_;
-  // The high bits are supposed to be zero. We check this on the caller side.
-  *out_rb_ptr_high_bits = rb_ptr_bits & ~rb_ptr_mask_;
-  return is_gray;
+inline bool ReadBarrier::IsGray(mirror::Object* obj, uintptr_t* fake_address_dependency) {
+  return obj->GetReadBarrierState(fake_address_dependency) == gray_state_;
+}
+
+inline bool ReadBarrier::IsGray(mirror::Object* obj) {
+  // Use a load-acquire to load the read barrier bit to avoid reordering with the subsequent load.
+  // GetReadBarrierStateAcquire() has load-acquire semantics.
+  return obj->GetReadBarrierStateAcquire() == gray_state_;
 }
 
 }  // namespace art
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index a861861..cbc2697 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -82,26 +82,32 @@
   // ALWAYS_INLINE on this caused a performance regression b/26744236.
   static mirror::Object* Mark(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  static mirror::Object* WhitePtr() {
-    return reinterpret_cast<mirror::Object*>(white_ptr_);
+  static constexpr uint32_t WhiteState() {
+    return white_state_;
   }
-  static mirror::Object* GrayPtr() {
-    return reinterpret_cast<mirror::Object*>(gray_ptr_);
-  }
-  static mirror::Object* BlackPtr() {
-    return reinterpret_cast<mirror::Object*>(black_ptr_);
+  static constexpr uint32_t GrayState() {
+    return gray_state_;
   }
 
-  ALWAYS_INLINE static bool HasGrayReadBarrierPointer(mirror::Object* obj,
-                                                      uintptr_t* out_rb_ptr_high_bits)
+  // fake_address_dependency will be zero which should be bitwise-or'ed with the address of the
+  // subsequent load to prevent the reordering of the read barrier bit load and the subsequent
+  // object reference load (from one of `obj`'s fields).
+  // *fake_address_dependency will be set to 0.
+  ALWAYS_INLINE static bool IsGray(mirror::Object* obj, uintptr_t* fake_address_dependency)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Note: These couldn't be constexpr pointers as reinterpret_cast isn't compatible with them.
-  static constexpr uintptr_t white_ptr_ = 0x0;    // Not marked.
-  static constexpr uintptr_t gray_ptr_ = 0x1;     // Marked, but not marked through. On mark stack.
-  // TODO: black_ptr_ is unused, we should remove it.
-  static constexpr uintptr_t black_ptr_ = 0x2;    // Marked through. Used for non-moving objects.
-  static constexpr uintptr_t rb_ptr_mask_ = 0x1;  // The low bits for white|gray.
+  // This uses a load-acquire to load the read barrier bit internally to prevent the reordering of
+  // the read barrier bit load and the subsequent load.
+  ALWAYS_INLINE static bool IsGray(mirror::Object* obj)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  static bool IsValidReadBarrierState(uint32_t rb_state) {
+    return rb_state == white_state_ || rb_state == gray_state_;
+  }
+
+  static constexpr uint32_t white_state_ = 0x0;    // Not marked.
+  static constexpr uint32_t gray_state_ = 0x1;     // Marked, but not marked through. On mark stack.
+  static constexpr uint32_t rb_state_mask_ = 0x1;  // The low bits for white|gray.
 };
 
 }  // namespace art
diff --git a/runtime/reflection-inl.h b/runtime/reflection-inl.h
index c4d4fae..68e7a10 100644
--- a/runtime/reflection-inl.h
+++ b/runtime/reflection-inl.h
@@ -29,11 +29,10 @@
 
 namespace art {
 
-inline bool ConvertPrimitiveValue(bool unbox_for_result,
-                                  Primitive::Type srcType,
-                                  Primitive::Type dstType,
-                                  const JValue& src,
-                                  JValue* dst) {
+inline bool ConvertPrimitiveValueNoThrow(Primitive::Type srcType,
+                                         Primitive::Type dstType,
+                                         const JValue& src,
+                                         JValue* dst) {
   DCHECK(srcType != Primitive::kPrimNot && dstType != Primitive::kPrimNot);
   if (LIKELY(srcType == dstType)) {
     dst->SetJ(src.GetJ());
@@ -91,6 +90,18 @@
   default:
     break;
   }
+  return false;
+}
+
+inline bool ConvertPrimitiveValue(bool unbox_for_result,
+                                  Primitive::Type srcType,
+                                  Primitive::Type dstType,
+                                  const JValue& src,
+                                  JValue* dst) {
+  if (ConvertPrimitiveValueNoThrow(srcType, dstType, src, dst)) {
+    return true;
+  }
+
   if (!unbox_for_result) {
     ThrowIllegalArgumentException(StringPrintf("Invalid primitive conversion from %s to %s",
                                                PrettyDescriptor(srcType).c_str(),
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index f88309b..3128380 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -453,7 +453,7 @@
     return JValue();
   }
 
-  ArtMethod* method = soa.DecodeMethod(mid);
+  ArtMethod* method = jni::DecodeArtMethod(mid);
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -484,7 +484,7 @@
     return JValue();
   }
 
-  ArtMethod* method = soa.DecodeMethod(mid);
+  ArtMethod* method = jni::DecodeArtMethod(mid);
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -516,7 +516,7 @@
   }
 
   ObjPtr<mirror::Object> receiver = soa.Decode<mirror::Object>(obj);
-  ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
+  ArtMethod* method = FindVirtualMethod(receiver, jni::DecodeArtMethod(mid));
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -548,7 +548,7 @@
   }
 
   ObjPtr<mirror::Object> receiver = soa.Decode<mirror::Object>(obj);
-  ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
+  ArtMethod* method = FindVirtualMethod(receiver, jni::DecodeArtMethod(mid));
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -739,8 +739,11 @@
     arg_array.Append(value.GetI());
   }
 
-  soa.DecodeMethod(m)->Invoke(soa.Self(), arg_array.GetArray(), arg_array.GetNumBytes(),
-                              &result, shorty);
+  jni::DecodeArtMethod(m)->Invoke(soa.Self(),
+                                  arg_array.GetArray(),
+                                  arg_array.GetNumBytes(),
+                                  &result,
+                                  shorty);
   return result.GetL();
 }
 
diff --git a/runtime/reflection.h b/runtime/reflection.h
index 6e5ef71..f2652fd 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -47,6 +47,12 @@
                              JValue* unboxed_value)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
+ALWAYS_INLINE bool ConvertPrimitiveValueNoThrow(Primitive::Type src_class,
+                                                Primitive::Type dst_class,
+                                                const JValue& src,
+                                                JValue* dst)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
 ALWAYS_INLINE bool ConvertPrimitiveValue(bool unbox_for_result,
                                          Primitive::Type src_class,
                                          Primitive::Type dst_class,
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index 22076bb..e254dfe 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -23,6 +23,7 @@
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "common_compiler_test.h"
+#include "jni_internal.h"
 #include "scoped_thread_state_change-inl.h"
 
 namespace art {
@@ -136,7 +137,7 @@
     ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "nop", "()V");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
-    InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), nullptr);
+    InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), nullptr);
   }
 
   void InvokeIdentityByteMethod(bool is_static) {
@@ -148,20 +149,20 @@
     jvalue args[1];
 
     args[0].b = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetB());
 
     args[0].b = -1;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-1, result.GetB());
 
     args[0].b = SCHAR_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(SCHAR_MAX, result.GetB());
 
     static_assert(SCHAR_MIN == -128, "SCHAR_MIN unexpected");
     args[0].b = SCHAR_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(SCHAR_MIN, result.GetB());
   }
 
@@ -174,19 +175,19 @@
     jvalue args[1];
 
     args[0].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = -1;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-1, result.GetI());
 
     args[0].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(INT_MAX, result.GetI());
 
     args[0].i = INT_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(INT_MIN, result.GetI());
   }
 
@@ -199,19 +200,19 @@
     jvalue args[1];
 
     args[0].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = -1.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(-1.0, result.GetD());
 
     args[0].d = DBL_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(DBL_MAX, result.GetD());
 
     args[0].d = DBL_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(DBL_MIN, result.GetD());
   }
 
@@ -225,22 +226,22 @@
 
     args[0].i = 1;
     args[1].i = 2;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(3, result.GetI());
 
     args[0].i = -2;
     args[1].i = 5;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(3, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-1, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-2, result.GetI());
   }
 
@@ -255,31 +256,31 @@
     args[0].i = 0;
     args[1].i = 0;
     args[2].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = 1;
     args[1].i = 2;
     args[2].i = 3;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(6, result.GetI());
 
     args[0].i = -1;
     args[1].i = 2;
     args[2].i = -3;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-2, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MIN;
     args[2].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483646, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MAX;
     args[2].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483645, result.GetI());
   }
 
@@ -295,35 +296,35 @@
     args[1].i = 0;
     args[2].i = 0;
     args[3].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = 1;
     args[1].i = 2;
     args[2].i = 3;
     args[3].i = 4;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(10, result.GetI());
 
     args[0].i = -1;
     args[1].i = 2;
     args[2].i = -3;
     args[3].i = 4;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MIN;
     args[2].i = INT_MAX;
     args[3].i = INT_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-2, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MAX;
     args[2].i = INT_MAX;
     args[3].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-4, result.GetI());
   }
 
@@ -340,7 +341,7 @@
     args[2].i = 0;
     args[3].i = 0;
     args[4].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = 1;
@@ -348,7 +349,7 @@
     args[2].i = 3;
     args[3].i = 4;
     args[4].i = 5;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(15, result.GetI());
 
     args[0].i = -1;
@@ -356,7 +357,7 @@
     args[2].i = -3;
     args[3].i = 4;
     args[4].i = -5;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-3, result.GetI());
 
     args[0].i = INT_MAX;
@@ -364,7 +365,7 @@
     args[2].i = INT_MAX;
     args[3].i = INT_MIN;
     args[4].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483645, result.GetI());
 
     args[0].i = INT_MAX;
@@ -372,7 +373,7 @@
     args[2].i = INT_MAX;
     args[3].i = INT_MAX;
     args[4].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483643, result.GetI());
   }
 
@@ -386,27 +387,27 @@
 
     args[0].d = 0.0;
     args[1].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = 2.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(3.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = -2.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(-1.0, result.GetD());
 
     args[0].d = DBL_MAX;
     args[1].d = DBL_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(1.7976931348623157e308, result.GetD());
 
     args[0].d = DBL_MAX;
     args[1].d = DBL_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(INFINITY, result.GetD());
   }
 
@@ -421,19 +422,19 @@
     args[0].d = 0.0;
     args[1].d = 0.0;
     args[2].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = 2.0;
     args[2].d = 3.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(6.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = -2.0;
     args[2].d = 3.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(2.0, result.GetD());
   }
 
@@ -449,21 +450,21 @@
     args[1].d = 0.0;
     args[2].d = 0.0;
     args[3].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = 2.0;
     args[2].d = 3.0;
     args[3].d = 4.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(10.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = -2.0;
     args[2].d = 3.0;
     args[3].d = -4.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(-2.0, result.GetD());
   }
 
@@ -480,7 +481,7 @@
     args[2].d = 0.0;
     args[3].d = 0.0;
     args[4].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
@@ -488,7 +489,7 @@
     args[2].d = 3.0;
     args[3].d = 4.0;
     args[4].d = 5.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(15.0, result.GetD());
 
     args[0].d = 1.0;
@@ -496,7 +497,7 @@
     args[2].d = 3.0;
     args[3].d = -4.0;
     args[4].d = 5.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(3.0, result.GetD());
   }
 
@@ -531,7 +532,7 @@
 
   jvalue args[1];
   args[0].l = nullptr;
-  InvokeWithJValues(soa, nullptr, soa.EncodeMethod(method), args);
+  InvokeWithJValues(soa, nullptr, jni::EncodeArtMethod(method), args);
 }
 
 TEST_F(ReflectionTest, StaticNopMethod) {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 262608d..d90e896 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -81,10 +81,12 @@
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
 #include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "jni_internal.h"
 #include "linear_alloc.h"
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/class_loader.h"
 #include "mirror/emulated_stack_frame.h"
 #include "mirror/field.h"
@@ -490,6 +492,14 @@
   GetMonitorList()->SweepMonitorList(visitor);
   GetJavaVM()->SweepJniWeakGlobals(visitor);
   GetHeap()->SweepAllocationRecords(visitor);
+  if (GetJit() != nullptr) {
+    // Visit JIT literal tables. Objects in these tables are classes and strings
+    // and only classes can be affected by class unloading. The strings always
+    // stay alive as they are strongly interned.
+    // TODO: Move this closer to CleanupClassLoaders, to avoid blocking weak accesses
+    // from mutators. See b/32167580.
+    GetJit()->GetCodeCache()->SweepRootTables(visitor);
+  }
 
   // All other generic system-weak holders.
   for (gc::AbstractSystemWeakHolder* holder : system_weak_holders_) {
@@ -558,7 +568,10 @@
       "getSystemClassLoader", "()Ljava/lang/ClassLoader;", pointer_size);
   CHECK(getSystemClassLoader != nullptr);
 
-  JValue result = InvokeWithJValues(soa, nullptr, soa.EncodeMethod(getSystemClassLoader), nullptr);
+  JValue result = InvokeWithJValues(soa,
+                                    nullptr,
+                                    jni::EncodeArtMethod(getSystemClassLoader),
+                                    nullptr);
   JNIEnv* env = soa.Self()->GetJniEnv();
   ScopedLocalRef<jobject> system_class_loader(env, soa.AddLocalReference<jobject>(result.GetL()));
   CHECK(system_class_loader.get() != nullptr);
@@ -1595,6 +1608,7 @@
   mirror::MethodType::VisitRoots(visitor);
   mirror::MethodHandleImpl::VisitRoots(visitor);
   mirror::EmulatedStackFrame::VisitRoots(visitor);
+  mirror::ClassExt::VisitRoots(visitor);
   // Visit all the primitive array types classes.
   mirror::PrimitiveArray<uint8_t>::VisitRoots(visitor);   // BooleanArray
   mirror::PrimitiveArray<int8_t>::VisitRoots(visitor);    // ByteArray
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 77ec238..6abe682 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -182,7 +182,7 @@
     return compiler_options_;
   }
 
-  void AddCompilerOption(std::string option) {
+  void AddCompilerOption(const std::string& option) {
     compiler_options_.push_back(option);
   }
 
@@ -326,6 +326,7 @@
   // Visit all the roots. If only_dirty is true then non-dirty roots won't be visited. If
   // clean_dirty is true then dirty roots will be marked as non-dirty after visiting.
   void VisitRoots(RootVisitor* visitor, VisitRootFlags flags = kVisitRootFlagAllRoots)
+      REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Visit image roots, only used for hprof since the GC uses the image space mod union table
@@ -335,6 +336,7 @@
   // Visit all of the roots we can do safely do concurrently.
   void VisitConcurrentRoots(RootVisitor* visitor,
                             VisitRootFlags flags = kVisitRootFlagAllRoots)
+      REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Visit all of the non thread roots, we can do this with mutators unpaused.
diff --git a/runtime/scoped_thread_state_change-inl.h b/runtime/scoped_thread_state_change-inl.h
index bde23c8..d4469f4 100644
--- a/runtime/scoped_thread_state_change-inl.h
+++ b/runtime/scoped_thread_state_change-inl.h
@@ -86,30 +86,6 @@
   return ObjPtr<T, kPoison>::DownCast(Self()->DecodeJObject(obj));
 }
 
-inline ArtField* ScopedObjectAccessAlreadyRunnable::DecodeField(jfieldID fid) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<ArtField*>(fid);
-}
-
-inline jfieldID ScopedObjectAccessAlreadyRunnable::EncodeField(ArtField* field) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<jfieldID>(field);
-}
-
-inline ArtMethod* ScopedObjectAccessAlreadyRunnable::DecodeMethod(jmethodID mid) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<ArtMethod*>(mid);
-}
-
-inline jmethodID ScopedObjectAccessAlreadyRunnable::EncodeMethod(ArtMethod* method) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<jmethodID>(method);
-}
-
 inline bool ScopedObjectAccessAlreadyRunnable::IsRunnable() const {
   return self_->GetState() == kRunnable;
 }
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index 04fd914..b499258 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -94,14 +94,6 @@
   template<typename T, bool kPoison = kIsDebugBuild>
   ObjPtr<T, kPoison> Decode(jobject obj) const REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtField* DecodeField(jfieldID fid) const REQUIRES_SHARED(Locks::mutator_lock_);
-
-  jfieldID EncodeField(ArtField* field) const REQUIRES_SHARED(Locks::mutator_lock_);
-
-  ArtMethod* DecodeMethod(jmethodID mid) const REQUIRES_SHARED(Locks::mutator_lock_);
-
-  jmethodID EncodeMethod(ArtMethod* method) const REQUIRES_SHARED(Locks::mutator_lock_);
-
   ALWAYS_INLINE bool IsRunnable() const;
 
  protected:
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 3f7d086..8ce9661 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -410,9 +410,9 @@
     self->tlsPtr_.opeer = soa.Decode<mirror::Object>(self->tlsPtr_.jpeer).Ptr();
     self->GetJniEnv()->DeleteGlobalRef(self->tlsPtr_.jpeer);
     self->tlsPtr_.jpeer = nullptr;
-    self->SetThreadName(self->GetThreadName(soa)->ToModifiedUtf8().c_str());
+    self->SetThreadName(self->GetThreadName()->ToModifiedUtf8().c_str());
 
-    ArtField* priorityField = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority);
+    ArtField* priorityField = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_priority);
     self->SetNativePriority(priorityField->GetInt(self->tlsPtr_.opeer));
     Dbg::PostThreadStart(self);
 
@@ -430,7 +430,7 @@
 
 Thread* Thread::FromManagedThread(const ScopedObjectAccessAlreadyRunnable& soa,
                                   mirror::Object* thread_peer) {
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer);
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_nativePeer);
   Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetLong(thread_peer)));
   // Sanity check that if we have a result it is either suspended or we hold the thread_list_lock_
   // to stop it from going away.
@@ -562,7 +562,7 @@
   if (VLOG_IS_ON(threads)) {
     ScopedObjectAccess soa(env);
 
-    ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
+    ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name);
     ObjPtr<mirror::String> java_name =
         f->GetObject(soa.Decode<mirror::Object>(java_peer))->AsString();
     std::string thread_name;
@@ -823,7 +823,7 @@
 
   ScopedObjectAccess soa(self);
   StackHandleScope<1> hs(self);
-  MutableHandle<mirror::String> peer_thread_name(hs.NewHandle(GetThreadName(soa)));
+  MutableHandle<mirror::String> peer_thread_name(hs.NewHandle(GetThreadName()));
   if (peer_thread_name.Get() == nullptr) {
     // The Thread constructor should have set the Thread.name to a
     // non-null value. However, because we can run without code
@@ -834,7 +834,7 @@
     } else {
       InitPeer<false>(soa, thread_is_daemon, thread_group, thread_name.get(), thread_priority);
     }
-    peer_thread_name.Assign(GetThreadName(soa));
+    peer_thread_name.Assign(GetThreadName());
   }
   // 'thread_name' may have been null, so don't trust 'peer_thread_name' to be non-null.
   if (peer_thread_name.Get() != nullptr) {
@@ -845,13 +845,13 @@
 template<bool kTransactionActive>
 void Thread::InitPeer(ScopedObjectAccess& soa, jboolean thread_is_daemon, jobject thread_group,
                       jobject thread_name, jint thread_priority) {
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_daemon)->
       SetBoolean<kTransactionActive>(tlsPtr_.opeer, thread_is_daemon);
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group)->
       SetObject<kTransactionActive>(tlsPtr_.opeer, soa.Decode<mirror::Object>(thread_group));
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_name)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name)->
       SetObject<kTransactionActive>(tlsPtr_.opeer, soa.Decode<mirror::Object>(thread_name));
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_priority)->
       SetInt<kTransactionActive>(tlsPtr_.opeer, thread_priority);
 }
 
@@ -947,8 +947,8 @@
   DumpStack(os, dump_native_stack, backtrace_map);
 }
 
-mirror::String* Thread::GetThreadName(const ScopedObjectAccessAlreadyRunnable& soa) const {
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
+mirror::String* Thread::GetThreadName() const {
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name);
   if (tlsPtr_.opeer == nullptr) {
     return nullptr;
   }
@@ -1306,17 +1306,18 @@
   // cause ScopedObjectAccessUnchecked to deadlock.
   if (gAborting == 0 && self != nullptr && thread != nullptr && thread->tlsPtr_.opeer != nullptr) {
     ScopedObjectAccessUnchecked soa(self);
-    priority = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)
+    priority = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_priority)
         ->GetInt(thread->tlsPtr_.opeer);
-    is_daemon = soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)
+    is_daemon = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_daemon)
         ->GetBoolean(thread->tlsPtr_.opeer);
 
     ObjPtr<mirror::Object> thread_group =
-        soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->GetObject(thread->tlsPtr_.opeer);
+        jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group)
+            ->GetObject(thread->tlsPtr_.opeer);
 
     if (thread_group != nullptr) {
       ArtField* group_name_field =
-          soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
+          jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_name);
       ObjPtr<mirror::String> group_name_string =
           group_name_field->GetObject(thread_group)->AsString();
       group_name = (group_name_string != nullptr) ? group_name_string->ToModifiedUtf8() : "<null>";
@@ -1792,10 +1793,10 @@
 
     // this.nativePeer = 0;
     if (Runtime::Current()->IsActiveTransaction()) {
-      soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer)
+      jni::DecodeArtField(WellKnownClasses::java_lang_Thread_nativePeer)
           ->SetLong<true>(tlsPtr_.opeer, 0);
     } else {
-      soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer)
+      jni::DecodeArtField(WellKnownClasses::java_lang_Thread_nativePeer)
           ->SetLong<false>(tlsPtr_.opeer, 0);
     }
     Dbg::PostThreadDeath(self);
@@ -1803,7 +1804,7 @@
     // Thread.join() is implemented as an Object.wait() on the Thread.lock object. Signal anyone
     // who is waiting.
     ObjPtr<mirror::Object> lock =
-        soa.DecodeField(WellKnownClasses::java_lang_Thread_lock)->GetObject(tlsPtr_.opeer);
+        jni::DecodeArtField(WellKnownClasses::java_lang_Thread_lock)->GetObject(tlsPtr_.opeer);
     // (This conditional is only needed for tests, where Thread.lock won't have been set.)
     if (lock != nullptr) {
       StackHandleScope<1> hs(self);
@@ -1894,7 +1895,7 @@
 void Thread::RemoveFromThreadGroup(ScopedObjectAccess& soa) {
   // this.group.removeThread(this);
   // group can be null if we're in the compiler or a test.
-  ObjPtr<mirror::Object> ogroup = soa.DecodeField(WellKnownClasses::java_lang_Thread_group)
+  ObjPtr<mirror::Object> ogroup = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group)
       ->GetObject(tlsPtr_.opeer);
   if (ogroup != nullptr) {
     ScopedLocalRef<jobject> group(soa.Env(), soa.AddLocalReference<jobject>(ogroup));
@@ -2414,7 +2415,7 @@
       ++i;
     }
     ScopedLocalRef<jobject> ref(soa.Env(), soa.AddLocalReference<jobject>(exception.Get()));
-    InvokeWithJValues(soa, ref.get(), soa.EncodeMethod(exception_init_method), jv_args);
+    InvokeWithJValues(soa, ref.get(), jni::EncodeArtMethod(exception_init_method), jv_args);
     if (LIKELY(!IsExceptionPending())) {
       SetException(exception.Get());
     }
@@ -2503,7 +2504,7 @@
   QUICK_ENTRY_POINT_INFO(pAllocStringFromChars)
   QUICK_ENTRY_POINT_INFO(pAllocStringFromString)
   QUICK_ENTRY_POINT_INFO(pInstanceofNonTrivial)
-  QUICK_ENTRY_POINT_INFO(pCheckCast)
+  QUICK_ENTRY_POINT_INFO(pCheckInstanceOf)
   QUICK_ENTRY_POINT_INFO(pInitializeStaticStorage)
   QUICK_ENTRY_POINT_INFO(pInitializeTypeAndVerifyAccess)
   QUICK_ENTRY_POINT_INFO(pInitializeType)
diff --git a/runtime/thread.h b/runtime/thread.h
index 75b5b12..f3001be 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -333,8 +333,7 @@
   }
 
   // Returns the java.lang.Thread's name, or null if this Thread* doesn't have a peer.
-  mirror::String* GetThreadName(const ScopedObjectAccessAlreadyRunnable& ts) const
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  mirror::String* GetThreadName() const REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Sets 'name' to the java.lang.Thread's name. This requires no transition to managed code,
   // allocation, or locking.
@@ -1567,7 +1566,8 @@
 
 class SCOPED_CAPABILITY ScopedAssertNoThreadSuspension {
  public:
-  ALWAYS_INLINE ScopedAssertNoThreadSuspension(const char* cause) ACQUIRE(Roles::uninterruptible_) {
+  ALWAYS_INLINE explicit ScopedAssertNoThreadSuspension(const char* cause)
+      ACQUIRE(Roles::uninterruptible_) {
     if (kIsDebugBuild) {
       self_ = Thread::Current();
       old_cause_ = self_->StartAssertNoThreadSuspension(cause);
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 3284925..d1e9751 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -113,8 +113,8 @@
   EXPECT_EQ(2u, CountModifiedUtf8Chars(reinterpret_cast<const char *>(kSurrogateEncoding)));
 }
 
-static void AssertConversion(const std::vector<uint16_t> input,
-                             const std::vector<uint8_t> expected) {
+static void AssertConversion(const std::vector<uint16_t>& input,
+                             const std::vector<uint8_t>& expected) {
   ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size()));
 
   std::vector<uint8_t> output(expected.size());
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 97bc79c..d9e3ea7 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -956,7 +956,7 @@
   delete last_fail_message;
 }
 
-void MethodVerifier::AppendToLastFailMessage(std::string append) {
+void MethodVerifier::AppendToLastFailMessage(const std::string& append) {
   size_t failure_num = failure_messages_.size();
   DCHECK_NE(failure_num, 0U);
   std::ostringstream* last_fail_message = failure_messages_[failure_num - 1];
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index eb8b7a6..c6ce583 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -318,7 +318,7 @@
   void PrependToLastFailMessage(std::string);
 
   // Adds the given string to the end of the last failure message.
-  void AppendToLastFailMessage(std::string);
+  void AppendToLastFailMessage(const std::string& append);
 
   // Verification result for method(s). Includes a (maximum) failure kind, and (the union of)
   // all failure types.
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 4ec2da6..da3d946 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -411,7 +411,7 @@
   }
 
   // Scan the map for the same value.
-  for (const std::pair<uint32_t, uint32_t>& pair : search_map) {
+  for (const std::pair<const uint32_t, uint32_t>& pair : search_map) {
     if (pair.first != src && pair.second == src_lock_levels) {
       return true;
     }
diff --git a/runtime/verifier/verifier_deps.cc b/runtime/verifier/verifier_deps.cc
index bdf63cb..c395612 100644
--- a/runtime/verifier/verifier_deps.cc
+++ b/runtime/verifier/verifier_deps.cc
@@ -607,7 +607,8 @@
   }
 }
 
-bool VerifierDeps::Verify(Handle<mirror::ClassLoader> class_loader, Thread* self) const {
+bool VerifierDeps::ValidateDependencies(Handle<mirror::ClassLoader> class_loader,
+                                        Thread* self) const {
   for (const auto& entry : dex_deps_) {
     if (!VerifyDexFile(class_loader, *entry.first, *entry.second, self)) {
       return false;
diff --git a/runtime/verifier/verifier_deps.h b/runtime/verifier/verifier_deps.h
index eea0299..7b419d4 100644
--- a/runtime/verifier/verifier_deps.h
+++ b/runtime/verifier/verifier_deps.h
@@ -109,11 +109,18 @@
   void Dump(VariableIndentationOutputStream* vios) const
       NO_THREAD_SAFETY_ANALYSIS;
 
-  // Verify the encoded dependencies of this `VerifierDeps`.
+  // Verify the encoded dependencies of this `VerifierDeps` are still valid.
   // NO_THREAD_SAFETY_ANALYSIS, as this must be called on a read-only `VerifierDeps`.
-  bool Verify(Handle<mirror::ClassLoader> class_loader, Thread* self) const
+  bool ValidateDependencies(Handle<mirror::ClassLoader> class_loader, Thread* self) const
       NO_THREAD_SAFETY_ANALYSIS;
 
+  // NO_THREAD_SAFETY_ANALSYS, as this is queried when the VerifierDeps are
+  // fully created.
+  const std::vector<uint16_t>& GetUnverifiedClasses(const DexFile& dex_file) const
+      NO_THREAD_SAFETY_ANALYSIS {
+    return GetDexFileDeps(dex_file)->unverified_classes_;
+  }
+
  private:
   static constexpr uint16_t kUnresolvedMarker = static_cast<uint16_t>(-1);
 
@@ -317,6 +324,7 @@
   ART_FRIEND_TEST(VerifierDepsTest, EncodeDecode);
   ART_FRIEND_TEST(VerifierDepsTest, EncodeDecodeMulti);
   ART_FRIEND_TEST(VerifierDepsTest, VerifyDeps);
+  ART_FRIEND_TEST(VerifierDepsTest, CompilerDriver);
 };
 
 }  // namespace verifier
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 2797d85..3549586 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -22,6 +22,7 @@
 
 #include "base/logging.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "jni_internal.h"
 #include "mirror/class.h"
 #include "mirror/throwable.h"
 #include "obj_ptr-inl.h"
@@ -219,9 +220,9 @@
   ScopedObjectAccess soa(Thread::Current());
   #define LOAD_STRING_INIT(init_runtime_name, init_signature, new_runtime_name,             \
                            new_java_name, new_signature, ...)                               \
-      init_runtime_name = soa.DecodeMethod(                                                 \
+      init_runtime_name = jni::DecodeArtMethod(                                             \
           CacheMethod(env, java_lang_String, false, "<init>", init_signature));             \
-      new_runtime_name = soa.DecodeMethod(                                                  \
+      new_runtime_name = jni::DecodeArtMethod(                                              \
           CacheMethod(env, java_lang_StringFactory, true, new_java_name, new_signature));
       STRING_INIT_LIST(LOAD_STRING_INIT)
   #undef LOAD_STRING_INIT
@@ -239,8 +240,8 @@
 ArtMethod* WellKnownClasses::StringInitToStringFactory(ArtMethod* string_init) {
   #define TO_STRING_FACTORY(init_runtime_name, init_signature, new_runtime_name,            \
                             new_java_name, new_signature, entry_point_name)                 \
-      if (string_init == init_runtime_name) {                                               \
-        return new_runtime_name;                                                            \
+      if (string_init == (init_runtime_name)) {                                             \
+        return (new_runtime_name);                                                          \
       }
       STRING_INIT_LIST(TO_STRING_FACTORY)
   #undef TO_STRING_FACTORY
@@ -251,7 +252,7 @@
 uint32_t WellKnownClasses::StringInitToEntryPoint(ArtMethod* string_init) {
   #define TO_ENTRY_POINT(init_runtime_name, init_signature, new_runtime_name,               \
                          new_java_name, new_signature, entry_point_name)                    \
-      if (string_init == init_runtime_name) {                                               \
+      if (string_init == (init_runtime_name)) {                                             \
         return kQuick ## entry_point_name;                                                  \
       }
       STRING_INIT_LIST(TO_ENTRY_POINT)
@@ -393,7 +394,9 @@
 }
 
 ObjPtr<mirror::Class> WellKnownClasses::ToClass(jclass global_jclass) {
-  return ObjPtr<mirror::Class>::DownCast(Thread::Current()->DecodeJObject(global_jclass));
+  auto ret = ObjPtr<mirror::Class>::DownCast(Thread::Current()->DecodeJObject(global_jclass));
+  DCHECK(!ret.IsNull());
+  return ret;
 }
 
 }  // namespace art
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index 56d737f..41329af 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -466,7 +466,7 @@
   return 0;
 }
 
-extern "C" char* native_bridge_getError() {
+extern "C" const char* native_bridge_getError() {
   printf("dlerror() in native bridge.\n");
   return nullptr;
 }
diff --git a/test/480-checker-dead-blocks/src/Main.java b/test/480-checker-dead-blocks/src/Main.java
index 141054d..0ca822f 100644
--- a/test/480-checker-dead-blocks/src/Main.java
+++ b/test/480-checker-dead-blocks/src/Main.java
@@ -30,7 +30,7 @@
     return false;
   }
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -39,13 +39,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Add:i\d+>>     Add [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Sub
   /// CHECK-NOT:                      Phi
@@ -62,7 +62,7 @@
     return z;
   }
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -71,13 +71,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>     Sub [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
   /// CHECK-NOT:                      Phi
@@ -94,10 +94,10 @@
     return z;
   }
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$after_inlining (before)
   /// CHECK:                          Mul
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static int testRemoveLoop(int x) {
@@ -109,11 +109,11 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      Return
   /// CHECK-DAG:                      Exit
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      Return
   /// CHECK-NOT:                      Exit
 
@@ -124,15 +124,15 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -143,16 +143,16 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -165,13 +165,13 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$after_inlining (before)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index e3617c7..cda6f73 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -23,7 +23,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
@@ -36,7 +36,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
@@ -73,7 +73,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -88,7 +88,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -129,7 +129,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -146,7 +146,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<SelX>>]                          loop:none
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -194,7 +194,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -217,7 +217,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
diff --git a/test/485-checker-dce-switch/src/Main.java b/test/485-checker-dce-switch/src/Main.java
index 7d5fd4f..95b1a93 100644
--- a/test/485-checker-dce-switch/src/Main.java
+++ b/test/485-checker-dce-switch/src/Main.java
@@ -20,14 +20,14 @@
     return 5;
   }
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:    <<Const100:i\d+>> IntConstant 100
   /// CHECK-DAG:                      Return [<<Const100>>]
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int wholeSwitchDead(int j) {
@@ -60,14 +60,14 @@
     return l;
   }
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (before)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:                      Return [<<Const7>>]
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_InRange() {
@@ -96,14 +96,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (before)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Const15:i\d+>> IntConstant 15
   /// CHECK-DAG:                      Return [<<Const15>>]
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_AboveRange() {
@@ -132,14 +132,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (before)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<ConstM5:i\d+>> IntConstant -5
   /// CHECK-DAG:                      Return [<<ConstM5>>]
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_BelowRange() {
diff --git a/test/527-checker-array-access-split/info.txt b/test/527-checker-array-access-split/info.txt
index 9206804..a39bea3 100644
--- a/test/527-checker-array-access-split/info.txt
+++ b/test/527-checker-array-access-split/info.txt
@@ -1 +1 @@
-Test arm64-specific array access optimization.
+Test arm- and arm64-specific array access optimization.
diff --git a/test/530-checker-lse/expected.txt b/test/530-checker-lse/expected.txt
index e69de29..ddae16a 100644
--- a/test/530-checker-lse/expected.txt
+++ b/test/530-checker-lse/expected.txt
@@ -0,0 +1 @@
+java.lang.ArrayIndexOutOfBoundsException: length=3; index=3
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 6b0dedf..9f4be6c 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -18,6 +18,9 @@
   Circle(double radius) {
     this.radius = radius;
   }
+  public double getRadius() {
+    return radius;
+  }
   public double getArea() {
     return radius * radius * Math.PI;
   }
@@ -744,6 +747,44 @@
     return 1.0f;
   }
 
+  /// CHECK-START: double Main.getCircleArea(double, boolean) load_store_elimination (before)
+  /// CHECK: NewInstance
+
+  /// CHECK-START: double Main.getCircleArea(double, boolean) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+
+  private static double getCircleArea(double radius, boolean b) {
+    double area = 0d;
+    if (b) {
+      area = new Circle(radius).getArea();
+    }
+    return area;
+  }
+
+  /// CHECK-START: double Main.testDeoptimize(int[], double[], double) load_store_elimination (before)
+  /// CHECK: Deoptimize
+  /// CHECK: NewInstance
+  /// CHECK: Deoptimize
+  /// CHECK: NewInstance
+
+  /// CHECK-START: double Main.testDeoptimize(int[], double[], double) load_store_elimination (after)
+  /// CHECK: Deoptimize
+  /// CHECK: NewInstance
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: NewInstance
+
+  private static double testDeoptimize(int[] iarr, double[] darr, double radius) {
+    iarr[0] = 1;  // One HDeoptimize here. Not triggered.
+    iarr[1] = 1;
+    Circle circle1 = new Circle(radius);
+    iarr[2] = 1;
+    darr[0] = circle1.getRadius();  // One HDeoptimize here, which holds circle1 live. Triggered.
+    darr[1] = circle1.getRadius();
+    darr[2] = circle1.getRadius();
+    darr[3] = circle1.getRadius();
+    return new Circle(Math.PI).getArea();
+  }
+
   static void assertIntEquals(int result, int expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -808,6 +849,22 @@
     assertIntEquals(sumWithinRange(array, 1, 5), 11);
     assertFloatEquals(testAllocationEliminationWithLoops(), 1.0f);
     assertFloatEquals(mF, 0f);
+    assertDoubleEquals(Math.PI * Math.PI * Math.PI, getCircleArea(Math.PI, true));
+    assertDoubleEquals(0d, getCircleArea(Math.PI, false));
+
+    int[] iarray = {0, 0, 0};
+    double[] darray = {0d, 0d, 0d};
+    try {
+      assertDoubleEquals(Math.PI * Math.PI * Math.PI, testDeoptimize(iarray, darray, Math.PI));
+    } catch (Exception e) {
+      System.out.println(e);
+    }
+    assertIntEquals(iarray[0], 1);
+    assertIntEquals(iarray[1], 1);
+    assertIntEquals(iarray[2], 1);
+    assertDoubleEquals(darray[0], Math.PI);
+    assertDoubleEquals(darray[1], Math.PI);
+    assertDoubleEquals(darray[2], Math.PI);
   }
 
   static boolean sFlag;
diff --git a/test/530-checker-lse2/expected.txt b/test/530-checker-lse2/expected.txt
new file mode 100644
index 0000000..e18fc7e
--- /dev/null
+++ b/test/530-checker-lse2/expected.txt
@@ -0,0 +1,8 @@
+Start....
+r  = 9.649776E8
+mZ = false
+mI = 0
+mJ = -576460752303423488
+mF = NaN
+mD = NaN
+Done....
diff --git a/test/530-checker-lse2/info.txt b/test/530-checker-lse2/info.txt
new file mode 100644
index 0000000..8dd3f50
--- /dev/null
+++ b/test/530-checker-lse2/info.txt
@@ -0,0 +1,2 @@
+Checker test for testing store/allocation elimination in presence of
+HDeoptimize.
diff --git a/test/530-checker-lse2/src/Main.java b/test/530-checker-lse2/src/Main.java
new file mode 100644
index 0000000..0fe3d87
--- /dev/null
+++ b/test/530-checker-lse2/src/Main.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+
+// Modified from a fuzz test.
+public class Main {
+
+  private interface X {
+    int x();
+  }
+
+  private class A {
+    public int a() {
+      return (+ (Math.multiplyExact(mI, mI)));
+    }
+  }
+
+  private class B extends A implements X {
+    public int a() {
+      return super.a() + ((int) (Math.max(364746077.0f, ((float) mD))));
+    }
+    public int x() {
+      return (mI >> (mI++));
+    }
+  }
+
+  private static class C implements X {
+    public static int s() {
+      return 671468641;
+    }
+    public int c() {
+      return -383762838;
+    }
+    public int x() {
+      return -138813312;
+    }
+  }
+
+  private A mA  = new B();
+  private B mB  = new B();
+  private X mBX = new B();
+  private C mC  = new C();
+  private X mCX = new C();
+
+  private boolean mZ = false;
+  private int     mI = 0;
+  private long    mJ = 0;
+  private float   mF = 0;
+  private double  mD = 0;
+
+  private boolean[] mArray = new boolean[576];
+
+  private Main() {
+    boolean a = false;
+    for (int i0 = 0; i0 < 576; i0++) {
+      mArray[i0] = a;
+      a = !a;
+    }
+  }
+
+  /// CHECK-START: float Main.testMethod() load_store_elimination (before)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-NOT: NewInstance
+
+  /// CHECK-START: float Main.testMethod() load_store_elimination (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NewInstance
+
+  private float testMethod() {
+    {
+      int lI0 = (-1456058746 << mI);
+      mD = ((double)(int)(double) mD);
+      for (int i0 = 56 - 1; i0 >= 0; i0--) {
+        mArray[i0] &= (Boolean.logicalOr(((true ? ((boolean) new Boolean((mZ))) : mZ) || mArray[i0]), (mZ)));
+        mF *= (mF * mF);
+        if ((mZ ^ true)) {
+          mF *= ((float)(int)(float) 267827331.0f);
+          mZ ^= ((false & ((boolean) new Boolean(false))) | mZ);
+          for (int i1 = 576 - 1; i1 >= 0; i1--) {
+            mZ &= ((mArray[279]) | ((boolean) new Boolean(true)));
+            mD -= (--mD);
+            for (int i2 = 56 - 1; i2 >= 0; i2--) {
+              mF /= (mF - mF);
+              mI = (Math.min(((int) new Integer(mI)), (766538816 * (++mI))));
+              mF += (mZ ? (mB.a()) : ((! mZ) ? -752042357.0f : (++mF)));
+              mJ |= ((long) new Long((-2084191070L + (mJ | mJ))));
+              lI0 |= ((int) new Integer(((int) new Integer(mI))));
+              if (((boolean) new Boolean(false))) {
+                mZ &= (mZ);
+                mF *= (mF--);
+                mD = (Double.POSITIVE_INFINITY);
+                mF += ((float)(int)(float) (-2026938813.0f * 638401585.0f));
+                mJ = (--mJ);
+                for (int i3 = 56 - 1; i3 >= 0; i3--) {
+                  mI &= (- mI);
+                  mD = (--mD);
+                  mArray[426] = (mZ || false);
+                  mF -= (((this instanceof Main) ? mF : mF) + 976981405.0f);
+                  mZ &= ((mZ) & (this instanceof Main));
+                }
+                mZ ^= (Float.isFinite(-1975953895.0f));
+              } else {
+                mJ /= ((long) (Math.nextDown(-1519600008.0f)));
+                mJ <<= (Math.round(1237681786.0));
+              }
+            }
+            mArray[i0] &= (false || ((1256071300.0f != -353296391.0f) ? false : (mZ ^ mArray[i0])));
+            mF *= (+ ((float) mD));
+            for (int i2 = 0; i2 < 576; i2++) {
+              mD *= ((double) lI0);
+              lI0 = (lI0 & (Integer.MIN_VALUE));
+              mF -= (--mF);
+            }
+            if ((this instanceof Main)) {
+              mZ ^= ((boolean) new Boolean(true));
+            } else {
+              {
+                int lI1 = (mZ ? (--lI0) : 1099574344);
+                mJ >>= (Math.incrementExact(mJ));
+                mJ = (~ -2103354070L);
+              }
+            }
+          }
+        } else {
+          mJ *= (- ((long) new Long(479832084L)));
+          mJ %= (Long.MAX_VALUE);
+          mD /= (--mD);
+          if ((mI > ((mBX.x()) << mI))) {
+            {
+              long lJ0 = (mJ--);
+              mI >>>= (mBX.x());
+            }
+            mF = (+ 505094603.0f);
+            mD *= (((boolean) new Boolean((! false))) ? mD : 1808773781.0);
+            mI *= (Integer.MIN_VALUE);
+            for (int i1 = 576 - 1; i1 >= 0; i1--) {
+              if (((boolean) new Boolean(false))) {
+                mD += ((double)(float)(double) -1051436901.0);
+              } else {
+                mF -= ((float)(int)(float) (Float.min(mF, (mF--))));
+              }
+              for (int i2 = 0; i2 < 576; i2++) {
+                mJ -= ((long) new Long(-1968644857L));
+                mJ ^= (+ (mC.s()));
+              }
+            }
+          } else {
+            mF -= ((- mF) + -2145489966.0f);
+          }
+          mD -= (mD++);
+          mD = (949112777.0 * 1209996119.0);
+        }
+        mZ &= (Boolean.logicalAnd(true, ((mZ) & (((boolean) new Boolean(true)) && true))));
+      }
+    }
+    return ((float) 964977619L);
+  }
+
+  public static void main(String[] args) {
+    System.out.println("Start....");
+    Main t = new Main();
+    float r = 1883600237.0f;
+    try {
+      r = t.testMethod();
+    } catch (Exception e) {
+      // Arithmetic, null pointer, index out of bounds, etc.
+      System.out.println("An exception was caught.");
+    }
+    System.out.println("r  = " + r);
+    System.out.println("mZ = " + t.mZ);
+    System.out.println("mI = " + t.mI);
+    System.out.println("mJ = " + t.mJ);
+    System.out.println("mF = " + t.mF);
+    System.out.println("mD = " + t.mD);
+    System.out.println("Done....");
+  }
+}
+
diff --git a/test/543-checker-dce-trycatch/smali/TestCase.smali b/test/543-checker-dce-trycatch/smali/TestCase.smali
index 5557c7b..f50e01e 100644
--- a/test/543-checker-dce-trycatch/smali/TestCase.smali
+++ b/test/543-checker-dce-trycatch/smali/TestCase.smali
@@ -26,18 +26,18 @@
 # Test a case when one entering TryBoundary is dead but the rest of the try
 # block remains live.
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK: Add
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-NOT: Add
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
@@ -71,18 +71,18 @@
 # Test a case when one exiting TryBoundary is dead but the rest of the try
 # block remains live.
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK: Add
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-NOT: Add
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
@@ -117,21 +117,21 @@
 # Test that a catch block remains live and consistent if some of try blocks
 # throwing into it are removed.
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
@@ -203,7 +203,7 @@
 
 # Test that DCE removes catch phi uses of instructions defined in dead try blocks.
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<Arg0:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Arg1:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Const0xa:i\d+>>  IntConstant 10
@@ -220,7 +220,7 @@
 ## CHECK-DAG:                        Phi [<<Add>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
 ## CHECK-DAG:                        Phi [<<Select>>,<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<Const0xb:i\d+>>  IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>>  IntConstant 12
 ## CHECK-DAG:     <<Const0xd:i\d+>>  IntConstant 13
@@ -277,7 +277,7 @@
 # Test that DCE does not remove catch phi uses of instructions defined outside
 # dead try blocks.
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
 ## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
@@ -287,7 +287,7 @@
 ## CHECK-DAG:                       Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
 ## CHECK-DAG:                       Phi [<<Const0xf>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
 ## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
diff --git a/test/543-checker-dce-trycatch/src/Main.java b/test/543-checker-dce-trycatch/src/Main.java
index 19587e7..0d7596a 100644
--- a/test/543-checker-dce-trycatch/src/Main.java
+++ b/test/543-checker-dce-trycatch/src/Main.java
@@ -35,10 +35,10 @@
   // where TryBoundary still has exception handler successors after having removed
   // some already.
 
-  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT: TryBoundary
 
-  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$after_inlining (after)
   /// CHECK: begin_block
   /// CHECK: begin_block
   /// CHECK: begin_block
@@ -63,4 +63,4 @@
   public static void main(String[] args) {
 
   }
-}
\ No newline at end of file
+}
diff --git a/test/562-no-intermediate/expected.txt b/test/562-checker-no-intermediate/expected.txt
similarity index 100%
rename from test/562-no-intermediate/expected.txt
rename to test/562-checker-no-intermediate/expected.txt
diff --git a/test/562-checker-no-intermediate/info.txt b/test/562-checker-no-intermediate/info.txt
new file mode 100644
index 0000000..38f1f65
--- /dev/null
+++ b/test/562-checker-no-intermediate/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing, checking that there is no
+intermediate address live across a Java call.
diff --git a/test/562-checker-no-intermediate/src/Main.java b/test/562-checker-no-intermediate/src/Main.java
new file mode 100644
index 0000000..104ba8b
--- /dev/null
+++ b/test/562-checker-no-intermediate/src/Main.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /**
+   * Check that the intermediate address computation is not reordered or merged
+   * across the call to Math.abs().
+   */
+
+  /// CHECK-START-ARM: void Main.main(java.lang.String[]) instruction_simplifier_arm (before)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:                                  ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM: void Main.main(java.lang.String[]) instruction_simplifier_arm (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM: void Main.main(java.lang.String[]) GVN$after_arch (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+
+  /// CHECK-START-ARM64: void Main.main(java.lang.String[]) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:                                  ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.main(java.lang.String[]) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.main(java.lang.String[]) GVN$after_arch (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  public static void main(String[] args) {
+    array[index] += Math.abs(-42);
+  }
+
+  static int index = 0;
+  static int[] array = new int[2];
+}
diff --git a/test/562-no-intermediate/info.txt b/test/562-no-intermediate/info.txt
deleted file mode 100644
index 4f21aeb..0000000
--- a/test/562-no-intermediate/info.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Regression test for optimizing, checking that there is no
-intermediate address between a Java call.
diff --git a/test/562-no-intermediate/src/Main.java b/test/562-no-intermediate/src/Main.java
deleted file mode 100644
index 3b74d6f..0000000
--- a/test/562-no-intermediate/src/Main.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class Main {
-
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
-
-  static int index = 0;
-  static double[] array = new double[2];
-}
diff --git a/test/611-checker-simplify-if/src/Main.java b/test/611-checker-simplify-if/src/Main.java
index 7dac007..774f239 100644
--- a/test/611-checker-simplify-if/src/Main.java
+++ b/test/611-checker-simplify-if/src/Main.java
@@ -64,13 +64,13 @@
 
   // Test when the phi is the input of the if.
 
-  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (before)
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG: <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:                   If
   /// CHECK-DAG: <<Phi:i\d+>>      Phi
   /// CHECK-DAG:                   If [<<Phi>>]
 
-  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (after)
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$after_inlining (after)
   /// CHECK:      If
   /// CHECK-NOT:  Phi
   /// CHECK-NOT:  If
diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java
index d8bc611..f85479a 100644
--- a/test/618-checker-induction/src/Main.java
+++ b/test/618-checker-induction/src/Main.java
@@ -92,6 +92,43 @@
     }
   }
 
+  /// CHECK-START: void Main.deadConditional(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none
+  //
+  /// CHECK-START: void Main.deadConditional(int) loop_optimization (after)
+  /// CHECK-NOT: Phi loop:{{B\d+}}
+  public static void deadConditional(int n) {
+    int k = 0;
+    int m = 0;
+    for (int i = 0; i < n; i++) {
+      if (i == 3)
+        k = i;
+      else
+        m = i;
+    }
+  }
+
+  /// CHECK-START: void Main.deadConditionalCycle(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.deadConditionalCycle(int) loop_optimization (after)
+  /// CHECK-NOT: Phi loop:{{B\d+}}
+  public static void deadConditionalCycle(int n) {
+    int k = 0;
+    int m = 0;
+    for (int i = 0; i < n; i++) {
+      if (i == 3)
+        k--;
+      else
+        m++;
+    }
+  }
+
+
   /// CHECK-START: void Main.deadInduction() loop_optimization (before)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
@@ -668,6 +705,8 @@
     potentialInfiniteLoop(4);
     deadNestedLoops();
     deadNestedAndFollowingLoops();
+    deadConditional(4);
+    deadConditionalCycle(4);
 
     deadInduction();
     for (int i = 0; i < a.length; i++) {
diff --git a/test/620-checker-bce-intrinsics/expected.txt b/test/620-checker-bce-intrinsics/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/620-checker-bce-intrinsics/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/620-checker-bce-intrinsics/info.txt b/test/620-checker-bce-intrinsics/info.txt
new file mode 100644
index 0000000..a868845
--- /dev/null
+++ b/test/620-checker-bce-intrinsics/info.txt
@@ -0,0 +1 @@
+Test on bounds check elimination in loops using intrinsics.
diff --git a/test/620-checker-bce-intrinsics/src/Main.java b/test/620-checker-bce-intrinsics/src/Main.java
new file mode 100644
index 0000000..afc3c65
--- /dev/null
+++ b/test/620-checker-bce-intrinsics/src/Main.java
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests on bounds check elimination in loops that use intrinsics.
+ * All bounds checks below should be statically eliminated.
+ */
+public class Main {
+
+  /// CHECK-START: int Main.oneArray(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  //
+  /// CHECK-START: int Main.oneArray(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int oneArray(int[] a) {
+    int x = 0;
+    for (int i = 0; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.oneArrayAbs(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  //
+  /// CHECK-START: int Main.oneArrayAbs(int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int oneArrayAbs(int[] a, int lo) {
+    int x = 0;
+    for (int i = Math.abs(lo); i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+
+  /// CHECK-START: int Main.twoArrays(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.twoArrays(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int twoArrays(int[] a, int[] b) {
+    int x = 0;
+    for (int i = 0; i < Math.min(a.length, b.length); i++) {
+      x += a[i] + b[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.threeArrays(int[], int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.threeArrays(int[], int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int threeArrays(int[] a, int[] b, int[] c) {
+    int x = 0;
+    for (int i = 0; i < Math.min(Math.min(a.length, b.length), c.length); i++) {
+      x += a[i] + b[i] + c[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.fourArrays(int[], int[], int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.fourArrays(int[], int[], int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int fourArrays(int[] a, int[] b, int[] c, int[] d) {
+    int x = 0;
+    for (int i = 0; i < Math.min(Math.min(a.length, b.length), Math.min(c.length, d.length)); i++) {
+      x += a[i] + b[i] + c[i] + d[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.oneArrayWithCleanup(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.oneArrayWithCleanup(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int oneArrayWithCleanup(int[] a) {
+    int x = 0;
+    int n = Math.min(4, a.length);
+    for (int i = 0; i < n; i++) {
+      x += a[i];
+    }
+    for (int i = n; i < a.length; i++) {
+      x += a[i] * 10;
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.twoArraysWithCleanup(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.twoArraysWithCleanup(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int twoArraysWithCleanup(int[] a, int[] b) {
+    int x = 0;
+    int n = Math.min(a.length, b.length);
+    for (int i = n - 1; i >= 0; i--) {
+      x += a[i] + b[i];
+    }
+    for (int i = n; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.threeArraysWithCleanup(int[], int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.threeArraysWithCleanup(int[], int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int threeArraysWithCleanup(int[] a, int[] b, int[] c) {
+    int x = 0;
+    int n = Math.min(a.length, Math.min(b.length, c.length));
+    for (int i = n - 1; i >= 0; i--) {
+      x += a[i] + b[i] + c[i];
+    }
+    for (int i = n; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.altLoopLogic(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.altLoopLogic(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int altLoopLogic(int[] a, int[] b) {
+    int x = 0;
+    int n = Math.min(a.length, b.length);
+    for (int i = n; i-- > 0;) {
+      x += a[i] + b[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.hiddenMin(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.hiddenMin(int[], int[]) BCE (after)
+  //
+  // TODO: make this so
+  static int hiddenMin(int[] a, int[] b) {
+    int x = 0;
+    for (int i = 0; i < a.length && i < b.length; i++) {
+      x += a[i] + b[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.hiddenMinWithCleanup(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.hiddenMinWithCleanup(int[], int[]) BCE (after)
+  //
+  // TODO: make this so
+  static int hiddenMinWithCleanup(int[] a, int[] b) {
+    int x = 0;
+    int i = 0;
+    for (; i < a.length && i < b.length; i++) {
+      x += a[i] + b[i];
+    }
+    for (; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  public static void main(String[] args) {
+    int[] a = { 1, 2, 3, 4, 5 };
+    int[] b = { 6, 7, 8, 9, 4, 2 };
+    int[] c = { 1, 2, 3 };
+    int[] d = { 8, 5, 3, 2 };
+
+    expectEquals(15, oneArray(a));
+    expectEquals(36, oneArray(b));
+    expectEquals(6,  oneArray(c));
+    expectEquals(18, oneArray(d));
+
+    expectEquals(5,  oneArrayAbs(a, -4));
+    expectEquals(15, oneArrayAbs(a, 0));
+    expectEquals(5,  oneArrayAbs(a, 4));
+
+    expectEquals(30, twoArrays(a, a));
+    expectEquals(49, twoArrays(a, b));
+    expectEquals(12, twoArrays(a, c));
+    expectEquals(28, twoArrays(a, d));
+
+    expectEquals(45, threeArrays(a, a, a));
+    expectEquals(33, threeArrays(a, b, c));
+    expectEquals(58, threeArrays(a, b, d));
+    expectEquals(28, threeArrays(a, c, d));
+
+    expectEquals(60, fourArrays(a, a, a, a));
+    expectEquals(49, fourArrays(a, b, c, d));
+
+    expectEquals(60, oneArrayWithCleanup(a));
+    expectEquals(90, oneArrayWithCleanup(b));
+    expectEquals(6,  oneArrayWithCleanup(c));
+    expectEquals(18, oneArrayWithCleanup(d));
+
+    expectEquals(30, twoArraysWithCleanup(a, a));
+    expectEquals(49, twoArraysWithCleanup(a, b));
+    expectEquals(21, twoArraysWithCleanup(a, c));
+    expectEquals(33, twoArraysWithCleanup(a, d));
+
+    expectEquals(45, threeArraysWithCleanup(a, a, a));
+    expectEquals(42, threeArraysWithCleanup(a, b, c));
+    expectEquals(63, threeArraysWithCleanup(a, b, d));
+    expectEquals(37, threeArraysWithCleanup(a, c, d));
+
+    expectEquals(30, altLoopLogic(a, a));
+    expectEquals(49, altLoopLogic(a, b));
+    expectEquals(12, altLoopLogic(a, c));
+    expectEquals(28, altLoopLogic(a, d));
+
+    expectEquals(30, hiddenMin(a, a));
+    expectEquals(49, hiddenMin(a, b));
+    expectEquals(12, hiddenMin(a, c));
+    expectEquals(28, hiddenMin(a, d));
+
+    expectEquals(30, hiddenMinWithCleanup(a, a));
+    expectEquals(49, hiddenMinWithCleanup(a, b));
+    expectEquals(21, hiddenMinWithCleanup(a, c));
+    expectEquals(33, hiddenMinWithCleanup(a, d));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/622-checker-bce-regressions/expected.txt b/test/622-checker-bce-regressions/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/622-checker-bce-regressions/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/622-checker-bce-regressions/info.txt b/test/622-checker-bce-regressions/info.txt
new file mode 100644
index 0000000..a753dfa
--- /dev/null
+++ b/test/622-checker-bce-regressions/info.txt
@@ -0,0 +1 @@
+Regression tests on BCE.
diff --git a/test/622-checker-bce-regressions/src/Main.java b/test/622-checker-bce-regressions/src/Main.java
new file mode 100644
index 0000000..6ba2644
--- /dev/null
+++ b/test/622-checker-bce-regressions/src/Main.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression tests for BCE.
+ */
+public class Main {
+
+  static int[] array = new int[10];
+
+  /// CHECK-START: int Main.doNotVisitAfterForwardBCE(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.doNotVisitAfterForwardBCE(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  static int doNotVisitAfterForwardBCE(int[] a) {
+    if (a == null) {
+      throw new Error("Null");
+    }
+    int k = 0;
+    int j = 0;
+    for (int i = 1; i < 10; i++) {
+      j = i - 1;
+      // b/32547652: after DCE, bounds checks become consecutive,
+      // and second should not be revisited after forward BCE.
+      k = a[i] + a[i - 1];
+    }
+    return j;
+  }
+
+  public static void main(String[] args) {
+    expectEquals(8, doNotVisitAfterForwardBCE(array));
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/562-no-intermediate/expected.txt b/test/622-simplifyifs-exception-edges/expected.txt
similarity index 100%
copy from test/562-no-intermediate/expected.txt
copy to test/622-simplifyifs-exception-edges/expected.txt
diff --git a/test/622-simplifyifs-exception-edges/info.txt b/test/622-simplifyifs-exception-edges/info.txt
new file mode 100644
index 0000000..58c4bfb
--- /dev/null
+++ b/test/622-simplifyifs-exception-edges/info.txt
@@ -0,0 +1,2 @@
+Regression test for the SimplifyIfs() graph simplification erroneously trying
+to redirect exception handler edges.
\ No newline at end of file
diff --git a/test/622-simplifyifs-exception-edges/smali/Test.smali b/test/622-simplifyifs-exception-edges/smali/Test.smali
new file mode 100644
index 0000000..5e91258
--- /dev/null
+++ b/test/622-simplifyifs-exception-edges/smali/Test.smali
@@ -0,0 +1,76 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTest;
+
+.super Ljava/lang/Object;
+
+.method public static test([I)I
+    .locals 2
+    const/4 v0, 0
+    :try1_begin
+    array-length v1, p0
+    :try1_end
+    add-int/lit8 v0, v1, -1
+    :try2_begin
+    aget v0, p0, v0
+    :try2_end
+    :end
+    return v0
+
+    :catch_all
+    # Regression test for bug 32545860:
+    #     SimplifyIfs() would have redirected exception handler edges leading here.
+    # Note: There is no move-exception here to prevent matching the SimplifyIfs() pattern.
+    if-eqz v0, :is_zero
+    const/4 v0, -1
+    goto :end
+    :is_zero
+    const/4 v0, -2
+    goto :end
+
+    .catchall {:try1_begin .. :try1_end } :catch_all
+    .catchall {:try2_begin .. :try2_end } :catch_all
+.end method
+
+.method public static test2([II)I
+    .locals 3
+    move v0, p1
+    :try_begin
+    array-length v1, p0
+    add-int/lit8 v1, v1, -1
+    add-int/lit8 v0, v0, 1
+    aget v1, p0, v1
+    const/4 v0, 2
+    aget v2, p0, p1
+    const/4 v0, 3
+    :try_end
+    :end
+    return v0
+
+    :catch_all
+    # Regression test for bug 32546110:
+    #     SimplifyIfs() would have looked at predecessors of this block based on the indexes
+    #     of the catch Phi's inputs. For catch blocks these two arrays are unrelated, so
+    #     this caused out-of-range access triggering a DCHECK() in dchecked_vector<>.
+    # Note: There is no move-exception here to prevent matching the SimplifyIfs() pattern.
+    if-eqz v0, :is_zero
+    const/4 v0, -1
+    goto :end
+    :is_zero
+    const/4 v0, -2
+    goto :end
+
+    .catchall {:try_begin .. :try_end } :catch_all
+.end method
diff --git a/test/622-simplifyifs-exception-edges/src/Main.java b/test/622-simplifyifs-exception-edges/src/Main.java
new file mode 100644
index 0000000..636f047
--- /dev/null
+++ b/test/622-simplifyifs-exception-edges/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+    public static void main(String[] args) throws Exception {
+        Class<?> c = Class.forName("Test");
+        Method test = c.getDeclaredMethod("test", int[].class);
+        assertIntEquals(-2, (int)test.invoke(null, new Object[] { null }));
+        assertIntEquals(-1, (int)test.invoke(null, new Object[] { new int[0] }));
+        assertIntEquals(42, (int)test.invoke(null, new Object[] { new int[] { 42 } }));
+
+        Method test2 = c.getDeclaredMethod("test2", int[].class, int.class);
+        assertIntEquals(-2, (int)test2.invoke(null, new Object[] { null, 0 }));
+        assertIntEquals(-1, (int)test2.invoke(null, new Object[] { new int[0], 0 }));
+        assertIntEquals(-1, (int)test2.invoke(null, new Object[] { new int[0], 1 }));
+        assertIntEquals(3, (int)test2.invoke(null, new Object[] { new int[] { 42 }, 0 }));
+    }
+
+    public static void assertIntEquals(int expected, int result) {
+        if (expected != result) {
+            throw new Error("Expected: " + expected + ", found: " + result);
+        }
+    }
+
+    // Workaround for non-zero field ids offset in dex file with no fields. Bug: 18051191
+    static final boolean dummy = false;
+}
diff --git a/test/623-checker-loop-regressions/expected.txt b/test/623-checker-loop-regressions/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/623-checker-loop-regressions/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/623-checker-loop-regressions/info.txt b/test/623-checker-loop-regressions/info.txt
new file mode 100644
index 0000000..6271600
--- /dev/null
+++ b/test/623-checker-loop-regressions/info.txt
@@ -0,0 +1 @@
+Regression tests on loop optimizations.
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
new file mode 100644
index 0000000..ce5bda1
--- /dev/null
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression tests for loop optimizations.
+ */
+public class Main {
+
+  /// CHECK-START: int Main.earlyExitFirst(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.earlyExitFirst(int) loop_optimization (after)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  static int earlyExitFirst(int m) {
+    int k = 0;
+    for (int i = 0; i < 10; i++) {
+      if (i == m) {
+        return k;
+      }
+      k++;
+    }
+    return k;
+  }
+
+  /// CHECK-START: int Main.earlyExitLast(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.earlyExitLast(int) loop_optimization (after)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  static int earlyExitLast(int m) {
+    int k = 0;
+    for (int i = 0; i < 10; i++) {
+      k++;
+      if (i == m) {
+        return k;
+      }
+    }
+    return k;
+  }
+
+  /// CHECK-START: int Main.earlyExitNested() loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop2:B\d+>> outer_loop:<<Loop1>>
+  /// CHECK-DAG: Phi loop:<<Loop2>>      outer_loop:<<Loop1>>
+  //
+  /// CHECK-START: int Main.earlyExitNested() loop_optimization (after)
+  /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop1>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.earlyExitNested() loop_optimization (after)
+  /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:{{B\d+}}
+  static int earlyExitNested() {
+    int offset = 0;
+    for (int i = 0; i < 2; i++) {
+      int start = offset;
+      // This loop can be removed.
+      for (int j = 0; j < 2; j++) {
+        offset++;
+      }
+      if (i == 1) {
+        return start;
+      }
+    }
+    return 0;
+  }
+
+  public static void main(String[] args) {
+    expectEquals(10, earlyExitFirst(-1));
+    for (int i = 0; i <= 10; i++) {
+      expectEquals(i, earlyExitFirst(i));
+    }
+    expectEquals(10, earlyExitFirst(11));
+
+    expectEquals(10, earlyExitLast(-1));
+    for (int i = 0; i < 10; i++) {
+      expectEquals(i + 1, earlyExitLast(i));
+    }
+    expectEquals(10, earlyExitLast(10));
+    expectEquals(10, earlyExitLast(11));
+
+    expectEquals(2, earlyExitNested());
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/955-methodhandles-smali/expected.txt b/test/955-methodhandles-smali/expected.txt
index 047a287..5de1274 100644
--- a/test/955-methodhandles-smali/expected.txt
+++ b/test/955-methodhandles-smali/expected.txt
@@ -5,4 +5,5 @@
 40
 43
 44
-0-11
+0
+-1
diff --git a/test/955-methodhandles-smali/smali/Main.smali b/test/955-methodhandles-smali/smali/Main.smali
index 9681d56..52460a8 100644
--- a/test/955-methodhandles-smali/smali/Main.smali
+++ b/test/955-methodhandles-smali/smali/Main.smali
@@ -220,24 +220,22 @@
     invoke-polymorphic {v0, v1, v1}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;Ljava/lang/Long;)I
     move-result v3
     sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->print(I)V
+    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(I)V
 
     # Call compareTo(long) - this is an implicit box.
     const-wide v2, 44
     invoke-polymorphic {v0, v1, v2, v3}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;J)I
     move-result v3
     sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->print(I)V
+    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(I)V
 
     # Call compareTo(int) - this is an implicit box.
-    const v2, 40
-    invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;I)I
-    move-result v3
-    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->print(I)V
-
-    # Add a newline at the end of file.
-    invoke-virtual {v4}, Ljava/io/PrintStream;->println()V
+# This throws WrongMethodTypeException as it's a two step conversion int->long->Long or int->Integer->Long.
+#    const v2, 40
+#    invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;I)I
+#    move-result v3
+#    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+#    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->print(I)V
 
     return-void
 .end method
diff --git a/test/956-methodhandles/expected.txt b/test/956-methodhandles/expected.txt
index ad1c43c..9ca448c 100644
--- a/test/956-methodhandles/expected.txt
+++ b/test/956-methodhandles/expected.txt
@@ -4,3 +4,4 @@
 foo_B
 privateRyan_D
 Received exception: Expected (java.lang.String, java.lang.String)java.lang.String but was (java.lang.String, java.lang.Object)void
+String constructors done.
diff --git a/test/956-methodhandles/src/Main.java b/test/956-methodhandles/src/Main.java
index 44c0447..d0c658f 100644
--- a/test/956-methodhandles/src/Main.java
+++ b/test/956-methodhandles/src/Main.java
@@ -19,6 +19,12 @@
 import java.lang.invoke.MethodHandles.Lookup;
 import java.lang.invoke.MethodType;
 import java.lang.invoke.WrongMethodTypeException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
 
 public class Main {
 
@@ -59,6 +65,10 @@
     testfindSpecial_invokeDirectBehaviour();
     testExceptionDetailMessages();
     testfindVirtual();
+    testUnreflects();
+    testAsType();
+    testConstructors();
+    testStringConstructors();
   }
 
   public static void testfindSpecial_invokeSuperBehaviour() throws Throwable {
@@ -262,6 +272,419 @@
       System.out.println("Unexpected return value for BarImpl#superPackageMethod: " + str);
     }
   }
+
+  static class UnreflectTester {
+    public String publicField;
+    private String privateField;
+
+    public static String publicStaticField = "publicStaticValue";
+    private static String privateStaticField = "privateStaticValue";
+
+    private UnreflectTester(String val) {
+      publicField = val;
+      privateField = val;
+    }
+
+    // NOTE: The boolean constructor argument only exists to give this a
+    // different signature.
+    public UnreflectTester(String val, boolean unused) {
+      this(val);
+    }
+
+    private static String privateStaticMethod() {
+      return "privateStaticMethod";
+    }
+
+    private String privateMethod() {
+      return "privateMethod";
+    }
+
+    public static String publicStaticMethod() {
+      return "publicStaticMethod";
+    }
+
+    public String publicMethod() {
+      return "publicMethod";
+    }
+  }
+
+  public static void testUnreflects() throws Throwable {
+    UnreflectTester instance = new UnreflectTester("unused");
+    Method publicMethod = UnreflectTester.class.getMethod("publicMethod");
+
+    MethodHandle mh = MethodHandles.lookup().unreflect(publicMethod);
+    assertEquals("publicMethod", (String) mh.invoke(instance));
+    assertEquals("publicMethod", (String) mh.invokeExact(instance));
+
+    Method publicStaticMethod = UnreflectTester.class.getMethod("publicStaticMethod");
+    mh = MethodHandles.lookup().unreflect(publicStaticMethod);
+    assertEquals("publicStaticMethod", (String) mh.invoke());
+    assertEquals("publicStaticMethod", (String) mh.invokeExact());
+
+    Method privateMethod = UnreflectTester.class.getDeclaredMethod("privateMethod");
+    try {
+      mh = MethodHandles.lookup().unreflect(privateMethod);
+      fail();
+    } catch (IllegalAccessException expected) {}
+
+    privateMethod.setAccessible(true);
+    mh = MethodHandles.lookup().unreflect(privateMethod);
+    assertEquals("privateMethod", (String) mh.invoke(instance));
+    assertEquals("privateMethod", (String) mh.invokeExact(instance));
+
+    Method privateStaticMethod = UnreflectTester.class.getDeclaredMethod("privateStaticMethod");
+    try {
+      mh = MethodHandles.lookup().unreflect(privateStaticMethod);
+      fail();
+    } catch (IllegalAccessException expected) {}
+
+    privateStaticMethod.setAccessible(true);
+    mh = MethodHandles.lookup().unreflect(privateStaticMethod);
+    assertEquals("privateStaticMethod", (String) mh.invoke());
+    assertEquals("privateStaticMethod", (String) mh.invokeExact());
+
+    Constructor privateConstructor = UnreflectTester.class.getDeclaredConstructor(String.class);
+    try {
+      mh = MethodHandles.lookup().unreflectConstructor(privateConstructor);
+      fail();
+    } catch (IllegalAccessException expected) {}
+
+    privateConstructor.setAccessible(true);
+    mh = MethodHandles.lookup().unreflectConstructor(privateConstructor);
+    instance = (UnreflectTester) mh.invokeExact("abc");
+    assertEquals("abc", instance.publicField);
+    instance = (UnreflectTester) mh.invoke("def");
+    assertEquals("def", instance.publicField);
+    Constructor publicConstructor = UnreflectTester.class.getConstructor(String.class,
+        boolean.class);
+    mh = MethodHandles.lookup().unreflectConstructor(publicConstructor);
+    instance = (UnreflectTester) mh.invokeExact("abc", false);
+    assertEquals("abc", instance.publicField);
+    instance = (UnreflectTester) mh.invoke("def", true);
+    assertEquals("def", instance.publicField);
+
+    // TODO(narayan): Non exact invokes for field sets/gets are not implemented yet.
+    //
+    // assertEquals("instanceValue", (String) mh.invoke(new UnreflectTester("instanceValue")));
+    Field publicField = UnreflectTester.class.getField("publicField");
+    mh = MethodHandles.lookup().unreflectGetter(publicField);
+    instance = new UnreflectTester("instanceValue");
+    assertEquals("instanceValue", (String) mh.invokeExact(instance));
+
+    mh = MethodHandles.lookup().unreflectSetter(publicField);
+    instance = new UnreflectTester("instanceValue");
+    mh.invokeExact(instance, "updatedInstanceValue");
+    assertEquals("updatedInstanceValue", instance.publicField);
+
+    Field publicStaticField = UnreflectTester.class.getField("publicStaticField");
+    mh = MethodHandles.lookup().unreflectGetter(publicStaticField);
+    UnreflectTester.publicStaticField = "updatedStaticValue";
+    assertEquals("updatedStaticValue", (String) mh.invokeExact());
+
+    mh = MethodHandles.lookup().unreflectSetter(publicStaticField);
+    UnreflectTester.publicStaticField = "updatedStaticValue";
+    mh.invokeExact("updatedStaticValue2");
+    assertEquals("updatedStaticValue2", UnreflectTester.publicStaticField);
+
+    Field privateField = UnreflectTester.class.getDeclaredField("privateField");
+    try {
+      mh = MethodHandles.lookup().unreflectGetter(privateField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+    try {
+      mh = MethodHandles.lookup().unreflectSetter(privateField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+
+    privateField.setAccessible(true);
+
+    mh = MethodHandles.lookup().unreflectGetter(privateField);
+    instance = new UnreflectTester("instanceValue");
+    assertEquals("instanceValue", (String) mh.invokeExact(instance));
+
+    mh = MethodHandles.lookup().unreflectSetter(privateField);
+    instance = new UnreflectTester("instanceValue");
+    mh.invokeExact(instance, "updatedInstanceValue");
+    assertEquals("updatedInstanceValue", instance.privateField);
+
+    Field privateStaticField = UnreflectTester.class.getDeclaredField("privateStaticField");
+    try {
+      mh = MethodHandles.lookup().unreflectGetter(privateStaticField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+    try {
+      mh = MethodHandles.lookup().unreflectSetter(privateStaticField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+
+    privateStaticField.setAccessible(true);
+    mh = MethodHandles.lookup().unreflectGetter(privateStaticField);
+    privateStaticField.set(null, "updatedStaticValue");
+    assertEquals("updatedStaticValue", (String) mh.invokeExact());
+
+    mh = MethodHandles.lookup().unreflectSetter(privateStaticField);
+    privateStaticField.set(null, "updatedStaticValue");
+    mh.invokeExact("updatedStaticValue2");
+    assertEquals("updatedStaticValue2", (String) privateStaticField.get(null));
+  }
+
+  // This method only exists to fool Jack's handling of types. See b/32536744.
+  public static CharSequence getSequence() {
+    return "foo";
+  }
+
+  public static void testAsType() throws Throwable {
+    // The type of this handle is (String, String)String.
+    MethodHandle mh = MethodHandles.lookup().findVirtual(String.class,
+        "concat", MethodType.methodType(String.class, String.class));
+
+    // Change it to (CharSequence, String)Object.
+    MethodHandle asType = mh.asType(
+        MethodType.methodType(Object.class, CharSequence.class, String.class));
+
+    Object obj = asType.invokeExact((CharSequence) getSequence(), "bar");
+    assertEquals("foobar", (String) obj);
+
+    // Should fail due to a wrong return type.
+    try {
+      String str = (String) asType.invokeExact((CharSequence) getSequence(), "bar");
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // Should fail due to a wrong argument type (String instead of Charsequence).
+    try {
+      String str = (String) asType.invokeExact("baz", "bar");
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // Calls to asType should fail if the types are not convertible.
+    //
+    // Bad return type conversion.
+    try {
+      mh.asType(MethodType.methodType(int.class, String.class, String.class));
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // Bad argument conversion.
+    try {
+      mh.asType(MethodType.methodType(String.class, int.class, String.class));
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+  }
+
+  public static void assertEquals(String s1, String s2) {
+    if (s1 == s2) {
+      return;
+    }
+
+    if (s1 != null && s2 != null && s1.equals(s2)) {
+      return;
+    }
+
+    throw new AssertionError("assertEquals s1: " + s1 + ", s2: " + s2);
+  }
+
+  public static void fail() {
+    System.out.println("fail");
+    Thread.dumpStack();
+  }
+
+  public static void fail(String message) {
+    System.out.println("fail: " + message);
+    Thread.dumpStack();
+  }
+
+  public static void testConstructors() throws Throwable {
+    MethodHandle mh =
+        MethodHandles.lookup().findConstructor(Float.class,
+                                               MethodType.methodType(void.class,
+                                                                     float.class));
+    Float value = (Float) mh.invokeExact(0.33f);
+    if (value.floatValue() != 0.33f) {
+      fail("Unexpected float value from invokeExact " + value.floatValue());
+    }
+
+    value = (Float) mh.invoke(3.34f);
+    if (value.floatValue() != 3.34f) {
+      fail("Unexpected float value from invoke " + value.floatValue());
+    }
+
+    mh = MethodHandles.lookup().findConstructor(Double.class,
+                                                MethodType.methodType(void.class, String.class));
+    Double d = (Double) mh.invoke("8.45e3");
+    if (d.doubleValue() != 8.45e3) {
+      fail("Unexpected double value from Double(String) " + value.doubleValue());
+    }
+
+    mh = MethodHandles.lookup().findConstructor(Double.class,
+                                                MethodType.methodType(void.class, double.class));
+    d = (Double) mh.invoke(8.45e3);
+    if (d.doubleValue() != 8.45e3) {
+      fail("Unexpected double value from Double(double) " + value.doubleValue());
+    }
+
+    // Primitive type
+    try {
+      mh = MethodHandles.lookup().findConstructor(int.class, MethodType.methodType(void.class));
+      fail("Unexpected lookup success for primitive constructor");
+    } catch (NoSuchMethodException e) {}
+
+    // Interface
+    try {
+      mh = MethodHandles.lookup().findConstructor(Readable.class,
+                                                  MethodType.methodType(void.class));
+      fail("Unexpected lookup success for interface constructor");
+    } catch (NoSuchMethodException e) {}
+
+    // Abstract
+    mh = MethodHandles.lookup().findConstructor(Process.class, MethodType.methodType(void.class));
+    try {
+      mh.invoke();
+      fail("Unexpected ability to instantiate an abstract class");
+    } catch (InstantiationException e) {}
+
+    // Non-existent
+    try {
+        MethodHandle bad = MethodHandles.lookup().findConstructor(
+            String.class, MethodType.methodType(String.class, Float.class));
+        fail("Unexpected success for non-existent constructor");
+    } catch (NoSuchMethodException e) {}
+
+    // Non-void constructor search. (I)I instead of (I)V.
+    try {
+        MethodHandle foo = MethodHandles.lookup().findConstructor(
+            Integer.class, MethodType.methodType(Integer.class, Integer.class));
+        fail("Unexpected success for non-void type for findConstructor");
+    } catch (NoSuchMethodException e) {}
+  }
+
+  public static void testStringConstructors() throws Throwable {
+    final String testPattern = "The system as we know it is broken";
+
+    // String()
+    MethodHandle mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class));
+    String s = (String) mh.invokeExact();
+    if (!s.equals("")) {
+      fail("Unexpected empty string constructor result: '" + s + "'");
+    }
+
+    // String(String)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, String.class));
+    s = (String) mh.invokeExact(testPattern);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(char[])
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, char[].class));
+    s = (String) mh.invokeExact(testPattern.toCharArray());
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(char[], int, int)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, char[].class, int.class, int.class));
+    s = (String) mh.invokeExact(new char [] { 'a', 'b', 'c', 'd', 'e'}, 2, 3);
+    if (!s.equals("cde")) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(int[] codePoints, int offset, int count)
+    StringBuffer sb = new StringBuffer(testPattern);
+    int[] codePoints = new int[sb.codePointCount(0, sb.length())];
+    for (int i = 0; i < sb.length(); ++i) {
+      codePoints[i] = sb.codePointAt(i);
+    }
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, int[].class, int.class, int.class));
+    s = (String) mh.invokeExact(codePoints, 0, codePoints.length);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte ascii[], int hibyte, int offset, int count)
+    byte [] ascii = testPattern.getBytes(StandardCharsets.US_ASCII);
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class, int.class, int.class));
+    s = (String) mh.invokeExact(ascii, 0, ascii.length);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], int offset, int length, String charsetName)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class,
+        MethodType.methodType(void.class, byte[].class, int.class, int.class, String.class));
+    s = (String) mh.invokeExact(ascii, 0, 5, StandardCharsets.US_ASCII.name());
+    if (!s.equals(testPattern.substring(0, 5))) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], int offset, int length, Charset charset)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class,
+        MethodType.methodType(void.class, byte[].class, int.class, int.class, Charset.class));
+    s = (String) mh.invokeExact(ascii, 0, 5, StandardCharsets.US_ASCII);
+    if (!s.equals(testPattern.substring(0, 5))) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], String charsetName)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class,
+        MethodType.methodType(void.class, byte[].class, String.class));
+    s = (String) mh.invokeExact(ascii, StandardCharsets.US_ASCII.name());
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], Charset charset)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class, Charset.class));
+    s = (String) mh.invokeExact(ascii, StandardCharsets.US_ASCII);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], int offset, int length)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class, int.class, int.class));
+    s = (String) mh.invokeExact(ascii, 1, ascii.length - 2);
+    s = testPattern.charAt(0) + s + testPattern.charAt(testPattern.length() - 1);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[])
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class));
+    s = (String) mh.invokeExact(ascii);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(StringBuffer buffer)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, StringBuffer.class));
+    s = (String) mh.invokeExact(sb);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    System.out.println("String constructors done.");
+  }
 }
 
 
diff --git a/test/957-methodhandle-transforms/expected.txt b/test/957-methodhandle-transforms/expected.txt
index fc4fdd6..7540ef7 100644
--- a/test/957-methodhandle-transforms/expected.txt
+++ b/test/957-methodhandle-transforms/expected.txt
@@ -4,6 +4,7 @@
 Message: foo, Message2: 42
 Message: foo, Message2: 42
 Message: foo, Message2: 42
+Message: foo, Message2: 42
 Target: Arg1: foo, Arg2: 42
 Target: Arg1: foo, Arg2: 42
 Handler: java.lang.IllegalArgumentException: exceptionMessage, Arg1: foo, Arg2: 42, ExMsg: exceptionMessage
@@ -14,3 +15,4 @@
 target: target, 42, 56
 fallback: fallback, 42, 56
 target: target, 42, 56
+target: target, 42, 56
diff --git a/test/957-methodhandle-transforms/src/Main.java b/test/957-methodhandle-transforms/src/Main.java
index 4a27086..3c6f119 100644
--- a/test/957-methodhandle-transforms/src/Main.java
+++ b/test/957-methodhandle-transforms/src/Main.java
@@ -26,6 +26,11 @@
     testDropArguments();
     testCatchException();
     testGuardWithTest();
+    testArrayElementGetter();
+    testArrayElementSetter();
+    testIdentity();
+    testConstant();
+    testBindTo();
   }
 
   public static void testThrowException() throws Throwable {
@@ -90,6 +95,12 @@
       // it's IAE and should be WMTE instead.
     }
 
+    // Check that asType works as expected.
+    transform = MethodHandles.dropArguments(delegate, 0, int.class, Object.class);
+    transform = transform.asType(MethodType.methodType(void.class,
+          new Class<?>[] { short.class, Object.class, String.class, long.class }));
+    transform.invokeExact((short) 45, new Object(), "foo", 42l);
+
     // Invalid argument location, should not be allowed.
     try {
       MethodHandles.dropArguments(delegate, -1, int.class, Object.class);
@@ -183,6 +194,14 @@
     assertEquals("java.lang.IllegalArgumentException: exceptionMessage", returnVal);
     returnVal = (String) adapter.invokeExact("foo", 42l, "exceptionMessage2");
     assertEquals("java.lang.IllegalArgumentException: exceptionMessage2", returnVal);
+
+    // Check that asType works as expected.
+    adapter = MethodHandles.catchException(target, IllegalArgumentException.class,
+        handler);
+    adapter = adapter.asType(MethodType.methodType(String.class,
+          new Class<?>[] { String.class, int.class, String.class }));
+    returnVal = (String) adapter.invokeExact("foo", 42, "exceptionMessage");
+    assertEquals("java.lang.IllegalArgumentException: exceptionMessage", returnVal);
   }
 
   public static boolean testGuardWithTest_test(String arg1, long arg2) {
@@ -225,6 +244,468 @@
     assertEquals("fallback", returnVal);
     returnVal = (String) adapter.invokeExact("target", 42l, 56);
     assertEquals("target", returnVal);
+
+    // Check that asType works as expected.
+    adapter = adapter.asType(MethodType.methodType(String.class,
+          new Class<?>[] { String.class, int.class, int.class }));
+    returnVal = (String) adapter.invokeExact("target", 42, 56);
+    assertEquals("target", returnVal);
+  }
+
+  public static void testArrayElementGetter() throws Throwable {
+    MethodHandle getter = MethodHandles.arrayElementGetter(int[].class);
+
+    {
+      int[] array = new int[1];
+      array[0] = 42;
+      int value = (int) getter.invoke(array, 0);
+      if (value != 42) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      try {
+        value = (int) getter.invoke(array, -1);
+        fail();
+      } catch (ArrayIndexOutOfBoundsException expected) {
+      }
+
+      try {
+        value = (int) getter.invoke(null, -1);
+        fail();
+      } catch (NullPointerException expected) {
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(long[].class);
+      long[] array = new long[1];
+      array[0] = 42;
+      long value = (long) getter.invoke(array, 0);
+      if (value != 42l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(short[].class);
+      short[] array = new short[1];
+      array[0] = 42;
+      short value = (short) getter.invoke(array, 0);
+      if (value != 42l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(char[].class);
+      char[] array = new char[1];
+      array[0] = 42;
+      char value = (char) getter.invoke(array, 0);
+      if (value != 42l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(byte[].class);
+      byte[] array = new byte[1];
+      array[0] = (byte) 0x8;
+      byte value = (byte) getter.invoke(array, 0);
+      if (value != (byte) 0x8) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(boolean[].class);
+      boolean[] array = new boolean[1];
+      array[0] = true;
+      boolean value = (boolean) getter.invoke(array, 0);
+      if (!value) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(float[].class);
+      float[] array = new float[1];
+      array[0] = 42.0f;
+      float value = (float) getter.invoke(array, 0);
+      if (value != 42.0f) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(double[].class);
+      double[] array = new double[1];
+      array[0] = 42.0;
+      double value = (double) getter.invoke(array, 0);
+      if (value != 42.0) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(String[].class);
+      String[] array = new String[3];
+      array[0] = "42";
+      array[1] = "48";
+      array[2] = "54";
+      String value = (String) getter.invoke(array, 0);
+      assertEquals("42", value);
+      value = (String) getter.invoke(array, 1);
+      assertEquals("48", value);
+      value = (String) getter.invoke(array, 2);
+      assertEquals("54", value);
+    }
+  }
+
+  public static void testArrayElementSetter() throws Throwable {
+    MethodHandle setter = MethodHandles.arrayElementSetter(int[].class);
+
+    {
+      int[] array = new int[2];
+      setter.invoke(array, 0, 42);
+      setter.invoke(array, 1, 43);
+
+      if (array[0] != 42) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+      if (array[1] != 43) {
+        System.out.println("Unexpected value: " + array[1]);
+      }
+
+      try {
+        setter.invoke(array, -1, 42);
+        fail();
+      } catch (ArrayIndexOutOfBoundsException expected) {
+      }
+
+      try {
+        setter.invoke(null, 0, 42);
+        fail();
+      } catch (NullPointerException expected) {
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(long[].class);
+      long[] array = new long[1];
+      setter.invoke(array, 0, 42l);
+      if (array[0] != 42l) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(short[].class);
+      short[] array = new short[1];
+      setter.invoke(array, 0, (short) 42);
+      if (array[0] != 42l) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(char[].class);
+      char[] array = new char[1];
+      setter.invoke(array, 0, (char) 42);
+      if (array[0] != 42) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(byte[].class);
+      byte[] array = new byte[1];
+      setter.invoke(array, 0, (byte) 0x8);
+      if (array[0] != (byte) 0x8) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(boolean[].class);
+      boolean[] array = new boolean[1];
+      setter.invoke(array, 0, true);
+      if (!array[0]) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(float[].class);
+      float[] array = new float[1];
+      setter.invoke(array, 0, 42.0f);
+      if (array[0] != 42.0f) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(double[].class);
+      double[] array = new double[1];
+      setter.invoke(array, 0, 42.0);
+      if (array[0] != 42.0) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(String[].class);
+      String[] array = new String[3];
+      setter.invoke(array, 0, "42");
+      setter.invoke(array, 1, "48");
+      setter.invoke(array, 2, "54");
+      assertEquals("42", array[0]);
+      assertEquals("48", array[1]);
+      assertEquals("54", array[2]);
+    }
+  }
+
+  public static void testIdentity() throws Throwable {
+    {
+      MethodHandle identity = MethodHandles.identity(boolean.class);
+      boolean value = (boolean) identity.invoke(false);
+      if (value) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(byte.class);
+      byte value = (byte) identity.invoke((byte) 0x8);
+      if (value != (byte) 0x8) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(char.class);
+      char value = (char) identity.invoke((char) -56);
+      if (value != (char) -56) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(short.class);
+      short value = (short) identity.invoke((short) -59);
+      if (value != (short) -59) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(int.class);
+      int value = (int) identity.invoke(52);
+      if (value != 52) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(long.class);
+      long value = (long) identity.invoke(-76l);
+      if (value != (long) -76) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(float.class);
+      float value = (float) identity.invoke(56.0f);
+      if (value != (float) 56.0f) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(double.class);
+      double value = (double) identity.invoke((double) 72.0);
+      if (value != (double) 72.0) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(String.class);
+      String value = (String) identity.invoke("bazman");
+      assertEquals("bazman", value);
+    }
+  }
+
+  public static void testConstant() throws Throwable {
+    // int constants.
+    {
+      MethodHandle constant = MethodHandles.constant(int.class, 56);
+      int value = (int) constant.invoke();
+      if (value != 56) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // short constant values are converted to int.
+      constant = MethodHandles.constant(int.class, (short) 52);
+      value = (int) constant.invoke();
+      if (value != 52) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // char constant values are converted to int.
+      constant = MethodHandles.constant(int.class, (char) 'b');
+      value = (int) constant.invoke();
+      if (value != (int) 'b') {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // int constant values are converted to int.
+      constant = MethodHandles.constant(int.class, (byte) 0x1);
+      value = (int) constant.invoke();
+      if (value != 1) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // boolean, float, double and long primitive constants are not convertible
+      // to int, so the handle creation must fail with a CCE.
+      try {
+        MethodHandles.constant(int.class, false);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+
+      try {
+        MethodHandles.constant(int.class, 0.1f);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+
+      try {
+        MethodHandles.constant(int.class, 0.2);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+
+      try {
+        MethodHandles.constant(int.class, 73l);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+    }
+
+    // long constants.
+    {
+      MethodHandle constant = MethodHandles.constant(long.class, 56l);
+      long value = (long) constant.invoke();
+      if (value != 56l) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      constant = MethodHandles.constant(long.class, (int) 56);
+      value = (long) constant.invoke();
+      if (value != 56l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // byte constants.
+    {
+      MethodHandle constant = MethodHandles.constant(byte.class, (byte) 0x12);
+      byte value = (byte) constant.invoke();
+      if (value != (byte) 0x12) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // boolean constants.
+    {
+      MethodHandle constant = MethodHandles.constant(boolean.class, true);
+      boolean value = (boolean) constant.invoke();
+      if (!value) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // char constants.
+    {
+      MethodHandle constant = MethodHandles.constant(char.class, 'f');
+      char value = (char) constant.invoke();
+      if (value != 'f') {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // short constants.
+    {
+      MethodHandle constant = MethodHandles.constant(short.class, (short) 123);
+      short value = (short) constant.invoke();
+      if (value != (short) 123) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // float constants.
+    {
+      MethodHandle constant = MethodHandles.constant(float.class, 56.0f);
+      float value = (float) constant.invoke();
+      if (value != 56.0f) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // double constants.
+    {
+      MethodHandle constant = MethodHandles.constant(double.class, 256.0);
+      double value = (double) constant.invoke();
+      if (value != 256.0) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // reference constants.
+    {
+      MethodHandle constant = MethodHandles.constant(String.class, "256.0");
+      String value = (String) constant.invoke();
+      assertEquals("256.0", value);
+    }
+  }
+
+  public static void testBindTo() throws Throwable {
+    MethodHandle stringCharAt = MethodHandles.lookup().findVirtual(
+        String.class, "charAt", MethodType.methodType(char.class, int.class));
+
+    char value = (char) stringCharAt.invoke("foo", 0);
+    if (value != 'f') {
+      System.out.println("Unexpected value: " + value);
+    }
+
+    MethodHandle bound = stringCharAt.bindTo("foo");
+    value = (char) bound.invoke(0);
+    if (value != 'f') {
+      System.out.println("Unexpected value: " + value);
+    }
+
+    try {
+      stringCharAt.bindTo(new Object());
+      fail();
+    } catch (ClassCastException expected) {
+    }
+
+    bound = stringCharAt.bindTo(null);
+    try {
+      bound.invoke(0);
+      fail();
+    } catch (NullPointerException expected) {
+    }
+
+    MethodHandle integerParseInt = MethodHandles.lookup().findStatic(
+        Integer.class, "parseInt", MethodType.methodType(int.class, String.class));
+
+    bound = integerParseInt.bindTo("78452");
+    int intValue = (int) bound.invoke();
+    if (intValue != 78452) {
+      System.out.println("Unexpected value: " + intValue);
+    }
   }
 
   public static void fail() {
diff --git a/test/979-invoke-polymorphic-accessors/build b/test/959-invoke-polymorphic-accessors/build
similarity index 100%
rename from test/979-invoke-polymorphic-accessors/build
rename to test/959-invoke-polymorphic-accessors/build
diff --git a/test/959-invoke-polymorphic-accessors/expected.txt b/test/959-invoke-polymorphic-accessors/expected.txt
new file mode 100644
index 0000000..de2916b
--- /dev/null
+++ b/test/959-invoke-polymorphic-accessors/expected.txt
@@ -0,0 +1,4 @@
+1515870810
+Passed MethodHandles.Lookup tests for accessors.
+Passed MethodHandle.invokeExact() tests for accessors.
+Passed MethodHandle.invoke() tests for accessors.
diff --git a/test/979-invoke-polymorphic-accessors/info.txt b/test/959-invoke-polymorphic-accessors/info.txt
similarity index 100%
rename from test/979-invoke-polymorphic-accessors/info.txt
rename to test/959-invoke-polymorphic-accessors/info.txt
diff --git a/test/979-invoke-polymorphic-accessors/run b/test/959-invoke-polymorphic-accessors/run
similarity index 100%
rename from test/979-invoke-polymorphic-accessors/run
rename to test/959-invoke-polymorphic-accessors/run
diff --git a/test/979-invoke-polymorphic-accessors/src/Main.java b/test/959-invoke-polymorphic-accessors/src/Main.java
similarity index 81%
rename from test/979-invoke-polymorphic-accessors/src/Main.java
rename to test/959-invoke-polymorphic-accessors/src/Main.java
index 6cdcd10..824a436 100644
--- a/test/979-invoke-polymorphic-accessors/src/Main.java
+++ b/test/959-invoke-polymorphic-accessors/src/Main.java
@@ -44,7 +44,26 @@
         public static final int s_fi = 0x5a5a5a5a;
     }
 
-    public static class InvokeExactTester {
+    public static class Tester {
+        public static void assertActualAndExpectedMatch(boolean actual, boolean expected)
+                throws AssertionError {
+            if (actual != expected) {
+                throw new AssertionError("Actual != Expected (" + actual + " != " + expected + ")");
+            }
+        }
+
+        public static void assertTrue(boolean value) throws AssertionError {
+            if (!value) {
+                throw new AssertionError("Value is not true");
+            }
+        }
+
+        public static void unreachable() throws Throwable{
+            throw new Error("unreachable");
+        }
+    }
+
+    public static class InvokeExactTester extends Tester {
         private enum PrimitiveType {
             Boolean,
             Byte,
@@ -64,19 +83,6 @@
             SGET,
         }
 
-        private static void assertActualAndExpectedMatch(boolean actual, boolean expected)
-                throws AssertionError {
-            if (actual != expected) {
-                throw new AssertionError("Actual != Expected (" + actual + " != " + expected + ")");
-            }
-        }
-
-        private static void assertTrue(boolean value) throws AssertionError {
-            if (!value) {
-                throw new AssertionError("Value is not true");
-            }
-        }
-
         static void setByte(MethodHandle m, ValueHolder v, byte value, boolean expectFailure)
                 throws Throwable {
             boolean exceptionThrown = false;
@@ -677,16 +683,26 @@
                 assertTrue(s.equals(ValueHolder.s_l));
             }
 
-            System.out.println("Passed InvokeExact tests for accessors.");
+            System.out.println("Passed MethodHandle.invokeExact() tests for accessors.");
         }
     }
 
-    public static class FindAccessorTester {
+    public static class FindAccessorTester extends Tester {
         public static void main() throws Throwable {
-            ValueHolder valueHolder = new ValueHolder();
+            // NB having a static field test here is essential for
+            // this test. MethodHandles need to ensure the class
+            // (ValueHolder) is initialized. This happens in the
+            // invoke-polymorphic dispatch.
             MethodHandles.Lookup lookup = MethodHandles.lookup();
-
-            lookup.findStaticGetter(ValueHolder.class, "s_fi", int.class);
+            try {
+                MethodHandle mh = lookup.findStaticGetter(ValueHolder.class, "s_fi", int.class);
+                int initialValue = (int)mh.invokeExact();
+                System.out.println(initialValue);
+            } catch (NoSuchFieldException e) { unreachable(); }
+            try {
+                MethodHandle mh = lookup.findStaticSetter(ValueHolder.class, "s_i", int.class);
+                mh.invokeExact(0);
+            } catch (NoSuchFieldException e) { unreachable(); }
             try {
                 lookup.findStaticGetter(ValueHolder.class, "s_fi", byte.class);
                 unreachable();
@@ -713,15 +729,169 @@
                 lookup.findSetter(ValueHolder.class, "m_fi", int.class);
                 unreachable();
             } catch (IllegalAccessException e) {}
+
+            System.out.println("Passed MethodHandles.Lookup tests for accessors.");
+        }
+    }
+
+    public static class InvokeTester extends Tester {
+        private static void testStaticGetter() throws Throwable {
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findStaticGetter(ValueHolder.class, "s_fi", int.class);
+            h0.invoke();
+            Number t = (Number)h0.invoke();
+            int u = (int)h0.invoke();
+            Integer v = (Integer)h0.invoke();
+            long w = (long)h0.invoke();
+            try {
+                byte x = (byte)h0.invoke();
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String y = (String)h0.invoke();
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Long z = (Long)h0.invoke();
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
         }
 
-        public static void unreachable() throws Throwable{
-            throw new Error("unreachable");
+        private static void testMemberGetter() throws Throwable {
+            ValueHolder valueHolder = new ValueHolder();
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findGetter(ValueHolder.class, "m_fi", int.class);
+            h0.invoke(valueHolder);
+            Number t = (Number)h0.invoke(valueHolder);
+            int u = (int)h0.invoke(valueHolder);
+            Integer v = (Integer)h0.invoke(valueHolder);
+            long w = (long)h0.invoke(valueHolder);
+            try {
+                byte x = (byte)h0.invoke(valueHolder);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String y = (String)h0.invoke(valueHolder);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Long z = (Long)h0.invoke(valueHolder);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+        }
+
+        private static void testMemberSetter() throws Throwable {
+            ValueHolder valueHolder = new ValueHolder();
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findSetter(ValueHolder.class, "m_f", float.class);
+            h0.invoke(valueHolder, 0.22f);
+            h0.invoke(valueHolder, new Float(1.11f));
+            Number floatNumber = new Float(0.88f);
+            h0.invoke(valueHolder, floatNumber);
+            assertTrue(valueHolder.m_f == floatNumber.floatValue());
+
+            try {
+              h0.invoke(valueHolder, (Float)null);
+              unreachable();
+            } catch (NullPointerException e) {}
+
+            h0.invoke(valueHolder, (byte)1);
+            h0.invoke(valueHolder, (short)2);
+            h0.invoke(valueHolder, 3);
+            h0.invoke(valueHolder, 4l);
+            try {
+                h0.invoke(valueHolder, 0.33);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Number doubleNumber = new Double(0.89);
+                h0.invoke(valueHolder, doubleNumber);
+                unreachable();
+            } catch (ClassCastException e) {}
+            try {
+                Number doubleNumber = null;
+                h0.invoke(valueHolder, doubleNumber);
+                unreachable();
+            } catch (NullPointerException e) {}
+            try {
+                // Mismatched return type - float != void
+                float tmp = (float)h0.invoke(valueHolder, 0.45f);
+                assertTrue(tmp == 0.0);
+            } catch (Exception e) { unreachable(); }
+            try {
+                h0.invoke(valueHolder, "bam");
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String s = null;
+                h0.invoke(valueHolder, s);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+        }
+
+        private static void testStaticSetter() throws Throwable {
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findStaticSetter(ValueHolder.class, "s_f", float.class);
+            h0.invoke(0.22f);
+            h0.invoke(new Float(1.11f));
+            Number floatNumber = new Float(0.88f);
+            h0.invoke(floatNumber);
+            assertTrue(ValueHolder.s_f == floatNumber.floatValue());
+
+            try {
+              h0.invoke((Float)null);
+              unreachable();
+            } catch (NullPointerException e) {}
+
+            h0.invoke((byte)1);
+            h0.invoke((short)2);
+            h0.invoke(3);
+            h0.invoke(4l);
+            try {
+                h0.invoke(0.33);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Number doubleNumber = new Double(0.89);
+                h0.invoke(doubleNumber);
+                unreachable();
+            } catch (ClassCastException e) {}
+            try {
+                Number doubleNumber = new Double(1.01);
+                doubleNumber = (doubleNumber.doubleValue() != 0.1) ? null : doubleNumber;
+                h0.invoke(doubleNumber);
+                unreachable();
+            } catch (NullPointerException e) {}
+            try {
+                // Mismatched return type - float != void
+                float tmp = (float)h0.invoke(0.45f);
+                assertTrue(tmp == 0.0);
+            } catch (Exception e) { unreachable(); }
+            try {
+                h0.invoke("bam");
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String s = null;
+                h0.invoke(s);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+        }
+
+        public static void main() throws Throwable{
+            testStaticGetter();
+            testMemberGetter();
+            testStaticSetter();
+            testMemberSetter();
+            System.out.println("Passed MethodHandle.invoke() tests for accessors.");
         }
     }
 
     public static void main(String[] args) throws Throwable {
+        // FindAccessor test should be the first test class in this
+        // file to ensure class initialization test is run.
         FindAccessorTester.main();
         InvokeExactTester.main();
+        InvokeTester.main();
     }
 }
diff --git a/test/979-invoke-polymorphic-accessors/expected.txt b/test/979-invoke-polymorphic-accessors/expected.txt
deleted file mode 100644
index 2987b6c..0000000
--- a/test/979-invoke-polymorphic-accessors/expected.txt
+++ /dev/null
@@ -1 +0,0 @@
-Passed InvokeExact tests for accessors.
diff --git a/test/Android.arm_vixl.mk b/test/Android.arm_vixl.mk
index 2d9708d..e562812 100644
--- a/test/Android.arm_vixl.mk
+++ b/test/Android.arm_vixl.mk
@@ -22,7 +22,6 @@
   004-JniTest \
   004-NativeAllocations \
   004-ThreadStress \
-  004-UnsafeTest \
   004-checker-UnsafeTest18 \
   005-annotations \
   009-instanceof \
@@ -208,6 +207,7 @@
   570-checker-osr \
   570-checker-select \
   573-checker-checkcast-regression \
+  574-irreducible-and-constant-area \
   575-checker-string-init-alias \
   580-checker-round \
   584-checker-div-bool \
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index ae569f9..8f8f998 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -384,7 +384,7 @@
   908-gc-start-finish \
   913-heaps \
   961-default-iface-resolution-gen \
-  964-default-iface-init-gen \  
+  964-default-iface-init-gen
 
 ifneq (,$(filter gcstress,$(GC_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -608,11 +608,8 @@
 # Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
 # 484: Baker's fast path based read barrier compiler instrumentation generates code containing
 #      more parallel moves on x86, thus some Checker assertions may fail.
-# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress
-#      instruction yet (b/26601270).
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
-  484-checker-register-hints \
-  527-checker-array-access-split
+  484-checker-register-hints
 
 # Tests that should fail in the read barrier configuration with JIT (Optimizing compiler).
 TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index cc0d008..c525b2b 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -397,9 +397,30 @@
 fi
 
 if [ "$HOST" = "n" ]; then
-  ISA=$(adb shell ls -F /data/dalvik-cache | grep -Ewo "${ARCHITECTURES_PATTERN}")
+  # Need to be root to query /data/dalvik-cache
+  adb root > /dev/null
+  adb wait-for-device
+  ISA=
+  ISA_adb_invocation=
+  ISA_outcome=
+  # We iterate a few times to workaround an adb issue. b/32655576
+  for i in {1..10}; do
+    ISA_adb_invocation=$(adb shell ls -F /data/dalvik-cache)
+    ISA_outcome=$?
+    ISA=$(echo $ISA_adb_invocation | grep -Ewo "${ARCHITECTURES_PATTERN}")
+    if [ x"$ISA" != "x" ]; then
+      break;
+    fi
+  done
   if [ x"$ISA" = "x" ]; then
     echo "Unable to determine architecture"
+    # Print a few things for helping diagnosing the problem.
+    echo "adb invocation output: $ISA_adb_invocation"
+    echo "adb invocation outcome: $ISA_outcome"
+    echo $(adb shell ls -F /data/dalvik-cache)
+    echo $(adb shell ls /data/dalvik-cache)
+    echo ${ARCHITECTURES_PATTERN}
+    echo $(adb shell ls -F /data/dalvik-cache | grep -Ewo "${ARCHITECTURES_PATTERN}")
     exit 1
   fi
 fi
diff --git a/test/run-test b/test/run-test
index 7a4afaf..37eefb3 100755
--- a/test/run-test
+++ b/test/run-test
@@ -758,8 +758,8 @@
 if [ "$run_checker" = "yes" -a "$target_mode" = "yes" ]; then
   # We will need to `adb pull` the .cfg output from the target onto the host to
   # run checker on it. This file can be big.
-  build_file_size_limit=24576
-  run_file_size_limit=24576
+  build_file_size_limit=32768
+  run_file_size_limit=32768
 fi
 if [ ${USE_JACK} = "false" ]; then
   # Set ulimit if we build with dx only, Jack can generate big temp files.
diff --git a/tools/cpp-define-generator/constant_lockword.def b/tools/cpp-define-generator/constant_lockword.def
index 67ed5b5..08d5885 100644
--- a/tools/cpp-define-generator/constant_lockword.def
+++ b/tools/cpp-define-generator/constant_lockword.def
@@ -30,6 +30,10 @@
 DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK_TOGGLED, uint32_t, kReadBarrierStateMaskShiftedToggled)
 DEFINE_LOCK_WORD_EXPR(THIN_LOCK_COUNT_ONE,       int32_t,  kThinLockCountOne)
 
+DEFINE_LOCK_WORD_EXPR(STATE_FORWARDING_ADDRESS, uint32_t, kStateForwardingAddress)
+DEFINE_LOCK_WORD_EXPR(STATE_FORWARDING_ADDRESS_OVERFLOW, uint32_t, kStateForwardingAddressOverflow)
+DEFINE_LOCK_WORD_EXPR(STATE_FORWARDING_ADDRESS_SHIFT, uint32_t, kForwardingAddressShift)
+
 DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED,   uint32_t,  kGCStateMaskShifted)
 DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED_TOGGLED, uint32_t, kGCStateMaskShiftedToggled)
 DEFINE_LOCK_WORD_EXPR(GC_STATE_SHIFT,   int32_t,  kGCStateShift)
diff --git a/tools/cpp-define-generator/main.cc b/tools/cpp-define-generator/main.cc
index a1b463a..fc99f8a 100644
--- a/tools/cpp-define-generator/main.cc
+++ b/tools/cpp-define-generator/main.cc
@@ -59,12 +59,12 @@
 }
 
 template <typename T>
-void cpp_define(std::string name, T value) {
+void cpp_define(const std::string& name, T value) {
   std::cout << "#define " << name << " " << pretty_format(value) << std::endl;
 }
 
 template <typename T>
-void emit_check_eq(T value, std::string expr) {
+void emit_check_eq(T value, const std::string& expr) {
   std::cout << "DEFINE_CHECK_EQ(" << value << ", (" << expr << "))" << std::endl;
 }