Merge "Remove two ReadBarrierMarkRegX entrypoints."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index cc96cf0..7f8fa8e 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -71,7 +71,7 @@
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
-ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS)
+ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) Statics
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
 ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
 ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
diff --git a/compiler/Android.mk b/compiler/Android.mk
index e3f8a5c..0b7bd9c 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -157,7 +157,6 @@
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES := \
   compiled_method.h \
-  dex/compiler_enums.h \
   dex/dex_to_dex_compiler.h \
   driver/compiler_driver.h \
   driver/compiler_options.h \
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
deleted file mode 100644
index 8800e4b..0000000
--- a/compiler/dex/compiler_enums.h
+++ /dev/null
@@ -1,677 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_COMPILER_ENUMS_H_
-#define ART_COMPILER_DEX_COMPILER_ENUMS_H_
-
-#include "dex_instruction.h"
-
-namespace art {
-
-enum RegisterClass {
-  kInvalidRegClass,
-  kCoreReg,
-  kFPReg,
-  kRefReg,
-  kAnyReg,
-};
-std::ostream& operator<<(std::ostream& os, const RegisterClass& rhs);
-
-enum BitsUsed {
-  kSize32Bits,
-  kSize64Bits,
-  kSize128Bits,
-  kSize256Bits,
-  kSize512Bits,
-  kSize1024Bits,
-};
-std::ostream& operator<<(std::ostream& os, const BitsUsed& rhs);
-
-enum SpecialTargetRegister {
-  kSelf,            // Thread pointer.
-  kSuspend,         // Used to reduce suspend checks for some targets.
-  kLr,
-  kPc,
-  kSp,
-  kArg0,
-  kArg1,
-  kArg2,
-  kArg3,
-  kArg4,
-  kArg5,
-  kArg6,
-  kArg7,
-  kFArg0,
-  kFArg1,
-  kFArg2,
-  kFArg3,
-  kFArg4,
-  kFArg5,
-  kFArg6,
-  kFArg7,
-  kFArg8,
-  kFArg9,
-  kFArg10,
-  kFArg11,
-  kFArg12,
-  kFArg13,
-  kFArg14,
-  kFArg15,
-  kRet0,
-  kRet1,
-  kInvokeTgt,
-  kHiddenArg,
-  kHiddenFpArg,
-  kCount
-};
-std::ostream& operator<<(std::ostream& os, const SpecialTargetRegister& code);
-
-enum RegLocationType {
-  kLocDalvikFrame = 0,  // Normal Dalvik register
-  kLocPhysReg,
-  kLocCompilerTemp,
-  kLocInvalid
-};
-std::ostream& operator<<(std::ostream& os, const RegLocationType& rhs);
-
-enum BBType {
-  kNullBlock,
-  kEntryBlock,
-  kDalvikByteCode,
-  kExitBlock,
-  kExceptionHandling,
-  kDead,
-};
-std::ostream& operator<<(std::ostream& os, const BBType& code);
-
-// Shared pseudo opcodes - must be < 0.
-enum LIRPseudoOpcode {
-  kPseudoPrologueBegin = -18,
-  kPseudoPrologueEnd = -17,
-  kPseudoEpilogueBegin = -16,
-  kPseudoEpilogueEnd = -15,
-  kPseudoExportedPC = -14,
-  kPseudoSafepointPC = -13,
-  kPseudoIntrinsicRetry = -12,
-  kPseudoSuspendTarget = -11,
-  kPseudoThrowTarget = -10,
-  kPseudoCaseLabel = -9,
-  kPseudoBarrier = -8,
-  kPseudoEntryBlock = -7,
-  kPseudoExitBlock = -6,
-  kPseudoTargetLabel = -5,
-  kPseudoDalvikByteCodeBoundary = -4,
-  kPseudoPseudoAlign4 = -3,
-  kPseudoEHBlockLabel = -2,
-  kPseudoNormalBlockLabel = -1,
-};
-std::ostream& operator<<(std::ostream& os, const LIRPseudoOpcode& rhs);
-
-enum ExtendedMIROpcode {
-  kMirOpFirst = kNumPackedOpcodes,
-  kMirOpPhi = kMirOpFirst,
-
-  // @brief Copy from one VR to another.
-  // @details
-  // vA: destination VR
-  // vB: source VR
-  kMirOpCopy,
-
-  // @brief Used to do float comparison with less-than bias.
-  // @details Unlike cmpl-float, this does not store result of comparison in VR.
-  // vA: left-hand side VR for comparison.
-  // vB: right-hand side VR for comparison.
-  kMirOpFusedCmplFloat,
-
-  // @brief Used to do float comparison with greater-than bias.
-  // @details Unlike cmpg-float, this does not store result of comparison in VR.
-  // vA: left-hand side VR for comparison.
-  // vB: right-hand side VR for comparison.
-  kMirOpFusedCmpgFloat,
-
-  // @brief Used to do double comparison with less-than bias.
-  // @details Unlike cmpl-double, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmplDouble,
-
-  // @brief Used to do double comparison with greater-than bias.
-  // @details Unlike cmpl-double, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmpgDouble,
-
-  // @brief Used to do comparison of 64-bit long integers.
-  // @details Unlike cmp-long, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmpLong,
-
-  // @brief This represents no-op.
-  kMirOpNop,
-
-  // @brief Do a null check on the object register.
-  // @details The backends may implement this implicitly or explicitly. This MIR is guaranteed
-  // to have the correct offset as an exception thrower.
-  // vA: object register
-  kMirOpNullCheck,
-
-  kMirOpRangeCheck,
-  kMirOpDivZeroCheck,
-  kMirOpCheck,
-  kMirOpSelect,
-
-  // Vector opcodes:
-  // TypeSize is an encoded field giving the element type and the vector size.
-  // It is encoded as OpSize << 16 | (number of bits in vector)
-  //
-  // Destination and source are integers that will be interpreted by the
-  // backend that supports Vector operations.  Backends are permitted to support only
-  // certain vector register sizes.
-  //
-  // At this point, only two operand instructions are supported.  Three operand instructions
-  // could be supported by using a bit in TypeSize and arg[0] where needed.
-
-  // @brief MIR to move constant data to a vector register
-  // vA: destination
-  // vB: number of bits in register
-  // args[0]~args[3]: up to 128 bits of data for initialization
-  kMirOpConstVector,
-
-  // @brief MIR to move a vectorized register to another
-  // vA: destination
-  // vB: source
-  // vC: TypeSize
-  kMirOpMoveVector,
-
-  // @brief Packed multiply of units in two vector registers: vB = vB .* vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedMultiply,
-
-  // @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedAddition,
-
-  // @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedSubtract,
-
-  // @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedShiftLeft,
-
-  // @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedSignedShiftRight,
-
-  // @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedUnsignedShiftRight,
-
-  // @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedAnd,
-
-  // @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedOr,
-
-  // @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedXor,
-
-  // @brief Reduce a 128-bit packed element into a single VR by taking lower bits
-  // @details Instruction does a horizontal addition of the packed elements and then adds it to VR
-  // vA: destination and source VR (not vector register)
-  // vB: source (vector register)
-  // vC: TypeSize
-  kMirOpPackedAddReduce,
-
-  // @brief Extract a packed element into a single VR.
-  // vA: destination VR (not vector register)
-  // vB: source (vector register)
-  // vC: TypeSize
-  // arg[0]: The index to use for extraction from vector register (which packed element)
-  kMirOpPackedReduce,
-
-  // @brief Create a vector value, with all TypeSize values equal to vC
-  // vA: destination vector register
-  // vB: source VR (not vector register)
-  // vC: TypeSize
-  kMirOpPackedSet,
-
-  // @brief Reserve a range of vector registers.
-  // vA: Start vector register to reserve.
-  // vB: Inclusive end vector register to reserve.
-  // @note: The backend may choose to map vector numbers used in vector opcodes.
-  //  Reserved registers are removed from the list of backend temporary pool.
-  kMirOpReserveVectorRegisters,
-
-  // @brief Free a range of reserved vector registers
-  // vA: Start vector register to unreserve.
-  // vB: Inclusive end vector register to unreserve.
-  // @note: All currently reserved vector registers are returned to the temporary pool.
-  kMirOpReturnVectorRegisters,
-
-  // @brief Create a memory barrier.
-  // vA: a constant defined by enum MemBarrierKind.
-  kMirOpMemBarrier,
-
-  // @brief Used to fill a vector register with array values.
-  // @details Just as with normal arrays, access on null object register must ensure NullPointerException
-  // and invalid index must ensure ArrayIndexOutOfBoundsException. Exception behavior must be the same
-  // as the aget it replaced and must happen at same index. Therefore, it is generally recommended that
-  // before using this MIR, it is proven that exception is guaranteed to not be thrown and marked with
-  // MIR_IGNORE_NULL_CHECK and MIR_IGNORE_RANGE_CHECK.
-  // vA: destination vector register
-  // vB: array register
-  // vC: index register
-  // arg[0]: TypeSize (most other vector opcodes have this in vC)
-  kMirOpPackedArrayGet,
-
-  // @brief Used to store a vector register into array.
-  // @details Just as with normal arrays, access on null object register must ensure NullPointerException
-  // and invalid index must ensure ArrayIndexOutOfBoundsException. Exception behavior must be the same
-  // as the aget it replaced and must happen at same index. Therefore, it is generally recommended that
-  // before using this MIR, it is proven that exception is guaranteed to not be thrown and marked with
-  // MIR_IGNORE_NULL_CHECK and MIR_IGNORE_RANGE_CHECK.
-  // vA: source vector register
-  // vB: array register
-  // vC: index register
-  // arg[0]: TypeSize (most other vector opcodes have this in vC)
-  kMirOpPackedArrayPut,
-
-  // @brief Multiply-add integer.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: addend
-  kMirOpMaddInt,
-
-  // @brief Multiply-subtract integer.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: minuend
-  kMirOpMsubInt,
-
-  // @brief Multiply-add long.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: addend
-  kMirOpMaddLong,
-
-  // @brief Multiply-subtract long.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: minuend
-  kMirOpMsubLong,
-
-  kMirOpLast,
-};
-
-enum MIROptimizationFlagPositions {
-  kMIRIgnoreNullCheck = 0,
-  kMIRIgnoreRangeCheck,
-  kMIRIgnoreCheckCast,
-  kMIRStoreNonNullValue,              // Storing non-null value, always mark GC card.
-  kMIRClassIsInitialized,
-  kMIRClassIsInDexCache,
-  kMirIgnoreDivZeroCheck,
-  kMIRInlined,                        // Invoke is inlined (ie dead).
-  kMIRInlinedPred,                    // Invoke is inlined via prediction.
-  kMIRCallee,                         // Instruction is inlined from callee.
-  kMIRIgnoreSuspendCheck,
-  kMIRDup,
-  kMIRMark,                           // Temporary node mark can be used by
-                                      // opt passes for their private needs.
-  kMIRStoreNonTemporal,
-  kMIRLastMIRFlag,
-};
-
-// For successor_block_list.
-enum BlockListType {
-  kNotUsed = 0,
-  kCatch,
-  kPackedSwitch,
-  kSparseSwitch,
-};
-std::ostream& operator<<(std::ostream& os, const BlockListType& rhs);
-
-enum AssemblerStatus {
-  kSuccess,
-  kRetryAll,
-};
-std::ostream& operator<<(std::ostream& os, const AssemblerStatus& rhs);
-
-enum OpSize {
-  kWord,            // Natural word size of target (32/64).
-  k32,
-  k64,
-  kReference,       // Object reference; compressed on 64-bit targets.
-  kSingle,
-  kDouble,
-  kUnsignedHalf,
-  kSignedHalf,
-  kUnsignedByte,
-  kSignedByte,
-};
-std::ostream& operator<<(std::ostream& os, const OpSize& kind);
-
-enum OpKind {
-  kOpMov,
-  kOpCmov,
-  kOpMvn,
-  kOpCmp,
-  kOpLsl,
-  kOpLsr,
-  kOpAsr,
-  kOpRor,
-  kOpNot,
-  kOpAnd,
-  kOpOr,
-  kOpXor,
-  kOpNeg,
-  kOpAdd,
-  kOpAdc,
-  kOpSub,
-  kOpSbc,
-  kOpRsub,
-  kOpMul,
-  kOpDiv,
-  kOpRem,
-  kOpBic,
-  kOpCmn,
-  kOpTst,
-  kOpRev,
-  kOpRevsh,
-  kOpBkpt,
-  kOpBlx,
-  kOpPush,
-  kOpPop,
-  kOp2Char,
-  kOp2Short,
-  kOp2Byte,
-  kOpCondBr,
-  kOpUncondBr,
-  kOpBx,
-  kOpInvalid,
-};
-std::ostream& operator<<(std::ostream& os, const OpKind& rhs);
-
-enum MoveType {
-  kMov8GP,      // Move 8-bit general purpose register.
-  kMov16GP,     // Move 16-bit general purpose register.
-  kMov32GP,     // Move 32-bit general purpose register.
-  kMov64GP,     // Move 64-bit general purpose register.
-  kMov32FP,     // Move 32-bit FP register.
-  kMov64FP,     // Move 64-bit FP register.
-  kMovLo64FP,   // Move low 32-bits of 64-bit FP register.
-  kMovHi64FP,   // Move high 32-bits of 64-bit FP register.
-  kMovU128FP,   // Move 128-bit FP register to/from possibly unaligned region.
-  kMov128FP = kMovU128FP,
-  kMovA128FP,   // Move 128-bit FP register to/from region surely aligned to 16-bytes.
-  kMovLo128FP,  // Move low 64-bits of 128-bit FP register.
-  kMovHi128FP,  // Move high 64-bits of 128-bit FP register.
-};
-std::ostream& operator<<(std::ostream& os, const MoveType& kind);
-
-enum ConditionCode {
-  kCondEq,  // equal
-  kCondNe,  // not equal
-  kCondCs,  // carry set
-  kCondCc,  // carry clear
-  kCondUlt,  // unsigned less than
-  kCondUge,  // unsigned greater than or same
-  kCondMi,  // minus
-  kCondPl,  // plus, positive or zero
-  kCondVs,  // overflow
-  kCondVc,  // no overflow
-  kCondHi,  // unsigned greater than
-  kCondLs,  // unsigned lower or same
-  kCondGe,  // signed greater than or equal
-  kCondLt,  // signed less than
-  kCondGt,  // signed greater than
-  kCondLe,  // signed less than or equal
-  kCondAl,  // always
-  kCondNv,  // never
-};
-std::ostream& operator<<(std::ostream& os, const ConditionCode& kind);
-
-// Target specific condition encodings
-enum ArmConditionCode {
-  kArmCondEq = 0x0,  // 0000
-  kArmCondNe = 0x1,  // 0001
-  kArmCondCs = 0x2,  // 0010
-  kArmCondCc = 0x3,  // 0011
-  kArmCondMi = 0x4,  // 0100
-  kArmCondPl = 0x5,  // 0101
-  kArmCondVs = 0x6,  // 0110
-  kArmCondVc = 0x7,  // 0111
-  kArmCondHi = 0x8,  // 1000
-  kArmCondLs = 0x9,  // 1001
-  kArmCondGe = 0xa,  // 1010
-  kArmCondLt = 0xb,  // 1011
-  kArmCondGt = 0xc,  // 1100
-  kArmCondLe = 0xd,  // 1101
-  kArmCondAl = 0xe,  // 1110
-  kArmCondNv = 0xf,  // 1111
-};
-std::ostream& operator<<(std::ostream& os, const ArmConditionCode& kind);
-
-enum X86ConditionCode {
-  kX86CondO   = 0x0,    // overflow
-  kX86CondNo  = 0x1,    // not overflow
-
-  kX86CondB   = 0x2,    // below
-  kX86CondNae = kX86CondB,  // not-above-equal
-  kX86CondC   = kX86CondB,  // carry
-
-  kX86CondNb  = 0x3,    // not-below
-  kX86CondAe  = kX86CondNb,  // above-equal
-  kX86CondNc  = kX86CondNb,  // not-carry
-
-  kX86CondZ   = 0x4,    // zero
-  kX86CondEq  = kX86CondZ,  // equal
-
-  kX86CondNz  = 0x5,    // not-zero
-  kX86CondNe  = kX86CondNz,  // not-equal
-
-  kX86CondBe  = 0x6,    // below-equal
-  kX86CondNa  = kX86CondBe,  // not-above
-
-  kX86CondNbe = 0x7,    // not-below-equal
-  kX86CondA   = kX86CondNbe,  // above
-
-  kX86CondS   = 0x8,    // sign
-  kX86CondNs  = 0x9,    // not-sign
-
-  kX86CondP   = 0xa,    // 8-bit parity even
-  kX86CondPE  = kX86CondP,
-
-  kX86CondNp  = 0xb,    // 8-bit parity odd
-  kX86CondPo  = kX86CondNp,
-
-  kX86CondL   = 0xc,    // less-than
-  kX86CondNge = kX86CondL,  // not-greater-equal
-
-  kX86CondNl  = 0xd,    // not-less-than
-  kX86CondGe  = kX86CondNl,  // not-greater-equal
-
-  kX86CondLe  = 0xe,    // less-than-equal
-  kX86CondNg  = kX86CondLe,  // not-greater
-
-  kX86CondNle = 0xf,    // not-less-than
-  kX86CondG   = kX86CondNle,  // greater
-};
-std::ostream& operator<<(std::ostream& os, const X86ConditionCode& kind);
-
-enum DividePattern {
-  DivideNone,
-  Divide3,
-  Divide5,
-  Divide7,
-};
-std::ostream& operator<<(std::ostream& os, const DividePattern& pattern);
-
-/**
- * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
- * @details We define the combined barrier types that are actually required
- * by the Java Memory Model, rather than using exactly the terminology from
- * the JSR-133 cookbook.  These should, in many cases, be replaced by acquire/release
- * primitives.  Note that the JSR-133 cookbook generally does not deal with
- * store atomicity issues, and the recipes there are not always entirely sufficient.
- * The current recipe is as follows:
- * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store.
- * -# Use AnyAny barrier after volatile store.  (StoreLoad is as expensive.)
- * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load.
- * -# Use StoreStore barrier after all stores but before return from any constructor whose
- *    class has final fields.
- * -# Use NTStoreStore to order non-temporal stores with respect to all later
- *    store-to-memory instructions.  Only generated together with non-temporal stores.
- */
-enum MemBarrierKind {
-  kAnyStore,
-  kLoadAny,
-  kStoreStore,
-  kAnyAny,
-  kNTStoreStore,
-  kLastBarrierKind = kNTStoreStore
-};
-std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
-
-enum OpFeatureFlags {
-  kIsBranch = 0,
-  kNoOperand,
-  kIsUnaryOp,
-  kIsBinaryOp,
-  kIsTertiaryOp,
-  kIsQuadOp,
-  kIsQuinOp,
-  kIsSextupleOp,
-  kIsIT,
-  kIsMoveOp,
-  kMemLoad,
-  kMemStore,
-  kMemVolatile,
-  kMemScaledx0,
-  kMemScaledx2,
-  kMemScaledx4,
-  kPCRelFixup,  // x86 FIXME: add NEEDS_FIXUP to instruction attributes.
-  kRegDef0,
-  kRegDef1,
-  kRegDef2,
-  kRegDefA,
-  kRegDefD,
-  kRegDefFPCSList0,
-  kRegDefFPCSList2,
-  kRegDefList0,
-  kRegDefList1,
-  kRegDefList2,
-  kRegDefLR,
-  kRegDefSP,
-  kRegUse0,
-  kRegUse1,
-  kRegUse2,
-  kRegUse3,
-  kRegUse4,
-  kRegUseA,
-  kRegUseC,
-  kRegUseD,
-  kRegUseB,
-  kRegUseFPCSList0,
-  kRegUseFPCSList2,
-  kRegUseList0,
-  kRegUseList1,
-  kRegUseLR,
-  kRegUsePC,
-  kRegUseSP,
-  kSetsCCodes,
-  kUsesCCodes,
-  kUseFpStack,
-  kUseHi,
-  kUseLo,
-  kDefHi,
-  kDefLo
-};
-std::ostream& operator<<(std::ostream& os, const OpFeatureFlags& rhs);
-
-enum SelectInstructionKind {
-  kSelectNone,
-  kSelectConst,
-  kSelectMove,
-  kSelectGoto
-};
-std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind);
-
-// LIR fixup kinds for Arm and X86.
-enum FixupKind {
-  kFixupNone,
-  kFixupLabel,             // For labels we just adjust the offset.
-  kFixupLoad,              // Mostly for immediates.
-  kFixupVLoad,             // FP load which *may* be pc-relative.
-  kFixupCBxZ,              // Cbz, Cbnz.
-  kFixupTBxZ,              // Tbz, Tbnz.
-  kFixupCondBranch,        // Conditional branch
-  kFixupT1Branch,          // Thumb1 Unconditional branch
-  kFixupT2Branch,          // Thumb2 Unconditional branch
-  kFixupBlx1,              // Blx1 (start of Blx1/Blx2 pair).
-  kFixupBl1,               // Bl1 (start of Bl1/Bl2 pair).
-  kFixupAdr,               // Adr.
-  kFixupMovImmLST,         // kThumb2MovImm16LST.
-  kFixupMovImmHST,         // kThumb2MovImm16HST.
-  kFixupAlign4,            // Align to 4-byte boundary.
-  kFixupA53Erratum835769,  // Cortex A53 Erratum 835769.
-  kFixupSwitchTable,       // X86_64 packed switch table.
-};
-std::ostream& operator<<(std::ostream& os, const FixupKind& kind);
-
-enum VolatileKind {
-  kNotVolatile,      // Load/Store is not volatile
-  kVolatile          // Load/Store is volatile
-};
-std::ostream& operator<<(std::ostream& os, const VolatileKind& kind);
-
-enum WideKind {
-  kNotWide,      // Non-wide view
-  kWide,         // Wide view
-  kRef           // Ref width
-};
-std::ostream& operator<<(std::ostream& os, const WideKind& kind);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_COMPILER_ENUMS_H_
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index fbe403f..50dc032 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -21,7 +21,6 @@
 #include "base/mutex.h"
 #include "base/macros.h"
 #include "safe_map.h"
-#include "dex/compiler_enums.h"
 #include "dex_file.h"
 #include "quick/inline_method_analyser.h"
 
@@ -31,6 +30,13 @@
 class MethodVerifier;
 }  // namespace verifier
 
+enum OpSize {
+  k32,
+  k64,
+  kSignedHalf,
+  kSignedByte,
+};
+
 /**
  * Handles inlining of methods from a particular DexFile.
  *
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
index 7c0423b..c09950c 100644
--- a/compiler/linker/mips/relative_patcher_mips.cc
+++ b/compiler/linker/mips/relative_patcher_mips.cc
@@ -49,6 +49,7 @@
                                                    uint32_t target_offset) {
   uint32_t anchor_literal_offset = patch.PcInsnOffset();
   uint32_t literal_offset = patch.LiteralOffset();
+  bool dex_cache_array = (patch.GetType() == LinkerPatch::Type::kDexCacheArray);
 
   // Basic sanity checks.
   if (is_r6) {
@@ -68,12 +69,16 @@
     DCHECK_GE(code->size(), 16u);
     DCHECK_LE(literal_offset, code->size() - 12u);
     DCHECK_GE(literal_offset, 4u);
-    DCHECK_EQ(literal_offset + 4u, anchor_literal_offset);
-    // NAL
-    DCHECK_EQ((*code)[literal_offset - 4], 0x00);
-    DCHECK_EQ((*code)[literal_offset - 3], 0x00);
-    DCHECK_EQ((*code)[literal_offset - 2], 0x10);
-    DCHECK_EQ((*code)[literal_offset - 1], 0x04);
+    // The NAL instruction may not precede immediately as the PC+0 value may
+    // come from HMipsComputeBaseMethodAddress.
+    if (dex_cache_array) {
+      DCHECK_EQ(literal_offset + 4u, anchor_literal_offset);
+      // NAL
+      DCHECK_EQ((*code)[literal_offset - 4], 0x00);
+      DCHECK_EQ((*code)[literal_offset - 3], 0x00);
+      DCHECK_EQ((*code)[literal_offset - 2], 0x10);
+      DCHECK_EQ((*code)[literal_offset - 1], 0x04);
+    }
     // LUI reg, offset_high
     DCHECK_EQ((*code)[literal_offset + 0], 0x34);
     DCHECK_EQ((*code)[literal_offset + 1], 0x12);
@@ -83,16 +88,22 @@
     DCHECK_EQ((*code)[literal_offset + 4], 0x78);
     DCHECK_EQ((*code)[literal_offset + 5], 0x56);
     DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x34);
-    // ADDU reg, reg, RA
+    // ADDU reg, reg, reg2
     DCHECK_EQ((*code)[literal_offset + 8], 0x21);
     DCHECK_EQ(((*code)[literal_offset + 9] & 0x07), 0x00);
-    DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F);
+    if (dex_cache_array) {
+      // reg2 is either RA or from HMipsComputeBaseMethodAddress.
+      DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F);
+    }
     DCHECK_EQ(((*code)[literal_offset + 11] & 0xFC), 0x00);
   }
 
   // Apply patch.
   uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
-  uint32_t diff = target_offset - anchor_offset + kDexCacheArrayLwOffset;
+  uint32_t diff = target_offset - anchor_offset;
+  if (dex_cache_array) {
+    diff += kDexCacheArrayLwOffset;
+  }
   if (is_r6) {
     diff += (diff & 0x8000) << 1;  // Account for sign extension in ADDIU.
   }
diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
index 0f1dcbc..a16aaca 100644
--- a/compiler/linker/mips/relative_patcher_mips32r6_test.cc
+++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
@@ -29,40 +29,78 @@
   Mips32r6RelativePatcherTest() : RelativePatcherTest(kMips, "mips32r6") {}
 
  protected:
+  static const uint8_t UnpatchedPcRelativeRawCode[];
+  static const uint32_t LiteralOffset;
+  static const uint32_t AnchorOffset;
+  static const ArrayRef<const uint8_t> UnpatchedPcRelativeCode;
+
   uint32_t GetMethodOffset(uint32_t method_idx) {
     auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
     CHECK(result.first);
     return result.second;
   }
+
+  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringReference(uint32_t string_offset);
 };
 
-TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) {
-  dex_cache_arrays_begin_ = 0x12345678;
-  constexpr size_t kElementOffset = 0x1234;
-  static const uint8_t raw_code[] = {
-      0x34, 0x12, 0x5E, 0xEE,  // auipc s2, high(diff); placeholder = 0x1234
-      0x78, 0x56, 0x52, 0x26,  // addiu s2, s2, low(diff); placeholder = 0x5678
-  };
-  constexpr uint32_t literal_offset = 0;  // At auipc (where patching starts).
-  constexpr uint32_t anchor_offset = literal_offset;  // At auipc (where PC+0 points).
-  ArrayRef<const uint8_t> code(raw_code);
-  LinkerPatch patches[] = {
-      LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset),
-  };
-  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+const uint8_t Mips32r6RelativePatcherTest::UnpatchedPcRelativeRawCode[] = {
+    0x34, 0x12, 0x5E, 0xEE,  // auipc s2, high(diff); placeholder = 0x1234
+    0x78, 0x56, 0x52, 0x26,  // addiu s2, s2, low(diff); placeholder = 0x5678
+};
+const uint32_t Mips32r6RelativePatcherTest::LiteralOffset = 0;  // At auipc (where patching starts).
+const uint32_t Mips32r6RelativePatcherTest::AnchorOffset = 0;  // At auipc (where PC+0 points).
+const ArrayRef<const uint8_t> Mips32r6RelativePatcherTest::UnpatchedPcRelativeCode(
+    UnpatchedPcRelativeRawCode);
+
+void Mips32r6RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
+                                                       uint32_t target_offset) {
+  AddCompiledMethod(MethodRef(1u), UnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
   ASSERT_TRUE(result.first);
-  uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) +
-      kDexCacheArrayLwOffset;
+
+  uint32_t diff = target_offset - (result.second + AnchorOffset);
+  if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) {
+    diff += kDexCacheArrayLwOffset;
+  }
   diff += (diff & 0x8000) << 1;  // Account for sign extension in addiu.
-  static const uint8_t expected_code[] = {
+
+  const uint8_t expected_code[] = {
       static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE,
       static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26,
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
+void Mips32r6RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
+                                                        uint32_t element_offset) {
+  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(LiteralOffset, nullptr, AnchorOffset, element_offset)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
+                       dex_cache_arrays_begin_ + element_offset);
+}
+
+void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_offset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(LiteralOffset, nullptr, AnchorOffset, kStringIndex)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
+}
+
+TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) {
+  TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+}
+
+TEST_F(Mips32r6RelativePatcherTest, StringReference) {
+  TestStringReference(/* string_offset*/ 0x87651234);
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc
index 8391b53..335ce2e 100644
--- a/compiler/linker/mips/relative_patcher_mips_test.cc
+++ b/compiler/linker/mips/relative_patcher_mips_test.cc
@@ -29,36 +29,47 @@
   MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {}
 
  protected:
+  static const uint8_t UnpatchedPcRelativeRawCode[];
+  static const uint32_t LiteralOffset;
+  static const uint32_t AnchorOffset;
+  static const ArrayRef<const uint8_t> UnpatchedPcRelativeCode;
+
   uint32_t GetMethodOffset(uint32_t method_idx) {
     auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
     CHECK(result.first);
     return result.second;
   }
+
+  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringReference(uint32_t string_offset);
 };
 
-TEST_F(MipsRelativePatcherTest, DexCacheReference) {
-  dex_cache_arrays_begin_ = 0x12345678;
-  constexpr size_t kElementOffset = 0x1234;
-  static const uint8_t raw_code[] = {
-      0x00, 0x00, 0x10, 0x04,  // nal
-      0x34, 0x12, 0x12, 0x3C,  // lui  s2, high(diff); placeholder = 0x1234
-      0x78, 0x56, 0x52, 0x36,  // ori  s2, s2, low(diff); placeholder = 0x5678
-      0x21, 0x90, 0x5F, 0x02,  // addu s2, s2, ra
-  };
-  constexpr uint32_t literal_offset = 4;  // At lui (where patching starts).
-  constexpr uint32_t anchor_offset = 8;  // At ori (where PC+0 points).
-  ArrayRef<const uint8_t> code(raw_code);
-  LinkerPatch patches[] = {
-      LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset),
-  };
-  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+const uint8_t MipsRelativePatcherTest::UnpatchedPcRelativeRawCode[] = {
+    0x00, 0x00, 0x10, 0x04,  // nal
+    0x34, 0x12, 0x12, 0x3C,  // lui  s2, high(diff); placeholder = 0x1234
+    0x78, 0x56, 0x52, 0x36,  // ori  s2, s2, low(diff); placeholder = 0x5678
+    0x21, 0x90, 0x5F, 0x02,  // addu s2, s2, ra
+};
+const uint32_t MipsRelativePatcherTest::LiteralOffset = 4;  // At lui (where patching starts).
+const uint32_t MipsRelativePatcherTest::AnchorOffset = 8;  // At ori (where PC+0 points).
+const ArrayRef<const uint8_t> MipsRelativePatcherTest::UnpatchedPcRelativeCode(
+    UnpatchedPcRelativeRawCode);
+
+void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
+                                                   uint32_t target_offset) {
+  AddCompiledMethod(MethodRef(1u), UnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
   ASSERT_TRUE(result.first);
-  uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) +
-      kDexCacheArrayLwOffset;
-  static const uint8_t expected_code[] = {
+
+  uint32_t diff = target_offset - (result.second + AnchorOffset);
+  if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) {
+    diff += kDexCacheArrayLwOffset;
+  }
+
+  const uint8_t expected_code[] = {
       0x00, 0x00, 0x10, 0x04,
       static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C,
       static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x36,
@@ -67,5 +78,32 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
+void MipsRelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
+                                                    uint32_t element_offset) {
+  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(LiteralOffset, nullptr, AnchorOffset, element_offset)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
+                       dex_cache_arrays_begin_ + element_offset);
+}
+
+void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_offset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(LiteralOffset, nullptr, AnchorOffset, kStringIndex)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
+}
+
+TEST_F(MipsRelativePatcherTest, DexCacheReference) {
+  TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+}
+
+TEST_F(MipsRelativePatcherTest, StringReference) {
+  TestStringReference(/* string_offset*/ 0x87651234);
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 1fc247f..8aefd9e 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -533,9 +533,6 @@
         first_index_bounds_check_map_(
             std::less<int>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
-        dynamic_bce_standby_(
-            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
-        record_dynamic_bce_standby_(true),
         early_exit_loop_(
             std::less<uint32_t>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
@@ -560,14 +557,6 @@
   }
 
   void Finish() {
-    // Retry dynamic bce candidates on standby that are still in the graph.
-    record_dynamic_bce_standby_ = false;
-    for (HBoundsCheck* bounds_check : dynamic_bce_standby_) {
-      if (bounds_check->IsInBlock()) {
-        TryDynamicBCE(bounds_check);
-      }
-    }
-
     // Preserve SSA structure which may have been broken by adding one or more
     // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
     InsertPhiNodes();
@@ -576,7 +565,6 @@
     early_exit_loop_.clear();
     taken_test_loop_.clear();
     finite_loop_.clear();
-    dynamic_bce_standby_.clear();
   }
 
  private:
@@ -832,7 +820,6 @@
            array_length->IsArrayLength() ||
            array_length->IsPhi());
     bool try_dynamic_bce = true;
-
     // Analyze index range.
     if (!index->IsIntConstant()) {
       // Non-constant index.
@@ -896,10 +883,20 @@
     // If static analysis fails, and OOB is not certain, try dynamic elimination.
     if (try_dynamic_bce) {
       // Try loop-based dynamic elimination.
-      if (TryDynamicBCE(bounds_check)) {
+      HLoopInformation* loop = bounds_check->GetBlock()->GetLoopInformation();
+      bool needs_finite_test = false;
+      bool needs_taken_test = false;
+      if (DynamicBCESeemsProfitable(loop, bounds_check->GetBlock()) &&
+          induction_range_.CanGenerateCode(
+              bounds_check, index, &needs_finite_test, &needs_taken_test) &&
+          CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+          // Do this test last, since it may generate code.
+          CanHandleLength(loop, array_length, needs_taken_test)) {
+        TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        TransformLoopForDynamicBCE(loop, bounds_check);
         return;
       }
-      // Prepare dominator-based dynamic elimination.
+      // Otherwise, prepare dominator-based dynamic elimination.
       if (first_index_bounds_check_map_.find(array_length->GetId()) ==
           first_index_bounds_check_map_.end()) {
         // Remember the first bounds check against each array_length. That bounds check
@@ -1180,7 +1177,7 @@
     }
   }
 
-  // Perform dominator-based dynamic elimination on suitable set of bounds checks.
+  /** Performs dominator-based dynamic elimination on suitable set of bounds checks. */
   void AddCompareWithDeoptimization(HBasicBlock* block,
                                     HInstruction* array_length,
                                     HInstruction* base,
@@ -1190,6 +1187,12 @@
     // Construct deoptimization on single or double bounds on range [base-min_c,base+max_c],
     // for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1
     // and base+3, since we made the assumption any in between value may occur too.
+    // In code, using unsigned comparisons:
+    // (1) constants only
+    //       if (max_c >= a.length) deoptimize;
+    // (2) general case
+    //       if (base-min_c >  base+max_c) deoptimize;
+    //       if (base+max_c >= a.length  ) deoptimize;
     static_assert(kMaxLengthForAddingDeoptimize < std::numeric_limits<int32_t>::max(),
                   "Incorrect max length may be subject to arithmetic wrap-around");
     HInstruction* upper = GetGraph()->GetIntConstant(max_c);
@@ -1208,7 +1211,7 @@
     has_dom_based_dynamic_bce_ = true;
   }
 
-  // Attempt dominator-based dynamic elimination on remaining candidates.
+  /** Attempts dominator-based dynamic elimination on remaining candidates. */
   void AddComparesWithDeoptimization(HBasicBlock* block) {
     for (const auto& entry : first_index_bounds_check_map_) {
       HBoundsCheck* bounds_check = entry.second;
@@ -1272,17 +1275,19 @@
           candidates.push_back(other_bounds_check);
         }
       }
-      // Perform dominator-based deoptimization if it seems profitable. Note that we reject cases
-      // where the distance min_c:max_c range gets close to the maximum possible array length,
-      // since those cases are likely to always deopt (such situations do not necessarily go
-      // OOB, though, since the programmer could rely on wrap-around from max to min).
+      // Perform dominator-based deoptimization if it seems profitable, where we eliminate
+      // bounds checks and replace these with deopt checks that guard against any possible
+      // OOB. Note that we reject cases where the distance min_c:max_c range gets close to
+      // the maximum possible array length, since those cases are likely to always deopt
+      // (such situations do not necessarily go OOB, though, since the array could be really
+      // large, or the programmer could rely on arithmetic wrap-around from max to min).
       size_t threshold = kThresholdForAddingDeoptimize + (base == nullptr ? 0 : 1);  // extra test?
       uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
       if (candidates.size() >= threshold &&
           (base != nullptr || min_c >= 0) &&  // reject certain OOB
            distance <= kMaxLengthForAddingDeoptimize) {  // reject likely/certain deopt
         AddCompareWithDeoptimization(block, array_length, base, min_c, max_c);
-        for (HInstruction* other_bounds_check : candidates) {
+        for (HBoundsCheck* other_bounds_check : candidates) {
           // Only replace if still in the graph. This avoids visiting the same
           // bounds check twice if it occurred multiple times in the use list.
           if (other_bounds_check->IsInBlock()) {
@@ -1328,45 +1333,127 @@
   }
 
   /**
-   * When the compiler fails to remove a bounds check statically, we try to remove the bounds
-   * check dynamically by adding runtime tests that trigger a deoptimization in case bounds
-   * will go out of range (we want to be rather certain of that given the slowdown of
-   * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
-   * bounds checks and related null checks removed.
+   * Performs loop-based dynamic elimination on a bounds check. In order to minimize the
+   * number of eventually generated tests, related bounds checks with tests that can be
+   * combined with tests for the given bounds check are collected first.
    */
-  bool TryDynamicBCE(HBoundsCheck* instruction) {
-    HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
-    HInstruction* index = instruction->InputAt(0);
-    HInstruction* length = instruction->InputAt(1);
-    // If dynamic bounds check elimination seems profitable and is possible, then proceed.
-    bool needs_finite_test = false;
-    bool needs_taken_test = false;
-    if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) &&
-        induction_range_.CanGenerateCode(
-            instruction, index, &needs_finite_test, &needs_taken_test) &&
-        CanHandleInfiniteLoop(loop, instruction, index, needs_finite_test) &&
-        CanHandleLength(loop, length, needs_taken_test)) {  // do this test last (may code gen)
-      HInstruction* lower = nullptr;
-      HInstruction* upper = nullptr;
-      // Generate the following unsigned comparisons
-      //     if (lower > upper)   deoptimize;
-      //     if (upper >= length) deoptimize;
-      // or, for a non-induction index, just the unsigned comparison on its 'upper' value
-      //     if (upper >= length) deoptimize;
-      // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
-      // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
-      // correctly guard against any possible OOB (including arithmetic wrap-around cases).
-      TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
-      HBasicBlock* block = GetPreHeader(loop, instruction);
-      induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
-      if (lower != nullptr) {
-        InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
+  void TransformLoopForDynamicBCE(HLoopInformation* loop, HBoundsCheck* bounds_check) {
+    HInstruction* index = bounds_check->InputAt(0);
+    HInstruction* array_length = bounds_check->InputAt(1);
+    DCHECK(loop->IsDefinedOutOfTheLoop(array_length));  // pre-checked
+    DCHECK(loop->DominatesAllBackEdges(bounds_check->GetBlock()));
+    // Collect all bounds checks in the same loop that are related as "a[base + constant]"
+    // for a base instruction (possibly absent) and various constants.
+    ValueBound value = ValueBound::AsValueBound(index);
+    HInstruction* base = value.GetInstruction();
+    int32_t min_c = base == nullptr ? 0 : value.GetConstant();
+    int32_t max_c = value.GetConstant();
+    ArenaVector<HBoundsCheck*> candidates(
+        GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+    ArenaVector<HBoundsCheck*> standby(
+        GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+    for (const HUseListNode<HInstruction*>& use : array_length->GetUses()) {
+      HInstruction* user = use.GetUser();
+      if (user->IsBoundsCheck() && loop == user->GetBlock()->GetLoopInformation()) {
+        HBoundsCheck* other_bounds_check = user->AsBoundsCheck();
+        HInstruction* other_index = other_bounds_check->InputAt(0);
+        HInstruction* other_array_length = other_bounds_check->InputAt(1);
+        ValueBound other_value = ValueBound::AsValueBound(other_index);
+        int32_t other_c = other_value.GetConstant();
+        if (array_length == other_array_length && base == other_value.GetInstruction()) {
+          // Does the current basic block dominate all back edges? If not,
+          // add this candidate later only if it falls into the range.
+          if (!loop->DominatesAllBackEdges(user->GetBlock())) {
+            standby.push_back(other_bounds_check);
+            continue;
+          }
+          min_c = std::min(min_c, other_c);
+          max_c = std::max(max_c, other_c);
+          candidates.push_back(other_bounds_check);
+        }
       }
-      InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
-      ReplaceInstruction(instruction, index);
-      return true;
     }
-    return false;
+    // Add standby candidates that fall in selected range.
+    for (HBoundsCheck* other_bounds_check : standby) {
+      HInstruction* other_index = other_bounds_check->InputAt(0);
+      int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+      if (min_c <= other_c && other_c <= max_c) {
+        candidates.push_back(other_bounds_check);
+      }
+    }
+    // Perform loop-based deoptimization if it seems profitable, where we eliminate bounds
+    // checks and replace these with deopt checks that guard against any possible OOB.
+    DCHECK_LT(0u, candidates.size());
+    uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
+    if ((base != nullptr || min_c >= 0) &&  // reject certain OOB
+        distance <= kMaxLengthForAddingDeoptimize) {  // reject likely/certain deopt
+      HBasicBlock* block = GetPreHeader(loop, bounds_check);
+      HInstruction* min_lower = nullptr;
+      HInstruction* min_upper = nullptr;
+      HInstruction* max_lower = nullptr;
+      HInstruction* max_upper = nullptr;
+      // Iterate over all bounds checks.
+      for (HBoundsCheck* other_bounds_check : candidates) {
+        // Only handle if still in the graph. This avoids visiting the same
+        // bounds check twice if it occurred multiple times in the use list.
+        if (other_bounds_check->IsInBlock()) {
+          HInstruction* other_index = other_bounds_check->InputAt(0);
+          int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+          // Generate code for either the maximum or minimum. Range analysis already was queried
+          // whether code generation on the original and, thus, related bounds check was possible.
+          // It handles either loop invariants (lower is not set) or unit strides.
+          if (other_c == max_c) {
+            induction_range_.GenerateRangeCode(
+                other_bounds_check, other_index, GetGraph(), block, &max_lower, &max_upper);
+          } else if (other_c == min_c && base != nullptr) {
+            induction_range_.GenerateRangeCode(
+                other_bounds_check, other_index, GetGraph(), block, &min_lower, &min_upper);
+          }
+          ReplaceInstruction(other_bounds_check, other_index);
+        }
+      }
+      // In code, using unsigned comparisons:
+      // (1) constants only
+      //       if (max_upper >= a.length ) deoptimize;
+      // (2) two symbolic invariants
+      //       if (min_upper >  max_upper) deoptimize;   unless min_c == max_c
+      //       if (max_upper >= a.length ) deoptimize;
+      // (3) general case, unit strides (where lower would exceed upper for arithmetic wrap-around)
+      //       if (min_lower >  max_lower) deoptimize;   unless min_c == max_c
+      //       if (max_lower >  max_upper) deoptimize;
+      //       if (max_upper >= a.length ) deoptimize;
+      if (base == nullptr) {
+        // Constants only.
+        DCHECK_GE(min_c, 0);
+        DCHECK(min_lower == nullptr && min_upper == nullptr &&
+               max_lower == nullptr && max_upper != nullptr);
+      } else if (max_lower == nullptr) {
+        // Two symbolic invariants.
+        if (min_c != max_c) {
+          DCHECK(min_lower == nullptr && min_upper != nullptr &&
+                 max_lower == nullptr && max_upper != nullptr);
+          InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_upper, max_upper));
+        } else {
+          DCHECK(min_lower == nullptr && min_upper == nullptr &&
+                 max_lower == nullptr && max_upper != nullptr);
+        }
+      } else {
+        // General case, unit strides.
+        if (min_c != max_c) {
+          DCHECK(min_lower != nullptr && min_upper != nullptr &&
+                 max_lower != nullptr && max_upper != nullptr);
+          InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_lower, max_lower));
+        } else {
+          DCHECK(min_lower == nullptr && min_upper == nullptr &&
+                 max_lower != nullptr && max_upper != nullptr);
+        }
+        InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(max_lower, max_upper));
+      }
+      InsertDeoptInLoop(
+          loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(max_upper, array_length));
+    } else {
+      // TODO: if rejected, avoid doing this again for subsequent instructions in this set?
+    }
   }
 
   /**
@@ -1474,8 +1561,7 @@
    * of the loop to use, dynamic bce in such cases is only allowed if other tests
    * ensure the loop is finite.
    */
-  bool CanHandleInfiniteLoop(
-      HLoopInformation* loop, HBoundsCheck* check, HInstruction* index, bool needs_infinite_test) {
+  bool CanHandleInfiniteLoop(HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
     if (needs_infinite_test) {
       // If we already forced the loop to be finite, allow directly.
       const uint32_t loop_id = loop->GetHeader()->GetBlockId();
@@ -1497,11 +1583,6 @@
           }
         }
       }
-      // If bounds check made it this far, it is worthwhile to check later if
-      // the loop was forced finite by another candidate.
-      if (record_dynamic_bce_standby_) {
-        dynamic_bce_standby_.push_back(check);
-      }
       return false;
     }
     return true;
@@ -1727,10 +1808,6 @@
   // in a block that checks an index against that HArrayLength.
   ArenaSafeMap<int, HBoundsCheck*> first_index_bounds_check_map_;
 
-  // Stand by list for dynamic bce.
-  ArenaVector<HBoundsCheck*> dynamic_bce_standby_;
-  bool record_dynamic_bce_standby_;
-
   // Early-exit loop bookkeeping.
   ArenaSafeMap<uint32_t, bool> early_exit_loop_;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 9c6dcaa..3269dc6 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1178,19 +1178,19 @@
         << "instruction->DebugName()=" << instruction->DebugName()
         << " slow_path->GetDescription()=" << slow_path->GetDescription();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
-           // When read barriers are enabled, some instructions use a
-           // slow path to emit a read barrier, which does not trigger
-           // GC, is not fatal, nor is emitted by HDeoptimize
-           // instructions.
+           // When (non-Baker) read barriers are enabled, some instructions
+           // use a slow path to emit a read barrier, which does not trigger
+           // GC.
            (kEmitCompilerReadBarrier &&
+            !kUseBakerReadBarrier &&
             (instruction->IsInstanceFieldGet() ||
              instruction->IsStaticFieldGet() ||
-             instruction->IsArraySet() ||
              instruction->IsArrayGet() ||
              instruction->IsLoadClass() ||
              instruction->IsLoadString() ||
              instruction->IsInstanceOf() ||
-             instruction->IsCheckCast())))
+             instruction->IsCheckCast() ||
+             (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))))
         << "instruction->DebugName()=" << instruction->DebugName()
         << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString()
         << " slow_path->GetDescription()=" << slow_path->GetDescription();
@@ -1204,6 +1204,27 @@
       << instruction->DebugName() << ((slow_path != nullptr) ? slow_path->GetDescription() : "");
 }
 
+void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+                                                                SlowPathCode* slow_path) {
+  DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath())
+      << "instruction->DebugName()=" << instruction->DebugName()
+      << " slow_path->GetDescription()=" << slow_path->GetDescription();
+  // Only the Baker read barrier marking slow path used by certains
+  // instructions is expected to invoke the runtime without recording
+  // PC-related information.
+  DCHECK(kUseBakerReadBarrier);
+  DCHECK(instruction->IsInstanceFieldGet() ||
+         instruction->IsStaticFieldGet() ||
+         instruction->IsArrayGet() ||
+         instruction->IsLoadClass() ||
+         instruction->IsLoadString() ||
+         instruction->IsInstanceOf() ||
+         instruction->IsCheckCast() ||
+         (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))
+      << "instruction->DebugName()=" << instruction->DebugName()
+      << " slow_path->GetDescription()=" << slow_path->GetDescription();
+}
+
 void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
   RegisterSet* live_registers = locations->GetLiveRegisters();
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 68de1aa..62dd1cc 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -380,8 +380,14 @@
     return type == Primitive::kPrimNot && !value->IsNullConstant();
   }
 
+
+  // Perfoms checks pertaining to an InvokeRuntime call.
   void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
 
+  // Perfoms checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
+  static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+                                                          SlowPathCode* slow_path);
+
   void AddAllocatedRegister(Location location) {
     allocated_registers_.Add(location);
   }
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 236ed20..124a61f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -431,8 +431,7 @@
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -440,7 +439,6 @@
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
-    InvokeRuntimeCallingConvention calling_convention;
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     DCHECK_NE(reg, SP);
     DCHECK_NE(reg, LR);
@@ -462,11 +460,8 @@
     //
     int32_t entry_point_offset =
         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmWordSize>(reg);
-    // TODO: Do not emit a stack map for this runtime call.
-    arm_codegen->InvokeRuntime(entry_point_offset,
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
+    // This runtime call does not require a stack map.
+    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ b(GetExitLabel());
   }
 
@@ -516,8 +511,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -1240,6 +1234,14 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorARM::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                           HInstruction* instruction,
+                                                           SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
+  __ blx(LR);
+}
+
 void InstructionCodeGeneratorARM::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   DCHECK(!successor->IsExitBlock());
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index ef7913b..a07a233 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -18,7 +18,6 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_
 
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "string_reference.h"
@@ -394,6 +393,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   // Emit a write barrier.
   void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 76b0797..efeef7b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -597,8 +597,7 @@
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -606,7 +605,6 @@
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
-    InvokeRuntimeCallingConvention calling_convention;
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     DCHECK_NE(obj_.reg(), LR);
     DCHECK_NE(obj_.reg(), WSP);
@@ -628,11 +626,8 @@
     //
     int32_t entry_point_offset =
         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64WordSize>(obj_.reg());
-    // TODO: Do not emit a stack map for this runtime call.
-    arm64_codegen->InvokeRuntime(entry_point_offset,
-                                 instruction_,
-                                 instruction_->GetDexPc(),
-                                 this);
+    // This runtime call does not require a stack map.
+    arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ B(GetExitLabel());
   }
 
@@ -682,8 +677,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
     // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
@@ -1502,6 +1496,15 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                             HInstruction* instruction,
+                                                             SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  BlockPoolsScope block_pools(GetVIXLAssembler());
+  __ Ldr(lr, MemOperand(tr, entry_point_offset));
+  __ Blr(lr);
+}
+
 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
                                                                      Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 7201e59..03f5a33 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -20,7 +20,6 @@
 #include "arch/arm64/quick_method_frame_info_arm64.h"
 #include "code_generator.h"
 #include "common_arm64.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -499,6 +498,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; }
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 39248aa..334d30d 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -482,11 +482,22 @@
       move_resolver_(graph->GetArena(), this),
       assembler_(graph->GetArena(), &isa_features),
       isa_features_(isa_features),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       method_patches_(MethodReferenceComparator(),
                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_string_patches_(StringReferenceValueComparator(),
+                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(TypeReferenceValueComparator(),
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_address_patches_(std::less<uint32_t>(),
+                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      clobbered_ra_(false) {
   // Save RA (containing the return address) to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(RA));
 }
@@ -688,6 +699,16 @@
   if ((fpu_spill_mask_ != 0) && (POPCOUNT(core_spill_mask_) % 2 != 0)) {
     core_spill_mask_ |= (1 << ZERO);
   }
+  // If RA is clobbered by PC-relative operations on R2 and it's the only spilled register
+  // (this can happen in leaf methods), artificially spill the ZERO register in order to
+  // force explicit saving and restoring of RA. RA isn't saved/restored when it's the only
+  // spilled register.
+  // TODO: Can this be improved? It causes creation of a stack frame (while RA might be
+  // saved in an unused temporary register) and saving of RA and the current method pointer
+  // in the frame.
+  if (clobbered_ra_ && core_spill_mask_ == (1u << RA) && fpu_spill_mask_ == 0) {
+    core_spill_mask_ |= (1 << ZERO);
+  }
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
@@ -962,7 +983,12 @@
   size_t size =
       method_patches_.size() +
       call_patches_.size() +
-      pc_relative_dex_cache_patches_.size();
+      pc_relative_dex_cache_patches_.size() +
+      pc_relative_string_patches_.size() +
+      pc_relative_type_patches_.size() +
+      boot_image_string_patches_.size() +
+      boot_image_type_patches_.size() +
+      boot_image_address_patches_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -994,6 +1020,71 @@
                                                               pc_rel_offset,
                                                               base_element_offset));
   }
+  for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t string_index = info.offset_or_index;
+    DCHECK(info.high_label.IsBound());
+    uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+    // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
+    // the assembler's base label used for PC-relative literals.
+    uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
+        ? __ GetLabelLocation(&info.pc_rel_label)
+        : __ GetPcRelBaseLabelLocation();
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(high_offset,
+                                                               &dex_file,
+                                                               pc_rel_offset,
+                                                               string_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t type_index = info.offset_or_index;
+    DCHECK(info.high_label.IsBound());
+    uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+    // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
+    // the assembler's base label used for PC-relative literals.
+    uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
+        ? __ GetLabelLocation(&info.pc_rel_label)
+        : __ GetPcRelBaseLabelLocation();
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(high_offset,
+                                                             &dex_file,
+                                                             pc_rel_offset,
+                                                             type_index));
+  }
+  for (const auto& entry : boot_image_string_patches_) {
+    const StringReference& target_string = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
+                                                       target_string.dex_file,
+                                                       target_string.string_index));
+  }
+  for (const auto& entry : boot_image_type_patches_) {
+    const TypeReference& target_type = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+                                                     target_type.dex_file,
+                                                     target_type.type_index));
+  }
+  for (const auto& entry : boot_image_address_patches_) {
+    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+  }
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
+    const DexFile& dex_file, uint32_t string_index) {
+  return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch(
+    const DexFile& dex_file, uint32_t type_index) {
+  return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_);
 }
 
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch(
@@ -1007,6 +1098,12 @@
   return &patches->back();
 }
 
+Literal* CodeGeneratorMIPS::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) {
+  return map->GetOrCreate(
+      value,
+      [this, value]() { return __ NewLiteral<uint32_t>(value); });
+}
+
 Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method,
                                                      MethodToLiteralMap* map) {
   return map->GetOrCreate(
@@ -1022,6 +1119,26 @@
   return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
 
+Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                                              uint32_t string_index) {
+  return boot_image_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                            uint32_t type_index) {
+  return boot_image_type_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) {
+  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
+  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+}
+
 void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
   MipsLabel done;
   Register card = AT;
@@ -1067,6 +1184,15 @@
     blocked_fpu_registers_[i] = true;
   }
 
+  if (GetGraph()->IsDebuggable()) {
+    // Stubs do not save callee-save floating point registers. If the graph
+    // is debuggable, we need to deal with these registers differently. For
+    // now, just block them.
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+    }
+  }
+
   UpdateBlockedPairRegisters();
 }
 
@@ -3440,7 +3566,8 @@
     if (field_type == Primitive::kPrimLong) {
       locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimLong));
     } else {
-      locations->SetOut(Location::RequiresFpuRegister());
+      // Use Location::Any() to prevent situations when running out of available fp registers.
+      locations->SetOut(Location::Any());
       // Need some temp core regs since FP results are returned in core registers
       Location reg = calling_convention.GetReturnLocation(Primitive::kPrimLong);
       locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairLow<Register>()));
@@ -3505,11 +3632,23 @@
                             IsDirectEntrypoint(kQuickA64Load));
     CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>();
     if (type == Primitive::kPrimDouble) {
-      // Need to move to FP regs since FP results are returned in core registers.
-      __ Mtc1(locations->GetTemp(1).AsRegister<Register>(),
-              locations->Out().AsFpuRegister<FRegister>());
-      __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
-                       locations->Out().AsFpuRegister<FRegister>());
+      // FP results are returned in core registers. Need to move them.
+      Location out = locations->Out();
+      if (out.IsFpuRegister()) {
+        __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>());
+        __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                         out.AsFpuRegister<FRegister>());
+      } else {
+        DCHECK(out.IsDoubleStackSlot());
+        __ StoreToOffset(kStoreWord,
+                         locations->GetTemp(1).AsRegister<Register>(),
+                         SP,
+                         out.GetStackIndex());
+        __ StoreToOffset(kStoreWord,
+                         locations->GetTemp(2).AsRegister<Register>(),
+                         SP,
+                         out.GetStackIndex() + 4);
+      }
     }
   } else {
     if (!Primitive::IsFloatingPointType(type)) {
@@ -3568,7 +3707,8 @@
       locations->SetInAt(1, Location::RegisterPairLocation(
           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
     } else {
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      // Use Location::Any() to prevent situations when running out of available fp registers.
+      locations->SetInAt(1, Location::Any());
       // Pass FP parameters in core registers.
       locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
       locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
@@ -3627,10 +3767,28 @@
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
     if (type == Primitive::kPrimDouble) {
       // Pass FP parameters in core registers.
-      __ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
-              locations->InAt(1).AsFpuRegister<FRegister>());
-      __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
-                         locations->InAt(1).AsFpuRegister<FRegister>());
+      Location in = locations->InAt(1);
+      if (in.IsFpuRegister()) {
+        __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), in.AsFpuRegister<FRegister>());
+        __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                           in.AsFpuRegister<FRegister>());
+      } else if (in.IsDoubleStackSlot()) {
+        __ LoadFromOffset(kLoadWord,
+                          locations->GetTemp(1).AsRegister<Register>(),
+                          SP,
+                          in.GetStackIndex());
+        __ LoadFromOffset(kLoadWord,
+                          locations->GetTemp(2).AsRegister<Register>(),
+                          SP,
+                          in.GetStackIndex() + 4);
+      } else {
+        DCHECK(in.IsConstant());
+        DCHECK(in.GetConstant()->IsDoubleConstant());
+        int64_t value = bit_cast<int64_t, double>(in.GetConstant()->AsDoubleConstant()->GetValue());
+        __ LoadConst64(locations->GetTemp(2).AsRegister<Register>(),
+                       locations->GetTemp(1).AsRegister<Register>(),
+                       value);
+      }
     }
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
                             instruction,
@@ -3696,6 +3854,23 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc());
 }
 
+void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location root,
+    Register obj,
+    uint32_t offset) {
+  Register root_reg = root.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
 void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind =
       instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
@@ -3861,16 +4036,80 @@
 }
 
 HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
-    HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) {
-  // TODO: Implement other kinds.
-  return HLoadString::LoadKind::kDexCacheViaMethod;
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  }
+  // We disable PC-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+  bool fallback_load = has_irreducible_loops;
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      fallback_load = false;
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      fallback_load = false;
+      break;
+  }
+  if (fallback_load) {
+    desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+  }
+  return desired_string_load_kind;
 }
 
 HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
     HLoadClass::LoadKind desired_class_load_kind) {
-  DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass);
-  // TODO: Implement other kinds.
-  return HLoadClass::LoadKind::kDexCacheViaMethod;
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  }
+  // We disable pc-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+  bool fallback_load = has_irreducible_loops;
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      fallback_load = false;
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      fallback_load = false;
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      fallback_load = false;
+      break;
+  }
+  if (fallback_load) {
+    desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+  }
+  return desired_class_load_kind;
 }
 
 Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
@@ -4107,11 +4346,40 @@
 }
 
 void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(V0));
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(V0),
+        /* code_generator_supports_read_barrier */ false);  // TODO: revisit this bool.
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      if (codegen_->GetInstructionSetFeatures().IsR6()) {
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kReferrersClass:
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      locations->SetInAt(0, Location::RequiresRegister());
+      break;
+    default:
+      break;
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
@@ -4127,34 +4395,126 @@
     return;
   }
 
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    __ LoadFromOffset(kLoadWord, out, current_method,
-                      ArtMethod::DeclaringClassOffset().Int32Value());
-  } else {
-    __ LoadFromOffset(kLoadWord, out, current_method,
-                      ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value());
-    __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  Register base_or_current_method_reg;
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+      break;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kReferrersClass:
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
+      break;
+    default:
+      base_or_current_method_reg = ZERO;
+      break;
+  }
 
-    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
-      DCHECK(cls->CanCallRuntime());
-      SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
-          cls,
-          cls,
-          cls->GetDexPc(),
-          cls->MustGenerateClinitCheck());
-      codegen_->AddSlowPath(slow_path);
-      if (!cls->IsInDexCache()) {
-        __ Beqz(out, slow_path->GetEntryLabel());
-      }
-      if (cls->MustGenerateClinitCheck()) {
-        GenerateClassInitializationCheck(slow_path, out);
+  bool generate_null_check = false;
+  switch (load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      GenerateGcRootFieldLoad(cls,
+                              out_loc,
+                              base_or_current_method_reg,
+                              ArtMethod::DeclaringClassOffset().Int32Value());
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+                                                               cls->GetTypeIndex()));
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorMIPS::PcRelativePatchInfo* info =
+          codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+      if (isR6) {
+        __ Bind(&info->high_label);
+        __ Bind(&info->pc_rel_label);
+        // Add a 32-bit offset to PC.
+        __ Auipc(out, /* placeholder */ 0x1234);
+        __ Addiu(out, out, /* placeholder */ 0x5678);
       } else {
-        __ Bind(slow_path->GetExitLabel());
+        __ Bind(&info->high_label);
+        __ Lui(out, /* placeholder */ 0x1234);
+        // We do not bind info->pc_rel_label here, we'll use the assembler's label
+        // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
+        __ Ori(out, out, /* placeholder */ 0x5678);
+        // Add a 32-bit offset to PC.
+        __ Addu(out, out, base_or_current_method_reg);
       }
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageAddressLiteral(address));
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(cls->GetAddress(), 4u);
+      int16_t offset = Low16Bits(address);
+      uint32_t base_address = address - offset;  // This accounts for offset sign extension.
+      __ Lui(out, High16Bits(base_address));
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      HMipsDexCacheArraysBase* base = cls->InputAt(0)->AsMipsDexCacheArraysBase();
+      int32_t offset =
+          cls->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+      // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, base_or_current_method_reg, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      __ LoadFromOffset(kLoadWord,
+                        out,
+                        base_or_current_method_reg,
+                        ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+    }
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    if (generate_null_check) {
+      __ Beqz(out, slow_path->GetEntryLabel());
+    }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
@@ -4183,21 +4543,132 @@
 }
 
 void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadString::LoadKind::kBootImageAddress:
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      if (codegen_->GetInstructionSetFeatures().IsR6()) {
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadString::LoadKind::kDexCachePcRelative:
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      locations->SetInAt(0, Location::RequiresRegister());
+      break;
+    default:
+      break;
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
   LocationSummary* locations = load->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  Register base_or_current_method_reg;
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadString::LoadKind::kBootImageAddress:
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+      break;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadString::LoadKind::kDexCachePcRelative:
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
+      break;
+    default:
+      base_or_current_method_reg = ZERO;
+      break;
+  }
+
+  switch (load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
+                                                                 load->GetStringIndex()));
+      return;  // No dex cache slow path.
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorMIPS::PcRelativePatchInfo* info =
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+      if (isR6) {
+        __ Bind(&info->high_label);
+        __ Bind(&info->pc_rel_label);
+        // Add a 32-bit offset to PC.
+        __ Auipc(out, /* placeholder */ 0x1234);
+        __ Addiu(out, out, /* placeholder */ 0x5678);
+      } else {
+        __ Bind(&info->high_label);
+        __ Lui(out, /* placeholder */ 0x1234);
+        // We do not bind info->pc_rel_label here, we'll use the assembler's label
+        // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
+        __ Ori(out, out, /* placeholder */ 0x5678);
+        // Add a 32-bit offset to PC.
+        __ Addu(out, out, base_or_current_method_reg);
+      }
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageAddressLiteral(address));
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(load->GetAddress(), 4u);
+      int16_t offset = Low16Bits(address);
+      uint32_t base_address = address - offset;  // This accounts for offset sign extension.
+      __ Lui(out, High16Bits(base_address));
+      // /* GcRoot<mirror::String> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(load, out_loc, out, offset);
+      break;
+    }
+    case HLoadString::LoadKind::kDexCachePcRelative: {
+      HMipsDexCacheArraysBase* base = load->InputAt(0)->AsMipsDexCacheArraysBase();
+      int32_t offset =
+          load->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+      // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset)
+      GenerateGcRootFieldLoad(load, out_loc, base_or_current_method_reg, offset);
+      break;
+    }
+    case HLoadString::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      GenerateGcRootFieldLoad(load,
+                              out_loc,
+                              base_or_current_method_reg,
+                              ArtMethod::DeclaringClassOffset().Int32Value());
+      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+      __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
+      // /* GcRoot<mirror::String> */ out = out[string_index]
+      GenerateGcRootFieldLoad(load,
+                              out_loc,
+                              out,
+                              CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
+      UNREACHABLE();
+  }
 
   if (!load->IsInDexCache()) {
     SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
@@ -5327,6 +5798,7 @@
   __ Nal();
   // Grab the return address off RA.
   __ Move(reg, RA);
+  // TODO: Can we share this code with that of VisitMipsDexCacheArraysBase()?
 
   // Remember this offset (the obtained PC value) for later use with constant area.
   __ BindPcRelBaseLabel();
@@ -5357,6 +5829,7 @@
     __ Ori(reg, reg, /* placeholder */ 0x5678);
     // Add a 32-bit offset to PC.
     __ Addu(reg, reg, RA);
+    // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()?
   }
 }
 
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 08f74c0..63a0345 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -18,11 +18,12 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_
 
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
+#include "string_reference.h"
 #include "utils/mips/assembler_mips.h"
+#include "utils/type_reference.h"
 
 namespace art {
 namespace mips {
@@ -226,6 +227,15 @@
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               Register obj,
+                               uint32_t offset);
   void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
   void GenerateIntCompareAndBranch(IfCondition cond,
                                    LocationSummary* locations,
@@ -298,6 +308,9 @@
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+  void ClobberRA() {
+    clobbered_ra_ = true;
+  }
 
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -383,7 +396,7 @@
     PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
 
     const DexFile& target_dex_file;
-    // Either the dex cache array element offset or the string index.
+    // Either the dex cache array element offset or the string/type index.
     uint32_t offset_or_index;
     // Label for the instruction loading the most significant half of the offset that's added to PC
     // to form the base address (the least significant half is loaded with the instruction that
@@ -393,14 +406,27 @@
     MipsLabel pc_rel_label;
   };
 
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
+  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
+  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index);
+  Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
 
  private:
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
+                                              Literal*,
+                                              StringReferenceValueComparator>;
+  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                            Literal*,
+                                            TypeReferenceValueComparator>;
 
+  Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
   Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
@@ -417,11 +443,27 @@
   MipsAssembler assembler_;
   const MipsInstructionSetFeatures& isa_features_;
 
+  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
   // Method patch info, map MethodReference to a literal for method address and method code.
   MethodToLiteralMap method_patches_;
   MethodToLiteralMap call_patches_;
   // PC-relative patch info for each HMipsDexCacheArraysBase.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
+  BootStringToLiteralMap boot_image_string_patches_;
+  // PC-relative String patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+  BootTypeToLiteralMap boot_image_type_patches_;
+  // PC-relative type patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // Deduplication map for patchable boot image addresses.
+  Uint32ToLiteralMap boot_image_address_patches_;
+
+  // PC-relative loads on R2 clobber RA, which may need to be preserved explicitly in leaf methods.
+  // This is a flag set by pc_relative_fixups_mips and dex_cache_array_fixups_mips optimizations.
+  bool clobbered_ra_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS);
 };
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 4b462cc..197f86b 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -18,7 +18,6 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_
 
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 82baaa0..528e94f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -466,8 +466,7 @@
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -475,7 +474,6 @@
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
-    InvokeRuntimeCallingConvention calling_convention;
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     DCHECK_NE(reg, ESP);
     DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
@@ -495,11 +493,8 @@
     //
     int32_t entry_point_offset =
         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86WordSize>(reg);
-    // TODO: Do not emit a stack map for this runtime call.
-    x86_codegen->InvokeRuntime(entry_point_offset,
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
+    // This runtime call does not require a stack map.
+    x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ jmp(GetExitLabel());
   }
 
@@ -549,8 +544,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -824,6 +818,13 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                           HInstruction* instruction,
+                                                           SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ fs()->call(Address::Absolute(entry_point_offset));
+}
+
 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
                                    const X86InstructionSetFeatures& isa_features,
                                    const CompilerOptions& compiler_options,
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 2a9fb80..39ea7d5 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -19,7 +19,6 @@
 
 #include "arch/x86/instruction_set_features_x86.h"
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -336,6 +335,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   size_t GetWordSize() const OVERRIDE {
     return kX86WordSize;
   }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index b6ba30e..0f0129b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -487,8 +487,7 @@
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -496,7 +495,6 @@
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
-    InvokeRuntimeCallingConvention calling_convention;
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     DCHECK_NE(reg, RSP);
     DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
@@ -516,11 +514,8 @@
     //
     int32_t entry_point_offset =
         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64WordSize>(reg);
-    // TODO: Do not emit a stack map for this runtime call.
-    x86_64_codegen->InvokeRuntime(entry_point_offset,
-                                  instruction_,
-                                  instruction_->GetDexPc(),
-                                  this);
+    // This runtime call does not require a stack map.
+    x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ jmp(GetExitLabel());
   }
 
@@ -570,8 +565,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -1052,6 +1046,13 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                              HInstruction* instruction,
+                                                              SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
+}
+
 static constexpr int kNumberOfCpuRegisterPairs = 0;
 // Use a fake return address register to mimic Quick.
 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d7cfd37..fbb78bc 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -19,7 +19,6 @@
 
 #include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -318,6 +317,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   size_t GetWordSize() const OVERRIDE {
     return kX86_64WordSize;
   }
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc
index 0f42d9c..19bab08 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "code_generator_mips.h"
 #include "dex_cache_array_fixups_mips.h"
 
 #include "base/arena_containers.h"
@@ -27,8 +28,9 @@
  */
 class DexCacheArrayFixupsVisitor : public HGraphVisitor {
  public:
-  explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen)
       : HGraphVisitor(graph),
+        codegen_(down_cast<CodeGeneratorMIPS*>(codegen)),
         dex_cache_array_bases_(std::less<const DexFile*>(),
                                // Attribute memory use to code generator.
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
@@ -41,9 +43,45 @@
       HMipsDexCacheArraysBase* base = entry.second;
       base->MoveBeforeFirstUserAndOutOfLoops();
     }
+    // Computing the dex cache base for PC-relative accesses will clobber RA with
+    // the NAL instruction on R2. Take a note of this before generating the method
+    // entry.
+    if (!dex_cache_array_bases_.empty() && !codegen_->GetInstructionSetFeatures().IsR6()) {
+      codegen_->ClobberRA();
+    }
   }
 
  private:
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache types array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_class->GetDexFile();
+      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex()));
+      // Add the special argument base to the load.
+      load_class->AddSpecialInput(base);
+    }
+  }
+
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache strings array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_string->GetDexFile();
+      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex()));
+      // Add the special argument base to the load.
+      load_string->AddSpecialInput(base);
+    }
+  }
+
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
     // If this is an invoke with PC-relative access to the dex cache methods array,
     // we need to add the dex cache arrays base as the special input.
@@ -74,6 +112,8 @@
         });
   }
 
+  CodeGeneratorMIPS* codegen_;
+
   using DexCacheArraysBaseMap =
       ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>;
   DexCacheArraysBaseMap dex_cache_array_bases_;
@@ -85,7 +125,7 @@
     // that can be live-in at the irreducible loop header.
     return;
   }
-  DexCacheArrayFixupsVisitor visitor(graph_);
+  DexCacheArrayFixupsVisitor visitor(graph_, codegen_);
   visitor.VisitInsertionOrder();
   visitor.MoveBasesIfNeeded();
 }
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h
index c8def28..21056e1 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.h
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.h
@@ -21,14 +21,21 @@
 #include "optimization.h"
 
 namespace art {
+
+class CodeGenerator;
+
 namespace mips {
 
 class DexCacheArrayFixups : public HOptimization {
  public:
-  DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "dex_cache_array_fixups_mips", stats) {}
+  DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "dex_cache_array_fixups_mips", stats),
+        codegen_(codegen) {}
 
   void Run() OVERRIDE;
+
+ private:
+  CodeGenerator* codegen_;
 };
 
 }  // namespace mips
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index afac5f9..e5dab56 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -19,6 +19,7 @@
 #include "art_method-inl.h"
 #include "bytecode_utils.h"
 #include "class_linker.h"
+#include "dex_instruction-inl.h"
 #include "driver/compiler_options.h"
 #include "scoped_thread_state_change.h"
 
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 9cfc065..517cf76 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -30,6 +30,8 @@
 
 namespace art {
 
+class Instruction;
+
 class HInstructionBuilder : public ValueObject {
  public:
   HInstructionBuilder(HGraph* graph,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d557f42..2808e1b 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2632,4 +2632,23 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind) {
+  switch (kind) {
+    case MemBarrierKind::kAnyStore:
+      return os << "AnyStore";
+    case MemBarrierKind::kLoadAny:
+      return os << "LoadAny";
+    case MemBarrierKind::kStoreStore:
+      return os << "StoreStore";
+    case MemBarrierKind::kAnyAny:
+      return os << "AnyAny";
+    case MemBarrierKind::kNTStoreStore:
+      return os << "NTStoreStore";
+
+    default:
+      LOG(FATAL) << "Unknown MemBarrierKind: " << static_cast<int>(kind);
+      UNREACHABLE();
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 23ac457..dfa8276 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -25,7 +25,6 @@
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/stl_util.h"
-#include "dex/compiler_enums.h"
 #include "dex_file.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "handle.h"
@@ -5626,9 +5625,12 @@
 
 // Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
 inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
-  // The special input is used for PC-relative loads on some architectures.
+  // The special input is used for PC-relative loads on some architectures,
+  // including literal pool loads, which are PC-relative too.
   DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
-         GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind();
+         GetLoadKind() == LoadKind::kDexCachePcRelative ||
+         GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
+         GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
   DCHECK(special_input_.GetInstruction() == nullptr);
   special_input_ = HUserRecord<HInstruction*>(special_input);
   special_input->AddUseAt(this, 0);
@@ -5836,9 +5838,12 @@
 
 // Note: defined outside class to see operator<<(., HLoadString::LoadKind).
 inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
-  // The special input is used for PC-relative loads on some architectures.
+  // The special input is used for PC-relative loads on some architectures,
+  // including literal pool loads, which are PC-relative too.
   DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
-         GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind();
+         GetLoadKind() == LoadKind::kDexCachePcRelative ||
+         GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
+         GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
   // HLoadString::GetInputRecords() returns an empty array at this point,
   // so use the GetInputRecords() from the base class to set the input record.
   DCHECK(special_input_.GetInstruction() == nullptr);
@@ -6305,6 +6310,32 @@
   DISALLOW_COPY_AND_ASSIGN(HCheckCast);
 };
 
+/**
+ * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
+ * @details We define the combined barrier types that are actually required
+ * by the Java Memory Model, rather than using exactly the terminology from
+ * the JSR-133 cookbook.  These should, in many cases, be replaced by acquire/release
+ * primitives.  Note that the JSR-133 cookbook generally does not deal with
+ * store atomicity issues, and the recipes there are not always entirely sufficient.
+ * The current recipe is as follows:
+ * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store.
+ * -# Use AnyAny barrier after volatile store.  (StoreLoad is as expensive.)
+ * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load.
+ * -# Use StoreStore barrier after all stores but before return from any constructor whose
+ *    class has final fields.
+ * -# Use NTStoreStore to order non-temporal stores with respect to all later
+ *    store-to-memory instructions.  Only generated together with non-temporal stores.
+ */
+enum MemBarrierKind {
+  kAnyStore,
+  kLoadAny,
+  kStoreStore,
+  kAnyAny,
+  kNTStoreStore,
+  kLastBarrierKind = kNTStoreStore
+};
+std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
+
 class HMemoryBarrier FINAL : public HTemplateInstruction<0> {
  public:
   explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc)
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 0bca186..d5b0d77 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -305,6 +305,18 @@
       OVERRIDE
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+ protected:
+  virtual void RunOptimizations(HGraph* graph,
+                                CodeGenerator* codegen,
+                                CompilerDriver* driver,
+                                const DexCompilationUnit& dex_compilation_unit,
+                                PassObserver* pass_observer,
+                                StackHandleScopeCollection* handles) const;
+
+  virtual void RunOptimizations(HOptimization* optimizations[],
+                                size_t length,
+                                PassObserver* pass_observer) const;
+
  private:
   // Create a 'CompiledMethod' for an optimized graph.
   CompiledMethod* Emit(ArenaAllocator* arena,
@@ -333,6 +345,18 @@
                             ArtMethod* method,
                             bool osr) const;
 
+  void MaybeRunInliner(HGraph* graph,
+                       CodeGenerator* codegen,
+                       CompilerDriver* driver,
+                       const DexCompilationUnit& dex_compilation_unit,
+                       PassObserver* pass_observer,
+                       StackHandleScopeCollection* handles) const;
+
+  void RunArchOptimizations(InstructionSet instruction_set,
+                            HGraph* graph,
+                            CodeGenerator* codegen,
+                            PassObserver* pass_observer) const;
+
   std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
 
   std::unique_ptr<std::ostream> visualizer_output_;
@@ -396,22 +420,22 @@
       || instruction_set == kX86_64;
 }
 
-static void RunOptimizations(HOptimization* optimizations[],
-                             size_t length,
-                             PassObserver* pass_observer) {
+void OptimizingCompiler::RunOptimizations(HOptimization* optimizations[],
+                                          size_t length,
+                                          PassObserver* pass_observer) const {
   for (size_t i = 0; i < length; ++i) {
     PassScope scope(optimizations[i]->GetPassName(), pass_observer);
     optimizations[i]->Run();
   }
 }
 
-static void MaybeRunInliner(HGraph* graph,
-                            CodeGenerator* codegen,
-                            CompilerDriver* driver,
-                            OptimizingCompilerStats* stats,
-                            const DexCompilationUnit& dex_compilation_unit,
-                            PassObserver* pass_observer,
-                            StackHandleScopeCollection* handles) {
+void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
+                                         CodeGenerator* codegen,
+                                         CompilerDriver* driver,
+                                         const DexCompilationUnit& dex_compilation_unit,
+                                         PassObserver* pass_observer,
+                                         StackHandleScopeCollection* handles) const {
+  OptimizingCompilerStats* stats = compilation_stats_.get();
   const CompilerOptions& compiler_options = driver->GetCompilerOptions();
   bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
       && (compiler_options.GetInlineMaxCodeUnits() > 0);
@@ -435,11 +459,11 @@
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
 }
 
-static void RunArchOptimizations(InstructionSet instruction_set,
-                                 HGraph* graph,
-                                 CodeGenerator* codegen,
-                                 OptimizingCompilerStats* stats,
-                                 PassObserver* pass_observer) {
+void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
+                                              HGraph* graph,
+                                              CodeGenerator* codegen,
+                                              PassObserver* pass_observer) const {
+  OptimizingCompilerStats* stats = compilation_stats_.get();
   ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
@@ -480,7 +504,7 @@
       mips::PcRelativeFixups* pc_relative_fixups =
           new (arena) mips::PcRelativeFixups(graph, codegen, stats);
       mips::DexCacheArrayFixups* dex_cache_array_fixups =
-          new (arena) mips::DexCacheArrayFixups(graph, stats);
+          new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
       HOptimization* mips_optimizations[] = {
           pc_relative_fixups,
           dex_cache_array_fixups
@@ -539,13 +563,13 @@
   }
 }
 
-static void RunOptimizations(HGraph* graph,
-                             CodeGenerator* codegen,
-                             CompilerDriver* driver,
-                             OptimizingCompilerStats* stats,
-                             const DexCompilationUnit& dex_compilation_unit,
-                             PassObserver* pass_observer,
-                             StackHandleScopeCollection* handles) {
+void OptimizingCompiler::RunOptimizations(HGraph* graph,
+                                          CodeGenerator* codegen,
+                                          CompilerDriver* driver,
+                                          const DexCompilationUnit& dex_compilation_unit,
+                                          PassObserver* pass_observer,
+                                          StackHandleScopeCollection* handles) const {
+  OptimizingCompilerStats* stats = compilation_stats_.get();
   ArenaAllocator* arena = graph->GetArena();
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
       graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
@@ -578,7 +602,7 @@
   };
   RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
 
-  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
+  MaybeRunInliner(graph, codegen, driver, dex_compilation_unit, pass_observer, handles);
 
   HOptimization* optimizations2[] = {
     // SelectGenerator depends on the InstructionSimplifier removing
@@ -601,7 +625,7 @@
   };
   RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 
-  RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, stats, pass_observer);
+  RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, pass_observer);
   AllocateRegisters(graph, codegen, pass_observer);
 }
 
@@ -813,7 +837,6 @@
     RunOptimizations(graph,
                      codegen.get(),
                      compiler_driver,
-                     compilation_stats_.get(),
                      dex_compilation_unit,
                      &pass_observer,
                      &handles);
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index ba405cd..c6acc45 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -37,6 +37,10 @@
       // entry block) and relieve some pressure on the register allocator
       // while avoiding recalculation of the base in a loop.
       base_->MoveBeforeFirstUserAndOutOfLoops();
+      // Computing the base for PC-relative literals will clobber RA with
+      // the NAL instruction on R2. Take a note of this before generating
+      // the method entry.
+      codegen_->ClobberRA();
     }
   }
 
@@ -58,6 +62,36 @@
     DCHECK(base_ != nullptr);
   }
 
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
+    switch (load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+        // Add a base register for PC-relative literals on R2.
+        InitializePCRelativeBasePointer();
+        load_class->AddSpecialInput(base_);
+        break;
+      default:
+        break;
+    }
+  }
+
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    HLoadString::LoadKind load_kind = load_string->GetLoadKind();
+    switch (load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+        // Add a base register for PC-relative literals on R2.
+        InitializePCRelativeBasePointer();
+        load_string->AddSpecialInput(base_);
+        break;
+      default:
+        break;
+    }
+  }
+
   void HandleInvoke(HInvoke* invoke) {
     // If this is an invoke-static/-direct with PC-relative dex cache array
     // addressing, we need the PC-relative address base.
@@ -77,7 +111,7 @@
       // method pointer from the invoke.
       if (invoke_static_or_direct->HasCurrentMethodInput()) {
         DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
-        CHECK(!has_extra_input);  // TODO: review this.
+        CHECK(!has_extra_input);
         return;
       }
 
@@ -116,7 +150,6 @@
   CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_);
   if (mips_codegen->GetInstructionSetFeatures().IsR6()) {
     // Do nothing for R6 because it has PC-relative addressing.
-    // TODO: review. Move this check into RunArchOptimizations()?
     return;
   }
   if (graph_->HasIrreducibleLoops()) {
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index ebaf1c0..608b3bc 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -2024,6 +2024,10 @@
   Bind(&pc_rel_base_label_);
 }
 
+uint32_t MipsAssembler::GetPcRelBaseLabelLocation() const {
+  return GetLabelLocation(&pc_rel_base_label_);
+}
+
 void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) {
   uint32_t length = branches_.back().GetLength();
   if (!label->IsBound()) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 1f7781f..8367e68 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -646,6 +646,9 @@
   // The assembler then computes literal offsets relative to this label.
   void BindPcRelBaseLabel();
 
+  // Returns the location of the label bound with BindPcRelBaseLabel().
+  uint32_t GetPcRelBaseLabelLocation() const;
+
   // Note that PC-relative literal loads are handled as pseudo branches because they need very
   // similar relocation and may similarly expand in size to accomodate for larger offsets relative
   // to PC.
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 93351e9..c076b5a 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
+#include <regex>
+#include <sstream>
 #include <string>
 #include <vector>
-#include <sstream>
 
 #include "common_runtime_test.h"
 
@@ -207,7 +208,7 @@
     std::string dex_location = GetScratchDir() + "/Dex2OatSwapTest.jar";
     std::string odex_location = GetOdexDir() + "/Dex2OatSwapTest.odex";
 
-    Copy(GetDexSrc1(), dex_location);
+    Copy(GetTestDexFileName(), dex_location);
 
     std::vector<std::string> copy(extra_args);
 
@@ -226,7 +227,11 @@
     CheckResult(expect_use);
   }
 
-  void CheckResult(bool expect_use) {
+  virtual std::string GetTestDexFileName() {
+    return GetDexSrc1();
+  }
+
+  virtual void CheckResult(bool expect_use) {
     if (kIsTargetBuild) {
       CheckTargetResult(expect_use);
     } else {
@@ -234,13 +239,13 @@
     }
   }
 
-  void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) {
+  virtual void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) {
     // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
     //       something for variants with file descriptor where we can control the lifetime of
     //       the swap file and thus take a look at it.
   }
 
-  void CheckHostResult(bool expect_use) {
+  virtual void CheckHostResult(bool expect_use) {
     if (!kIsTargetBuild) {
       if (expect_use) {
         EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos)
@@ -253,7 +258,7 @@
   }
 
   // Check whether the dex2oat run was really successful.
-  void CheckValidity() {
+  virtual void CheckValidity() {
     if (kIsTargetBuild) {
       CheckTargetValidity();
     } else {
@@ -261,14 +266,14 @@
     }
   }
 
-  void CheckTargetValidity() {
+  virtual void CheckTargetValidity() {
     // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
     //       something for variants with file descriptor where we can control the lifetime of
     //       the swap file and thus take a look at it.
   }
 
   // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
-  void CheckHostValidity() {
+  virtual void CheckHostValidity() {
     EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
   }
 };
@@ -297,6 +302,122 @@
           { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
 }
 
+class Dex2oatSwapUseTest : public Dex2oatSwapTest {
+ protected:
+  void CheckHostResult(bool expect_use) OVERRIDE {
+    if (!kIsTargetBuild) {
+      if (expect_use) {
+        EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      } else {
+        EXPECT_EQ(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      }
+    }
+  }
+
+  std::string GetTestDexFileName() OVERRIDE {
+    // Use Statics as it has a handful of functions.
+    return CommonRuntimeTest::GetTestDexFileName("Statics");
+  }
+
+  void GrabResult1() {
+    if (!kIsTargetBuild) {
+      native_alloc_1_ = ParseNativeAlloc();
+      swap_1_ = ParseSwap(false /* expected */);
+    } else {
+      native_alloc_1_ = std::numeric_limits<size_t>::max();
+      swap_1_ = 0;
+    }
+  }
+
+  void GrabResult2() {
+    if (!kIsTargetBuild) {
+      native_alloc_2_ = ParseNativeAlloc();
+      swap_2_ = ParseSwap(true /* expected */);
+    } else {
+      native_alloc_2_ = 0;
+      swap_2_ = std::numeric_limits<size_t>::max();
+    }
+  }
+
+ private:
+  size_t ParseNativeAlloc() {
+    std::regex native_alloc_regex("dex2oat took.*native alloc=[^ ]+ \\(([0-9]+)B\\)");
+    std::smatch native_alloc_match;
+    bool found = std::regex_search(output_, native_alloc_match, native_alloc_regex);
+    if (!found) {
+      EXPECT_TRUE(found);
+      return 0;
+    }
+    if (native_alloc_match.size() != 2U) {
+      EXPECT_EQ(native_alloc_match.size(), 2U);
+      return 0;
+    }
+
+    std::istringstream stream(native_alloc_match[1].str());
+    size_t value;
+    stream >> value;
+
+    return value;
+  }
+
+  size_t ParseSwap(bool expected) {
+    std::regex swap_regex("dex2oat took[^\\n]+swap=[^ ]+ \\(([0-9]+)B\\)");
+    std::smatch swap_match;
+    bool found = std::regex_search(output_, swap_match, swap_regex);
+    if (found != expected) {
+      EXPECT_EQ(expected, found);
+      return 0;
+    }
+
+    if (!found) {
+      return 0;
+    }
+
+    if (swap_match.size() != 2U) {
+      EXPECT_EQ(swap_match.size(), 2U);
+      return 0;
+    }
+
+    std::istringstream stream(swap_match[1].str());
+    size_t value;
+    stream >> value;
+
+    return value;
+  }
+
+ protected:
+  size_t native_alloc_1_;
+  size_t native_alloc_2_;
+
+  size_t swap_1_;
+  size_t swap_2_;
+};
+
+TEST_F(Dex2oatSwapUseTest, CheckSwapUsage) {
+  RunTest(false /* use_fd */,
+          false /* expect_use */);
+  GrabResult1();
+  std::string output_1 = output_;
+
+  output_ = "";
+
+  RunTest(false /* use_fd */,
+          true /* expect_use */,
+          { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
+  GrabResult2();
+  std::string output_2 = output_;
+
+  if (native_alloc_2_ >= native_alloc_1_ || swap_1_ >= swap_2_) {
+    EXPECT_LT(native_alloc_2_, native_alloc_1_);
+    EXPECT_LT(swap_1_, swap_2_);
+
+    LOG(ERROR) << output_1;
+    LOG(ERROR) << output_2;
+  }
+}
+
 class Dex2oatVeryLargeTest : public Dex2oatTest {
  protected:
   void CheckFilter(CompilerFilter::Filter input ATTRIBUTE_UNUSED,
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 6173ae7..a5be52d 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1276,8 +1276,18 @@
      * name mismatch between instructions. This macro uses the lower 32b of register when possible.
      * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
      */
-.macro READ_BARRIER xDest, wDest, xObj, offset
+.macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number
 #ifdef USE_READ_BARRIER
+#ifdef USE_BAKER_READ_BARRIER
+    ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number
+    // False dependency to avoid needing load/load fence.
+    add \xObj, \xObj, \xTemp, lsr #32
+    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
+    UNPOISON_HEAP_REF \wDest
+    b .Lrb_exit\number
+#endif
+.Lrb_slowpath\number:
     // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
     stp x0, x1, [sp, #-48]!
     .cfi_adjust_cfa_offset 48
@@ -1311,6 +1321,7 @@
     .cfi_restore x30
     add sp, sp, #48
     .cfi_adjust_cfa_offset -48
+.Lrb_exit\number:
 #else
     ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
     UNPOISON_HEAP_REF \wDest
@@ -1349,12 +1360,12 @@
 #endif
 ENTRY art_quick_aput_obj
     cbz x2, .Ldo_aput_null
-    READ_BARRIER x3, w3, x0, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
-                                                         // This also zero-extends to x3
-    READ_BARRIER x4, w4, x2, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
-                                                         // This also zero-extends to x4
-    READ_BARRIER x3, w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET // Heap reference = 32b
-                                                         // This also zero-extends to x3
+    READ_BARRIER x3, w3, x0, x3, w3, MIRROR_OBJECT_CLASS_OFFSET, 0  // Heap reference = 32b
+                                                                    // This also zero-extends to x3
+    READ_BARRIER x3, w3, x3, x4, w4, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, 1 // Heap reference = 32b
+    // This also zero-extends to x3
+    READ_BARRIER x4, w4, x2, x4, w4, MIRROR_OBJECT_CLASS_OFFSET, 2  // Heap reference = 32b
+                                                                    // This also zero-extends to x4
     cmp w3, w4  // value's type == array's component type - trivial assignability
     bne .Lcheck_assignability
 .Ldo_aput:
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 32425d8..70a907f 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -90,10 +90,12 @@
   if (kIsDebugBuild) {
     Thread* self = Thread::Current();
     if (!Locks::mutator_lock_->IsSharedHeld(self)) {
-      ScopedObjectAccess soa(self);
-      CHECK(IsRuntimeMethod() ||
-            GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
-            GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
+      if (self->IsThreadSuspensionAllowable()) {
+        ScopedObjectAccess soa(self);
+        CHECK(IsRuntimeMethod() ||
+              GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+              GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
+      }
     } else {
       // We cannot use SOA in this case. We might be holding the lock, but may not be in the
       // runnable state (e.g., during GC).
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 9075931..b7df90d 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -63,54 +63,10 @@
 ADD_TEST_EQ(static_cast<size_t>(1U << POINTER_SIZE_SHIFT),
             static_cast<size_t>(__SIZEOF_POINTER__))
 
-// Size of references to the heap on the stack.
-#define STACK_REFERENCE_SIZE 4
-ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReference<art::mirror::Object>))
-
-// Size of heap references
-#define COMPRESSED_REFERENCE_SIZE 4
-ADD_TEST_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE),
-            sizeof(art::mirror::CompressedReference<art::mirror::Object>))
-
-#define COMPRESSED_REFERENCE_SIZE_SHIFT 2
-ADD_TEST_EQ(static_cast<size_t>(1U << COMPRESSED_REFERENCE_SIZE_SHIFT),
-            static_cast<size_t>(COMPRESSED_REFERENCE_SIZE))
-
-// Note: these callee save methods loads require read barriers.
-// Offset of field Runtime::callee_save_methods_[kSaveAll]
-#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
-ADD_TEST_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET),
-            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kSaveAll))
-
-// Offset of field Runtime::callee_save_methods_[kRefsOnly]
-#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 8
-ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET),
-            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsOnly))
-
-// Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
-#define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET (2 * 8)
-ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET),
-            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsAndArgs))
-
-// Offset of field Thread::tls32_.state_and_flags.
-#define THREAD_FLAGS_OFFSET 0
-ADD_TEST_EQ(THREAD_FLAGS_OFFSET,
-            art::Thread::ThreadFlagsOffset<__SIZEOF_POINTER__>().Int32Value())
-
-// Offset of field Thread::tls32_.thin_lock_thread_id.
-#define THREAD_ID_OFFSET 12
-ADD_TEST_EQ(THREAD_ID_OFFSET,
-            art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
-
-// Offset of field Thread::tls32_.is_gc_marking.
-#define THREAD_IS_GC_MARKING_OFFSET 52
-ADD_TEST_EQ(THREAD_IS_GC_MARKING_OFFSET,
-            art::Thread::IsGcMarkingOffset<__SIZEOF_POINTER__>().Int32Value())
-
-// Offset of field Thread::tlsPtr_.card_table.
-#define THREAD_CARD_TABLE_OFFSET 128
-ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
-            art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
+// Import platform-independent constant defines from our autogenerated list.
+// Export new defines (for assembly use) by editing cpp-define-generator def files.
+#define DEFINE_CHECK_EQ ADD_TEST_EQ
+#include "generated/asm_support_gen.h"
 
 // Offset of field Thread::tlsPtr_.exception.
 #define THREAD_EXCEPTION_OFFSET (THREAD_CARD_TABLE_OFFSET + __SIZEOF_POINTER__)
@@ -199,17 +155,6 @@
 ADD_TEST_EQ(SHADOWFRAME_VREGS_OFFSET,
             static_cast<int32_t>(art::ShadowFrame::VRegsOffset()))
 
-// Offsets within CodeItem
-#define CODEITEM_INSNS_OFFSET 16
-ADD_TEST_EQ(CODEITEM_INSNS_OFFSET,
-            static_cast<int32_t>(OFFSETOF_MEMBER(art::DexFile::CodeItem, insns_)))
-
-// Offsets within java.lang.Object.
-#define MIRROR_OBJECT_CLASS_OFFSET 0
-ADD_TEST_EQ(MIRROR_OBJECT_CLASS_OFFSET, art::mirror::Object::ClassOffset().Int32Value())
-#define MIRROR_OBJECT_LOCK_WORD_OFFSET 4
-ADD_TEST_EQ(MIRROR_OBJECT_LOCK_WORD_OFFSET, art::mirror::Object::MonitorOffset().Int32Value())
-
 #if defined(USE_BROOKS_READ_BARRIER)
 #define MIRROR_OBJECT_HEADER_SIZE 16
 #else
@@ -231,16 +176,6 @@
 ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
             art::mirror::Class::StatusOffset().Int32Value())
 
-#define MIRROR_CLASS_STATUS_INITIALIZED 10
-ADD_TEST_EQ(static_cast<uint32_t>(MIRROR_CLASS_STATUS_INITIALIZED),
-            static_cast<uint32_t>(art::mirror::Class::kStatusInitialized))
-#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE 0x80000000
-ADD_TEST_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE),
-            static_cast<uint32_t>(art::kAccClassIsFinalizable))
-#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT 31
-ADD_TEST_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE),
-            static_cast<uint32_t>(1U << ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT))
-
 // Array offsets.
 #define MIRROR_ARRAY_LENGTH_OFFSET      MIRROR_OBJECT_HEADER_SIZE
 ADD_TEST_EQ(MIRROR_ARRAY_LENGTH_OFFSET, art::mirror::Array::LengthOffset().Int32Value())
@@ -289,118 +224,7 @@
 #define MIRROR_STRING_VALUE_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
 
-// Offsets within java.lang.reflect.ArtMethod.
-#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_32 20
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_METHODS_OFFSET_32,
-            art::ArtMethod::DexCacheResolvedMethodsOffset(4).Int32Value())
 
-#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_64 24
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_METHODS_OFFSET_64,
-            art::ArtMethod::DexCacheResolvedMethodsOffset(8).Int32Value())
-
-#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_32 24
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_TYPES_OFFSET_32,
-            art::ArtMethod::DexCacheResolvedTypesOffset(4).Int32Value())
-
-#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_64 32
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_TYPES_OFFSET_64,
-            art::ArtMethod::DexCacheResolvedTypesOffset(8).Int32Value())
-
-#define ART_METHOD_JNI_OFFSET_32 28
-ADD_TEST_EQ(ART_METHOD_JNI_OFFSET_32,
-            art::ArtMethod::EntryPointFromJniOffset(4).Int32Value())
-
-#define ART_METHOD_JNI_OFFSET_64 40
-ADD_TEST_EQ(ART_METHOD_JNI_OFFSET_64,
-            art::ArtMethod::EntryPointFromJniOffset(8).Int32Value())
-
-#define ART_METHOD_QUICK_CODE_OFFSET_32 32
-ADD_TEST_EQ(ART_METHOD_QUICK_CODE_OFFSET_32,
-            art::ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value())
-
-#define ART_METHOD_QUICK_CODE_OFFSET_64 48
-ADD_TEST_EQ(ART_METHOD_QUICK_CODE_OFFSET_64,
-            art::ArtMethod::EntryPointFromQuickCompiledCodeOffset(8).Int32Value())
-
-#define LOCK_WORD_STATE_SHIFT 30
-ADD_TEST_EQ(LOCK_WORD_STATE_SHIFT, static_cast<int32_t>(art::LockWord::kStateShift))
-
-#define LOCK_WORD_STATE_MASK 0xC0000000
-ADD_TEST_EQ(LOCK_WORD_STATE_MASK, static_cast<uint32_t>(art::LockWord::kStateMaskShifted))
-
-#define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28
-ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_SHIFT,
-            static_cast<int32_t>(art::LockWord::kReadBarrierStateShift))
-
-#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x30000000
-ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_MASK,
-            static_cast<int32_t>(art::LockWord::kReadBarrierStateMaskShifted))
-
-#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xCFFFFFFF
-ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED,
-            static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled))
-
-#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
-ADD_TEST_EQ(LOCK_WORD_THIN_LOCK_COUNT_ONE, static_cast<int32_t>(art::LockWord::kThinLockCountOne))
-
-#define OBJECT_ALIGNMENT_MASK 7
-ADD_TEST_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), art::kObjectAlignment - 1)
-
-#define OBJECT_ALIGNMENT_MASK_TOGGLED 0xFFFFFFF8
-ADD_TEST_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED),
-            ~static_cast<uint32_t>(art::kObjectAlignment - 1))
-
-#define ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE 128
-ADD_TEST_EQ(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 3
-ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSizeShift))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 7
-ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff8
-ADD_TEST_EQ(static_cast<uint32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32),
-            ~static_cast<uint32_t>(
-                art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff8
-ADD_TEST_EQ(static_cast<uint64_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64),
-            ~static_cast<uint64_t>(
-                art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
-
-#define ROSALLOC_RUN_FREE_LIST_OFFSET 8
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListOffset()))
-
-#define ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET 0
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListHeadOffset()))
-
-#define ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET 16
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListSizeOffset()))
-
-#define ROSALLOC_SLOT_NEXT_OFFSET 0
-ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunSlotNextOffset()))
-// Assert this so that we can avoid zeroing the next field by installing the class pointer.
-ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, MIRROR_OBJECT_CLASS_OFFSET)
-
-#define THREAD_SUSPEND_REQUEST 1
-ADD_TEST_EQ(THREAD_SUSPEND_REQUEST, static_cast<int32_t>(art::kSuspendRequest))
-
-#define THREAD_CHECKPOINT_REQUEST 2
-ADD_TEST_EQ(THREAD_CHECKPOINT_REQUEST, static_cast<int32_t>(art::kCheckpointRequest))
-
-#define JIT_CHECK_OSR (-1)
-ADD_TEST_EQ(JIT_CHECK_OSR, static_cast<int32_t>(art::jit::kJitCheckForOSR))
-
-#define JIT_HOTNESS_DISABLE (-2)
-ADD_TEST_EQ(JIT_HOTNESS_DISABLE, static_cast<int32_t>(art::jit::kJitHotnessDisabled))
 
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 5d9ae14..2a2d2c0 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -1638,8 +1638,15 @@
   mirror::Class* annotation_member_class =
       WellKnownClasses::ToClass(WellKnownClasses::libcore_reflect_AnnotationMember);
   Handle<mirror::Object> new_member(hs.NewHandle(annotation_member_class->AllocObject(self)));
-  Handle<mirror::Method> method_object(
-      hs.NewHandle(mirror::Method::CreateFromArtMethod(self, annotation_method)));
+  mirror::Method* method_obj_ptr;
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  if (pointer_size == 8U) {
+    method_obj_ptr = mirror::Method::CreateFromArtMethod<8U, false>(self, annotation_method);
+  } else {
+    DCHECK_EQ(pointer_size, 4U);
+    method_obj_ptr = mirror::Method::CreateFromArtMethod<4U, false>(self, annotation_method);
+  }
+  Handle<mirror::Method> method_object(hs.NewHandle(method_obj_ptr));
 
   if (new_member.Get() == nullptr || string_name.Get() == nullptr ||
       method_object.Get() == nullptr || method_return.Get() == nullptr) {
@@ -1947,16 +1954,29 @@
         StackHandleScope<2> hs(self);
         Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
         Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
-        ArtMethod* method = Runtime::Current()->GetClassLinker()->ResolveMethodWithoutInvokeType(
+        ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+        ArtMethod* method = class_linker->ResolveMethodWithoutInvokeType(
             klass->GetDexFile(), index, dex_cache, class_loader);
         if (method == nullptr) {
           return false;
         }
+        size_t pointer_size = class_linker->GetImagePointerSize();
         set_object = true;
+        DCHECK(!Runtime::Current()->IsActiveTransaction());
         if (method->IsConstructor()) {
-          element_object = mirror::Constructor::CreateFromArtMethod(self, method);
+          if (pointer_size == 8U) {
+            element_object = mirror::Constructor::CreateFromArtMethod<8U, false>(self, method);
+          } else {
+            DCHECK_EQ(pointer_size, 4U);
+            element_object = mirror::Constructor::CreateFromArtMethod<4U, false>(self, method);
+          }
         } else {
-          element_object = mirror::Method::CreateFromArtMethod(self, method);
+          if (pointer_size == 8U) {
+            element_object = mirror::Method::CreateFromArtMethod<8U, false>(self, method);
+          } else {
+            DCHECK_EQ(pointer_size, 4U);
+            element_object = mirror::Method::CreateFromArtMethod<4U, false>(self, method);
+          }
         }
         if (element_object == nullptr) {
           return false;
@@ -1978,7 +1998,13 @@
           return false;
         }
         set_object = true;
-        element_object = mirror::Field::CreateFromArtField(self, field, true);
+        size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        if (pointer_size == 8) {
+          element_object = mirror::Field::CreateFromArtField<8U>(self, field, true);
+        } else {
+          DCHECK_EQ(pointer_size, 4U);
+          element_object = mirror::Field::CreateFromArtField<4U>(self, field, true);
+        }
         if (element_object == nullptr) {
           return false;
         }
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 49043f6..0306bd6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -876,8 +876,10 @@
   DCHECK(interface_method != nullptr) << PrettyMethod(proxy_method);
   DCHECK(!interface_method->IsProxyMethod()) << PrettyMethod(interface_method);
   self->EndAssertNoThreadSuspension(old_cause);
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
   jobject interface_method_jobj = soa.AddLocalReference<jobject>(
-      mirror::Method::CreateFromArtMethod(soa.Self(), interface_method));
+      mirror::Method::CreateFromArtMethod<sizeof(void*), false>(soa.Self(), interface_method));
 
   // All naked Object*s should now be in jobjects, so its safe to go into the main invoke code
   // that performs allocations.
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 4e40aea..4e83913 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -318,6 +318,18 @@
   const std::set<mirror::Object*>& references_;
 };
 
+class EmptyMarkObjectVisitor : public MarkObjectVisitor {
+ public:
+  mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE {return obj;}
+  void MarkHeapReference(mirror::HeapReference<mirror::Object>*) OVERRIDE {}
+};
+
+void ModUnionTable::FilterCards() {
+  EmptyMarkObjectVisitor visitor;
+  // Use empty visitor since filtering is automatically done by UpdateAndMarkReferences.
+  UpdateAndMarkReferences(&visitor);
+}
+
 void ModUnionTableReferenceCache::Verify() {
   // Start by checking that everything in the mod union table is marked.
   for (const auto& ref_pair : references_) {
@@ -364,6 +376,31 @@
   }
 }
 
+void ModUnionTableReferenceCache::VisitObjects(ObjectCallback* callback, void* arg) {
+  CardTable* const card_table = heap_->GetCardTable();
+  ContinuousSpaceBitmap* live_bitmap = space_->GetLiveBitmap();
+  for (uint8_t* card : cleared_cards_) {
+    uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
+    uintptr_t end = start + CardTable::kCardSize;
+    live_bitmap->VisitMarkedRange(start,
+                                  end,
+                                  [this, callback, arg](mirror::Object* obj) {
+      callback(obj, arg);
+    });
+  }
+  // This may visit the same card twice, TODO avoid this.
+  for (const auto& pair : references_) {
+    const uint8_t* card = pair.first;
+    uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
+    uintptr_t end = start + CardTable::kCardSize;
+    live_bitmap->VisitMarkedRange(start,
+                                  end,
+                                  [this, callback, arg](mirror::Object* obj) {
+      callback(obj, arg);
+    });
+  }
+}
+
 void ModUnionTableReferenceCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) {
   CardTable* const card_table = heap_->GetCardTable();
   std::vector<mirror::HeapReference<mirror::Object>*> cards_references;
@@ -502,6 +539,22 @@
       0, RoundUp(space_->Size(), CardTable::kCardSize) / CardTable::kCardSize, bit_visitor);
 }
 
+void ModUnionTableCardCache::VisitObjects(ObjectCallback* callback, void* arg) {
+  card_bitmap_->VisitSetBits(
+      0,
+      RoundUp(space_->Size(), CardTable::kCardSize) / CardTable::kCardSize,
+      [this, callback, arg](size_t bit_index) {
+        const uintptr_t start = card_bitmap_->AddrFromBitIndex(bit_index);
+        DCHECK(space_->HasAddress(reinterpret_cast<mirror::Object*>(start)))
+            << start << " " << *space_;
+        space_->GetLiveBitmap()->VisitMarkedRange(start,
+                                                  start + CardTable::kCardSize,
+                                                  [this, callback, arg](mirror::Object* obj) {
+          callback(obj, arg);
+        });
+      });
+}
+
 void ModUnionTableCardCache::Dump(std::ostream& os) {
   os << "ModUnionTable dirty cards: [";
   // TODO: Find cleaner way of doing this.
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index a7a4246..bf665c5 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -68,6 +68,9 @@
   // spaces which are stored in the mod-union table.
   virtual void UpdateAndMarkReferences(MarkObjectVisitor* visitor) = 0;
 
+  // Visit all of the objects that may contain references to other spaces.
+  virtual void VisitObjects(ObjectCallback* callback, void* arg) = 0;
+
   // Verification, sanity checks that we don't have clean cards which conflict with out cached data
   // for said cards. Exclusive lock is required since verify sometimes uses
   // SpaceBitmap::VisitMarkedRange and VisitMarkedRange can't know if the callback will modify the
@@ -78,6 +81,9 @@
   // doesn't need to be aligned.
   virtual bool ContainsCardFor(uintptr_t addr) = 0;
 
+  // Filter out cards that don't need to be marked. Automatically done with UpdateAndMarkReferences.
+  void FilterCards();
+
   virtual void Dump(std::ostream& os) = 0;
 
   space::ContinuousSpace* GetSpace() {
@@ -115,6 +121,10 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(Locks::heap_bitmap_lock_);
 
+  virtual void VisitObjects(ObjectCallback* callback, void* arg) OVERRIDE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Exclusive lock is required since verify uses SpaceBitmap::VisitMarkedRange and
   // VisitMarkedRange can't know if the callback will modify the bitmap or not.
   void Verify() OVERRIDE
@@ -156,6 +166,10 @@
       REQUIRES(Locks::heap_bitmap_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  virtual void VisitObjects(ObjectCallback* callback, void* arg) OVERRIDE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Nothing to verify.
   virtual void Verify() OVERRIDE {}
 
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index d2d2f23..bd07bb8 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -22,6 +22,7 @@
 #include "base/systrace.h"
 #include "debugger.h"
 #include "gc/accounting/heap_bitmap-inl.h"
+#include "gc/accounting/mod_union_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/reference_processor.h"
 #include "gc/space/image_space.h"
@@ -40,6 +41,12 @@
 namespace collector {
 
 static constexpr size_t kDefaultGcMarkStackSize = 2 * MB;
+// If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty
+// pages.
+static constexpr bool kGrayDirtyImmuneObjects = true;
+// If kFilterModUnionCards then we attempt to filter cards that don't need to be dirty in the mod
+// union table. Disabled since it does not seem to help the pause much.
+static constexpr bool kFilterModUnionCards = kIsDebugBuild;
 
 ConcurrentCopying::ConcurrentCopying(Heap* heap,
                                      const std::string& name_prefix,
@@ -315,12 +322,87 @@
       TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
       Runtime::Current()->VisitTransactionRoots(cc);
     }
+    if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
+      cc->GrayAllDirtyImmuneObjects();
+      if (kIsDebugBuild) {
+        // Check that all non-gray immune objects only refernce immune objects.
+        cc->VerifyGrayImmuneObjects();
+      }
+    }
   }
 
  private:
   ConcurrentCopying* const concurrent_copying_;
 };
 
+class ConcurrentCopying::VerifyGrayImmuneObjectsVisitor {
+ public:
+  explicit VerifyGrayImmuneObjectsVisitor(ConcurrentCopying* collector)
+      : collector_(collector) {}
+
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */)
+      const ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_)
+      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
+    CheckReference(obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(offset),
+                   obj, offset);
+  }
+
+  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+      SHARED_REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE {
+    CHECK(klass->IsTypeOfReferenceClass());
+    CheckReference(ref->GetReferent<kWithoutReadBarrier>(),
+                   ref,
+                   mirror::Reference::ReferentOffset());
+  }
+
+  void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!root->IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    CheckReference(root->AsMirrorPtr(), nullptr, MemberOffset(0));
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+
+  void CheckReference(mirror::Object* ref, mirror::Object* holder, MemberOffset offset) const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (ref != nullptr) {
+      CHECK(collector_->immune_spaces_.ContainsObject(ref))
+          << "Non gray object references non immune object "<< ref << " " << PrettyTypeOf(ref)
+          << " in holder " << holder << " " << PrettyTypeOf(holder) << " offset="
+          << offset.Uint32Value();
+    }
+  }
+};
+
+void ConcurrentCopying::VerifyGrayImmuneObjects() {
+  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
+  for (auto& space : immune_spaces_.GetSpaces()) {
+    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    VerifyGrayImmuneObjectsVisitor visitor(this);
+    live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                  reinterpret_cast<uintptr_t>(space->Limit()),
+                                  [&visitor](mirror::Object* obj)
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+      // If an object is not gray, it should only have references to things in the immune spaces.
+      if (obj->GetReadBarrierPointer() != ReadBarrier::GrayPtr()) {
+        obj->VisitReferences</*kVisitNativeRoots*/true,
+                             kDefaultVerifyFlags,
+                             kWithoutReadBarrier>(visitor, visitor);
+      }
+    });
+  }
+}
+
 // Switch threads that from from-space to to-space refs. Forward/mark the thread roots.
 void ConcurrentCopying::FlipThreadRoots() {
   TimingLogger::ScopedTiming split("FlipThreadRoots", GetTimings());
@@ -350,6 +432,52 @@
   }
 }
 
+class ConcurrentCopying::GrayImmuneObjectVisitor {
+ public:
+  explicit GrayImmuneObjectVisitor() {}
+
+  ALWAYS_INLINE void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (kUseBakerReadBarrier) {
+      if (kIsDebugBuild) {
+        Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+      }
+      obj->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+    }
+  }
+
+  static void Callback(mirror::Object* obj, void* arg) SHARED_REQUIRES(Locks::mutator_lock_) {
+    reinterpret_cast<GrayImmuneObjectVisitor*>(arg)->operator()(obj);
+  }
+};
+
+void ConcurrentCopying::GrayAllDirtyImmuneObjects() {
+  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  accounting::CardTable* const card_table = heap->GetCardTable();
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
+    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+    GrayImmuneObjectVisitor visitor;
+    accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space);
+    // Mark all the objects on dirty cards since these may point to objects in other space.
+    // Once these are marked, the GC will eventually clear them later.
+    // Table is non null for boot image and zygote spaces. It is only null for application image
+    // spaces.
+    if (table != nullptr) {
+      // TODO: Add preclean outside the pause.
+      table->ClearCards();
+      table->VisitObjects(GrayImmuneObjectVisitor::Callback, &visitor);
+    } else {
+      // TODO: Consider having a mark bitmap for app image spaces and avoid scanning during the
+      // pause because app image spaces are all dirty pages anyways.
+      card_table->Scan<false>(space->GetMarkBitmap(), space->Begin(), space->End(), visitor);
+    }
+  }
+  // Since all of the objects that may point to other spaces are marked, we can avoid all the read
+  // barriers in the immune spaces.
+  updated_all_immune_objects_.StoreRelaxed(true);
+}
+
 void ConcurrentCopying::SwapStacks() {
   heap_->SwapStacks();
 }
@@ -393,7 +521,17 @@
       : collector_(cc) {}
 
   void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    collector_->ScanImmuneObject(obj);
+    if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
+      if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+        collector_->ScanImmuneObject(obj);
+        // Done scanning the object, go back to white.
+        bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                        ReadBarrier::WhitePtr());
+        CHECK(success);
+      }
+    } else {
+      collector_->ScanImmuneObject(obj);
+    }
   }
 
  private:
@@ -415,13 +553,16 @@
   if (kUseBakerReadBarrier) {
     gc_grays_immune_objects_ = false;
   }
-  for (auto& space : immune_spaces_.GetSpaces()) {
-    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
-    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
-    ImmuneSpaceScanObjVisitor visitor(this);
-    live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                  reinterpret_cast<uintptr_t>(space->Limit()),
-                                  visitor);
+  {
+    TimingLogger::ScopedTiming split2("ScanImmuneSpaces", GetTimings());
+    for (auto& space : immune_spaces_.GetSpaces()) {
+      DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+      accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+      ImmuneSpaceScanObjVisitor visitor(this);
+      live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                    reinterpret_cast<uintptr_t>(space->Limit()),
+                                    visitor);
+    }
   }
   if (kUseBakerReadBarrier) {
     // This release fence makes the field updates in the above loop visible before allowing mutator
@@ -1573,12 +1714,19 @@
 
 // Scan ref fields of an object.
 inline void ConcurrentCopying::Scan(mirror::Object* to_ref) {
+  if (kIsDebugBuild) {
+    // Avoid all read barriers during visit references to help performance.
+    Thread::Current()->ModifyDebugDisallowReadBarrier(1);
+  }
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   DCHECK_EQ(Thread::Current(), thread_running_gc_);
   RefFieldsVisitor visitor(this);
   // Disable the read barrier for a performance reason.
   to_ref->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
       visitor, visitor);
+  if (kIsDebugBuild) {
+    Thread::Current()->ModifyDebugDisallowReadBarrier(-1);
+  }
 }
 
 // Process a field.
@@ -1695,7 +1843,7 @@
   mirror::Class* int_array_class = mirror::IntArray::GetArrayClass();
   CHECK(int_array_class != nullptr);
   AssertToSpaceInvariant(nullptr, MemberOffset(0), int_array_class);
-  size_t component_size = int_array_class->GetComponentSize();
+  size_t component_size = int_array_class->GetComponentSize<kWithoutReadBarrier>();
   CHECK_EQ(component_size, sizeof(int32_t));
   size_t data_offset = mirror::Array::DataOffset(component_size).SizeValue();
   if (data_offset > byte_size) {
@@ -1708,13 +1856,14 @@
   } else {
     // Use an int array.
     dummy_obj->SetClass(int_array_class);
-    CHECK(dummy_obj->IsArrayInstance());
+    CHECK((dummy_obj->IsArrayInstance<kVerifyNone, kWithoutReadBarrier>()));
     int32_t length = (byte_size - data_offset) / component_size;
-    dummy_obj->AsArray()->SetLength(length);
-    CHECK_EQ(dummy_obj->AsArray()->GetLength(), length)
+    mirror::Array* dummy_arr = dummy_obj->AsArray<kVerifyNone, kWithoutReadBarrier>();
+    dummy_arr->SetLength(length);
+    CHECK_EQ(dummy_arr->GetLength(), length)
         << "byte_size=" << byte_size << " length=" << length
         << " component_size=" << component_size << " data_offset=" << data_offset;
-    CHECK_EQ(byte_size, dummy_obj->SizeOf())
+    CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone, kWithoutReadBarrier>()))
         << "byte_size=" << byte_size << " length=" << length
         << " component_size=" << component_size << " data_offset=" << data_offset;
   }
@@ -2052,8 +2201,23 @@
   }
   {
     ReaderMutexLock mu(self, *Locks::mutator_lock_);
-    WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
-    heap_->ClearMarkedObjects();
+    {
+      WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
+      heap_->ClearMarkedObjects();
+    }
+    if (kUseBakerReadBarrier && kFilterModUnionCards) {
+      TimingLogger::ScopedTiming split("FilterModUnionCards", GetTimings());
+      ReaderMutexLock mu2(self, *Locks::heap_bitmap_lock_);
+      gc::Heap* const heap = Runtime::Current()->GetHeap();
+      for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
+        DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+        accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space);
+        // Filter out cards that don't need to be set.
+        if (table != nullptr) {
+          table->FilterCards();
+        }
+      }
+    }
   }
   if (measure_read_barrier_slow_path_) {
     MutexLock mu(self, rb_slow_path_histogram_lock_);
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 6a8d052..32b05fa 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -152,6 +152,12 @@
   bool ProcessMarkStackOnce() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
   void ProcessMarkStackRef(mirror::Object* to_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
+  void GrayAllDirtyImmuneObjects()
+      REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
+  void VerifyGrayImmuneObjects()
+      REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
   size_t ProcessThreadLocalMarkStacks(bool disable_weak_ref_access)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
   void RevokeThreadLocalMarkStacks(bool disable_weak_ref_access)
@@ -294,12 +300,14 @@
   class ComputeUnevacFromSpaceLiveRatioVisitor;
   class DisableMarkingCheckpoint;
   class FlipCallback;
+  class GrayImmuneObjectVisitor;
   class ImmuneSpaceScanObjVisitor;
   class LostCopyVisitor;
   class RefFieldsVisitor;
   class RevokeThreadLocalMarkStackCheckpoint;
   class ScopedGcGraysImmuneObjects;
   class ThreadFlipVisitor;
+  class VerifyGrayImmuneObjectsVisitor;
   class VerifyNoFromSpaceRefsFieldVisitor;
   class VerifyNoFromSpaceRefsObjectVisitor;
   class VerifyNoFromSpaceRefsVisitor;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 6f4767e..74e6c3c 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2546,6 +2546,9 @@
   // Set all the cards in the mod-union table since we don't know which objects contain references
   // to large objects.
   mod_union_table->SetCards();
+  // Filter out cards that do not to be dirty. This is mostly for CC collector so that it does
+  // not gray the objects on all the cards in the zygote space.
+  mod_union_table->FilterCards();
   AddModUnionTable(mod_union_table);
   large_object_space_->SetAllLargeObjectsAsZygoteObjects(self);
   if (collector::SemiSpace::kUseRememberedSet) {
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 6088a43..62625c4 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -44,7 +44,9 @@
     // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
     list_ = ref;
   } else {
-    mirror::Reference* head = list_->GetPendingNext();
+    // The list is owned by the GC, everything that has been inserted must already be at least
+    // gray.
+    mirror::Reference* head = list_->GetPendingNext<kWithoutReadBarrier>();
     DCHECK(head != nullptr);
     ref->SetPendingNext(head);
   }
@@ -54,14 +56,14 @@
 
 mirror::Reference* ReferenceQueue::DequeuePendingReference() {
   DCHECK(!IsEmpty());
-  mirror::Reference* ref = list_->GetPendingNext();
+  mirror::Reference* ref = list_->GetPendingNext<kWithoutReadBarrier>();
   DCHECK(ref != nullptr);
   // Note: the following code is thread-safe because it is only called from ProcessReferences which
   // is single threaded.
   if (list_ == ref) {
     list_ = nullptr;
   } else {
-    mirror::Reference* next = ref->GetPendingNext();
+    mirror::Reference* next = ref->GetPendingNext<kWithoutReadBarrier>();
     list_->SetPendingNext(next);
   }
   ref->SetPendingNext(nullptr);
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
new file mode 100644
index 0000000..f1a3256
--- /dev/null
+++ b/runtime/generated/asm_support_gen.h
@@ -0,0 +1,115 @@
+
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+#define ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+
+// This file has been auto-generated by cpp-define-generator; do not edit directly.
+
+#define STACK_REFERENCE_SIZE 0x4
+DEFINE_CHECK_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), (static_cast<size_t>(sizeof(art::StackReference<art::mirror::Object>))))
+#define COMPRESSED_REFERENCE_SIZE 0x4
+DEFINE_CHECK_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE), (static_cast<size_t>(sizeof(art::mirror::CompressedReference<art::mirror::Object>))))
+#define COMPRESSED_REFERENCE_SIZE_SHIFT 0x2
+DEFINE_CHECK_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE_SHIFT), (static_cast<size_t>(art::WhichPowerOf2(sizeof(art::mirror::CompressedReference<art::mirror::Object>)))))
+#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveAll))))
+#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 0x8
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kRefsOnly))))
+#define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 0x10
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kRefsAndArgs))))
+#define THREAD_FLAGS_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_FLAGS_OFFSET), (static_cast<int32_t>(art::Thread:: ThreadFlagsOffset<sizeof(void*)>().Int32Value())))
+#define THREAD_ID_OFFSET 12
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_ID_OFFSET), (static_cast<int32_t>(art::Thread:: ThinLockIdOffset<sizeof(void*)>().Int32Value())))
+#define THREAD_IS_GC_MARKING_OFFSET 52
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_IS_GC_MARKING_OFFSET), (static_cast<int32_t>(art::Thread:: IsGcMarkingOffset<sizeof(void*)>().Int32Value())))
+#define THREAD_CARD_TABLE_OFFSET 128
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CARD_TABLE_OFFSET), (static_cast<int32_t>(art::Thread:: CardTableOffset<sizeof(void*)>().Int32Value())))
+#define CODEITEM_INSNS_OFFSET 16
+DEFINE_CHECK_EQ(static_cast<int32_t>(CODEITEM_INSNS_OFFSET), (static_cast<int32_t>(__builtin_offsetof(art::DexFile::CodeItem, insns_))))
+#define MIRROR_OBJECT_CLASS_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(MIRROR_OBJECT_CLASS_OFFSET), (static_cast<int32_t>(art::mirror::Object:: ClassOffset().Int32Value())))
+#define MIRROR_OBJECT_LOCK_WORD_OFFSET 4
+DEFINE_CHECK_EQ(static_cast<int32_t>(MIRROR_OBJECT_LOCK_WORD_OFFSET), (static_cast<int32_t>(art::mirror::Object:: MonitorOffset().Int32Value())))
+#define MIRROR_CLASS_STATUS_INITIALIZED 0xa
+DEFINE_CHECK_EQ(static_cast<uint32_t>(MIRROR_CLASS_STATUS_INITIALIZED), (static_cast<uint32_t>((art::mirror::Class::kStatusInitialized))))
+#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE 0x80000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), (static_cast<uint32_t>((art::kAccClassIsFinalizable))))
+#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT 0x1f
+DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT), (static_cast<uint32_t>((art::MostSignificantBit(art::kAccClassIsFinalizable)))))
+#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_32 20
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_METHODS_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedMethodsOffset(4).Int32Value())))
+#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_64 24
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_METHODS_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedMethodsOffset(8).Int32Value())))
+#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_32 24
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_TYPES_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedTypesOffset(4).Int32Value())))
+#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_64 32
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_TYPES_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedTypesOffset(8).Int32Value())))
+#define ART_METHOD_JNI_OFFSET_32 28
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_JNI_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromJniOffset(4).Int32Value())))
+#define ART_METHOD_JNI_OFFSET_64 40
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_JNI_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromJniOffset(8).Int32Value())))
+#define ART_METHOD_QUICK_CODE_OFFSET_32 32
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(4).Int32Value())))
+#define ART_METHOD_QUICK_CODE_OFFSET_64 48
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(8).Int32Value())))
+#define LOCK_WORD_STATE_SHIFT 30
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kStateShift)))
+#define LOCK_WORD_STATE_MASK 0xc0000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kStateMaskShifted)))
+#define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_READ_BARRIER_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kReadBarrierStateShift)))
+#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x30000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShifted)))
+#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xcfffffff
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled)))
+#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_THIN_LOCK_COUNT_ONE), (static_cast<int32_t>(art::LockWord::kThinLockCountOne)))
+#define OBJECT_ALIGNMENT_MASK 0x7
+DEFINE_CHECK_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), (static_cast<size_t>(art::kObjectAlignment - 1)))
+#define OBJECT_ALIGNMENT_MASK_TOGGLED 0xfffffff8
+DEFINE_CHECK_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED), (static_cast<uint32_t>(~static_cast<uint32_t>(art::kObjectAlignment - 1))))
+#define ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE 128
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), (static_cast<int32_t>((art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 3
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), (static_cast<int32_t>((art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSizeShift))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 7
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK), (static_cast<int32_t>((static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1)))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff8
+DEFINE_CHECK_EQ(static_cast<uint32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32), (static_cast<uint32_t>((~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1)))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff8
+DEFINE_CHECK_EQ(static_cast<uint64_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64), (static_cast<uint64_t>((~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1)))))
+#define ROSALLOC_RUN_FREE_LIST_OFFSET 8
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_RUN_FREE_LIST_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunFreeListOffset()))))
+#define ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunFreeListHeadOffset()))))
+#define ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET 16
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunFreeListSizeOffset()))))
+#define ROSALLOC_SLOT_NEXT_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_SLOT_NEXT_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunSlotNextOffset()))))
+#define THREAD_SUSPEND_REQUEST 1
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_SUSPEND_REQUEST), (static_cast<int32_t>((art::kSuspendRequest))))
+#define THREAD_CHECKPOINT_REQUEST 2
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kCheckpointRequest))))
+#define JIT_CHECK_OSR (-1)
+DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_CHECK_OSR), (static_cast<int16_t>((art::jit::kJitCheckForOSR))))
+#define JIT_HOTNESS_DISABLE (-2)
+DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_HOTNESS_DISABLE), (static_cast<int16_t>((art::jit::kJitHotnessDisabled))))
+
+#endif  // ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+
diff --git a/runtime/interpreter/mterp/arm64/binopLit8.S b/runtime/interpreter/mterp/arm64/binopLit8.S
index 326c657..0b7c68a 100644
--- a/runtime/interpreter/mterp/arm64/binopLit8.S
+++ b/runtime/interpreter/mterp/arm64/binopLit8.S
@@ -13,7 +13,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 4257200..c791eb5 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -272,6 +272,14 @@
 .endm
 
 /*
+ * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit.
+ * Used to avoid an extra instruction in int-to-long.
+ */
+.macro GET_VREG_S reg, vreg
+    ldrsw   \reg, [xFP, \vreg, uxtw #2]
+.endm
+
+/*
  * Convert a virtual register index into an address.
  */
 .macro VREG_INDEX_TO_ADDR reg, vreg
diff --git a/runtime/interpreter/mterp/arm64/op_const_16.S b/runtime/interpreter/mterp/arm64/op_const_16.S
index 27f5273..f0e8192 100644
--- a/runtime/interpreter/mterp/arm64/op_const_16.S
+++ b/runtime/interpreter/mterp/arm64/op_const_16.S
@@ -1,5 +1,5 @@
     /* const/16 vAA, #+BBBB */
-    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended)
     lsr     w3, wINST, #8               // w3<- AA
     FETCH_ADVANCE_INST 2                // advance xPC, load wINST
     SET_VREG w0, w3                     // vAA<- w0
diff --git a/runtime/interpreter/mterp/arm64/op_const_4.S b/runtime/interpreter/mterp/arm64/op_const_4.S
index 04cd4f8..9a36115 100644
--- a/runtime/interpreter/mterp/arm64/op_const_4.S
+++ b/runtime/interpreter/mterp/arm64/op_const_4.S
@@ -1,8 +1,7 @@
     /* const/4 vA, #+B */
-    lsl     w1, wINST, #16              // w1<- Bxxx0000
+    sbfx    w1, wINST, #12, #4          // w1<- sssssssB
     ubfx    w0, wINST, #8, #4           // w0<- A
     FETCH_ADVANCE_INST 1                // advance xPC, load wINST
-    asr     w1, w1, #28                 // w1<- sssssssB (sign-extended)
     GET_INST_OPCODE ip                  // ip<- opcode from xINST
     SET_VREG w1, w0                     // fp[A]<- w1
     GOTO_OPCODE ip                      // execute next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_high16.S b/runtime/interpreter/mterp/arm64/op_const_high16.S
index dd51ce1..3a9edff 100644
--- a/runtime/interpreter/mterp/arm64/op_const_high16.S
+++ b/runtime/interpreter/mterp/arm64/op_const_high16.S
@@ -1,5 +1,5 @@
     /* const/high16 vAA, #+BBBB0000 */
-    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended)
     lsr     w3, wINST, #8               // r3<- AA
     lsl     w0, w0, #16                 // r0<- BBBB0000
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm64/op_if_eqz.S b/runtime/interpreter/mterp/arm64/op_if_eqz.S
index 1d3202e1..47c1dee 100644
--- a/runtime/interpreter/mterp/arm64/op_if_eqz.S
+++ b/runtime/interpreter/mterp/arm64/op_if_eqz.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"eq" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"cbz     w2," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gez.S b/runtime/interpreter/mterp/arm64/op_if_gez.S
index 8e3abd3..087e094 100644
--- a/runtime/interpreter/mterp/arm64/op_if_gez.S
+++ b/runtime/interpreter/mterp/arm64/op_if_gez.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"ge" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"tbz     w2, #31," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gtz.S b/runtime/interpreter/mterp/arm64/op_if_gtz.S
index a4f2f6b..476b265 100644
--- a/runtime/interpreter/mterp/arm64/op_if_gtz.S
+++ b/runtime/interpreter/mterp/arm64/op_if_gtz.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"gt" }
+%include "arm64/zcmp.S" { "branch":"b.gt" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_lez.S b/runtime/interpreter/mterp/arm64/op_if_lez.S
index c1425fdd..2717a60 100644
--- a/runtime/interpreter/mterp/arm64/op_if_lez.S
+++ b/runtime/interpreter/mterp/arm64/op_if_lez.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"le" }
+%include "arm64/zcmp.S" { "branch":"b.le" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_ltz.S b/runtime/interpreter/mterp/arm64/op_if_ltz.S
index 03cd3d6..86089c1 100644
--- a/runtime/interpreter/mterp/arm64/op_if_ltz.S
+++ b/runtime/interpreter/mterp/arm64/op_if_ltz.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"lt" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"tbnz    w2, #31," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_nez.S b/runtime/interpreter/mterp/arm64/op_if_nez.S
index 21e1bc2..efacc88 100644
--- a/runtime/interpreter/mterp/arm64/op_if_nez.S
+++ b/runtime/interpreter/mterp/arm64/op_if_nez.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"ne" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"cbnz    w2," }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
index 30b30c2..e9388e4 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
@@ -4,8 +4,7 @@
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
     cbz     w3, common_errNullObject    // object was null
-    add     x4, x3, x4                  // create direct pointer
-    ldr     x0, [x4]
+    ldr     x0, [x3, x4]                // x0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     SET_VREG_WIDE x0, w2
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S
index 35830f3..45e3112 100644
--- a/runtime/interpreter/mterp/arm64/op_int_to_long.S
+++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S
@@ -1 +1,8 @@
-%include "arm64/funopWider.S" {"instr":"sxtw x0, w0", "srcreg":"w0", "tgtreg":"x0"}
+    /* int-to-long vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_S x0, w3                   // x0<- sign_extend(fp[B])
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4                // fp[A]<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_quick.S b/runtime/interpreter/mterp/arm64/op_iput_quick.S
index 2afc51b..e95da76 100644
--- a/runtime/interpreter/mterp/arm64/op_iput_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iput_quick.S
@@ -5,7 +5,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
index 566e2bf..6cec363 100644
--- a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
@@ -6,7 +6,6 @@
     cbz     w2, common_errNullObject    // object was null
     GET_VREG_WIDE x0, w0                // x0-< fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
-    add     x1, x2, x3                  // create a direct pointer
-    str     x0, [x1]
+    str     x0, [x2, x3]                // obj.field<- x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_long_to_int.S b/runtime/interpreter/mterp/arm64/op_long_to_int.S
index 360a69b..73f58d8 100644
--- a/runtime/interpreter/mterp/arm64/op_long_to_int.S
+++ b/runtime/interpreter/mterp/arm64/op_long_to_int.S
@@ -1 +1,2 @@
-%include "arm64/funopNarrower.S" {"instr":"", "srcreg":"x0", "tgtreg":"w0"}
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "arm64/op_move.S"
diff --git a/runtime/interpreter/mterp/arm64/op_neg_double.S b/runtime/interpreter/mterp/arm64/op_neg_double.S
index e9064c4..d77859d 100644
--- a/runtime/interpreter/mterp/arm64/op_neg_double.S
+++ b/runtime/interpreter/mterp/arm64/op_neg_double.S
@@ -1 +1 @@
-%include "arm64/unopWide.S" {"preinstr":"mov x1, #0x8000000000000000", "instr":"add     x0, x0, x1"}
+%include "arm64/unopWide.S" {"instr":"eor     x0, x0, #0x8000000000000000"}
diff --git a/runtime/interpreter/mterp/arm64/op_neg_float.S b/runtime/interpreter/mterp/arm64/op_neg_float.S
index 49d51af..6652aec 100644
--- a/runtime/interpreter/mterp/arm64/op_neg_float.S
+++ b/runtime/interpreter/mterp/arm64/op_neg_float.S
@@ -1 +1 @@
-%include "arm64/unop.S" {"preinstr":"mov w4, #0x80000000", "instr":"add     w0, w0, w4"}
+%include "arm64/unop.S" {"instr":"eor     w0, w0, #0x80000000"}
diff --git a/runtime/interpreter/mterp/arm64/unop.S b/runtime/interpreter/mterp/arm64/unop.S
index 474a961..e681968 100644
--- a/runtime/interpreter/mterp/arm64/unop.S
+++ b/runtime/interpreter/mterp/arm64/unop.S
@@ -1,4 +1,3 @@
-%default {"preinstr":""}
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = op w0".
@@ -11,7 +10,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-    $preinstr                           // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     $instr                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm64/unopWide.S b/runtime/interpreter/mterp/arm64/unopWide.S
index 109302a..6ee4f92 100644
--- a/runtime/interpreter/mterp/arm64/unopWide.S
+++ b/runtime/interpreter/mterp/arm64/unopWide.S
@@ -1,4 +1,4 @@
-%default {"instr":"sub x0, xzr, x0", "preinstr":""}
+%default {"instr":"sub x0, xzr, x0"}
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = op x0".
@@ -10,7 +10,6 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    $preinstr
     $instr
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
index b303e6a..510a3c1 100644
--- a/runtime/interpreter/mterp/arm64/zcmp.S
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -1,3 +1,4 @@
+%default { "compare":"1" }
     /*
      * Generic one-operand compare-and-branch operation.  Provide a "condition"
      * fragment that specifies the comparison to perform.
@@ -8,8 +9,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if ${compare}
     cmp     w2, #0                      // compare (vA, 0)
-    b.${condition} MterpCommonTakenBranchNoFlags
+    .endif
+    ${branch} MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
index 57206d2..6427ead 100644
--- a/runtime/interpreter/mterp/config_arm64
+++ b/runtime/interpreter/mterp/config_arm64
@@ -20,9 +20,6 @@
 handler-style computed-goto
 handler-size 128
 
-# source for alternate entry stub
-asm-alt-stub arm64/alt_stub.S
-
 # file header and basic definitions
 import arm64/header.S
 
@@ -295,5 +292,12 @@
     # op op_unused_ff FALLBACK
 op-end
 
-# common subroutines for asm
+# common subroutines for asm; we emit the footer before alternate
+# entry stubs, so that TBZ/TBNZ from ops can reach targets in footer
 import arm64/footer.S
+
+# source for alternate entry stub
+asm-alt-stub arm64/alt_stub.S
+
+# emit alternate entry stubs
+alt-ops
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index e318782..de37e07 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -279,6 +279,14 @@
 .endm
 
 /*
+ * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit.
+ * Used to avoid an extra instruction in int-to-long.
+ */
+.macro GET_VREG_S reg, vreg
+    ldrsw   \reg, [xFP, \vreg, uxtw #2]
+.endm
+
+/*
  * Convert a virtual register index into an address.
  */
 .macro VREG_INDEX_TO_ADDR reg, vreg
@@ -695,10 +703,9 @@
 .L_op_const_4: /* 0x12 */
 /* File: arm64/op_const_4.S */
     /* const/4 vA, #+B */
-    lsl     w1, wINST, #16              // w1<- Bxxx0000
+    sbfx    w1, wINST, #12, #4          // w1<- sssssssB
     ubfx    w0, wINST, #8, #4           // w0<- A
     FETCH_ADVANCE_INST 1                // advance xPC, load wINST
-    asr     w1, w1, #28                 // w1<- sssssssB (sign-extended)
     GET_INST_OPCODE ip                  // ip<- opcode from xINST
     SET_VREG w1, w0                     // fp[A]<- w1
     GOTO_OPCODE ip                      // execute next instruction
@@ -708,7 +715,7 @@
 .L_op_const_16: /* 0x13 */
 /* File: arm64/op_const_16.S */
     /* const/16 vAA, #+BBBB */
-    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended)
     lsr     w3, wINST, #8               // w3<- AA
     FETCH_ADVANCE_INST 2                // advance xPC, load wINST
     SET_VREG w0, w3                     // vAA<- w0
@@ -734,7 +741,7 @@
 .L_op_const_high16: /* 0x15 */
 /* File: arm64/op_const_high16.S */
     /* const/high16 vAA, #+BBBB0000 */
-    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended)
     lsr     w3, wINST, #8               // r3<- AA
     lsl     w0, w0, #16                 // r0<- BBBB0000
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -1465,8 +1472,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.eq MterpCommonTakenBranchNoFlags
+    .endif
+    cbz     w2, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1489,8 +1498,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.ne MterpCommonTakenBranchNoFlags
+    .endif
+    cbnz    w2, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1513,8 +1524,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.lt MterpCommonTakenBranchNoFlags
+    .endif
+    tbnz    w2, #31, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1537,8 +1550,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.ge MterpCommonTakenBranchNoFlags
+    .endif
+    tbz     w2, #31, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1561,7 +1576,9 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 1
     cmp     w2, #0                      // compare (vA, 0)
+    .endif
     b.gt MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
@@ -1585,7 +1602,9 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 1
     cmp     w2, #0                      // compare (vA, 0)
+    .endif
     b.le MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
@@ -3192,7 +3211,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     sub     w0, wzr, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3218,7 +3236,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     mvn     w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3243,7 +3260,6 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    
     sub x0, xzr, x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
@@ -3267,7 +3283,6 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    
     mvn     x0, x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
@@ -3292,9 +3307,8 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-    mov w4, #0x80000000                           // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    add     w0, w0, w4                              // w0<- op, w0-w3 changed
+    eor     w0, w0, #0x80000000                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                     // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -3317,8 +3331,7 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    mov x1, #0x8000000000000000
-    add     x0, x0, x1
+    eor     x0, x0, #0x8000000000000000
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
     GOTO_OPCODE ip                      // jump to next instruction
@@ -3329,24 +3342,15 @@
     .balign 128
 .L_op_int_to_long: /* 0x81 */
 /* File: arm64/op_int_to_long.S */
-/* File: arm64/funopWider.S */
-    /*
-     * Generic 32bit-to-64bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "x0 = op w0".
-     *
-     * For: int-to-double, float-to-double, float-to-long
-     */
-    /* unop vA, vB */
+    /* int-to-long vA, vB */
     lsr     w3, wINST, #12              // w3<- B
     ubfx    w4, wINST, #8, #4           // w4<- A
-    GET_VREG w0, w3
+    GET_VREG_S x0, w3                   // x0<- sign_extend(fp[B])
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    sxtw x0, w0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
-    SET_VREG_WIDE x0, w4           // vA<- d0
+    SET_VREG_WIDE x0, w4                // fp[A]<- x0
     GOTO_OPCODE ip                      // jump to next instruction
 
-
 /* ------------------------------ */
     .balign 128
 .L_op_int_to_float: /* 0x82 */
@@ -3396,22 +3400,21 @@
     .balign 128
 .L_op_long_to_int: /* 0x84 */
 /* File: arm64/op_long_to_int.S */
-/* File: arm64/funopNarrower.S */
-    /*
-     * Generic 64bit-to-32bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "w0 = op x0".
-     *
-     * For: int-to-double, float-to-double, float-to-long
-     */
-    /* unop vA, vB */
-    lsr     w3, wINST, #12              // w3<- B
-    ubfx    w4, wINST, #8, #4           // w4<- A
-    GET_VREG_WIDE x0, w3
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: arm64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-                                  // d0<- op
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    SET_VREG w0, w4                // vA<- d0
-    GOTO_OPCODE ip                      // jump to next instruction
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
 
 
 /* ------------------------------ */
@@ -3608,7 +3611,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     sxtb    w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3634,7 +3636,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     uxth    w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3660,7 +3661,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     sxth    w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -6052,7 +6052,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6088,7 +6088,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6125,7 +6125,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6161,7 +6161,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6197,7 +6197,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6233,7 +6233,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6269,7 +6269,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6305,7 +6305,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6341,7 +6341,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6377,7 +6377,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6413,7 +6413,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6458,8 +6458,7 @@
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
     cbz     w3, common_errNullObject    // object was null
-    add     x4, x3, x4                  // create direct pointer
-    ldr     x0, [x4]
+    ldr     x0, [x3, x4]                // x0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     SET_VREG_WIDE x0, w2
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -6495,7 +6494,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6515,8 +6513,7 @@
     cbz     w2, common_errNullObject    // object was null
     GET_VREG_WIDE x0, w0                // x0-< fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
-    add     x1, x2, x3                  // create a direct pointer
-    str     x0, [x1]
+    str     x0, [x2, x3]                // obj.field<- x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -6597,7 +6594,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6617,7 +6613,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6637,7 +6632,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6657,7 +6651,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6885,6 +6878,321 @@
     .global artMterpAsmSisterEnd
 artMterpAsmSisterEnd:
 
+/* File: arm64/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    ldr  x2, [xSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     x0, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    cbz     x0, MterpFallback                       // If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    // (self, shadow_frame)
+    cbz     w0, MterpExceptionReturn                // no local catch, back to caller.
+    ldr     x0, [xFP, #OFF_FP_CODE_ITEM]
+    ldr     w1, [xFP, #OFF_FP_DEX_PC]
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     xPC, x0, #CODEITEM_INSNS_OFFSET
+    add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    /* resume execution at catch block */
+    EXPORT_PC
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    wINST          <= signed offset
+ *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    cmp     wINST, #0
+    b.gt    .L_forward_branch           // don't add forward branches to hotness
+    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
+    subs    wPROFILE, wPROFILE, #1      // countdown
+    b.eq    .L_add_batch                // counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    REFRESH_IBASE
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L_suspend_request_pending
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback
+    REFRESH_IBASE                       // might have changed during suspend
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_no_count_backwards:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.ne    .L_resume_backward_branch
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_osr_forward
+.L_resume_forward_branch:
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_check_osr_forward:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    mov     x2, xSELF
+    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
+    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/*
+ * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    check1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+check1:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sxtw x2, wINST
+    bl MterpLogOSR
+#endif
+    mov  x0, #1                         // Signal normal return
+    b    MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     x0, #0                                  // signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* xFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     x0, #1                                  // signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     x2, [xFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    str     x0, [x2]
+    mov     x0, xSELF
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.eq    check2
+    bl      MterpSuspendCheck                       // (self)
+check2:
+    mov     x0, #1                                  // signal return to caller.
+MterpDone:
+/*
+ * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     wPROFILE, #0
+    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+MterpProfileActive:
+    mov     xINST, x0                               // stash return value
+    /* Report cached hotness counts */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xSELF
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
+    mov     x0, xINST                               // restore return value
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
+
 
     .global artMterpAsmAltInstructionStart
     .type   artMterpAsmAltInstructionStart, %function
@@ -11247,318 +11555,3 @@
     .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
     .global artMterpAsmAltInstructionEnd
 artMterpAsmAltInstructionEnd:
-/* File: arm64/footer.S */
-/*
- * ===========================================================================
- *  Common subroutines and data
- * ===========================================================================
- */
-
-
-/*
- * We've detected a condition that will result in an exception, but the exception
- * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
- * TUNING: for consistency, we may want to just go ahead and handle these here.
- */
-common_errDivideByZero:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogDivideByZeroException
-#endif
-    b MterpCommonFallback
-
-common_errArrayIndex:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogArrayIndexException
-#endif
-    b MterpCommonFallback
-
-common_errNegativeArraySize:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogNegativeArraySizeException
-#endif
-    b MterpCommonFallback
-
-common_errNoSuchMethod:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogNoSuchMethodException
-#endif
-    b MterpCommonFallback
-
-common_errNullObject:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogNullObjectException
-#endif
-    b MterpCommonFallback
-
-common_exceptionThrown:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogExceptionThrownException
-#endif
-    b MterpCommonFallback
-
-MterpSuspendFallback:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    ldr  x2, [xSELF, #THREAD_FLAGS_OFFSET]
-    bl MterpLogSuspendFallback
-#endif
-    b MterpCommonFallback
-
-/*
- * If we're here, something is out of the ordinary.  If there is a pending
- * exception, handle it.  Otherwise, roll back and retry with the reference
- * interpreter.
- */
-MterpPossibleException:
-    ldr     x0, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    cbz     x0, MterpFallback                       // If not, fall back to reference interpreter.
-    /* intentional fallthrough - handle pending exception. */
-/*
- * On return from a runtime helper routine, we've found a pending exception.
- * Can we handle it here - or need to bail out to caller?
- *
- */
-MterpException:
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    bl      MterpHandleException                    // (self, shadow_frame)
-    cbz     w0, MterpExceptionReturn                // no local catch, back to caller.
-    ldr     x0, [xFP, #OFF_FP_CODE_ITEM]
-    ldr     w1, [xFP, #OFF_FP_DEX_PC]
-    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
-    add     xPC, x0, #CODEITEM_INSNS_OFFSET
-    add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
-    /* Do we need to switch interpreters? */
-    bl      MterpShouldSwitchInterpreters
-    cbnz    w0, MterpFallback
-    /* resume execution at catch block */
-    EXPORT_PC
-    FETCH_INST
-    GET_INST_OPCODE ip
-    GOTO_OPCODE ip
-    /* NOTE: no fallthrough */
-/*
- * Common handling for branches with support for Jit profiling.
- * On entry:
- *    wINST          <= signed offset
- *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
- *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
- *
- * We have quite a few different cases for branch profiling, OSR detection and
- * suspend check support here.
- *
- * Taken backward branches:
- *    If profiling active, do hotness countdown and report if we hit zero.
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *    Is there a pending suspend request?  If so, suspend.
- *
- * Taken forward branches and not-taken backward branches:
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *
- * Our most common case is expected to be a taken backward branch with active jit profiling,
- * but no full OSR check and no pending suspend request.
- * Next most common case is not-taken branch with no full OSR check.
- *
- */
-MterpCommonTakenBranchNoFlags:
-    cmp     wINST, #0
-    b.gt    .L_forward_branch           // don't add forward branches to hotness
-    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
-    subs    wPROFILE, wPROFILE, #1      // countdown
-    b.eq    .L_add_batch                // counted down to zero - report
-.L_resume_backward_branch:
-    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
-    add     w2, wINST, wINST            // w2<- byte offset
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    REFRESH_IBASE
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    b.ne    .L_suspend_request_pending
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_suspend_request_pending:
-    EXPORT_PC
-    mov     x0, xSELF
-    bl      MterpSuspendCheck           // (self)
-    cbnz    x0, MterpFallback
-    REFRESH_IBASE                       // might have changed during suspend
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_no_count_backwards:
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.ne    .L_resume_backward_branch
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    b       .L_resume_backward_branch
-
-.L_forward_branch:
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_osr_forward
-.L_resume_forward_branch:
-    add     w2, wINST, wINST            // w2<- byte offset
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_check_osr_forward:
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    b       .L_resume_forward_branch
-
-.L_add_batch:
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    ldr     x0, [xFP, #OFF_FP_METHOD]
-    mov     x2, xSELF
-    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
-    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
-    b       .L_no_count_backwards
-
-/*
- * Entered from the conditional branch handlers when OSR check request active on
- * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
- */
-.L_check_not_taken_osr:
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, #2
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    FETCH_ADVANCE_INST 2
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-
-/*
- * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
- */
-MterpCheckSuspendAndContinue:
-    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    b.ne    check1
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-check1:
-    EXPORT_PC
-    mov     x0, xSELF
-    bl      MterpSuspendCheck           // (self)
-    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-/*
- * On-stack replacement has happened, and now we've returned from the compiled method.
- */
-MterpOnStackReplacement:
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    sxtw x2, wINST
-    bl MterpLogOSR
-#endif
-    mov  x0, #1                         // Signal normal return
-    b    MterpDone
-
-/*
- * Bail out to reference interpreter.
- */
-MterpFallback:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogFallback
-#endif
-MterpCommonFallback:
-    mov     x0, #0                                  // signal retry with reference interpreter.
-    b       MterpDone
-
-/*
- * We pushed some registers on the stack in ExecuteMterpImpl, then saved
- * SP and LR.  Here we restore SP, restore the registers, and then restore
- * LR to PC.
- *
- * On entry:
- *  uint32_t* xFP  (should still be live, pointer to base of vregs)
- */
-MterpExceptionReturn:
-    mov     x0, #1                                  // signal return to caller.
-    b MterpDone
-MterpReturn:
-    ldr     x2, [xFP, #OFF_FP_RESULT_REGISTER]
-    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
-    str     x0, [x2]
-    mov     x0, xSELF
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    b.eq    check2
-    bl      MterpSuspendCheck                       // (self)
-check2:
-    mov     x0, #1                                  // signal return to caller.
-MterpDone:
-/*
- * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
- * checking for OSR.  If greater than zero, we might have unreported hotness to register
- * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
- * should only reach zero immediately after a hotness decrement, and is then reset to either
- * a negative special state or the new non-zero countdown value.
- */
-    cmp     wPROFILE, #0
-    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
-    ret
-
-MterpProfileActive:
-    mov     xINST, x0                               // stash return value
-    /* Report cached hotness counts */
-    ldr     x0, [xFP, #OFF_FP_METHOD]
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xSELF
-    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
-    mov     x0, xINST                               // restore return value
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
-    ret
-
-    .cfi_endproc
-    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
-
-
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 1f473e4..793260d 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -300,11 +300,25 @@
                            PrettyDescriptor(klass).c_str());
     return;
   }
-  if (Runtime::Current()->IsActiveTransaction()) {
-    result->SetL(mirror::Field::CreateFromArtField<true>(self, found, true));
+  Runtime* runtime = Runtime::Current();
+  size_t pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
+  mirror::Field* field;
+  if (runtime->IsActiveTransaction()) {
+    if (pointer_size == 8) {
+      field = mirror::Field::CreateFromArtField<8U, true>(self, found, true);
+    } else {
+      DCHECK_EQ(pointer_size, 4U);
+      field = mirror::Field::CreateFromArtField<4U, true>(self, found, true);
+    }
   } else {
-    result->SetL(mirror::Field::CreateFromArtField<false>(self, found, true));
+    if (pointer_size == 8) {
+      field = mirror::Field::CreateFromArtField<8U, false>(self, found, true);
+    } else {
+      DCHECK_EQ(pointer_size, 4U);
+      field = mirror::Field::CreateFromArtField<4U, false>(self, found, true);
+    }
   }
+  result->SetL(field);
 }
 
 // This is required for Enum(Set) code, as that uses reflection to inspect enum classes.
@@ -319,11 +333,26 @@
   mirror::String* name = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
   mirror::ObjectArray<mirror::Class>* args =
       shadow_frame->GetVRegReference(arg_offset + 2)->AsObjectArray<mirror::Class>();
-  if (Runtime::Current()->IsActiveTransaction()) {
-    result->SetL(mirror::Class::GetDeclaredMethodInternal<true>(self, klass, name, args));
+  Runtime* runtime = Runtime::Current();
+  bool transaction = runtime->IsActiveTransaction();
+  size_t pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
+  mirror::Method* method;
+  if (transaction) {
+    if (pointer_size == 8U) {
+      method = mirror::Class::GetDeclaredMethodInternal<8U, true>(self, klass, name, args);
+    } else {
+      DCHECK_EQ(pointer_size, 4U);
+      method = mirror::Class::GetDeclaredMethodInternal<4U, true>(self, klass, name, args);
+    }
   } else {
-    result->SetL(mirror::Class::GetDeclaredMethodInternal<false>(self, klass, name, args));
+    if (pointer_size == 8U) {
+      method = mirror::Class::GetDeclaredMethodInternal<8U, false>(self, klass, name, args);
+    } else {
+      DCHECK_EQ(pointer_size, 4U);
+      method = mirror::Class::GetDeclaredMethodInternal<4U, false>(self, klass, name, args);
+    }
   }
+  result->SetL(method);
 }
 
 // Special managed code cut-out to allow constructor lookup in a un-started runtime.
@@ -336,11 +365,26 @@
   }
   mirror::ObjectArray<mirror::Class>* args =
       shadow_frame->GetVRegReference(arg_offset + 1)->AsObjectArray<mirror::Class>();
-  if (Runtime::Current()->IsActiveTransaction()) {
-    result->SetL(mirror::Class::GetDeclaredConstructorInternal<true>(self, klass, args));
+  Runtime* runtime = Runtime::Current();
+  bool transaction = runtime->IsActiveTransaction();
+  size_t pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
+  mirror::Constructor* constructor;
+  if (transaction) {
+    if (pointer_size == 8U) {
+      constructor = mirror::Class::GetDeclaredConstructorInternal<8U, true>(self, klass, args);
+    } else {
+      DCHECK_EQ(pointer_size, 4U);
+      constructor = mirror::Class::GetDeclaredConstructorInternal<4U, true>(self, klass, args);
+    }
   } else {
-    result->SetL(mirror::Class::GetDeclaredConstructorInternal<false>(self, klass, args));
+    if (pointer_size == 8U) {
+      constructor = mirror::Class::GetDeclaredConstructorInternal<8U, false>(self, klass, args);
+    } else {
+      DCHECK_EQ(pointer_size, 4U);
+      constructor = mirror::Class::GetDeclaredConstructorInternal<4U, false>(self, klass, args);
+    }
   }
+  result->SetL(constructor);
 }
 
 void UnstartedRuntime::UnstartedClassGetEnclosingClass(
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index 5039d2d..aa606a2 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -637,7 +637,7 @@
     os << "\n\tclasses: ";
     for (const auto class_it : dex_data.class_set) {
       if (dex_file != nullptr) {
-        os << "\n\t\t" << PrettyType(class_it, *dex_file);
+        os << "\n\t\t" << dex_file->GetClassDescriptor(dex_file->GetClassDef(class_it));
       } else {
         os << class_it << ",";
       }
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 8cdf96d..6ef3999 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -375,10 +375,12 @@
     ScopedObjectAccess soa(env);
     ArtMethod* m = soa.DecodeMethod(mid);
     mirror::AbstractMethod* method;
+    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+    DCHECK(!Runtime::Current()->IsActiveTransaction());
     if (m->IsConstructor()) {
-      method = mirror::Constructor::CreateFromArtMethod(soa.Self(), m);
+      method = mirror::Constructor::CreateFromArtMethod<sizeof(void*), false>(soa.Self(), m);
     } else {
-      method = mirror::Method::CreateFromArtMethod(soa.Self(), m);
+      method = mirror::Method::CreateFromArtMethod<sizeof(void*), false>(soa.Self(), m);
     }
     return soa.AddLocalReference<jobject>(method);
   }
@@ -387,7 +389,8 @@
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
     ArtField* f = soa.DecodeField(fid);
-    return soa.AddLocalReference<jobject>(mirror::Field::CreateFromArtField(soa.Self(), f, true));
+    return soa.AddLocalReference<jobject>(
+        mirror::Field::CreateFromArtField<sizeof(void*)>(soa.Self(), f, true));
   }
 
   static jclass GetObjectClass(JNIEnv* env, jobject java_object) {
diff --git a/runtime/mirror/abstract_method.cc b/runtime/mirror/abstract_method.cc
index 5a07dee..ef39132 100644
--- a/runtime/mirror/abstract_method.cc
+++ b/runtime/mirror/abstract_method.cc
@@ -21,12 +21,9 @@
 namespace art {
 namespace mirror {
 
-template <bool kTransactionActive>
+template <size_t kPointerSize, bool kTransactionActive>
 bool AbstractMethod::CreateFromArtMethod(ArtMethod* method) {
-  auto* interface_method = method->GetInterfaceMethodIfProxy(
-      kTransactionActive
-          ? Runtime::Current()->GetClassLinker()->GetImagePointerSize()
-          : sizeof(void*));
+  auto* interface_method = method->GetInterfaceMethodIfProxy(kPointerSize);
   SetArtMethod<kTransactionActive>(method);
   SetFieldObject<kTransactionActive>(DeclaringClassOffset(), method->GetDeclaringClass());
   SetFieldObject<kTransactionActive>(
@@ -36,8 +33,10 @@
   return true;
 }
 
-template bool AbstractMethod::CreateFromArtMethod<false>(ArtMethod* method);
-template bool AbstractMethod::CreateFromArtMethod<true>(ArtMethod* method);
+template bool AbstractMethod::CreateFromArtMethod<4U, false>(ArtMethod* method);
+template bool AbstractMethod::CreateFromArtMethod<4U, true>(ArtMethod* method);
+template bool AbstractMethod::CreateFromArtMethod<8U, false>(ArtMethod* method);
+template bool AbstractMethod::CreateFromArtMethod<8U, true>(ArtMethod* method);
 
 ArtMethod* AbstractMethod::GetArtMethod() {
   return reinterpret_cast<ArtMethod*>(GetField64(ArtMethodOffset()));
diff --git a/runtime/mirror/abstract_method.h b/runtime/mirror/abstract_method.h
index a39f94d..936b14c 100644
--- a/runtime/mirror/abstract_method.h
+++ b/runtime/mirror/abstract_method.h
@@ -34,7 +34,7 @@
 class MANAGED AbstractMethod : public AccessibleObject {
  public:
   // Called from Constructor::CreateFromArtMethod, Method::CreateFromArtMethod.
-  template <bool kTransactionActive = false>
+  template <size_t kPointerSize, bool kTransactionActive>
   bool CreateFromArtMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index b783a01..9a9fd87 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -457,6 +457,7 @@
   Class* declaring_class = method->GetDeclaringClass();
   DCHECK(declaring_class != nullptr) << PrettyClass(this);
   DCHECK(declaring_class->IsInterface()) << PrettyMethod(method);
+  DCHECK(!method->IsCopied());
   // TODO cache to improve lookup speed
   const int32_t iftable_count = GetIfTableCount();
   IfTable* iftable = GetIfTable();
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 1c31c57..375cb2f 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -1053,7 +1053,7 @@
   return (type_id == nullptr) ? DexFile::kDexNoIndex : dex_file.GetIndexForTypeId(*type_id);
 }
 
-template <bool kTransactionActive>
+template <size_t kPointerSize, bool kTransactionActive>
 mirror::Method* Class::GetDeclaredMethodInternal(Thread* self,
                                                  mirror::Class* klass,
                                                  mirror::String* name,
@@ -1074,11 +1074,8 @@
   auto h_args = hs.NewHandle(args);
   Handle<mirror::Class> h_klass = hs.NewHandle(klass);
   ArtMethod* result = nullptr;
-  const size_t pointer_size = kTransactionActive
-                                  ? Runtime::Current()->GetClassLinker()->GetImagePointerSize()
-                                  : sizeof(void*);
-  for (auto& m : h_klass->GetDeclaredVirtualMethods(pointer_size)) {
-    auto* np_method = m.GetInterfaceMethodIfProxy(pointer_size);
+  for (auto& m : h_klass->GetDeclaredVirtualMethods(kPointerSize)) {
+    auto* np_method = m.GetInterfaceMethodIfProxy(kPointerSize);
     // May cause thread suspension.
     mirror::String* np_name = np_method->GetNameAsString(self);
     if (!np_name->Equals(h_method_name.Get()) || !np_method->EqualParameters(h_args)) {
@@ -1089,19 +1086,19 @@
     }
     auto modifiers = m.GetAccessFlags();
     if ((modifiers & kSkipModifiers) == 0) {
-      return mirror::Method::CreateFromArtMethod<kTransactionActive>(self, &m);
+      return mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, &m);
     }
     if ((modifiers & kAccMiranda) == 0) {
       result = &m;  // Remember as potential result if it's not a miranda method.
     }
   }
   if (result == nullptr) {
-    for (auto& m : h_klass->GetDirectMethods(pointer_size)) {
+    for (auto& m : h_klass->GetDirectMethods(kPointerSize)) {
       auto modifiers = m.GetAccessFlags();
       if ((modifiers & kAccConstructor) != 0) {
         continue;
       }
-      auto* np_method = m.GetInterfaceMethodIfProxy(pointer_size);
+      auto* np_method = m.GetInterfaceMethodIfProxy(kPointerSize);
       // May cause thread suspension.
       mirror::String* np_name = np_method->GetNameAsString(self);
       if (np_name == nullptr) {
@@ -1115,50 +1112,69 @@
         continue;
       }
       if ((modifiers & kSkipModifiers) == 0) {
-        return mirror::Method::CreateFromArtMethod<kTransactionActive>(self, &m);
+        return mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, &m);
       }
       // Direct methods cannot be miranda methods, so this potential result must be synthetic.
       result = &m;
     }
   }
   return result != nullptr
-      ? mirror::Method::CreateFromArtMethod<kTransactionActive>(self, result)
+      ? mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, result)
       : nullptr;
 }
 
 template
-mirror::Method* Class::GetDeclaredMethodInternal<false>(Thread* self,
-                                                        mirror::Class* klass,
-                                                        mirror::String* name,
-                                                        mirror::ObjectArray<mirror::Class>* args);
+mirror::Method* Class::GetDeclaredMethodInternal<4U, false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
 template
-mirror::Method* Class::GetDeclaredMethodInternal<true>(Thread* self,
-                                                       mirror::Class* klass,
-                                                       mirror::String* name,
-                                                       mirror::ObjectArray<mirror::Class>* args);
+mirror::Method* Class::GetDeclaredMethodInternal<4U, true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Method* Class::GetDeclaredMethodInternal<8U, false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Method* Class::GetDeclaredMethodInternal<8U, true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
 
-template <bool kTransactionActive>
+template <size_t kPointerSize, bool kTransactionActive>
 mirror::Constructor* Class::GetDeclaredConstructorInternal(
     Thread* self,
     mirror::Class* klass,
     mirror::ObjectArray<mirror::Class>* args) {
   StackHandleScope<1> hs(self);
-  const size_t pointer_size = kTransactionActive
-                                  ? Runtime::Current()->GetClassLinker()->GetImagePointerSize()
-                                  : sizeof(void*);
-  ArtMethod* result = klass->GetDeclaredConstructor(self, hs.NewHandle(args), pointer_size);
+  ArtMethod* result = klass->GetDeclaredConstructor(self, hs.NewHandle(args), kPointerSize);
   return result != nullptr
-      ? mirror::Constructor::CreateFromArtMethod<kTransactionActive>(self, result)
+      ? mirror::Constructor::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, result)
       : nullptr;
 }
 
 // mirror::Constructor::CreateFromArtMethod<kTransactionActive>(self, result)
 
-template mirror::Constructor* Class::GetDeclaredConstructorInternal<false>(
+template mirror::Constructor* Class::GetDeclaredConstructorInternal<4U, false>(
     Thread* self,
     mirror::Class* klass,
     mirror::ObjectArray<mirror::Class>* args);
-template mirror::Constructor* Class::GetDeclaredConstructorInternal<true>(
+template mirror::Constructor* Class::GetDeclaredConstructorInternal<4U, true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+template mirror::Constructor* Class::GetDeclaredConstructorInternal<8U, false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+template mirror::Constructor* Class::GetDeclaredConstructorInternal<8U, true>(
     Thread* self,
     mirror::Class* klass,
     mirror::ObjectArray<mirror::Class>* args);
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 9be9f01..3ba9e1a 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -471,7 +471,7 @@
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t GetComponentSize() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return 1U << GetComponentSizeShift();
+    return 1U << GetComponentSizeShift<kReadBarrierOption>();
   }
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
@@ -754,13 +754,13 @@
         size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template <bool kTransactionActive = false>
+  template <size_t kPointerSize, bool kTransactionActive>
   static Method* GetDeclaredMethodInternal(Thread* self,
                                            mirror::Class* klass,
                                            mirror::String* name,
                                            mirror::ObjectArray<mirror::Class>* args)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  template <bool kTransactionActive = false>
+  template <size_t kPointerSize, bool kTransactionActive>
   static Constructor* GetDeclaredConstructorInternal(Thread* self,
                                                      mirror::Class* klass,
                                                      mirror::ObjectArray<mirror::Class>* args)
diff --git a/runtime/mirror/field-inl.h b/runtime/mirror/field-inl.h
index 8a0daec..4183476 100644
--- a/runtime/mirror/field-inl.h
+++ b/runtime/mirror/field-inl.h
@@ -27,7 +27,7 @@
 
 namespace mirror {
 
-template <bool kTransactionActive>
+template <size_t kPointerSize, bool kTransactionActive>
 inline mirror::Field* Field::CreateFromArtField(Thread* self, ArtField* field,
                                                 bool force_resolve) {
   StackHandleScope<2> hs(self);
@@ -54,10 +54,8 @@
     self->AssertPendingOOMException();
     return nullptr;
   }
-  const auto pointer_size = kTransactionActive ?
-      Runtime::Current()->GetClassLinker()->GetImagePointerSize() : sizeof(void*);
   auto dex_field_index = field->GetDexFieldIndex();
-  auto* resolved_field = field->GetDexCache()->GetResolvedField(dex_field_index, pointer_size);
+  auto* resolved_field = field->GetDexCache()->GetResolvedField(dex_field_index, kPointerSize);
   if (field->GetDeclaringClass()->IsProxyClass()) {
     DCHECK(field->IsStatic());
     DCHECK_LT(dex_field_index, 2U);
@@ -70,7 +68,7 @@
     } else {
       // We rely on the field being resolved so that we can back to the ArtField
       // (i.e. FromReflectedMethod).
-      field->GetDexCache()->SetResolvedField(dex_field_index, field, pointer_size);
+      field->GetDexCache()->SetResolvedField(dex_field_index, field, kPointerSize);
     }
   }
   ret->SetType<kTransactionActive>(type.Get());
diff --git a/runtime/mirror/field.h b/runtime/mirror/field.h
index edaddbd..2bd6132 100644
--- a/runtime/mirror/field.h
+++ b/runtime/mirror/field.h
@@ -92,7 +92,7 @@
   // Slow, try to use only for PrettyField and such.
   ArtField* GetArtField() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template <bool kTransactionActive = false>
+  template <size_t kPointerSize, bool kTransactionActive = false>
   static mirror::Field* CreateFromArtField(Thread* self, ArtField* field,
                                            bool force_resolve)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
diff --git a/runtime/mirror/method.cc b/runtime/mirror/method.cc
index 9838b71..3cc70e1 100644
--- a/runtime/mirror/method.cc
+++ b/runtime/mirror/method.cc
@@ -51,18 +51,21 @@
   array_class_ = GcRoot<Class>(nullptr);
 }
 
-template <bool kTransactionActive>
+template <size_t kPointerSize, bool kTransactionActive>
 Method* Method::CreateFromArtMethod(Thread* self, ArtMethod* method) {
   DCHECK(!method->IsConstructor()) << PrettyMethod(method);
   auto* ret = down_cast<Method*>(StaticClass()->AllocObject(self));
   if (LIKELY(ret != nullptr)) {
-    static_cast<AbstractMethod*>(ret)->CreateFromArtMethod<kTransactionActive>(method);
+    static_cast<AbstractMethod*>(ret)->
+        CreateFromArtMethod<kPointerSize, kTransactionActive>(method);
   }
   return ret;
 }
 
-template Method* Method::CreateFromArtMethod<false>(Thread* self, ArtMethod* method);
-template Method* Method::CreateFromArtMethod<true>(Thread* self, ArtMethod* method);
+template Method* Method::CreateFromArtMethod<4U, false>(Thread* self, ArtMethod* method);
+template Method* Method::CreateFromArtMethod<4U, true>(Thread* self, ArtMethod* method);
+template Method* Method::CreateFromArtMethod<8U, false>(Thread* self, ArtMethod* method);
+template Method* Method::CreateFromArtMethod<8U, true>(Thread* self, ArtMethod* method);
 
 void Method::VisitRoots(RootVisitor* visitor) {
   static_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
@@ -96,18 +99,21 @@
   array_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
-template <bool kTransactionActive>
+template <size_t kPointerSize, bool kTransactionActive>
 Constructor* Constructor::CreateFromArtMethod(Thread* self, ArtMethod* method) {
   DCHECK(method->IsConstructor()) << PrettyMethod(method);
   auto* ret = down_cast<Constructor*>(StaticClass()->AllocObject(self));
   if (LIKELY(ret != nullptr)) {
-    static_cast<AbstractMethod*>(ret)->CreateFromArtMethod<kTransactionActive>(method);
+    static_cast<AbstractMethod*>(ret)->
+        CreateFromArtMethod<kPointerSize, kTransactionActive>(method);
   }
   return ret;
 }
 
-template Constructor* Constructor::CreateFromArtMethod<false>(Thread* self, ArtMethod* method);
-template Constructor* Constructor::CreateFromArtMethod<true>(Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<4U, false>(Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<4U, true>(Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<8U, false>(Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<8U, true>(Thread* self, ArtMethod* method);
 
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/method.h b/runtime/mirror/method.h
index 0b56964..ecd6a74 100644
--- a/runtime/mirror/method.h
+++ b/runtime/mirror/method.h
@@ -28,7 +28,7 @@
 // C++ mirror of java.lang.reflect.Method.
 class MANAGED Method : public AbstractMethod {
  public:
-  template <bool kTransactionActive = false>
+  template <size_t kPointerSize, bool kTransactionActive>
   static Method* CreateFromArtMethod(Thread* self, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
@@ -60,7 +60,7 @@
 // C++ mirror of java.lang.reflect.Constructor.
 class MANAGED Constructor: public AbstractMethod {
  public:
-  template <bool kTransactionActive = false>
+  template <size_t kPointerSize, bool kTransactionActive>
   static Constructor* CreateFromArtMethod(Thread* self, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 3baa12e..e8ad5fa 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -76,8 +76,9 @@
     SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), nullptr);
   }
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Reference* GetPendingNext() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldObject<Reference>(PendingNextOffset());
+    return GetFieldObject<Reference, kDefaultVerifyFlags, kReadBarrierOption>(PendingNextOffset());
   }
 
   void SetPendingNext(Reference* pending_next)
@@ -102,7 +103,7 @@
   // removed from the list after having determined the reference is not ready
   // to be enqueued on a java ReferenceQueue.
   bool IsUnprocessed() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetPendingNext() == nullptr;
+    return GetPendingNext<kWithoutReadBarrier>() == nullptr;
   }
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 0624da3..02a97f5 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -136,7 +136,9 @@
   }
   for (ArtField& field : ifields) {
     if (!public_only || field.IsPublic()) {
-      auto* reflect_field = mirror::Field::CreateFromArtField(self, &field, force_resolve);
+      auto* reflect_field = mirror::Field::CreateFromArtField<sizeof(void*)>(self,
+                                                                             &field,
+                                                                             force_resolve);
       if (reflect_field == nullptr) {
         if (kIsDebugBuild) {
           self->AssertPendingException();
@@ -149,7 +151,9 @@
   }
   for (ArtField& field : sfields) {
     if (!public_only || field.IsPublic()) {
-      auto* reflect_field = mirror::Field::CreateFromArtField(self, &field, force_resolve);
+      auto* reflect_field = mirror::Field::CreateFromArtField<sizeof(void*)>(self,
+                                                                             &field,
+                                                                             force_resolve);
       if (reflect_field == nullptr) {
         if (kIsDebugBuild) {
           self->AssertPendingException();
@@ -222,11 +226,15 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* art_field = FindFieldByName(self, name, c->GetIFieldsPtr());
   if (art_field != nullptr) {
-    return mirror::Field::CreateFromArtField(self, art_field, true);
+    return mirror::Field::CreateFromArtField<sizeof(void*)>(self,
+                                                            art_field,
+                                                            true);
   }
   art_field = FindFieldByName(self, name, c->GetSFieldsPtr());
   if (art_field != nullptr) {
-    return mirror::Field::CreateFromArtField(self, art_field, true);
+    return mirror::Field::CreateFromArtField<sizeof(void*)>(self,
+                                                            art_field,
+                                                            true);
   }
   return nullptr;
 }
@@ -323,7 +331,9 @@
 static jobject Class_getDeclaredConstructorInternal(
     JNIEnv* env, jobject javaThis, jobjectArray args) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::Constructor* result = mirror::Class::GetDeclaredConstructorInternal(
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  mirror::Constructor* result = mirror::Class::GetDeclaredConstructorInternal<sizeof(void*), false>(
       soa.Self(),
       DecodeClass(soa, javaThis),
       soa.Decode<mirror::ObjectArray<mirror::Class>*>(args));
@@ -355,7 +365,10 @@
   constructor_count = 0;
   for (auto& m : h_klass->GetDirectMethods(sizeof(void*))) {
     if (MethodMatchesConstructor(&m, publicOnly != JNI_FALSE)) {
-      auto* constructor = mirror::Constructor::CreateFromArtMethod(soa.Self(), &m);
+      DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+      DCHECK(!Runtime::Current()->IsActiveTransaction());
+      auto* constructor = mirror::Constructor::CreateFromArtMethod<sizeof(void*), false>(
+          soa.Self(), &m);
       if (UNLIKELY(constructor == nullptr)) {
         soa.Self()->AssertPendingOOMException();
         return nullptr;
@@ -369,7 +382,9 @@
 static jobject Class_getDeclaredMethodInternal(JNIEnv* env, jobject javaThis,
                                                jobject name, jobjectArray args) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::Method* result = mirror::Class::GetDeclaredMethodInternal(
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  mirror::Method* result = mirror::Class::GetDeclaredMethodInternal<sizeof(void*), false>(
       soa.Self(),
       DecodeClass(soa, javaThis),
       soa.Decode<mirror::String*>(name),
@@ -398,7 +413,9 @@
     auto modifiers = m.GetAccessFlags();
     if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
         (modifiers & kAccConstructor) == 0) {
-      auto* method = mirror::Method::CreateFromArtMethod(soa.Self(), &m);
+      DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+      DCHECK(!Runtime::Current()->IsActiveTransaction());
+      auto* method = mirror::Method::CreateFromArtMethod<sizeof(void*), false>(soa.Self(), &m);
       if (method == nullptr) {
         soa.Self()->AssertPendingException();
         return nullptr;
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 4d9ca6d..1ce7e48 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -60,29 +60,31 @@
 
     jsize array_index = 0;
     // Fill the method array
+    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
     ArtMethod* method = javaLangObject->FindDeclaredVirtualMethod(
         "equals", "(Ljava/lang/Object;)Z", sizeof(void*));
     CHECK(method != nullptr);
+    DCHECK(!Runtime::Current()->IsActiveTransaction());
     soa.Env()->SetObjectArrayElement(
         proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-            mirror::Method::CreateFromArtMethod(soa.Self(), method)));
+            mirror::Method::CreateFromArtMethod<sizeof(void*), false>(soa.Self(), method)));
     method = javaLangObject->FindDeclaredVirtualMethod("hashCode", "()I", sizeof(void*));
     CHECK(method != nullptr);
     soa.Env()->SetObjectArrayElement(
         proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-            mirror::Method::CreateFromArtMethod(soa.Self(), method)));
+            mirror::Method::CreateFromArtMethod<sizeof(void*), false>(soa.Self(), method)));
     method = javaLangObject->FindDeclaredVirtualMethod(
         "toString", "()Ljava/lang/String;", sizeof(void*));
     CHECK(method != nullptr);
     soa.Env()->SetObjectArrayElement(
         proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-            mirror::Method::CreateFromArtMethod(soa.Self(), method)));
+            mirror::Method::CreateFromArtMethod<sizeof(void*), false>(soa.Self(), method)));
     // Now adds all interfaces virtual methods.
     for (mirror::Class* interface : interfaces) {
       for (auto& m : interface->GetDeclaredVirtualMethods(sizeof(void*))) {
         soa.Env()->SetObjectArrayElement(
             proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-                mirror::Method::CreateFromArtMethod(soa.Self(), &m)));
+                mirror::Method::CreateFromArtMethod<sizeof(void*), false>(soa.Self(), &m)));
       }
     }
     CHECK_EQ(array_index, methods_count);
@@ -226,14 +228,20 @@
   EXPECT_EQ(static_fields1->At(0).GetDeclaringClass(), proxyClass1.Get());
   EXPECT_EQ(static_fields1->At(1).GetDeclaringClass(), proxyClass1.Get());
 
+  ASSERT_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+  ASSERT_FALSE(Runtime::Current()->IsActiveTransaction());
   Handle<mirror::Field> field00 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields0->At(0), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<sizeof(void*), false>(
+          soa.Self(), &static_fields0->At(0), true));
   Handle<mirror::Field> field01 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields0->At(1), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<sizeof(void*), false>(
+          soa.Self(), &static_fields0->At(1), true));
   Handle<mirror::Field> field10 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields1->At(0), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<sizeof(void*), false>(
+          soa.Self(), &static_fields1->At(0), true));
   Handle<mirror::Field> field11 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields1->At(1), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<sizeof(void*), false>(
+          soa.Self(), &static_fields1->At(1), true));
   EXPECT_EQ(field00->GetArtField(), &static_fields0->At(0));
   EXPECT_EQ(field01->GetArtField(), &static_fields0->At(1));
   EXPECT_EQ(field10->GetArtField(), &static_fields1->At(0));
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 3fd66a7..3aa1fc2 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -93,6 +93,18 @@
   return static_cast<ThreadState>(old_state_and_flags.as_struct.state);
 }
 
+inline bool Thread::IsThreadSuspensionAllowable() const {
+  if (tls32_.no_thread_suspension != 0) {
+    return false;
+  }
+  for (int i = kLockLevelCount - 1; i >= 0; --i) {
+    if (i != kMutatorLock && GetHeldMutex(static_cast<LockLevel>(i)) != nullptr) {
+      return false;
+    }
+  }
+  return true;
+}
+
 inline void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const {
   if (kIsDebugBuild) {
     if (gAborting == 0) {
diff --git a/runtime/thread.h b/runtime/thread.h
index a3a4005..9a4eb97 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -287,6 +287,9 @@
 
   void AssertThreadSuspensionIsAllowable(bool check_locks = true) const;
 
+  // Return true if thread suspension is allowable.
+  bool IsThreadSuspensionAllowable() const;
+
   bool IsDaemon() const {
     return tls32_.daemon;
   }
diff --git a/runtime/trace.cc b/runtime/trace.cc
index e77a11e..1e15960 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -645,31 +645,11 @@
   }
 }
 
-static void GetVisitedMethodsFromBitSets(
-    const std::map<const DexFile*, DexIndexBitSet*>& seen_methods,
-    std::set<ArtMethod*>* visited_methods) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  Thread* const self = Thread::Current();
-  for (auto& e : seen_methods) {
-    DexIndexBitSet* bit_set = e.second;
-    // TODO: Visit trace methods as roots.
-    mirror::DexCache* dex_cache = class_linker->FindDexCache(self, *e.first, false);
-    for (uint32_t i = 0; i < bit_set->size(); ++i) {
-      if ((*bit_set)[i]) {
-        visited_methods->insert(dex_cache->GetResolvedMethod(i, sizeof(void*)));
-      }
-    }
-  }
-}
-
 void Trace::FinishTracing() {
   size_t final_offset = 0;
 
   std::set<ArtMethod*> visited_methods;
   if (trace_output_mode_ == TraceOutputMode::kStreaming) {
-    // Write the secondary file with all the method names.
-    GetVisitedMethodsFromBitSets(seen_methods_, &visited_methods);
-
     // Clean up.
     STLDeleteValues(&seen_methods_);
   } else {
@@ -850,11 +830,6 @@
 bool Trace::RegisterMethod(ArtMethod* method) {
   mirror::DexCache* dex_cache = method->GetDexCache();
   const DexFile* dex_file = dex_cache->GetDexFile();
-  auto* resolved_method = dex_cache->GetResolvedMethod(method->GetDexMethodIndex(), sizeof(void*));
-  if (resolved_method != method) {
-    DCHECK(resolved_method == nullptr);
-    dex_cache->SetResolvedMethod(method->GetDexMethodIndex(), method, sizeof(void*));
-  }
   if (seen_methods_.find(dex_file) == seen_methods_.end()) {
     seen_methods_.insert(std::make_pair(dex_file, new DexIndexBitSet()));
   }
@@ -880,8 +855,7 @@
 
 std::string Trace::GetMethodLine(ArtMethod* method) {
   method = method->GetInterfaceMethodIfProxy(sizeof(void*));
-  return StringPrintf("%p\t%s\t%s\t%s\t%s\n",
-                      reinterpret_cast<void*>((EncodeTraceMethod(method) << TraceActionBits)),
+  return StringPrintf("%#x\t%s\t%s\t%s\t%s\n", (EncodeTraceMethod(method) << TraceActionBits),
       PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), method->GetName(),
       method->GetSignature().ToString().c_str(), method->GetDeclaringClassSourceFile());
 }
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 41771b5..c125e33 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -1204,9 +1204,6 @@
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
   /// CHECK: Goto
@@ -1217,7 +1214,7 @@
     for (int i = array.length - 1 ; i >= 0; i--) {
       array[i] = 1;
     }
-    // Several HDeoptimize will be added. Two for each index.
+    // Three HDeoptimize will be added for the bounds.
     // The null check is not necessary.
     for (int i = end - 2 ; i > 0; i--) {
       if (expectInterpreter) {
@@ -1266,20 +1263,12 @@
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
   /// CHECK: Goto
   /// CHECK: Goto
 
   void foo6(int[] array, int start, int end, boolean expectInterpreter) {
-    // Several HDeoptimize will be added.
     for (int i = end; i >= start; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -1398,8 +1387,8 @@
   /// CHECK-NOT: Deoptimize
 
   void foo9(int[] array, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. Two for the index
-    // and one for null check on array.
+    // Three HDeoptimize will be added. Two for the index and one for null check on array. Then
+    // simplification removes one redundant HDeoptimize.
     for (int i = 0 ; i < 10; i++) {
       if (expectInterpreter) {
         assertIsInterpreted();
diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java
index 8de6318..78e8a40 100644
--- a/test/496-checker-inlining-and-class-loader/src/Main.java
+++ b/test/496-checker-inlining-and-class-loader/src/Main.java
@@ -107,7 +107,8 @@
                 /* Load and initialize FirstSeenByMyClassLoader */
   /// CHECK:      LoadClass gen_clinit_check:true
                 /* Load and initialize System */
-  /// CHECK-NEXT: LoadClass gen_clinit_check:true
+  // There may be MipsComputeBaseMethodAddress here.
+  /// CHECK:      LoadClass gen_clinit_check:true
   /// CHECK-NEXT: StaticFieldGet
   // There may be HArmDexCacheArraysBase or HX86ComputeBaseMethodAddress here.
   /// CHECK:      LoadString
diff --git a/test/530-checker-loops3/expected.txt b/test/530-checker-loops3/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/530-checker-loops3/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/530-checker-loops3/info.txt b/test/530-checker-loops3/info.txt
new file mode 100644
index 0000000..07d99a3
--- /dev/null
+++ b/test/530-checker-loops3/info.txt
@@ -0,0 +1 @@
+Test on loop optimizations, in particular loop-based dynamic bce.
diff --git a/test/530-checker-loops3/src/Main.java b/test/530-checker-loops3/src/Main.java
new file mode 100644
index 0000000..5ffcbe9
--- /dev/null
+++ b/test/530-checker-loops3/src/Main.java
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on loop optimizations, in particular dynamic BCE. In all cases,
+// bounds check on a[] is resolved statically. Bounds checks on b[]
+// exercise various different scenarios. In all cases, loop-based
+// dynamic BCE is better than the dominator-based BCE, since it
+// generates the test outside the loop.
+//
+public class Main {
+
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneConstantIndex(int[] a, int[] b) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[1];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleConstantIndices(int[] a, int[] b) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[0] + b[1] + b[2];
+    }
+  }
+
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneInvariantIndex(int[] a, int[] b, int c) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[c];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleInvariantIndices(int[] a, int[] b, int c) {
+    // Dynamic bce on b requires three deopts: one null and two bounds.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[c-1] + b[c] + b[c+1];
+    }
+  }
+
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneUnitStride(int[] a, int[] b) {
+    // Dynamic bce on b requires three deopts: one null and two bounds.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[i];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleUnitStrides(int[] a, int[] b) {
+    // Dynamic bce on b requires four deopts: one null and three bounds.
+    // One redundant deopt is removed by simplifier.
+    // TODO: range information could remove another
+    for (int i = 1; i < a.length - 1; i++) {
+      a[i] = b[i-1] + b[i] + b[i+1];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleUnitStridesConditional(int[] a, int[] b) {
+    // Dynamic bce on b requires four deopts: one null and three bounds.
+    // The two conditional references may be included, since they are in range.
+    // One redundant deopt is removed by simplifier.
+    for (int i = 2; i < a.length - 2; i++) {
+      int t = b[i-2] + b[i] + b[i+2] + (((i & 1) == 0) ? b[i+1] : b[i-1]);
+      a[i] = t;
+    }
+  }
+
+  /// CHECK-START: void Main.shifter(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.shifter(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.shifter(int[]) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.shifter(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void shifter(int[] x) {
+    // Real-life example: should have four deopts: one null and three bounds.
+    // Two redundant deopts are removed by simplifier.
+    for (int i = 16; i < 80; i++) {
+      int t = x[i - 3] ^ x[i - 8] ^ x[i - 14] ^ x[i - 16];
+      x[i] = t << 1 | t >>> 31;
+    }
+  }
+
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void stencil(int[] array, int start, int end) {
+    // Real-life example: should have four deopts: one null and three bounds.
+    for (int i = end; i >= start; i--) {
+      array[i] = (array[i-2] + array[i-1] + array[i] + array[i+1] + array[i+2]) / 5;
+    }
+  }
+
+  //
+  // Verifier.
+  //
+
+  public static void main(String[] args) {
+    int[] a = new int[10];
+    int b[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int b1[] = { 100 };
+
+    oneConstantIndex(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(2, a[i]);;
+    }
+    try {
+      oneConstantIndex(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleConstantIndices(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(6, a[i]);;
+    }
+    try {
+      multipleConstantIndices(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    oneInvariantIndex(a, b, 1);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(2, a[i]);;
+    }
+    try {
+      oneInvariantIndex(a, b1, 1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleInvariantIndices(a, b, 1);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(6, a[i]);;
+    }
+    try {
+      multipleInvariantIndices(a, b1, 1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    oneUnitStride(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(i + 1, a[i]);;
+    }
+    try {
+      oneUnitStride(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+      expectEquals(100, a[0]);;
+    }
+
+    multipleUnitStrides(a, b);
+    for (int i = 1; i < a.length - 1; i++) {
+      expectEquals(3 * i + 3, a[i]);;
+    }
+    try {
+      multipleUnitStrides(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleUnitStridesConditional(a, b);
+    for (int i = 2; i < a.length - 2; i++) {
+      int e = 3 * i + 3 + (((i & 1) == 0) ? i + 2 : i);
+      expectEquals(e, a[i]);;
+    }
+    try {
+      multipleUnitStridesConditional(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index 09a77ed..2232ff4 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -51,6 +51,10 @@
   /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
 
+  /// CHECK-START-MIPS: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
   /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
@@ -62,6 +66,10 @@
   /// CHECK:                ArmDexCacheArraysBase
   /// CHECK-NOT:            ArmDexCacheArraysBase
 
+  /// CHECK-START-MIPS: int Main.testSimple(int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
   /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
   /// CHECK:                X86ComputeBaseMethodAddress
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
@@ -83,6 +91,11 @@
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
 
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
   /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
@@ -100,6 +113,14 @@
   /// CHECK:                ArmDexCacheArraysBase
   /// CHECK-NEXT:           If
 
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NEXT:           If
+
   /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
   /// CHECK:                X86ComputeBaseMethodAddress
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
@@ -110,7 +131,7 @@
 
   public static int testDiamond(boolean negate, int x) {
     // These calls should use PC-relative dex cache array loads to retrieve the target method.
-    // PC-relative bases used by X86 and ARM should be pulled before the If.
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the If.
     if (negate) {
       return $noinline$foo(-x);
     } else {
@@ -154,8 +175,26 @@
   /// CHECK:                begin_block
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
 
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (before)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           MipsDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
   public static int testLoop(int[] array, int x) {
-    // PC-relative bases used by X86 and ARM should be pulled before the loop.
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop.
     for (int i : array) {
       x += $noinline$foo(i);
     }
@@ -182,8 +221,18 @@
   /// CHECK-NEXT:           ArmDexCacheArraysBase
   /// CHECK-NEXT:           Goto
 
+  /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_mips (before)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           MipsDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+
   public static int testLoopWithDiamond(int[] array, boolean negate, int x) {
-    // PC-relative bases used by X86 and ARM should be pulled before the loop
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop
     // but not outside the if.
     if (array != null) {
       for (int i : array) {
@@ -220,6 +269,11 @@
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
   /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
 
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
   public static String $noinline$getBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
@@ -250,6 +304,13 @@
   /// CHECK-START-ARM64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
   /// CHECK:                LoadString load_kind:DexCachePcRelative
 
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() dex_cache_array_fixups_mips (after)
+  /// CHECK-DAG:            MipsDexCacheArraysBase
+  /// CHECK-DAG:            LoadString load_kind:DexCachePcRelative
+
   public static String $noinline$getNonBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
@@ -280,6 +341,11 @@
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
   /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
 
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
   public static Class<?> $noinline$getStringClass() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
@@ -310,6 +376,13 @@
   /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
   /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
 
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() dex_cache_array_fixups_mips (after)
+  /// CHECK-DAG:            MipsDexCacheArraysBase
+  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+
   public static Class<?> $noinline$getOtherClass() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
diff --git a/test/601-verifier-fails/expected.txt b/test/601-verifier-fails/expected.txt
deleted file mode 100644
index 8399969..0000000
--- a/test/601-verifier-fails/expected.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-passed A
-passed B
-passed C
-passed D
diff --git a/test/601-verifier-fails/info.txt b/test/601-verifier-fails/info.txt
deleted file mode 100644
index f77de05..0000000
--- a/test/601-verifier-fails/info.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-The situations in these tests were discovered by running the mutating
-dexfuzz on the DEX files of fuzzingly random generated Java test.
-
-(A) b/28908555:
-    soft verification failure (on the final field modification) should
-    not hide the hard verification failure (on the type mismatch) to
-    avoid compiler crash later on
-(B) b/29070461:
-    hard verification failure (not calling super in constructor) should
-    bail immediately and not allow soft verification failures to pile up
-    behind it to avoid fatal message later on
-(C) b/29068831:
-    access validation should occur prior to null reference check
-(D) b/29126870:
-    soft verification failure (cannot access) should not hide the hard
-    verification failure (non-reference type) to avoid a compiler crash
-    later on
-
diff --git a/test/601-verifier-fails/smali/construct.smali b/test/601-verifier-fails/smali/construct.smali
deleted file mode 100644
index 417ced9..0000000
--- a/test/601-verifier-fails/smali/construct.smali
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# Copyright (C) 2016 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-.class public LB;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-    .registers 1
-    if-eqz v0, :bail
-    invoke-direct {v0}, LB;->append(Ljava/lang/String;)V
-:bail
-    return-void
-.end method
diff --git a/test/601-verifier-fails/smali/iget.smali b/test/601-verifier-fails/smali/iget.smali
deleted file mode 100644
index 5c045e6..0000000
--- a/test/601-verifier-fails/smali/iget.smali
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# Copyright (C) 2016 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-.class public LD;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-    .registers 2
-    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
-    const v0, 2
-    iget v1, v0, LMain;->privateField:I
-    return-void
-.end method
diff --git a/test/601-verifier-fails/smali/iput.smali b/test/601-verifier-fails/smali/iput.smali
deleted file mode 100644
index bd8b928..0000000
--- a/test/601-verifier-fails/smali/iput.smali
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# Copyright (C) 2016 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-.class public LC;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-    .registers 2
-    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
-    const v0, 0
-    iput-object v0, v0, LMain;->staticPrivateField:Ljava/lang/String;
-    return-void
-.end method
diff --git a/test/601-verifier-fails/smali/sput.smali b/test/601-verifier-fails/smali/sput.smali
deleted file mode 100644
index e8e56ac..0000000
--- a/test/601-verifier-fails/smali/sput.smali
+++ /dev/null
@@ -1,23 +0,0 @@
-#
-# Copyright (C) 2016 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-.class public LA;
-.super Ljava/lang/Object;
-
-.method public foo(I)V
-.registers 2
-    sput v1, LMain;->staticFinalField:Ljava/lang/String;
-    return-void
-.end method
diff --git a/test/601-verifier-fails/src/Main.java b/test/601-verifier-fails/src/Main.java
deleted file mode 100644
index a6a07fd..0000000
--- a/test/601-verifier-fails/src/Main.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class Main {
-
-  public static final String staticFinalField = null;
-
-  private static String staticPrivateField = null;
-
-  private int privateField = 0;
-
-  private static void test(String name) throws Exception {
-    try {
-      Class<?> a = Class.forName(name);
-      a.newInstance();
-    } catch (java.lang.LinkageError e) {
-      System.out.println("passed " + name);
-    }
-  }
-
-  public static void main(String[] args) throws Exception {
-    test("A");
-    test("B");
-    test("C");
-    test("D");
-  }
-}
diff --git a/test/etc/default-build b/test/etc/default-build
index 962ae38..37ce0f2 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -64,6 +64,9 @@
 SKIP_DX_MERGER="false"
 EXPERIMENTAL=""
 
+# The key for default arguments if no experimental things are enabled.
+DEFAULT_EXPERIMENT="no-experiment"
+
 # Setup experimental flag mappings in a bash associative array.
 declare -A JACK_EXPERIMENTAL_ARGS
 JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
@@ -72,6 +75,11 @@
 declare -A SMALI_EXPERIMENTAL_ARGS
 SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api-level 24"
 
+declare -A JAVAC_EXPERIMENTAL_ARGS
+JAVAC_EXPERIMENTAL_ARGS["default-methods"]="-source 1.8 -target 1.8"
+JAVAC_EXPERIMENTAL_ARGS["lambdas"]="-source 1.8 -target 1.8"
+JAVAC_EXPERIMENTAL_ARGS[${DEFAULT_EXPERIMENT}]="-source 1.7 -target 1.7"
+
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
     shift
@@ -100,6 +108,8 @@
     shift
   elif [ "x$1" = "x--experimental" ]; then
     shift
+    # We have a specific experimental configuration so don't use the default.
+    DEFAULT_EXPERIMENT=""
     EXPERIMENTAL="${EXPERIMENTAL} $1"
     shift
   elif expr "x$1" : "x--" >/dev/null 2>&1; then
@@ -110,10 +120,14 @@
   fi
 done
 
+# Be sure to get any default arguments if not doing any experiments.
+EXPERIMENTAL="${EXPERIMENTAL} ${DEFAULT_EXPERIMENT}"
+
 # Add args from the experimental mappings.
 for experiment in ${EXPERIMENTAL}; do
   JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}"
   SMALI_ARGS="${SMALI_ARGS} ${SMALI_EXPERIMENTAL_ARGS[${experiment}]}"
+  JAVAC_ARGS="${JAVAC_ARGS} ${JAVAC_EXPERIMENTAL_ARGS[${experiment}]}"
 done
 
 if [ -e classes.dex ]; then
diff --git a/test/run-test b/test/run-test
index 1ef5428..edee4ae 100755
--- a/test/run-test
+++ b/test/run-test
@@ -41,7 +41,7 @@
 fi
 checker="${progdir}/../tools/checker/checker.py"
 export JAVA="java"
-export JAVAC="javac -g -source 1.7 -target 1.7 -Xlint:-options"
+export JAVAC="javac -g -Xlint:-options"
 export RUN="${progdir}/etc/run-test-jar"
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
diff --git a/tools/ahat/src/Main.java b/tools/ahat/src/Main.java
index d784599..fdc5a86 100644
--- a/tools/ahat/src/Main.java
+++ b/tools/ahat/src/Main.java
@@ -67,12 +67,14 @@
       return;
     }
 
-    System.out.println("Processing hprof file...");
-    AhatSnapshot ahat = AhatSnapshot.fromHprof(hprof);
-
+    // Launch the server before parsing the hprof file so we get
+    // BindExceptions quickly.
     InetAddress loopback = InetAddress.getLoopbackAddress();
     InetSocketAddress addr = new InetSocketAddress(loopback, port);
     HttpServer server = HttpServer.create(addr, 0);
+
+    System.out.println("Processing hprof file...");
+    AhatSnapshot ahat = AhatSnapshot.fromHprof(hprof);
     server.createContext("/", new AhatHttpHandler(new OverviewHandler(ahat, hprof)));
     server.createContext("/rooted", new AhatHttpHandler(new RootedHandler(ahat)));
     server.createContext("/object", new AhatHttpHandler(new ObjectHandler(ahat)));
diff --git a/tools/cpp-define-generator/Android.mk b/tools/cpp-define-generator/Android.mk
new file mode 100644
index 0000000..6ba643c
--- /dev/null
+++ b/tools/cpp-define-generator/Android.mk
@@ -0,0 +1,34 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.executable.mk
+
+CPP_DEFINE_GENERATOR_SRC_FILES := \
+  main.cc
+
+CPP_DEFINE_GENERATOR_EXTRA_SHARED_LIBRARIES :=
+CPP_DEFINE_GENERATOR_EXTRA_INCLUDE :=
+CPP_DEFINE_GENERATOR_MULTILIB :=
+
+# Build a "data" binary which will hold all the symbol values that will be parsed by the other scripts.
+#
+# Builds are for host only, target-specific define generation is possibly but is trickier and would need extra tooling.
+#
+# In the future we may wish to parameterize this on (32,64)x(read_barrier,no_read_barrier).
+$(eval $(call build-art-executable,cpp-define-generator-data,$(CPP_DEFINE_GENERATOR_SRC_FILES),$(CPP_DEFINE_GENERATOR_EXTRA_SHARED_LIBRARIES),$(CPP_DEFINE_GENERATOR_EXTRA_INCLUDE),host,debug,$(CPP_DEFINE_GENERATOR_MULTILIB),shared))
+
diff --git a/tools/cpp-define-generator/common.def b/tools/cpp-define-generator/common.def
new file mode 100644
index 0000000..76c64c9
--- /dev/null
+++ b/tools/cpp-define-generator/common.def
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Convenience macro to define an offset expression.
+
+#ifndef DEFINE_OFFSET_EXPR
+#define DEFINE_OFFSET_EXPR(holder_type, field_name, field_type, expr) \
+  DEFINE_EXPR(holder_type ## _ ## field_name ## _OFFSET, field_type, expr)
+#define DEFINE_OFFSET_EXPR_STANDARD_DEFINITION
+#endif
+
diff --git a/tools/cpp-define-generator/common_undef.def b/tools/cpp-define-generator/common_undef.def
new file mode 100644
index 0000000..c44aba7
--- /dev/null
+++ b/tools/cpp-define-generator/common_undef.def
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef DEFINE_OFFSET_EXPR_STANDARD_DEFINITION
+#undef DEFINE_OFFSET_EXPR_STANDARD_DEFINITION
+#undef DEFINE_OFFSET_EXPR
+#endif
diff --git a/tools/cpp-define-generator/constant_class.def b/tools/cpp-define-generator/constant_class.def
new file mode 100644
index 0000000..58372f9
--- /dev/null
+++ b/tools/cpp-define-generator/constant_class.def
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/class.h"         // kStatusInitialized
+#include "modifiers.h"            // kAccClassIsFinalizable
+#include "base/bit_utils.h"       // MostSignificantBit
+#endif
+
+#define DEFINE_FLAG_OFFSET(type_name, field_name, expr) \
+  DEFINE_EXPR(type_name ## _ ## field_name, uint32_t, (expr))
+
+DEFINE_FLAG_OFFSET(MIRROR_CLASS, STATUS_INITIALIZED,       art::mirror::Class::kStatusInitialized)
+DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_FINALIZABLE,     art::kAccClassIsFinalizable)
+// TODO: We should really have a BitPosition which also checks it's a power of 2.
+DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_FINALIZABLE_BIT, art::MostSignificantBit(art::kAccClassIsFinalizable))
+
+#undef DEFINE_FLAG_OFFSET
diff --git a/tools/cpp-define-generator/constant_globals.def b/tools/cpp-define-generator/constant_globals.def
new file mode 100644
index 0000000..1e24d64
--- /dev/null
+++ b/tools/cpp-define-generator/constant_globals.def
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Export global values.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "globals.h"         // art::kObjectAlignment
+#endif
+
+#define DEFINE_OBJECT_EXPR(macro_name, type, constant_field_name) \
+  DEFINE_EXPR(OBJECT_ ## macro_name, type, constant_field_name)
+
+DEFINE_OBJECT_EXPR(ALIGNMENT_MASK,         size_t,   art::kObjectAlignment - 1)
+DEFINE_OBJECT_EXPR(ALIGNMENT_MASK_TOGGLED, uint32_t, ~static_cast<uint32_t>(art::kObjectAlignment - 1))
+
+#undef DEFINE_OBJECT_EXPR
+
diff --git a/tools/cpp-define-generator/constant_jit.def b/tools/cpp-define-generator/constant_jit.def
new file mode 100644
index 0000000..5fa5194
--- /dev/null
+++ b/tools/cpp-define-generator/constant_jit.def
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Constants within jit.h.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "jit/jit.h"   // art::kSuspendRequest, etc.
+#endif
+
+#define DEFINE_JIT_CONSTANT(macro_name, type, expr) \
+  DEFINE_EXPR(JIT_ ## macro_name, type, (expr))
+
+DEFINE_JIT_CONSTANT(CHECK_OSR,       int16_t, art::jit::kJitCheckForOSR)
+DEFINE_JIT_CONSTANT(HOTNESS_DISABLE, int16_t, art::jit::kJitHotnessDisabled)
+
+#undef DEFINE_JIT_CONSTANT
diff --git a/tools/cpp-define-generator/constant_lockword.def b/tools/cpp-define-generator/constant_lockword.def
new file mode 100644
index 0000000..c1e6099
--- /dev/null
+++ b/tools/cpp-define-generator/constant_lockword.def
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Export lockword values.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "lock_word.h"         // art::LockWord
+#endif
+
+#define DEFINE_LOCK_WORD_EXPR(macro_name, type, constant_field_name) \
+  DEFINE_EXPR(LOCK_WORD_ ## macro_name, type, art::LockWord::constant_field_name)
+
+DEFINE_LOCK_WORD_EXPR(STATE_SHIFT,               int32_t,  kStateShift)
+DEFINE_LOCK_WORD_EXPR(STATE_MASK,                uint32_t, kStateMaskShifted)
+DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_SHIFT,  int32_t,  kReadBarrierStateShift)
+DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK,   uint32_t,  kReadBarrierStateMaskShifted)
+DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK_TOGGLED, uint32_t, kReadBarrierStateMaskShiftedToggled)
+DEFINE_LOCK_WORD_EXPR(THIN_LOCK_COUNT_ONE,       int32_t,  kThinLockCountOne)
+
+#undef DEFINE_LOCK_WORD_EXPR
+
diff --git a/tools/cpp-define-generator/constant_reference.def b/tools/cpp-define-generator/constant_reference.def
new file mode 100644
index 0000000..d312f76
--- /dev/null
+++ b/tools/cpp-define-generator/constant_reference.def
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/object.h"            // mirror::Object
+#include "stack.h"                    // StackReference
+#include "mirror/object_reference.h"  // mirror::CompressedReference
+#include "base/bit_utils.h"           // WhichPowerOf2
+#endif
+
+// Size of references to the heap on the stack.
+DEFINE_EXPR(STACK_REFERENCE_SIZE,            size_t, sizeof(art::StackReference<art::mirror::Object>))
+// Size of heap references
+DEFINE_EXPR(COMPRESSED_REFERENCE_SIZE,       size_t, sizeof(art::mirror::CompressedReference<art::mirror::Object>))
+DEFINE_EXPR(COMPRESSED_REFERENCE_SIZE_SHIFT, size_t, art::WhichPowerOf2(sizeof(art::mirror::CompressedReference<art::mirror::Object>)))
+
+#undef DEFINE_REFERENCE_OFFSET
diff --git a/tools/cpp-define-generator/constant_rosalloc.def b/tools/cpp-define-generator/constant_rosalloc.def
new file mode 100644
index 0000000..2007cef
--- /dev/null
+++ b/tools/cpp-define-generator/constant_rosalloc.def
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Constants within RosAlloc.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "gc/allocator/rosalloc.h"   // art::gc::allocator::RosAlloc
+#endif
+
+#define DEFINE_ROSALLOC_CONSTANT(macro_name, type, expr) \
+  DEFINE_EXPR(ROSALLOC_ ## macro_name, type, (expr))
+
+DEFINE_ROSALLOC_CONSTANT(MAX_THREAD_LOCAL_BRACKET_SIZE, int32_t, art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize)
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_SHIFT,    int32_t, art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSizeShift)
+// TODO: This should be a BitUtils helper, e.g. BitMaskFromSize or something like that.
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_MASK,     int32_t, static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_MASK_TOGGLED32,\
+                                                        uint32_t, ~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_MASK_TOGGLED64,\
+                                                        uint64_t, ~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
+DEFINE_ROSALLOC_CONSTANT(RUN_FREE_LIST_OFFSET,          int32_t, art::gc::allocator::RosAlloc::RunFreeListOffset())
+DEFINE_ROSALLOC_CONSTANT(RUN_FREE_LIST_HEAD_OFFSET,     int32_t, art::gc::allocator::RosAlloc::RunFreeListHeadOffset())
+DEFINE_ROSALLOC_CONSTANT(RUN_FREE_LIST_SIZE_OFFSET,     int32_t, art::gc::allocator::RosAlloc::RunFreeListSizeOffset())
+DEFINE_ROSALLOC_CONSTANT(SLOT_NEXT_OFFSET,              int32_t, art::gc::allocator::RosAlloc::RunSlotNextOffset())
+
+
+#undef DEFINE_ROSALLOC_CONSTANT
diff --git a/tools/cpp-define-generator/constant_thread.def b/tools/cpp-define-generator/constant_thread.def
new file mode 100644
index 0000000..af5ca21
--- /dev/null
+++ b/tools/cpp-define-generator/constant_thread.def
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Constants within thread.h.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "thread.h"   // art::kSuspendRequest, etc.
+#endif
+
+#define DEFINE_THREAD_CONSTANT(macro_name, type, expr) \
+  DEFINE_EXPR(THREAD_ ## macro_name, type, (expr))
+
+DEFINE_THREAD_CONSTANT(SUSPEND_REQUEST,    int32_t, art::kSuspendRequest)
+DEFINE_THREAD_CONSTANT(CHECKPOINT_REQUEST, int32_t, art::kCheckpointRequest)
+
+#undef DEFINE_THREAD_CONSTANT
diff --git a/tools/cpp-define-generator/generate-asm-support b/tools/cpp-define-generator/generate-asm-support
new file mode 100755
index 0000000..f95648b
--- /dev/null
+++ b/tools/cpp-define-generator/generate-asm-support
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Generates asm_support_gen.h
+# - This must be run after a build since it uses cpp-define-generator-data
+
+[[ -z ${ANDROID_BUILD_TOP+x} ]] && (echo "Run source build/envsetup.sh first" >&2 && exit 1)
+
+cpp-define-generator-datad > ${ANDROID_BUILD_TOP}/art/runtime/generated/asm_support_gen.h
diff --git a/tools/cpp-define-generator/main.cc b/tools/cpp-define-generator/main.cc
new file mode 100644
index 0000000..a1b463a
--- /dev/null
+++ b/tools/cpp-define-generator/main.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <sstream>
+#include <type_traits>
+#include <ios>
+#include <algorithm>
+#include <string>
+
+// Art Offset file dependencies
+#define DEFINE_INCLUDE_DEPENDENCIES
+#include "offsets_all.def"
+
+std::string to_upper(std::string input) {
+  std::transform(input.begin(), input.end(), input.begin(), ::toupper);
+  return input;
+}
+
+template <typename T, typename = void>
+typename std::enable_if<!std::is_signed<T>::value, std::string>::type
+pretty_format(T value) {
+  // Print most values as hex.
+  std::stringstream ss;
+  ss << std::showbase << std::hex << value;
+  return ss.str();
+}
+
+template <typename T, typename = void>
+typename std::enable_if<std::is_signed<T>::value, std::string>::type
+pretty_format(T value) {
+  // Print "signed" values as decimal so that the negativity doesn't get lost.
+  std::stringstream ss;
+
+  // For negative values add a (). Omit it from positive values for conciseness.
+  if (value < 0) {
+    ss << "(";
+  }
+
+  ss << value;
+
+  if (value < 0) {
+    ss << ")";
+  }
+  return ss.str();
+}
+
+template <typename T>
+void cpp_define(std::string name, T value) {
+  std::cout << "#define " << name << " " << pretty_format(value) << std::endl;
+}
+
+template <typename T>
+void emit_check_eq(T value, std::string expr) {
+  std::cout << "DEFINE_CHECK_EQ(" << value << ", (" << expr << "))" << std::endl;
+}
+
+const char *kFileHeader = /* // NOLINT [readability/multiline_string] [5] */ R"L1C3NS3(
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+#define ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+
+// This file has been auto-generated by cpp-define-generator; do not edit directly.
+)L1C3NS3";  // NOLINT [readability/multiline_string] [5]
+
+const char *kFileFooter = /* // NOLINT [readability/multiline_string] [5] */ R"F00T3R(
+#endif  // ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+)F00T3R";  // NOLINT [readability/multiline_string] [5]
+
+#define MACROIZE(holder_type, field_name) to_upper(#holder_type "_" #field_name "_OFFSET")
+
+int main() {
+  std::cout << kFileHeader << std::endl;
+
+  std::string z = "";
+
+  // Print every constant expression to stdout as a #define or a CHECK_EQ
+#define DEFINE_EXPR(macro_name, field_type, expr) \
+  cpp_define(to_upper(#macro_name), static_cast<field_type>(expr)); \
+  emit_check_eq(z + "static_cast<" #field_type ">(" + to_upper(#macro_name) + ")", \
+                "static_cast<" #field_type ">(" #expr ")");
+#include "offsets_all.def"
+
+  std::cout << kFileFooter << std::endl;
+  return 0;
+}
diff --git a/tools/cpp-define-generator/offset_codeitem.def b/tools/cpp-define-generator/offset_codeitem.def
new file mode 100644
index 0000000..e5acd1d
--- /dev/null
+++ b/tools/cpp-define-generator/offset_codeitem.def
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within CodeItem.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include <cstddef>      // offsetof
+#include "dex_file.h"   // art::DexFile
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_CODEITEM_OFFSET(field_name) \
+  DEFINE_OFFSET_EXPR(CodeItem, field_name, int32_t, offsetof(art::DexFile::CodeItem, field_name ## _))
+
+//                     Field Name
+DEFINE_CODEITEM_OFFSET(insns)
+
+#undef DEFINE_CODEITEM_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_dexcache.def b/tools/cpp-define-generator/offset_dexcache.def
new file mode 100644
index 0000000..ec4b248
--- /dev/null
+++ b/tools/cpp-define-generator/offset_dexcache.def
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within art::ArtMethod.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "art_method.h"         // art::ArtMethod
+#endif
+
+#define DEFINE_ART_METHOD_OFFSET(field_name, method_name) \
+  DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET_32, int32_t, art::ArtMethod::method_name##Offset(4).Int32Value()) \
+  DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET_64, int32_t, art::ArtMethod::method_name##Offset(8).Int32Value())
+
+//                         New macro suffix          Method Name (of the Offset method)
+DEFINE_ART_METHOD_OFFSET(DEX_CACHE_METHODS,          DexCacheResolvedMethods)
+DEFINE_ART_METHOD_OFFSET(DEX_CACHE_TYPES,            DexCacheResolvedTypes)
+DEFINE_ART_METHOD_OFFSET(JNI,                        EntryPointFromJni)
+DEFINE_ART_METHOD_OFFSET(QUICK_CODE,                 EntryPointFromQuickCompiledCode)
+
+#undef DEFINE_ART_METHOD_OFFSET
diff --git a/tools/cpp-define-generator/offset_mirror_object.def b/tools/cpp-define-generator/offset_mirror_object.def
new file mode 100644
index 0000000..9b99634
--- /dev/null
+++ b/tools/cpp-define-generator/offset_mirror_object.def
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within java.lang.Object (mirror::Object).
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/object.h"         // art::mirror::Object
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_MIRROR_OBJECT_OFFSET(field_name, method_name) \
+  DEFINE_OFFSET_EXPR(MIRROR_OBJECT, field_name, int32_t, art::mirror::Object::method_name##Offset().Int32Value())
+
+//                          New macro suffix            Method Name (of the Offset method)
+DEFINE_MIRROR_OBJECT_OFFSET(CLASS,                      Class)
+DEFINE_MIRROR_OBJECT_OFFSET(LOCK_WORD,                  Monitor)
+
+#undef DEFINE_MIRROR_OBJECT_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_runtime.def b/tools/cpp-define-generator/offset_runtime.def
new file mode 100644
index 0000000..b327ca3
--- /dev/null
+++ b/tools/cpp-define-generator/offset_runtime.def
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within ShadowFrame.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "runtime.h"         // art::Runtime
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+// Note: these callee save methods loads require read barriers.
+
+#define DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(field_name, constant_name) \
+  DEFINE_OFFSET_EXPR(Runtime, field_name ## _CALLEE_SAVE_FRAME, size_t, art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: constant_name))
+
+                    //     Macro substring       Constant name
+// Offset of field Runtime::callee_save_methods_[kSaveAll]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_ALL,      kSaveAll)
+// Offset of field Runtime::callee_save_methods_[kRefsOnly]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(REFS_ONLY,     kRefsOnly)
+// Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(REFS_AND_ARGS, kRefsAndArgs)
+
+#undef DEFINE_RUNTIME_CALLEE_SAVE_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_shadow_frame.def b/tools/cpp-define-generator/offset_shadow_frame.def
new file mode 100644
index 0000000..b49a340
--- /dev/null
+++ b/tools/cpp-define-generator/offset_shadow_frame.def
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within ShadowFrame.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "stack.h"         // art::ShadowFrame
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_SHADOW_FRAME_OFFSET(field_name, method_name) \
+  DEFINE_OFFSET_EXPR(ShadowFrame, field_name, int32_t, art::ShadowFrame::method_name##Offset())
+
+//                         New macro suffix            Method Name (of the Offset method)
+DEFINE_SHADOW_FRAME_OFFSET(LINK,                       Link)
+DEFINE_SHADOW_FRAME_OFFSET(METHOD,                     Method)
+DEFINE_SHADOW_FRAME_OFFSET(RESULT_REGISTER,            ResultRegister)
+DEFINE_SHADOW_FRAME_OFFSET(DEX_PC_PTR,                 DexPCPtr)
+DEFINE_SHADOW_FRAME_OFFSET(CODE_ITEM,                  CodeItem)
+DEFINE_SHADOW_FRAME_OFFSET(LOCK_COUNT_DATA,            LockCountData)
+DEFINE_SHADOW_FRAME_OFFSET(NUMBER_OF_VREGS,            NumberOfVRegs)
+DEFINE_SHADOW_FRAME_OFFSET(DEX_PC,                     DexPC)
+DEFINE_SHADOW_FRAME_OFFSET(CACHED_HOTNESS_COUNTDOWN,   CachedHotnessCountdown)
+DEFINE_SHADOW_FRAME_OFFSET(VREGS,                      VRegs)
+
+#undef DEFINE_SHADOW_FRAME_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_thread.def b/tools/cpp-define-generator/offset_thread.def
new file mode 100644
index 0000000..71648e6
--- /dev/null
+++ b/tools/cpp-define-generator/offset_thread.def
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within ShadowFrame.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "stack.h"         // art::ShadowFrame
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_THREAD_OFFSET(field_name, method_name) \
+  DEFINE_OFFSET_EXPR(Thread, field_name, int32_t, art::Thread::method_name##Offset<sizeof(void*)>().Int32Value())
+
+//                   New macro suffix            Method Name (of the Offset method)
+DEFINE_THREAD_OFFSET(FLAGS,                      ThreadFlags)
+DEFINE_THREAD_OFFSET(ID,                         ThinLockId)
+DEFINE_THREAD_OFFSET(IS_GC_MARKING,              IsGcMarking)
+DEFINE_THREAD_OFFSET(CARD_TABLE,                 CardTable)
+
+// TODO: The rest of the offsets
+// are dependent on __SIZEOF_POINTER__
+
+#undef DEFINE_THREAD_OFFSET
+
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offsets_all.def b/tools/cpp-define-generator/offsets_all.def
new file mode 100644
index 0000000..01e4d5b
--- /dev/null
+++ b/tools/cpp-define-generator/offsets_all.def
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Includes every single offset file in art.
+// Useful for processing every single offset together.
+
+// Usage:
+// #define DEFINE_INCLUDE_DEPENDENCIES
+// #include "offsets_all.def"
+// to automatically include each def file's header dependencies.
+//
+// Afterwards,
+// #define DEFINE_EXPR(define_name, field_type, expr) ...
+// #include "offsets_all.def"
+// to process each offset however one wants.
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#define DEFINE_EXPR(define_name, field_type, expr)
+#endif
+
+#if !defined(DEFINE_EXPR)
+#error "Either DEFINE_INCLUDE_DEPENDENCIES or DEFINE_EXPR must be defined"
+#endif
+
+#include "constant_reference.def"
+#include "offset_runtime.def"
+// TODO: rest of THREAD_ offsets (depends on __SIZEOF__POINTER__).
+#include "offset_thread.def"
+// TODO: SHADOW_FRAME depends on __SIZEOF__POINTER__
+// #include "offset_shadow_frame.def"
+#include "offset_codeitem.def"
+// TODO: MIRROR_OBJECT_HEADER_SIZE (depends on #ifdef read barrier)
+// TODO: MIRROR_CLASS offsets (see above)
+#include "offset_mirror_object.def"
+#include "constant_class.def"
+// TODO: MIRROR_*_ARRAY offsets (depends on header size)
+// TODO: MIRROR_STRING offsets (depends on header size)
+#include "offset_dexcache.def"
+#include "constant_lockword.def"
+#include "constant_globals.def"
+#include "constant_rosalloc.def"
+#include "constant_thread.def"
+#include "constant_jit.def"
+
+// TODO: MIRROR_OBJECT_HEADER_SIZE #ifdef depends on read barriers
+// TODO: Array offsets (depends on MIRROR_OBJECT_HEADER_SIZE)
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#undef DEFINE_EXPR
+#undef DEFINE_INCLUDE_DEPENDENCIES
+#endif
+
+
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 996f2f8..bf8d12b 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -260,5 +260,12 @@
   bug: 30107038,
   modes: [device],
   names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_destroyForcibly"]
+},
+{
+  description: "Flaky failure, possibly caused by a kernel bug accessing /proc/",
+  result: EXEC_FAILED,
+  bug: 27464570,
+  modes: [device],
+  names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit"]
 }
 ]
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 976e1d8..bdb2d4b 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -19,8 +19,12 @@
   exit 1
 fi
 
+if [ -z "$ANDROID_HOST_OUT" ] ; then
+  ANDROID_HOST_OUT=${OUT_DIR-$ANDROID_BUILD_TOP/out}/host/linux-x86
+fi
+
 # Jar containing all the tests.
-test_jack=${OUT_DIR-out}/host/common/obj/JAVA_LIBRARIES/apache-harmony-jdwp-tests-hostdex_intermediates/classes.jack
+test_jack=${ANDROID_HOST_OUT}/../common/obj/JAVA_LIBRARIES/apache-harmony-jdwp-tests-hostdex_intermediates/classes.jack
 
 if [ ! -f $test_jack ]; then
   echo "Before running, you must build jdwp tests and vogar:" \
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 3e2a512..3605aa0 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -19,12 +19,17 @@
   exit 1
 fi
 
+if [ -z "$ANDROID_PRODUCT_OUT" ] ; then
+  JAVA_LIBRARIES=out/target/common/obj/JAVA_LIBRARIES
+else
+  JAVA_LIBRARIES=${ANDROID_PRODUCT_OUT}/../../common/obj/JAVA_LIBRARIES
+fi
+
 # Jar containing jsr166 tests.
-jsr166_test_jack=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/jsr166-tests_intermediates/classes.jack
+jsr166_test_jack=${JAVA_LIBRARIES}/jsr166-tests_intermediates/classes.jack
 
 # Jar containing all the other tests.
-test_jack=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/core-tests_intermediates/classes.jack
-
+test_jack=${JAVA_LIBRARIES}/core-tests_intermediates/classes.jack
 
 if [ ! -f $test_jack ]; then
   echo "Before running, you must build core-tests, jsr166-tests and vogar: \