Merge "creating workflow for mirror::String compression"

commit: 51a0c4fe2dabeb21525d377047b68031b6fe0186 [log] [tgz]
author: Treehugger Robot <treehugger-gerrit@google.com> Wed Aug 24 00:50:38 2016 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Wed Aug 24 00:50:39 2016 +0000
tree: 83a02e40cd106e6decbf100870b2e931232820db
parent: e99331f7a430b0b72142dd0b8b0689421489dcf6 [diff]
parent: 3aaa37bba53d6df0265793de48b4b0b57327e57a [diff]
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 1e2cfa3..c8e3654 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk

@@ -152,7 +152,7 @@
 
 # The architectures the compiled tools are able to run on. Setting this to 'all' will cause all
 # architectures to be included.
-ART_TARGET_CODEGEN_ARCHS ?= all
+ART_TARGET_CODEGEN_ARCHS ?= svelte
 ART_HOST_CODEGEN_ARCHS ?= all
 
 ifeq ($(ART_TARGET_CODEGEN_ARCHS),all)

diff --git a/compiler/Android.mk b/compiler/Android.mk
index 410b2d0..16c6a7b 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk

@@ -92,6 +92,8 @@
 	linker/arm/relative_patcher_thumb2.cc \
 	optimizing/code_generator_arm.cc \
 	optimizing/dex_cache_array_fixups_arm.cc \
+	optimizing/instruction_simplifier_arm.cc \
+	optimizing/instruction_simplifier_shared.cc \
 	optimizing/intrinsics_arm.cc \
 	utils/arm/assembler_arm.cc \
 	utils/arm/assembler_arm32.cc \
@@ -109,7 +111,6 @@
 	linker/arm64/relative_patcher_arm64.cc \
 	optimizing/nodes_arm64.cc \
 	optimizing/code_generator_arm64.cc \
-	optimizing/instruction_simplifier_arm.cc \
 	optimizing/instruction_simplifier_arm64.cc \
 	optimizing/instruction_simplifier_shared.cc \
 	optimizing/intrinsics_arm64.cc \

diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 55e1ab2..6e5eb66 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc

@@ -2456,16 +2456,18 @@
   __ FloorWS(FTMP, in);
   __ Mfc1(out, FTMP);
 
-  __ LoadConst32(TMP, 1);
+  if (!IsR6()) {
+    __ LoadConst32(TMP, -1);
+  }
 
-  // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
+  // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0;
   __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
   __ Bne(AT, out, &finite);
 
   __ Mtc1(ZERO, FTMP);
   if (IsR6()) {
     __ CmpLtS(FTMP, in, FTMP);
-    __ Mfc1(AT, FTMP);
+    __ Mfc1(TMP, FTMP);
   } else {
     __ ColtS(in, FTMP);
   }
@@ -2474,28 +2476,26 @@
 
   __ Bind(&finite);
 
-  // TMP = (0.5f <= (in - out)) ? 1 : 0;
+  // TMP = (0.5f <= (in - out)) ? -1 : 0;
   __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
   __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
   __ SubS(FTMP, in, FTMP);
   __ Mtc1(AT, half);
   if (IsR6()) {
     __ CmpLeS(FTMP, half, FTMP);
-    __ Mfc1(AT, FTMP);
+    __ Mfc1(TMP, FTMP);
   } else {
     __ ColeS(half, FTMP);
   }
 
   __ Bind(&add);
 
-  if (IsR6()) {
-    __ Selnez(TMP, TMP, AT);
-  } else {
+  if (!IsR6()) {
     __ Movf(TMP, ZERO);
   }
 
-  // Return out += TMP.
-  __ Addu(out, out, TMP);
+  // Return out -= TMP.
+  __ Subu(out, out, TMP);
 
   __ Bind(&done);
 }

diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index a1da20b..cc9cbda 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc

@@ -428,8 +428,14 @@
       || instruction_set == kX86_64;
 }
 
+// Strip pass name suffix to get optimization name.
+static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) {
+  size_t pos = pass_name.find(kPassNameSeparator);
+  return pos == std::string::npos ? pass_name : pass_name.substr(0, pos);
+}
+
 static HOptimization* BuildOptimization(
-    const std::string& opt_name,
+    const std::string& pass_name,
     ArenaAllocator* arena,
     HGraph* graph,
     OptimizingCompilerStats* stats,
@@ -439,6 +445,7 @@
     StackHandleScopeCollection* handles,
     SideEffectsAnalysis* most_recent_side_effects,
     HInductionVarAnalysis* most_recent_induction) {
+  std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
   if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
     CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
     return new (arena) BoundsCheckElimination(graph,
@@ -446,11 +453,11 @@
                                               most_recent_induction);
   } else if (opt_name == GVNOptimization::kGlobalValueNumberingPassName) {
     CHECK(most_recent_side_effects != nullptr);
-    return new (arena) GVNOptimization(graph, *most_recent_side_effects);
+    return new (arena) GVNOptimization(graph, *most_recent_side_effects, pass_name.c_str());
   } else if (opt_name == HConstantFolding::kConstantFoldingPassName) {
-    return new (arena) HConstantFolding(graph);
+    return new (arena) HConstantFolding(graph, pass_name.c_str());
   } else if (opt_name == HDeadCodeElimination::kDeadCodeEliminationPassName) {
-    return new (arena) HDeadCodeElimination(graph, stats);
+    return new (arena) HDeadCodeElimination(graph, stats, pass_name.c_str());
   } else if (opt_name == HInliner::kInlinerPassName) {
     size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
     return new (arena) HInliner(graph,                   // outer_graph
@@ -470,7 +477,7 @@
   } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
     return new (arena) HInductionVarAnalysis(graph);
   } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
-    return new (arena) InstructionSimplifier(graph, stats);
+    return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str());
   } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
     return new (arena) IntrinsicsRecognizer(graph, driver, stats);
   } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
@@ -522,12 +529,9 @@
   SideEffectsAnalysis* most_recent_side_effects = nullptr;
   HInductionVarAnalysis* most_recent_induction = nullptr;
   ArenaVector<HOptimization*> ret(arena->Adapter());
-  for (std::string pass_name : pass_names) {
-    size_t pos = pass_name.find(kPassNameSeparator);    // Strip suffix to get base pass name.
-    std::string opt_name = pos == std::string::npos ? pass_name : pass_name.substr(0, pos);
-
+  for (const std::string& pass_name : pass_names) {
     HOptimization* opt = BuildOptimization(
-        opt_name,
+        pass_name,
         arena,
         graph,
         stats,
@@ -540,6 +544,7 @@
     CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\"";
     ret.push_back(opt);
 
+    std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
     if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
       most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt);
     } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {

diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index c4ec726..e25e93f 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S

@@ -191,7 +191,7 @@
     .cfi_rel_offset r11, 44
     .cfi_rel_offset ip, 48
     .cfi_rel_offset lr, 52
-    vpush {d0-d15}                      @ 32 words of float args.
+    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
     .cfi_adjust_cfa_offset 128
     sub sp, #8                          @ 2 words of space, alignment padding and Method*
     .cfi_adjust_cfa_offset 8
@@ -1030,11 +1030,49 @@
 END art_quick_set64_instance
 
     /*
-     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. R0 holds the string index. The fast
-     * path check for hit in strings cache has already been performed.
+     * Entry from managed code to resolve a string, this stub will
+     * check the dex cache for a matching string (the fast path), and if not found,
+     * it will allocate a String and deliver an exception on error.
+     * On success the String is returned. R0 holds the string index.
      */
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+ENTRY art_quick_resolve_string
+    ldr    r1, [sp]                                              @ load referrer
+    ldr    r1, [r1, #ART_METHOD_DECLARING_CLASS_OFFSET]          @ load declaring class
+    ldr    r1, [r1, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]   @ load string dex cache
+    ubfx   r2, r0, #0, #STRING_DEX_CACHE_HASH_BITS
+    add    r1, r1, r2, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
+    ldrd   r2, r3, [r1]                                    @ load index into r3 and pointer into r2
+    cmp    r0, r3
+    bne    .Lart_quick_resolve_string_slow_path
+#ifdef USE_READ_BARRIER
+    ldr    r3, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   r3, .Lart_quick_resolve_string_marking
+#endif
+    mov    r0, r2
+    bx     lr
+// Slow path case, the index did not match
+.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME r2                    @ save callee saves in case of GC
+    mov    r1, r9                                    @ pass Thread::Current
+    mov    r3, sp
+    bl     artResolveStringFromCode                  @ (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// GC is marking case, need to check the mark bit.
+.Lart_quick_resolve_string_marking:
+    ldr    r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tst    r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    mov    r0, r2
+    bne    .Lart_quick_resolve_string_no_rb
+    push   {r1, r2, r3, lr}                          @ Save x1, LR
+    .cfi_adjust_cfa_offset 16
+    bl     artReadBarrierMark                        @ Get the marked string back.
+    pop    {r1, r2, r3, lr}                          @ Restore registers.
+    .cfi_adjust_cfa_offset -16
+.Lart_quick_resolve_string_no_rb:
+    bx     lr
+END art_quick_resolve_string
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR

diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 4289cab..202846a 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S

@@ -331,6 +331,7 @@
 #endif
 
     // Save FP registers.
+    // For better performance, store d0 and d31 separately, so that all STPs are 16-byte aligned.
     str d0,       [sp, #8]
     stp d1, d2,   [sp, #16]
     stp d3, d4,   [sp, #32]
@@ -431,6 +432,7 @@
 
 .macro RESTORE_SAVE_EVERYTHING_FRAME
     // Restore FP registers.
+    // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
     ldr d0,       [sp, #8]
     ldp d1, d2,   [sp, #16]
     ldp d3, d4,   [sp, #32]
@@ -1784,11 +1786,48 @@
 END art_quick_set64_static
 
     /*
-     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. w0 holds the string index. The fast
-     * path check for hit in strings cache has already been performed.
+     * Entry from managed code to resolve a string, this stub will
+     * check the dex cache for a matching string (the fast path), and if not found,
+     * it will allocate a String and deliver an exception on error.
+     * On success the String is returned. R0 holds the string index.
      */
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+ENTRY art_quick_resolve_string
+    ldr   x1, [sp]                                               // load referrer
+    ldr   w2, [x1, #ART_METHOD_DECLARING_CLASS_OFFSET]           // load declaring class
+    ldr   x1, [x2, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]    // load string dex cache
+    and   x2, x0, #STRING_DEX_CACHE_SIZE_MINUS_ONE               // get masked string index into x2
+    ldr   x2, [x1, x2, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT]  // load dex cache pair into x2
+    cmp   x0, x2, lsr #32                                         // compare against upper 32 bits
+    bne   .Lart_quick_resolve_string_slow_path
+    ubfx  x0, x2, #0, #32                                        // extract lower 32 bits into x0
+#ifdef USE_READ_BARRIER
+    // Most common case: GC is not marking.
+    ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   x3, .Lart_quick_resolve_string_marking
+#endif
+    ret
+
+// Slow path case, the index did not match.
+.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
+    mov   x1, xSELF                                 // pass Thread::Current
+    bl    artResolveStringFromCode                  // (int32_t string_idx, Thread* self)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+// GC is marking case, need to check the mark bit.
+.Lart_quick_resolve_string_marking:
+    ldr   x3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tbnz  x3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
+    // Save LR so that we can return, also x1 for alignment purposes.
+    stp    x1, xLR, [sp, #-16]!                     // Save x1, LR.
+    bl     artReadBarrierMark                       // Get the marked string back.
+    ldp    x1, xLR, [sp], #16                       // Restore registers.
+.Lart_quick_resolve_string_no_rb:
+    ret
+
+END art_quick_resolve_string
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS

diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 2e9682e..d685ace 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S

@@ -1108,7 +1108,44 @@
     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB
 END_FUNCTION art_quick_alloc_object_region_tlab
 
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+DEFINE_FUNCTION art_quick_resolve_string
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx
+    movl FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ecx                   // get referrer
+    movl ART_METHOD_DECLARING_CLASS_OFFSET(%ecx), %ecx           // get declaring class
+    movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %ecx    // get string dex cache
+    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %edx
+    andl %eax, %edx
+    shl LITERAL(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), %edx
+    addl %ecx, %edx
+    movlps (%edx), %xmm0                                     // load string idx and pointer to xmm0
+    movd %xmm0, %ecx                                         // extract pointer
+    pshufd LITERAL(0x55), %xmm0, %xmm0                       // shuffle index into lowest bits
+    movd %xmm0, %edx                                         // extract index
+    cmp %edx, %eax
+    jne .Lart_quick_resolve_string_slow_path
+#ifdef USE_READ_BARRIER
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+    jz .Lart_quick_resolve_string_slow_path
+#endif
+    movl %ecx, %eax
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    ret
+
+.Lart_quick_resolve_string_slow_path:
+    // Outgoing argument set up
+    subl LITERAL(8), %esp                                        // push padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    pushl %fs:THREAD_SELF_OFFSET                                 // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH eax                                                     // pass arg1
+    call SYMBOL(artResolveStringFromCode)
+    addl LITERAL(16), %esp                                       // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+END_FUNCTION art_quick_resolve_string
+
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER

diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 32768b0..647fe1d 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S

@@ -1330,7 +1330,36 @@
     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
 END_FUNCTION art_quick_alloc_object_initialized_region_tlab
 
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+DEFINE_FUNCTION art_quick_resolve_string
+    movq 8(%rsp), %rcx                                         // get referrer
+    movl ART_METHOD_DECLARING_CLASS_OFFSET(%rcx), %ecx         // get declaring class
+    movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %rcx  // get string dex cache
+    movq LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %rdx
+    andq %rdi, %rdx
+    shlq LITERAL(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), %rdx
+    addq %rcx, %rdx
+    movq %rax, %rcx
+    movq (%rdx), %rdx
+    movq %rdx, %rax
+    movl %eax, %eax
+    shrq LITERAL(32), %rdx
+    cmp %rdx, %rdi
+    jne .Lart_quick_resolve_string_slow_path
+#ifdef USE_READ_BARRIER
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%rax)
+    jz .Lart_quick_resolve_string_slow_path
+#endif
+    ret
+.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME
+    movq %rcx, %rax
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
+    call SYMBOL(artResolveStringFromCode)       // artResolveStringFromCode(arg0, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME                // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END_FUNCTION art_quick_resolve_string
+
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER

diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 848f8e5..102b993 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h

@@ -19,12 +19,15 @@
 
 #if defined(__cplusplus)
 #include "art_method.h"
+#include "base/bit_utils.h"
 #include "gc/allocator/rosalloc.h"
 #include "gc/heap.h"
 #include "jit/jit.h"
 #include "lock_word.h"
 #include "mirror/class.h"
+#include "mirror/dex_cache.h"
 #include "mirror/string.h"
+#include "utils/dex_cache_arrays_layout.h"
 #include "runtime.h"
 #include "thread.h"
 #endif

diff --git a/runtime/base/arena_allocator_test.cc b/runtime/base/arena_allocator_test.cc
index 9de3cc4..fd48a3f 100644
--- a/runtime/base/arena_allocator_test.cc
+++ b/runtime/base/arena_allocator_test.cc

@@ -16,6 +16,7 @@
 
 #include "base/arena_allocator.h"
 #include "base/arena_bit_vector.h"
+#include "base/memory_tool.h"
 #include "gtest/gtest.h"
 
 namespace art {
@@ -124,4 +125,221 @@
   }
 }
 
+TEST_F(ArenaAllocatorTest, AllocAlignment) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  for (size_t iterations = 0; iterations <= 10; ++iterations) {
+    for (size_t size = 1; size <= ArenaAllocator::kAlignment + 1; ++size) {
+      void* allocation = arena.Alloc(size);
+      EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(allocation))
+          << reinterpret_cast<uintptr_t>(allocation);
+    }
+  }
+}
+
+TEST_F(ArenaAllocatorTest, ReallocReuse) {
+  // Realloc does not reuse arenas when running under sanitization. So we cannot do those
+  if (RUNNING_ON_MEMORY_TOOL != 0) {
+    printf("WARNING: TEST DISABLED FOR MEMORY_TOOL\n");
+    return;
+  }
+
+  {
+    // Case 1: small aligned allocation, aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2;
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = ArenaAllocator::kAlignment * 3;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_EQ(original_allocation, realloc_allocation);
+  }
+
+  {
+    // Case 2: small aligned allocation, non-aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2;
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_EQ(original_allocation, realloc_allocation);
+  }
+
+  {
+    // Case 3: small non-aligned allocation, aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = ArenaAllocator::kAlignment * 4;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_EQ(original_allocation, realloc_allocation);
+  }
+
+  {
+    // Case 4: small non-aligned allocation, aligned non-extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = ArenaAllocator::kAlignment * 3;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_EQ(original_allocation, realloc_allocation);
+  }
+
+  // The next part is brittle, as the default size for an arena is variable, and we don't know about
+  // sanitization.
+
+  {
+    // Case 5: large allocation, aligned extend into next arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = Arena::kDefaultSize - ArenaAllocator::kAlignment * 5;
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = Arena::kDefaultSize + ArenaAllocator::kAlignment * 2;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_NE(original_allocation, realloc_allocation);
+  }
+
+  {
+    // Case 6: large allocation, non-aligned extend into next arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = Arena::kDefaultSize -
+        ArenaAllocator::kAlignment * 4 -
+        ArenaAllocator::kAlignment / 2;
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = Arena::kDefaultSize +
+        ArenaAllocator::kAlignment * 2 +
+        ArenaAllocator::kAlignment / 2;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_NE(original_allocation, realloc_allocation);
+  }
+}
+
+TEST_F(ArenaAllocatorTest, ReallocAlignment) {
+  {
+    // Case 1: small aligned allocation, aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 3;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 2: small aligned allocation, non-aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 3: small non-aligned allocation, aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 4;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 4: small non-aligned allocation, aligned non-extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 3;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  // The next part is brittle, as the default size for an arena is variable, and we don't know about
+  // sanitization.
+
+  {
+    // Case 5: large allocation, aligned extend into next arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = Arena::kDefaultSize - ArenaAllocator::kAlignment * 5;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = Arena::kDefaultSize + ArenaAllocator::kAlignment * 2;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 6: large allocation, non-aligned extend into next arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = Arena::kDefaultSize -
+        ArenaAllocator::kAlignment * 4 -
+        ArenaAllocator::kAlignment / 2;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = Arena::kDefaultSize +
+        ArenaAllocator::kAlignment * 2 +
+        ArenaAllocator::kAlignment / 2;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+}
+
+
 }  // namespace art

diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index cbdf3dc..a5b0689 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc

@@ -4058,7 +4058,7 @@
   // Prepare JDWP ids for the reply.
   JDWP::JdwpTag result_tag = BasicTagFromDescriptor(m->GetShorty());
   const bool is_object_result = (result_tag == JDWP::JT_OBJECT);
-  StackHandleScope<2> hs(soa.Self());
+  StackHandleScope<3> hs(soa.Self());
   Handle<mirror::Object> object_result = hs.NewHandle(is_object_result ? result.GetL() : nullptr);
   Handle<mirror::Throwable> exception = hs.NewHandle(soa.Self()->GetException());
   soa.Self()->ClearException();
@@ -4097,10 +4097,17 @@
     // unless we threw, in which case we return null.
     DCHECK_EQ(JDWP::JT_VOID, result_tag);
     if (exceptionObjectId == 0) {
-      // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the
-      // object registry.
-      result_value = GetObjectRegistry()->Add(pReq->receiver.Read());
-      result_tag = TagFromObject(soa, pReq->receiver.Read());
+      if (m->GetDeclaringClass()->IsStringClass()) {
+        // For string constructors, the new string is remapped to the receiver (stored in ref).
+        Handle<mirror::Object> decoded_ref = hs.NewHandle(soa.Self()->DecodeJObject(ref.get()));
+        result_value = gRegistry->Add(decoded_ref);
+        result_tag = TagFromObject(soa, decoded_ref.Get());
+      } else {
+        // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the
+        // object registry.
+        result_value = GetObjectRegistry()->Add(pReq->receiver.Read());
+        result_tag = TagFromObject(soa, pReq->receiver.Read());
+      }
     } else {
       result_value = 0;
       result_tag = JDWP::JT_OBJECT;

diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 7afe6f9..42816a0 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc

@@ -435,10 +435,8 @@
   gc_barrier_->Init(self, 0);
   ThreadFlipVisitor thread_flip_visitor(this, heap_->use_tlab_);
   FlipCallback flip_callback(this);
-  heap_->ThreadFlipBegin(self);  // Sync with JNI critical calls.
   size_t barrier_count = Runtime::Current()->FlipThreadRoots(
       &thread_flip_visitor, &flip_callback, this);
-  heap_->ThreadFlipEnd(self);
   {
     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
     gc_barrier_->Increment(self, barrier_count);

diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 39f26e7..638c1d8 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc

@@ -878,9 +878,13 @@
   MutexLock mu(self, *thread_flip_lock_);
   bool has_waited = false;
   uint64_t wait_start = NanoTime();
-  while (thread_flip_running_) {
-    has_waited = true;
-    thread_flip_cond_->Wait(self);
+  if (thread_flip_running_) {
+    TimingLogger::ScopedTiming split("IncrementDisableThreadFlip",
+                                     GetCurrentGcIteration()->GetTimings());
+    while (thread_flip_running_) {
+      has_waited = true;
+      thread_flip_cond_->Wait(self);
+    }
   }
   ++disable_thread_flip_count_;
   if (has_waited) {

diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index ae6c321..c87312b 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc

@@ -454,8 +454,7 @@
                                           const std::string& image_filename,
                                           bool is_zygote,
                                           bool is_global_cache,
-                                          bool is_system,
-                                          bool relocated_version_used,
+                                          bool validate_oat_file,
                                           std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     // Note that we must not use the file descriptor associated with
@@ -483,7 +482,7 @@
     // file name.
     return Init(image_filename.c_str(),
                 image_location,
-                !(is_system || relocated_version_used),
+                validate_oat_file,
                 /* oat_file */nullptr,
                 error_msg);
   }
@@ -1473,8 +1472,7 @@
                                  cache_filename,
                                  is_zygote,
                                  is_global_cache,
-                                 /* is_system */ false,
-                                 /* relocated_version_used */ true,
+                                 /* validate_oat_file */ false,
                                  &local_error_msg);
       if (relocated_space != nullptr) {
         return relocated_space;
@@ -1491,8 +1489,7 @@
                                cache_filename,
                                is_zygote,
                                is_global_cache,
-                               /* is_system */ false,
-                               /* relocated_version_used */ true,
+                               /* validate_oat_file */ true,
                                &local_error_msg);
     if (cache_space != nullptr) {
       return cache_space;
@@ -1512,8 +1509,7 @@
                                system_filename,
                                is_zygote,
                                is_global_cache,
-                               /* is_system */ true,
-                               /* relocated_version_used */ false,
+                               /* validate_oat_file */ false,
                                &local_error_msg);
     if (system_space != nullptr) {
       return system_space;
@@ -1538,8 +1534,7 @@
                                    cache_filename,
                                    is_zygote,
                                    is_global_cache,
-                                   /* is_system */ false,
-                                   /* relocated_version_used */ true,
+                                   /* validate_oat_file */ false,
                                    &local_error_msg);
         if (patched_space != nullptr) {
           return patched_space;
@@ -1568,8 +1563,7 @@
                                    cache_filename,
                                    is_zygote,
                                    is_global_cache,
-                                   /* is_system */ false,
-                                   /* relocated_version_used */ true,
+                                   /* validate_oat_file */ false,
                                    &local_error_msg);
         if (compiled_space != nullptr) {
           return compiled_space;

diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 716c23d..40b71c4 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h

@@ -70,6 +70,16 @@
 DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k32).Int32Value())))
 #define ART_METHOD_QUICK_CODE_OFFSET_64 48
 DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k64).Int32Value())))
+#define ART_METHOD_DECLARING_CLASS_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DECLARING_CLASS_OFFSET), (static_cast<int32_t>(art::ArtMethod:: DeclaringClassOffset().Int32Value())))
+#define DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET 40
+DEFINE_CHECK_EQ(static_cast<int32_t>(DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET), (static_cast<int32_t>(art::mirror::Class:: DexCacheStringsOffset().Int32Value())))
+#define STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT 3
+DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), (static_cast<int32_t>(art::WhichPowerOf2(sizeof(art::mirror::StringDexCachePair)))))
+#define STRING_DEX_CACHE_SIZE_MINUS_ONE 1023
+DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_SIZE_MINUS_ONE), (static_cast<int32_t>(art::mirror::DexCache::kDexCacheStringCacheSize - 1)))
+#define STRING_DEX_CACHE_HASH_BITS 10
+DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_HASH_BITS), (static_cast<int32_t>(art::LeastSignificantBit(art::mirror::DexCache::kDexCacheStringCacheSize))))
 #define MIN_LARGE_OBJECT_THRESHOLD 0x3000
 DEFINE_CHECK_EQ(static_cast<size_t>(MIN_LARGE_OBJECT_THRESHOLD), (static_cast<size_t>(art::gc::Heap::kMinLargeObjectThreshold)))
 #define LOCK_WORD_STATE_SHIFT 30

diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index f1f7f42..101c9a1 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc

@@ -20,6 +20,9 @@
 
 #include "common_throws.h"
 #include "interpreter_common.h"
+#include "interpreter_goto_table_impl.h"
+#include "interpreter_mterp_impl.h"
+#include "interpreter_switch_impl.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -242,28 +245,6 @@
 
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 
-#if defined(__clang__)
-// Clang 3.4 fails to build the goto interpreter implementation.
-template<bool do_access_check, bool transaction_active>
-JValue ExecuteGotoImpl(Thread*, const DexFile::CodeItem*, ShadowFrame&, JValue) {
-  LOG(FATAL) << "UNREACHABLE";
-  UNREACHABLE();
-}
-// Explicit definitions of ExecuteGotoImpl.
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                    ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                     ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<true, true>(Thread* self,  const DexFile::CodeItem* code_item,
-                                   ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
-                                    ShadowFrame& shadow_frame, JValue result_register);
-#endif
-
 static inline JValue Execute(
     Thread* self,
     const DexFile::CodeItem* code_item,

diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index acdc270..7b38473 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h

@@ -65,21 +65,6 @@
 namespace art {
 namespace interpreter {
 
-// External references to all interpreter implementations.
-
-template<bool do_access_check, bool transaction_active>
-extern JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
-                                ShadowFrame& shadow_frame, JValue result_register,
-                                bool interpret_one_instruction);
-
-template<bool do_access_check, bool transaction_active>
-extern JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item,
-                              ShadowFrame& shadow_frame, JValue result_register);
-
-// Mterp does not support transactions or access check, thus no templated versions.
-extern "C" bool ExecuteMterpImpl(Thread* self, const DexFile::CodeItem* code_item,
-                                 ShadowFrame* shadow_frame, JValue* result_register);
-
 void ThrowNullPointerExceptionFromInterpreter()
     SHARED_REQUIRES(Locks::mutator_lock_);
 

diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 6aba898..37dd63b 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc

@@ -14,18 +14,29 @@
  * limitations under the License.
  */
 
+#include "interpreter_goto_table_impl.h"
+
+// Common includes
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "stack.h"
+#include "thread.h"
+
+// Clang compiles the GOTO interpreter very slowly. So we skip it. These are the implementation
+// details only necessary when compiling it.
 #if !defined(__clang__)
-// Clang 3.4 fails to build the goto interpreter implementation.
-
-
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
 #include "safe_math.h"
+#endif
 
 namespace art {
 namespace interpreter {
 
+#if !defined(__clang__)
+
 // In the following macros, we expect the following local variables exist:
 // - "self": the current Thread*.
 // - "inst" : the current Instruction*.
@@ -2557,20 +2568,40 @@
 }  // NOLINT(readability/fn_size)
 
 // Explicit definitions of ExecuteGotoImpl.
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+template HOT_ATTR
 JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
                                     ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+template HOT_ATTR
 JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
                                      ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+template
 JValue ExecuteGotoImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item,
                                    ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+template
 JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
                                     ShadowFrame& shadow_frame, JValue result_register);
 
+#else
+
+template<bool do_access_check, bool transaction_active>
+JValue ExecuteGotoImpl(Thread*, const DexFile::CodeItem*, ShadowFrame&, JValue) {
+  LOG(FATAL) << "UNREACHABLE";
+  UNREACHABLE();
+}
+// Explicit definitions of ExecuteGotoImpl.
+template<>
+JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
+                                    ShadowFrame& shadow_frame, JValue result_register);
+template<>
+JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
+                                     ShadowFrame& shadow_frame, JValue result_register);
+template<>
+JValue ExecuteGotoImpl<true, true>(Thread* self,  const DexFile::CodeItem* code_item,
+                                   ShadowFrame& shadow_frame, JValue result_register);
+template<>
+JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
+                                    ShadowFrame& shadow_frame, JValue result_register);
+#endif
+
 }  // namespace interpreter
 }  // namespace art
-
-#endif

diff --git a/runtime/interpreter/interpreter_goto_table_impl.h b/runtime/interpreter/interpreter_goto_table_impl.h
new file mode 100644
index 0000000..bb9be88
--- /dev/null
+++ b/runtime/interpreter/interpreter_goto_table_impl.h

@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+#include "jvalue.h"
+
+namespace art {
+
+class ShadowFrame;
+class Thread;
+
+namespace interpreter {
+
+template<bool do_access_check, bool transaction_active>
+JValue ExecuteGotoImpl(Thread* self,
+                       const DexFile::CodeItem* code_item,
+                       ShadowFrame& shadow_frame,
+                       JValue result_register) SHARED_REQUIRES(Locks::mutator_lock_);
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_

diff --git a/runtime/interpreter/interpreter_mterp_impl.h b/runtime/interpreter/interpreter_mterp_impl.h
new file mode 100644
index 0000000..322df4e
--- /dev/null
+++ b/runtime/interpreter/interpreter_mterp_impl.h

@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+#include "jvalue.h"
+
+namespace art {
+
+class ShadowFrame;
+class Thread;
+
+namespace interpreter {
+
+// Mterp does not support transactions or access check, thus no templated versions.
+extern "C" bool ExecuteMterpImpl(Thread* self,
+                                 const DexFile::CodeItem* code_item,
+                                 ShadowFrame* shadow_frame,
+                                 JValue* result_register) SHARED_REQUIRES(Locks::mutator_lock_);
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_

diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 582fc9b..fd37737 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc

@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "interpreter_switch_impl.h"
+
 #include "base/enums.h"
 #include "experimental_flags.h"
 #include "interpreter_common.h"
@@ -2336,19 +2338,19 @@
 }  // NOLINT(readability/fn_size)
 
 // Explicit definitions of ExecuteSwitchImpl.
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+template HOT_ATTR
 JValue ExecuteSwitchImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
                                       ShadowFrame& shadow_frame, JValue result_register,
                                       bool interpret_one_instruction);
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+template HOT_ATTR
 JValue ExecuteSwitchImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
                                        ShadowFrame& shadow_frame, JValue result_register,
                                        bool interpret_one_instruction);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+template
 JValue ExecuteSwitchImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item,
                                      ShadowFrame& shadow_frame, JValue result_register,
                                      bool interpret_one_instruction);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+template
 JValue ExecuteSwitchImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
                                       ShadowFrame& shadow_frame, JValue result_register,
                                       bool interpret_one_instruction);

diff --git a/runtime/interpreter/interpreter_switch_impl.h b/runtime/interpreter/interpreter_switch_impl.h
new file mode 100644
index 0000000..90ec908
--- /dev/null
+++ b/runtime/interpreter/interpreter_switch_impl.h

@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+#include "jvalue.h"
+
+namespace art {
+
+class ShadowFrame;
+class Thread;
+
+namespace interpreter {
+
+template<bool do_access_check, bool transaction_active>
+JValue ExecuteSwitchImpl(Thread* self,
+                         const DexFile::CodeItem* code_item,
+                         ShadowFrame& shadow_frame,
+                         JValue result_register,
+                         bool interpret_one_instruction) SHARED_REQUIRES(Locks::mutator_lock_);
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_

diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk
index a34a841..e39af2d 100644
--- a/runtime/simulator/Android.mk
+++ b/runtime/simulator/Android.mk

@@ -22,6 +22,9 @@
   code_simulator.cc \
   code_simulator_arm64.cc
 
+LIBART_SIMULATOR_CFLAGS := \
+  -DVIXL_INCLUDE_SIMULATOR_AARCH64
+
 # $(1): target or host
 # $(2): ndebug or debug
 define build-libart-simulator
@@ -54,6 +57,7 @@
   LOCAL_MODULE_CLASS := SHARED_LIBRARIES
 
   LOCAL_SRC_FILES := $$(LIBART_SIMULATOR_SRC_FILES)
+  LOCAL_CFLAGS := $$(LIBART_SIMULATOR_CFLAGS)
 
   ifeq ($$(art_target_or_host),target)
     $(call set-target-local-clang-vars)

diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 3aa1fc2..216d8a7 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h

@@ -224,6 +224,7 @@
         thread_to_pass = this;
       }
       MutexLock mu(thread_to_pass, *Locks::thread_suspend_count_lock_);
+      ScopedTransitioningToRunnable scoped_transitioning_to_runnable(this);
       old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
       DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
       while ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {

diff --git a/runtime/thread.cc b/runtime/thread.cc
index b35a614..79b9f02 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc

@@ -1217,10 +1217,8 @@
   ScopedTrace trace(__FUNCTION__);
   VLOG(threads) << this << " self-suspending";
   // Make thread appear suspended to other threads, release mutator_lock_.
-  tls32_.suspended_at_suspend_check = true;
   // Transition to suspended and back to runnable, re-acquire share on mutator_lock_.
   ScopedThreadSuspension(this, kSuspended);
-  tls32_.suspended_at_suspend_check = false;
   VLOG(threads) << this << " self-reviving";
 }
 
@@ -1433,6 +1431,12 @@
     if (o == nullptr) {
       os << "an unknown object";
     } else {
+      if (kUseReadBarrier && Thread::Current()->GetIsGcMarking()) {
+        // We may call Thread::Dump() in the middle of the CC thread flip and this thread's stack
+        // may have not been flipped yet and "o" may be a from-space (stale) ref, in which case the
+        // IdentityHashCode call below will crash. So explicitly mark/forward it here.
+        o = ReadBarrier::Mark(o);
+      }
       if ((o->GetLockWord(false).GetState() == LockWord::kThinLocked) &&
           Locks::mutator_lock_->IsExclusiveHeld(Thread::Current())) {
         // Getting the identity hashcode here would result in lock inflation and suspension of the
@@ -1635,7 +1639,7 @@
   }
   tlsPtr_.flip_function = nullptr;
   tlsPtr_.thread_local_mark_stack = nullptr;
-  tls32_.suspended_at_suspend_check = false;
+  tls32_.is_transitioning_to_runnable = false;
 }
 
 bool Thread::IsStillStarting() const {
@@ -1773,7 +1777,7 @@
   CHECK(tlsPtr_.checkpoint_function == nullptr);
   CHECK_EQ(checkpoint_overflow_.size(), 0u);
   CHECK(tlsPtr_.flip_function == nullptr);
-  CHECK_EQ(tls32_.suspended_at_suspend_check, false);
+  CHECK_EQ(tls32_.is_transitioning_to_runnable, false);
 
   // Make sure we processed all deoptimization requests.
   CHECK(tlsPtr_.deoptimization_context_stack == nullptr) << "Missed deoptimization";

diff --git a/runtime/thread.h b/runtime/thread.h
index 840b781..1c2d4ab 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h

@@ -1085,8 +1085,12 @@
     return tlsPtr_.nested_signal_state;
   }
 
-  bool IsSuspendedAtSuspendCheck() const {
-    return tls32_.suspended_at_suspend_check;
+  bool IsTransitioningToRunnable() const {
+    return tls32_.is_transitioning_to_runnable;
+  }
+
+  void SetIsTransitioningToRunnable(bool value) {
+    tls32_.is_transitioning_to_runnable = value;
   }
 
   void PushVerifier(verifier::MethodVerifier* verifier);
@@ -1264,7 +1268,7 @@
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), handling_signal_(false),
-      suspended_at_suspend_check(false), ready_for_debug_invoke(false),
+      is_transitioning_to_runnable(false), ready_for_debug_invoke(false),
       debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true),
       disable_thread_flip_count(0) {
     }
@@ -1306,10 +1310,10 @@
     // True if signal is being handled by this thread.
     bool32_t handling_signal_;
 
-    // True if the thread is suspended in FullSuspendCheck(). This is
-    // used to distinguish runnable threads that are suspended due to
-    // a normal suspend check from other threads.
-    bool32_t suspended_at_suspend_check;
+    // True if the thread is in TransitionFromSuspendedToRunnable(). This is used to distinguish the
+    // non-runnable threads (eg. kNative, kWaiting) that are about to transition to runnable from
+    // the rest of them.
+    bool32_t is_transitioning_to_runnable;
 
     // True if the thread has been suspended by a debugger event. This is
     // used to invoke method from the debugger which is only allowed when
@@ -1588,6 +1592,26 @@
   Thread* const self_;
 };
 
+class ScopedTransitioningToRunnable : public ValueObject {
+ public:
+  explicit ScopedTransitioningToRunnable(Thread* self)
+      : self_(self) {
+    DCHECK_EQ(self, Thread::Current());
+    if (kUseReadBarrier) {
+      self_->SetIsTransitioningToRunnable(true);
+    }
+  }
+
+  ~ScopedTransitioningToRunnable() {
+    if (kUseReadBarrier) {
+      self_->SetIsTransitioningToRunnable(false);
+    }
+  }
+
+ private:
+  Thread* const self_;
+};
+
 std::ostream& operator<<(std::ostream& os, const Thread& thread);
 std::ostream& operator<<(std::ostream& os, const StackedShadowFrameType& thread);
 

diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 419ecec..688514c 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc

@@ -405,6 +405,8 @@
   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
   CHECK_NE(self->GetState(), kRunnable);
 
+  collector->GetHeap()->ThreadFlipBegin(self);  // Sync with JNI critical calls.
+
   SuspendAllInternal(self, self, nullptr);
 
   // Run the flip callback for the collector.
@@ -414,26 +416,31 @@
   collector->RegisterPause(NanoTime() - start_time);
 
   // Resume runnable threads.
-  std::vector<Thread*> runnable_threads;
+  size_t runnable_thread_count = 0;
   std::vector<Thread*> other_threads;
   {
+    TimingLogger::ScopedTiming split2("ResumeRunnableThreads", collector->GetTimings());
     MutexLock mu(self, *Locks::thread_list_lock_);
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     --suspend_all_count_;
     for (const auto& thread : list_) {
+      // Set the flip function for all threads because Thread::DumpState/DumpJavaStack() (invoked by
+      // a checkpoint) may cause the flip function to be run for a runnable/suspended thread before
+      // a runnable thread runs it for itself or we run it for a suspended thread below.
+      thread->SetFlipFunction(thread_flip_visitor);
       if (thread == self) {
         continue;
       }
-      // Set the flip function for both runnable and suspended threads
-      // because Thread::DumpState/DumpJavaStack() (invoked by a
-      // checkpoint) may cause the flip function to be run for a
-      // runnable/suspended thread before a runnable threads runs it
-      // for itself or we run it for a suspended thread below.
-      thread->SetFlipFunction(thread_flip_visitor);
-      if (thread->IsSuspendedAtSuspendCheck()) {
+      // Resume early the threads that were runnable but are suspended just for this thread flip or
+      // about to transition from non-runnable (eg. kNative at the SOA entry in a JNI function) to
+      // runnable (both cases waiting inside Thread::TransitionFromSuspendedToRunnable), or waiting
+      // for the thread flip to end at the JNI critical section entry (kWaitingForGcThreadFlip),
+      ThreadState state = thread->GetState();
+      if (state == kWaitingForGcThreadFlip ||
+          thread->IsTransitioningToRunnable()) {
         // The thread will resume right after the broadcast.
         thread->ModifySuspendCount(self, -1, nullptr, false);
-        runnable_threads.push_back(thread);
+        ++runnable_thread_count;
       } else {
         other_threads.push_back(thread);
       }
@@ -441,8 +448,11 @@
     Thread::resume_cond_->Broadcast(self);
   }
 
+  collector->GetHeap()->ThreadFlipEnd(self);
+
   // Run the closure on the other threads and let them resume.
   {
+    TimingLogger::ScopedTiming split3("FlipOtherThreads", collector->GetTimings());
     ReaderMutexLock mu(self, *Locks::mutator_lock_);
     for (const auto& thread : other_threads) {
       Closure* flip_func = thread->GetFlipFunction();
@@ -451,11 +461,15 @@
       }
     }
     // Run it for self.
-    thread_flip_visitor->Run(self);
+    Closure* flip_func = self->GetFlipFunction();
+    if (flip_func != nullptr) {
+      flip_func->Run(self);
+    }
   }
 
   // Resume other threads.
   {
+    TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings());
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : other_threads) {
       thread->ModifySuspendCount(self, -1, nullptr, false);
@@ -463,7 +477,7 @@
     Thread::resume_cond_->Broadcast(self);
   }
 
-  return runnable_threads.size() + other_threads.size() + 1;  // +1 for self.
+  return runnable_thread_count + other_threads.size() + 1;  // +1 for self.
 }
 
 void ThreadList::SuspendAll(const char* cause, bool long_suspend) {

diff --git a/tools/cpp-define-generator/constant_dexcache.def b/tools/cpp-define-generator/constant_dexcache.def
new file mode 100644
index 0000000..fd197f2
--- /dev/null
+++ b/tools/cpp-define-generator/constant_dexcache.def

@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/dex_cache.h"   // art::mirror::DexCache, StringDexCachePair
+#endif
+
+DEFINE_EXPR(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT,       int32_t, art::WhichPowerOf2(sizeof(art::mirror::StringDexCachePair)))
+DEFINE_EXPR(STRING_DEX_CACHE_SIZE_MINUS_ONE,           int32_t, art::mirror::DexCache::kDexCacheStringCacheSize - 1)
+DEFINE_EXPR(STRING_DEX_CACHE_HASH_BITS,                int32_t,
+    art::LeastSignificantBit(art::mirror::DexCache::kDexCacheStringCacheSize))
\ No newline at end of file

diff --git a/tools/cpp-define-generator/offset_dexcache.def b/tools/cpp-define-generator/offset_dexcache.def
index 3b26518..4b9d481 100644
--- a/tools/cpp-define-generator/offset_dexcache.def
+++ b/tools/cpp-define-generator/offset_dexcache.def

@@ -19,16 +19,27 @@
 #if defined(DEFINE_INCLUDE_DEPENDENCIES)
 #include "art_method.h"         // art::ArtMethod
 #include "base/enums.h"         // PointerSize
+#include "mirror/dex_cache.h"   // art::DexCache
 #endif
 
-#define DEFINE_ART_METHOD_OFFSET(field_name, method_name) \
+#define DEFINE_ART_METHOD_OFFSET_SIZED(field_name, method_name) \
   DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET_32, int32_t, art::ArtMethod::method_name##Offset(art::PointerSize::k32).Int32Value()) \
   DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET_64, int32_t, art::ArtMethod::method_name##Offset(art::PointerSize::k64).Int32Value())
 
+#define DEFINE_ART_METHOD_OFFSET(field_name, method_name) \
+  DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET, int32_t, art::ArtMethod::method_name##Offset().Int32Value())
+
+#define DEFINE_DECLARING_CLASS_OFFSET(field_name, method_name) \
+  DEFINE_EXPR(DECLARING_CLASS_ ## field_name ## _OFFSET, int32_t, art::mirror::Class::method_name##Offset().Int32Value())
+
 //                         New macro suffix          Method Name (of the Offset method)
-DEFINE_ART_METHOD_OFFSET(DEX_CACHE_METHODS,          DexCacheResolvedMethods)
-DEFINE_ART_METHOD_OFFSET(DEX_CACHE_TYPES,            DexCacheResolvedTypes)
-DEFINE_ART_METHOD_OFFSET(JNI,                        EntryPointFromJni)
-DEFINE_ART_METHOD_OFFSET(QUICK_CODE,                 EntryPointFromQuickCompiledCode)
+DEFINE_ART_METHOD_OFFSET_SIZED(DEX_CACHE_METHODS,    DexCacheResolvedMethods)
+DEFINE_ART_METHOD_OFFSET_SIZED(DEX_CACHE_TYPES,      DexCacheResolvedTypes)
+DEFINE_ART_METHOD_OFFSET_SIZED(JNI,                  EntryPointFromJni)
+DEFINE_ART_METHOD_OFFSET_SIZED(QUICK_CODE,           EntryPointFromQuickCompiledCode)
+DEFINE_ART_METHOD_OFFSET(DECLARING_CLASS,            DeclaringClass)
+DEFINE_DECLARING_CLASS_OFFSET(DEX_CACHE_STRINGS,     DexCacheStrings)
 
 #undef DEFINE_ART_METHOD_OFFSET
+#undef DEFINE_ART_METHOD_OFFSET_32
+#undef DEFINE_DECLARING_CLASS_OFFSET

diff --git a/tools/cpp-define-generator/offsets_all.def b/tools/cpp-define-generator/offsets_all.def
index d2d8777..13371a1 100644
--- a/tools/cpp-define-generator/offsets_all.def
+++ b/tools/cpp-define-generator/offsets_all.def

@@ -48,6 +48,7 @@
 // TODO: MIRROR_*_ARRAY offsets (depends on header size)
 // TODO: MIRROR_STRING offsets (depends on header size)
 #include "offset_dexcache.def"
+#include "constant_dexcache.def"
 #include "constant_heap.def"
 #include "constant_lockword.def"
 #include "constant_globals.def"

diff --git a/tools/javafuzz/README.md b/tools/javafuzz/README.md
index 35c057c..68fc171 100644
--- a/tools/javafuzz/README.md
+++ b/tools/javafuzz/README.md

@@ -8,7 +8,7 @@
 or using various target architectures. Any difference between the outputs
 (**divergence**) may indicate a bug in one of the execution modes.
 
-JavaFuzz can be combined with dexfuzz to get multilayered fuzz testing.
+JavaFuzz can be combined with dexfuzz to get multi-layered fuzz testing.
 
 How to run JavaFuzz
 ===================
@@ -40,19 +40,20 @@
 ===============================
 
     run_java_fuzz_test.py [--num_tests]
+                          [--device]
                           [--mode1=mode] [--mode2=mode]
 
 where
 
-    --num_tests: number of tests to run (10000 by default)
-    --mode1:m1
-    --mode2:m2
-    with m1 != m2, and one of
-      ri   : reference implementation on host (default for m1)
-      hint : Art interpreter on host
-      hopt : Art optimizing on host (default for m2)
-      tint : Art interpreter on target
-      topt : Art optimizing on target
+    --num_tests : number of tests to run (10000 by default)
+    --device    : target device serial number (passed to adb -s)
+    --mode1     : m1
+    --mode2     : m2, with m1 != m2, and values one of
+      ri   = reference implementation on host (default for m1)
+      hint = Art interpreter on host
+      hopt = Art optimizing on host (default for m2)
+      tint = Art interpreter on target
+      topt = Art optimizing on target
 
 Background
 ==========
@@ -67,14 +68,15 @@
 and flaws still linger in the system.
 
 Over the years, fuzz testing has gained popularity as a testing technique for
-discovering such lingering bugs, including bugs that can bring down a system in
-an unexpected way. Fuzzing refers to feeding a large amount of random data as
-input to a system in an attempt to find bugs or make it crash. Mutation-based
-fuzz testing is a special form of fuzzing that applies small random changes to
-existing inputs in order to detect shortcomings in a system. Profile-guided or
-coverage-guided fuzzing adds a direction to the way these random changes are
-applied. Multilayer approaches generate random inputs that are subsequently
-mutated at various stages of execution.
+discovering such lingering bugs, including bugs that can bring down a system
+in an unexpected way. Fuzzing refers to feeding a large amount of random data
+as input to a system in an attempt to find bugs or make it crash. Generation-
+based fuzz testing constructs random, but properly formatted input data.
+Mutation-based fuzz testing applies small random changes to existing inputs
+in order to detect shortcomings in a system. Profile-guided or coverage-guided
+fuzzing adds a direction to the way these random changes are applied. Multi-
+layered approaches generate random inputs that are subsequently mutated at
+various stages of execution.
 
 The randomness of fuzz testing implies that the size and scope of testing is no
 longer bounded. Every new run can potentially discover bugs and crashes that were

diff --git a/tools/javafuzz/javafuzz.cc b/tools/javafuzz/javafuzz.cc
index 4e6e978..161ae0a 100644
--- a/tools/javafuzz/javafuzz.cc
+++ b/tools/javafuzz/javafuzz.cc

@@ -53,7 +53,9 @@
  * to preserve the property that a given version of JavaFuzz yields the same
  * fuzzed Java program for a deterministic random seed.
  */
-const char* VERSION = "1.0";
+const char* VERSION = "1.1";
+
+static const uint32_t MAX_DIMS[11] = { 0, 1000, 32, 10, 6, 4, 3, 3, 2, 2, 2 };
 
 /**
  * A class that generates a random Java program that compiles correctly. The program
@@ -83,8 +85,8 @@
         fuzz_loop_nest_(loop_nest),
         return_type_(randomType()),
         array_type_(randomType()),
-        array_dim_(random1(3)),
-        array_size_(random1(10)),
+        array_dim_(random1(10)),
+        array_size_(random1(MAX_DIMS[array_dim_])),
         indentation_(0),
         expr_depth_(0),
         stmt_length_(0),
@@ -169,7 +171,7 @@
   // Emit an unary operator (same type in-out).
   void emitUnaryOp(Type tp) {
     if (tp == kBoolean) {
-      fputs("!", out_);
+      fputc('!', out_);
     } else if (isInteger(tp)) {
       EMIT(kIntUnaryOps);
     } else {  // isFP(tp)
@@ -239,16 +241,21 @@
         case 6: fputs("(long)(int)(long)",   out_); return kLong;
       }
     } else if (tp == kFloat) {
-      switch (random1(3)) {
+      switch (random1(4)) {
         case 1: fputs("(float)", out_); return kInt;
         case 2: fputs("(float)", out_); return kLong;
         case 3: fputs("(float)", out_); return kDouble;
+        // Narrowing-widening.
+        case 4: fputs("(float)(int)(float)", out_); return kFloat;
       }
     } else if (tp == kDouble) {
-      switch (random1(3)) {
+      switch (random1(5)) {
         case 1: fputs("(double)", out_); return kInt;
         case 2: fputs("(double)", out_); return kLong;
         case 3: fputs("(double)", out_); return kFloat;
+        // Narrowing-widening.
+        case 4: fputs("(double)(int)(double)",   out_); return kDouble;
+        case 5: fputs("(double)(float)(double)", out_); return kDouble;
       }
     }
     return tp;  // nothing suitable, just keep type
@@ -273,15 +280,17 @@
   // Emit an unary intrinsic (out type given, new suitable in type picked).
   Type emitIntrinsic1(Type tp) {
     if (tp == kBoolean) {
-      switch (random1(4)) {
+      switch (random1(6)) {
         case 1: fputs("Float.isNaN",       out_); return kFloat;
-        case 2: fputs("Float.isInfinite",  out_); return kFloat;
-        case 3: fputs("Double.isNaN",      out_); return kDouble;
-        case 4: fputs("Double.isInfinite", out_); return kDouble;
+        case 2: fputs("Float.isFinite",    out_); return kFloat;
+        case 3: fputs("Float.isInfinite",  out_); return kFloat;
+        case 4: fputs("Double.isNaN",      out_); return kDouble;
+        case 5: fputs("Double.isFinite",   out_); return kDouble;
+        case 6: fputs("Double.isInfinite", out_); return kDouble;
       }
     } else if (isInteger(tp)) {
       const char* prefix = tp == kLong ? "Long" : "Integer";
-      switch (random1(9)) {
+      switch (random1(13)) {
         case 1: fprintf(out_, "%s.highestOneBit",         prefix); break;
         case 2: fprintf(out_, "%s.lowestOneBit",          prefix); break;
         case 3: fprintf(out_, "%s.numberOfLeadingZeros",  prefix); break;
@@ -290,15 +299,27 @@
         case 6: fprintf(out_, "%s.signum",                prefix); break;
         case 7: fprintf(out_, "%s.reverse",               prefix); break;
         case 8: fprintf(out_, "%s.reverseBytes",          prefix); break;
-        case 9: fputs("Math.abs", out_);                           break;
+        case 9:  fputs("Math.incrementExact", out_); break;
+        case 10: fputs("Math.decrementExact", out_); break;
+        case 11: fputs("Math.negateExact",    out_); break;
+        case 12: fputs("Math.abs",            out_); break;
+        case 13: fputs("Math.round", out_);
+                 return tp == kLong ? kDouble : kFloat;
       }
     } else {  // isFP(tp)
-      switch (random1(5)) {
+      switch (random1(6)) {
         case 1: fputs("Math.abs",      out_); break;
         case 2: fputs("Math.ulp",      out_); break;
         case 3: fputs("Math.signum",   out_); break;
         case 4: fputs("Math.nextUp",   out_); break;
         case 5: fputs("Math.nextDown", out_); break;
+        case 6: if (tp == kDouble) {
+                  fputs("Double.longBitsToDouble", out_);
+                  return kLong;
+                } else {
+                  fputs("Float.intBitsToFloat", out_);
+                  return kInt;
+                }
       }
     }
     return tp;  // same type in-out
@@ -314,15 +335,27 @@
       }
     } else if (isInteger(tp)) {
       const char* prefix = tp == kLong ? "Long" : "Integer";
-      switch (random1(3)) {
+      switch (random1(11)) {
         case 1: fprintf(out_, "%s.compare", prefix); break;
-        case 2: fputs("Math.min", out_); break;
-        case 3: fputs("Math.max", out_); break;
+        case 2: fprintf(out_, "%s.sum",     prefix); break;
+        case 3: fprintf(out_, "%s.min",     prefix); break;
+        case 4: fprintf(out_, "%s.max",     prefix); break;
+        case 5:  fputs("Math.min",           out_); break;
+        case 6:  fputs("Math.max",           out_); break;
+        case 7:  fputs("Math.floorDiv",      out_); break;
+        case 8:  fputs("Math.floorMod",      out_); break;
+        case 9:  fputs("Math.addExact",      out_); break;
+        case 10: fputs("Math.subtractExact", out_); break;
+        case 11: fputs("Math.multiplyExact", out_); break;
       }
     } else {  // isFP(tp)
-      switch (random1(2)) {
-        case 1: fputs("Math.min", out_); break;
-        case 2: fputs("Math.max", out_); break;
+      const char* prefix = tp == kDouble ? "Double" : "Float";
+      switch (random1(5)) {
+        case 1: fprintf(out_, "%s.sum", prefix); break;
+        case 2: fprintf(out_, "%s.min", prefix); break;
+        case 3: fprintf(out_, "%s.max", prefix); break;
+        case 4: fputs("Math.min", out_); break;
+        case 5: fputs("Math.max", out_); break;
       }
     }
     return tp;  // same type in-out
@@ -358,12 +391,24 @@
 
   // Emit miscellaneous constructs.
   void emitMisc(Type tp) {
-    switch (tp) {
-      case kBoolean: fputs("this instanceof Test", out_); break;
-      case kInt:     fputs("mArray.length",    out_); break;
-      case kLong:    fputs("Long.MAX_VALUE",   out_); break;
-      case kFloat:   fputs("Float.MAX_VALUE",  out_); break;
-      case kDouble:  fputs("Double.MAX_VALUE", out_); break;
+    if (tp == kBoolean) {
+      fputs("this instanceof Test", out_);
+    } else if (isInteger(tp)) {
+      const char* prefix = tp == kLong ? "Long" : "Integer";
+      switch (random1(2)) {
+        case 1: fprintf(out_, "%s.MIN_VALUE", prefix); break;
+        case 2: fprintf(out_, "%s.MAX_VALUE", prefix); break;
+      }
+    } else {  // isFP(tp)
+      const char* prefix = tp == kDouble ? "Double" : "Float";
+      switch (random1(6)) {
+        case 1: fprintf(out_, "%s.MIN_NORMAL", prefix);        break;
+        case 2: fprintf(out_, "%s.MIN_VALUE", prefix);         break;
+        case 3: fprintf(out_, "%s.MAX_VALUE", prefix);         break;
+        case 4: fprintf(out_, "%s.POSITIVE_INFINITY", prefix); break;
+        case 5: fprintf(out_, "%s.NEGATIVE_INFINITY", prefix); break;
+        case 6: fprintf(out_, "%s.NaN", prefix);               break;
+      }
     }
   }
 
@@ -412,10 +457,10 @@
   void emitLiteral(Type tp) {
     switch (tp) {
       case kBoolean: fputs(random1(2) == 1 ? "true" : "false", out_); break;
-      case kInt:     fprintf(out_, "%d",    random0(100)); break;
-      case kLong:    fprintf(out_, "%dL",   random0(100)); break;
-      case kFloat:   fprintf(out_, "%d.0f", random0(100)); break;
-      case kDouble:  fprintf(out_, "%d.0",  random0(100)); break;
+      case kInt:     fprintf(out_, "%d",    random()); break;
+      case kLong:    fprintf(out_, "%dL",   random()); break;
+      case kFloat:   fprintf(out_, "%d.0f", random()); break;
+      case kDouble:  fprintf(out_, "%d.0",  random()); break;
     }
   }
 
@@ -433,17 +478,6 @@
     return false;
   }
 
-  // Emit a loop variable, if available.
-  bool emitLoopVariable(Type tp) {
-    if (tp == kInt) {
-      if (loop_nest_ > 0) {
-        fprintf(out_, "i%u", random0(loop_nest_));
-        return true;
-      }
-    }
-    return false;
-  }
-
   // Emit a local variable, if available.
   bool emitLocalVariable(Type tp) {
     uint32_t locals = adjustLocal(tp, 0);
@@ -483,10 +517,6 @@
         if (emitLocalVariable(tp))
           return;
         // FALL-THROUGH
-      case 3:
-        if (emitLoopVariable(tp))
-          return;
-        // FALL-THROUGH
       default:
         emitFieldVariable(tp);
         break;
@@ -510,8 +540,9 @@
     fputc('(', out_);
     switch (random1(12)) {  // favor binary operations
       case 1:
-        // Unary operator: ~x
+        // Unary operator: ~ x
         emitUnaryOp(tp);
+        fputc(' ', out_);
         emitExpression(tp);
         break;
       case 2:
@@ -761,7 +792,7 @@
 
     bool mayFollow = false;
     fputs("switch (", out_);
-    emitExpression(kInt);
+    emitArrayIndex();  // restrict its range
     fputs(") {\n", out_);
 
     ++if_nest_;
@@ -771,7 +802,7 @@
     for (uint32_t i = 0; i < 2; i++) {
       emitIndentation();
       if (i == 0) {
-        fprintf(out_, "case %d: {\n", random0(100));
+        fprintf(out_, "case %u: {\n", random0(array_size_));
       } else {
         fprintf(out_, "default: {\n");
       }
@@ -977,6 +1008,11 @@
   // Random integers.
   //
 
+  // Return random integer.
+  int32_t random() {
+    return fuzz_random_engine_();
+  }
+
   // Return random integer in range [0,max).
   uint32_t random0(uint32_t max) {
     std::uniform_int_distribution<uint32_t> gen(0, max - 1);
@@ -1025,7 +1061,7 @@
   // Defaults.
   uint32_t seed = time(NULL);
   uint32_t expr_depth = 1;
-  uint32_t stmt_length = 4;
+  uint32_t stmt_length = 8;
   uint32_t if_nest = 2;
   uint32_t loop_nest = 3;
 

diff --git a/tools/javafuzz/run_java_fuzz_test.py b/tools/javafuzz/run_java_fuzz_test.py
index 4f192e7..5f527b8 100755
--- a/tools/javafuzz/run_java_fuzz_test.py
+++ b/tools/javafuzz/run_java_fuzz_test.py

@@ -78,10 +78,11 @@
   return libdir + '/core-libart-hostdex_intermediates/classes.jack:' \
        + libdir + '/core-oj-hostdex_intermediates/classes.jack'
 
-def GetExecutionModeRunner(mode):
+def GetExecutionModeRunner(device, mode):
   """Returns a runner for the given execution mode.
 
   Args:
+    device: string, target device serial number (or None)
     mode: string, execution mode
   Returns:
     TestRunner with given execution mode
@@ -95,9 +96,9 @@
   if mode == 'hopt':
     return TestRunnerArtOnHost(False)
   if mode == 'tint':
-    return TestRunnerArtOnTarget(True)
+    return TestRunnerArtOnTarget(device, True)
   if mode == 'topt':
-    return TestRunnerArtOnTarget(False)
+    return TestRunnerArtOnTarget(device, False)
   raise FatalError('Unknown execution mode')
 
 def GetReturnCode(retc):
@@ -210,13 +211,14 @@
 class TestRunnerArtOnTarget(TestRunner):
   """Concrete test runner of Art on target (interpreter or optimizing)."""
 
-  def  __init__(self, interpreter):
+  def  __init__(self, device, interpreter):
     """Constructor for the Art on target tester.
 
     Args:
+      device: string, target device serial number (or None)
       interpreter: boolean, selects between interpreter or optimizing
     """
-    self._dalvik_args = '-cp /data/local/tmp/classes.dex Test'
+    self._dalvik_args = 'shell dalvikvm -cp /data/local/tmp/classes.dex Test'
     if interpreter:
       self._description = 'Art interpreter on target'
       self._id = 'TInt'
@@ -224,16 +226,19 @@
     else:
       self._description = 'Art optimizing on target'
       self._id = 'TOpt'
+    self._adb = 'adb'
+    if device != None:
+      self._adb = self._adb + ' -s ' + device
     self._jack_args = '-cp ' + GetJackClassPath() + ' --output-dex . Test.java'
 
   def CompileAndRunTest(self):
     if RunCommand('jack', self._jack_args,
                   out=None, err='jackerr.txt', timeout=30) == EXIT_SUCCESS:
-      if RunCommand('adb push', 'classes.dex /data/local/tmp/',
+      if RunCommand(self._adb, 'push classes.dex /data/local/tmp/',
                     'adb.txt', err=None) != EXIT_SUCCESS:
         raise FatalError('Cannot push to target device')
       out = self.GetId() + '_run_out.txt'
-      retc = RunCommand('adb shell dalvikvm', self._dalvik_args, out, err=None)
+      retc = RunCommand(self._adb, self._dalvik_args, out, err=None)
       if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT:
         retc = EXIT_NOTRUN
     else:
@@ -241,7 +246,7 @@
     # Cleanup and return.
     RunCommand('rm', '-f classes.dex jackerr.txt adb.txt',
                out=None, err=None)
-    RunCommand('adb shell', 'rm -f /data/local/tmp/classes.dex',
+    RunCommand(self._adb, 'shell rm -f /data/local/tmp/classes.dex',
                out=None, err=None)
     return retc
 
@@ -256,17 +261,19 @@
 class JavaFuzzTester(object):
   """Tester that runs JavaFuzz many times and report divergences."""
 
-  def  __init__(self, num_tests, mode1, mode2):
+  def  __init__(self, num_tests, device, mode1, mode2):
     """Constructor for the tester.
 
     Args:
     num_tests: int, number of tests to run
+    device: string, target device serial number (or None)
     mode1: string, execution mode for first runner
     mode2: string, execution mode for second runner
     """
     self._num_tests = num_tests
-    self._runner1 = GetExecutionModeRunner(mode1)
-    self._runner2 = GetExecutionModeRunner(mode2)
+    self._device = device
+    self._runner1 = GetExecutionModeRunner(device, mode1)
+    self._runner2 = GetExecutionModeRunner(device, mode2)
     self._save_dir = None
     self._tmp_dir = None
     # Statistics.
@@ -302,6 +309,7 @@
     print '**\n**** JavaFuzz Testing\n**'
     print
     print '#Tests    :', self._num_tests
+    print 'Device    :', self._device
     print 'Directory :', self._tmp_dir
     print 'Exec-mode1:', self._runner1.GetDescription()
     print 'Exec-mode2:', self._runner2.GetDescription()
@@ -391,6 +399,7 @@
   parser = argparse.ArgumentParser()
   parser.add_argument('--num_tests', default=10000,
                       type=int, help='number of tests to run')
+  parser.add_argument('--device', help='target device serial number')
   parser.add_argument('--mode1', default='ri',
                       help='execution mode 1 (default: ri)')
   parser.add_argument('--mode2', default='hopt',
@@ -399,7 +408,8 @@
   if args.mode1 == args.mode2:
     raise FatalError("Identical execution modes given")
   # Run the JavaFuzz tester.
-  with JavaFuzzTester(args.num_tests, args.mode1, args.mode2) as fuzzer:
+  with JavaFuzzTester(args.num_tests, args.device,
+                      args.mode1, args.mode2) as fuzzer:
     fuzzer.Run()
 
 if __name__ == "__main__":
commit	51a0c4fe2dabeb21525d377047b68031b6fe0186	[log] [tgz]
author	Treehugger Robot <treehugger-gerrit@google.com>	Wed Aug 24 00:50:38 2016 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Wed Aug 24 00:50:39 2016 +0000
tree	83a02e40cd106e6decbf100870b2e931232820db
parent	e99331f7a430b0b72142dd0b8b0689421489dcf6 [diff]
parent	3aaa37bba53d6df0265793de48b4b0b57327e57a [diff]