Merge "Add an optimization for removing redundant suspend tests in ART"
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 3ef1dbf..a2676c8 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -111,7 +111,8 @@
       merged_df_flags_(0u),
       ifield_lowering_infos_(arena, 0u),
       sfield_lowering_infos_(arena, 0u),
-      method_lowering_infos_(arena, 0u) {
+      method_lowering_infos_(arena, 0u),
+      gen_suspend_test_list_(arena, 0u) {
   try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
   max_available_special_compiler_temps_ = std::abs(static_cast<int>(kVRegNonSpecialTempBaseReg))
       - std::abs(static_cast<int>(kVRegTempBaseReg));
@@ -1533,6 +1534,24 @@
   return false;
 }
 
+bool MIRGraph::HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id) {
+  BasicBlock* target = GetBasicBlock(target_id);
+
+  if (source == nullptr || target == nullptr)
+    return false;
+
+  int idx;
+  for (idx = gen_suspend_test_list_.Size() - 1; idx >= 0; idx--) {
+    BasicBlock* bb = gen_suspend_test_list_.Get(idx);
+    if (bb == source)
+      return true;  // The block has been inserted by a suspend check before.
+    if (source->dominators->IsBitSet(bb->id) && bb->dominators->IsBitSet(target_id))
+      return true;
+  }
+
+  return false;
+}
+
 ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph)
     : basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false),
       visited_taken_(false), have_successors_(false) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 38cd5ee..b6cec66 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -192,6 +192,7 @@
 
 typedef uint16_t BasicBlockId;
 static const BasicBlockId NullBasicBlockId = 0;
+static constexpr bool kLeafOptimization = false;
 
 /*
  * In general, vreg/sreg describe Dalvik registers that originated with dx.  However,
@@ -1055,6 +1056,20 @@
   void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
   bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
 
+  // Used for removing redudant suspend tests
+  void AppendGenSuspendTestList(BasicBlock* bb) {
+    if (gen_suspend_test_list_.Size() == 0 ||
+        gen_suspend_test_list_.Get(gen_suspend_test_list_.Size() - 1) != bb) {
+      gen_suspend_test_list_.Insert(bb);
+    }
+  }
+
+  /* This is used to check if there is already a method call dominating the
+   * source basic block of a backedge and being dominated by the target basic
+   * block of the backedge.
+   */
+  bool HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id);
+
  protected:
   int FindCommonParent(int block1, int block2);
   void ComputeSuccLineIn(ArenaBitVector* dest, const ArenaBitVector* src1,
@@ -1162,6 +1177,7 @@
   GrowableArray<MirSFieldLoweringInfo> sfield_lowering_infos_;
   GrowableArray<MirMethodLoweringInfo> method_lowering_infos_;
   static const uint64_t oat_data_flow_attributes_[kMirOpLast];
+  GrowableArray<BasicBlock*> gen_suspend_test_list_;  // List of blocks containing suspend tests
 
   friend class ClassInitCheckEliminationTest;
   friend class LocalValueNumberingTest;
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index e384f6b..e32e7cb 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -29,7 +29,8 @@
  *        pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
  *        registers.
  * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
- * r4   : (rARM_SUSPEND) is reserved (suspend check/debugger assist)
+ * r4   : If ARM_R4_SUSPEND_FLAG is set then reserved as a suspend check/debugger
+ *        assist flag, otherwise a callee save promotion target.
  * r5   : Callee save (promotion target)
  * r6   : Callee save (promotion target)
  * r7   : Callee save (promotion target)
@@ -95,6 +96,8 @@
 
 // First FP callee save.
 #define ARM_FP_CALLEE_SAVE_BASE 16
+// Flag for using R4 to do suspend check
+#define ARM_R4_SUSPEND_FLAG
 
 enum ArmResourceEncodingPos {
   kArmGPReg0   = 0,
@@ -117,7 +120,11 @@
   r1           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  1,
   r2           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  2,
   r3           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  3,
+#ifdef ARM_R4_SUSPEND_FLAG
   rARM_SUSPEND = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  4,
+#else
+  r4           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  4,
+#endif
   r5           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  5,
   r6           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  6,
   r7           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  7,
@@ -207,7 +214,11 @@
 constexpr RegStorage rs_r1(RegStorage::kValid | r1);
 constexpr RegStorage rs_r2(RegStorage::kValid | r2);
 constexpr RegStorage rs_r3(RegStorage::kValid | r3);
+#ifdef ARM_R4_SUSPEND_FLAG
 constexpr RegStorage rs_rARM_SUSPEND(RegStorage::kValid | rARM_SUSPEND);
+#else
+constexpr RegStorage rs_r4(RegStorage::kValid | r4);
+#endif
 constexpr RegStorage rs_r5(RegStorage::kValid | r5);
 constexpr RegStorage rs_r6(RegStorage::kValid | r6);
 constexpr RegStorage rs_r7(RegStorage::kValid | r7);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 769122d..4732e52 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -953,8 +953,18 @@
 
 // Test suspend flag, return target of taken suspend branch
 LIR* ArmMir2Lir::OpTestSuspend(LIR* target) {
+#ifdef ARM_R4_SUSPEND_FLAG
   NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1);
   return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
+#else
+  RegStorage t_reg = AllocTemp();
+  LoadBaseDisp(rs_rARM_SELF, Thread::ThreadFlagsOffset<4>().Int32Value(),
+    t_reg, kUnsignedHalf);
+  LIR* cmp_branch = OpCmpImmBranch((target == NULL) ? kCondNe : kCondEq, t_reg,
+    0, target);
+  FreeTemp(t_reg);
+  return cmp_branch;
+#endif
 }
 
 // Decrement register and branch on condition
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 5340d83..bd9c8b4 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -25,9 +25,15 @@
 
 namespace art {
 
+#ifdef ARM_R4_SUSPEND_FLAG
 static constexpr RegStorage core_regs_arr[] =
     {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
      rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+#else
+static constexpr RegStorage core_regs_arr[] =
+    {rs_r0, rs_r1, rs_r2, rs_r3, rs_r4, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
+     rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+#endif
 static constexpr RegStorage sp_regs_arr[] =
     {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
      rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20,
@@ -36,9 +42,15 @@
 static constexpr RegStorage dp_regs_arr[] =
     {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10,
      rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15};
+#ifdef ARM_R4_SUSPEND_FLAG
 static constexpr RegStorage reserved_regs_arr[] =
     {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
 static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12};
+#else
+static constexpr RegStorage reserved_regs_arr[] =
+    {rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r4, rs_r12};
+#endif
 static constexpr RegStorage sp_temps_arr[] =
     {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
      rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15};
@@ -79,7 +91,11 @@
   RegStorage res_reg = RegStorage::InvalidReg();
   switch (reg) {
     case kSelf: res_reg = rs_rARM_SELF; break;
+#ifdef ARM_R4_SUSPEND_FLAG
     case kSuspend: res_reg =  rs_rARM_SUSPEND; break;
+#else
+    case kSuspend: res_reg = RegStorage::InvalidReg(); break;
+#endif
     case kLr: res_reg =  rs_rARM_LR; break;
     case kPc: res_reg =  rs_rARM_PC; break;
     case kSp: res_reg =  rs_rARM_SP; break;
@@ -578,11 +594,13 @@
     }
   }
 
+#ifdef ARM_R4_SUSPEND_FLAG
   // TODO: re-enable this when we can safely save r4 over the suspension code path.
   bool no_suspend = NO_SUSPEND;  // || !Runtime::Current()->ExplicitSuspendChecks();
   if (no_suspend) {
     GetRegInfo(rs_rARM_SUSPEND)->MarkFree();
   }
+#endif
 
   // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
   // TODO: adjust when we roll to hard float calling convention.
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 9621995..1f12b6f 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -399,7 +399,7 @@
                                                           cu_->class_def_idx)) {
         GenMemBarrier(kStoreStore);
       }
-      if (!mir_graph_->MethodIsLeaf()) {
+      if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
       break;
@@ -408,7 +408,7 @@
       DCHECK(rl_src[0].ref);
       // Intentional fallthrough.
     case Instruction::RETURN:
-      if (!mir_graph_->MethodIsLeaf()) {
+      if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
       DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
@@ -416,7 +416,7 @@
       break;
 
     case Instruction::RETURN_WIDE:
-      if (!mir_graph_->MethodIsLeaf()) {
+      if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
       DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
@@ -543,7 +543,8 @@
     case Instruction::GOTO:
     case Instruction::GOTO_16:
     case Instruction::GOTO_32:
-      if (mir_graph_->IsBackedge(bb, bb->taken)) {
+      if (mir_graph_->IsBackedge(bb, bb->taken) &&
+          (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken))) {
         GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken]);
       } else {
         OpUnconditionalBranch(&label_list[bb->taken]);
@@ -582,12 +583,15 @@
         bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg),
                                        mir_graph_->ConstantValue(rl_src[1].orig_sreg));
         BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through;
-        if (mir_graph_->IsBackedge(bb, target_id)) {
+        if (mir_graph_->IsBackedge(bb, target_id) &&
+            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, target_id))) {
           GenSuspendTest(opt_flags);
         }
         OpUnconditionalBranch(&label_list[target_id]);
       } else {
-        if (mir_graph_->IsBackwardsBranch(bb)) {
+        if (mir_graph_->IsBackwardsBranch(bb) &&
+            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
+             !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
           GenSuspendTest(opt_flags);
         }
         GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken, fall_through);
@@ -607,12 +611,15 @@
       if (rl_src[0].is_const) {
         bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), 0);
         BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through;
-        if (mir_graph_->IsBackedge(bb, target_id)) {
+        if (mir_graph_->IsBackedge(bb, target_id) &&
+            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, target_id))) {
           GenSuspendTest(opt_flags);
         }
         OpUnconditionalBranch(&label_list[target_id]);
       } else {
-        if (mir_graph_->IsBackwardsBranch(bb)) {
+        if (mir_graph_->IsBackwardsBranch(bb) &&
+            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
+             !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
           GenSuspendTest(opt_flags);
         }
         GenCompareZeroAndBranch(opcode, rl_src[0], taken, fall_through);
@@ -756,37 +763,69 @@
 
     case Instruction::INVOKE_STATIC_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, true));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        // If the invocation is not inlined, we can assume there is already a
+        // suspend check at the return site
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
     case Instruction::INVOKE_STATIC:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, false));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
 
     case Instruction::INVOKE_DIRECT:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, false));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
     case Instruction::INVOKE_DIRECT_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, true));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
 
     case Instruction::INVOKE_VIRTUAL:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, false));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
     case Instruction::INVOKE_VIRTUAL_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, true));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
 
     case Instruction::INVOKE_SUPER:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, false));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
     case Instruction::INVOKE_SUPER_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, true));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
 
     case Instruction::INVOKE_INTERFACE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, false));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
     case Instruction::INVOKE_INTERFACE_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, true));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
 
     case Instruction::NEG_INT:
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 9e1db96..ae30aee 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -34,5 +34,7 @@
 
 // Expected size of a heap reference
 #define HEAP_REFERENCE_SIZE 4
+// Flag for enabling R4 optimization in arm runtime
+#define ARM_R4_SUSPEND_FLAG
 
 #endif  // ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 5212576..83a683d 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -330,7 +330,9 @@
     mov    r11, sp                         @ save the stack pointer
     .cfi_def_cfa_register r11
     mov    r9, r3                          @ move managed thread pointer into r9
+#ifdef ARM_R4_SUSPEND_FLAG
     mov    r4, #SUSPEND_CHECK_INTERVAL     @ reset r4 to suspend check interval
+#endif
     add    r5, r2, #16                     @ create space for method pointer in frame
     and    r5, #0xFFFFFFF0                 @ align frame size to 16 bytes
     sub    sp, r5                          @ reserve stack space for argument array
@@ -932,11 +934,13 @@
      */
     .extern artTestSuspendFromCode
 ENTRY art_quick_test_suspend
+#ifdef ARM_R4_SUSPEND_FLAG
     ldrh    r0, [rSELF, #THREAD_FLAGS_OFFSET]
     mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL  @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     cbnz   r0, 1f                             @ check Thread::Current()->suspend_count_ == 0
     bx     lr                                 @ return if suspend_count_ == 0
 1:
+#endif
     mov    r0, rSELF
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME          @ save callee saves for stack crawl
     mov    r1, sp