diff --git a/src/asm_support.h b/src/asm_support.h
index 6eda4bf..097ab7a 100644
--- a/src/asm_support.h
+++ b/src/asm_support.h
@@ -3,9 +3,16 @@
 #ifndef ART_SRC_ASM_SUPPORT_H_
 #define ART_SRC_ASM_SUPPORT_H_
 
+#if defined(__arm__)
+#define rSUSPEND r4
+#define rSELF r9
+#define rLR r14
+#define SUSPEND_CHECK_INTERVAL (1000)
+#endif
+
 #if defined(__i386__)
 // Offset of field Thread::self_ verified in InitCpu
-#define THREAD_SELF_OFFSET 0x161
+#define THREAD_SELF_OFFSET 0x165
 #endif
 
 #endif  // ART_SRC_ASM_SUPPORT_H_
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index b697292..0965c14 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -87,6 +87,7 @@
     kMIRInlined,                        // Invoke is inlined (ie dead)
     kMIRInlinedPred,                    // Invoke is inlined via prediction
     kMIRCallee,                         // Instruction is inlined from callee
+    kMIRIgnoreSuspendCheck,
 } MIROptimizationFlagPositons;
 
 #define MIR_IGNORE_NULL_CHECK           (1 << kMIRIgnoreNullCheck)
@@ -96,6 +97,7 @@
 #define MIR_INLINED                     (1 << kMIRInlined)
 #define MIR_INLINED_PRED                (1 << kMIRInlinedPred)
 #define MIR_CALLEE                      (1 << kMIRCallee)
+#define MIR_IGNORE_SUSPEND_CHECK        (1 << kMIRIgnoreSuspendCheck)
 
 typedef struct CallsiteInfo {
     const char* classDescriptor;
@@ -239,6 +241,7 @@
     GrowableList dfsOrder;
     GrowableList domPostOrderTraversal;
     GrowableList throwLaunchpads;
+    GrowableList suspendLaunchpads;
     ArenaBitVector* tryBlockAddr;
     ArenaBitVector** defBlockMatrix;    // numDalvikRegister x numBlocks
     ArenaBitVector* tempBlockV;
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index fdcce9c..6a01e36 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -725,6 +725,9 @@
     /* Intialize the throwLaunchpads list */
     oatInitGrowableList(&cUnit.throwLaunchpads, 4);
 
+    /* Intialize the suspendLaunchpads list */
+    oatInitGrowableList(&cUnit.suspendLaunchpads, 4);
+
     /* Allocate the bit-vector to track the beginning of basic blocks */
     ArenaBitVector *tryBlockAddr = oatAllocBitVector(cUnit.insnsSize,
                                                      true /* expandable */);
diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc
index 1d6bb41..45e1b19 100644
--- a/src/compiler/codegen/arm/ArchUtility.cc
+++ b/src/compiler/codegen/arm/ArchUtility.cc
@@ -354,6 +354,9 @@
         case kArmPseudoThrowTarget:
             LOG(INFO) << "LT" << (intptr_t)lir << ":";
             break;
+        case kArmPseudoSuspendTarget:
+            LOG(INFO) << "LS" << (intptr_t)lir << ":";
+            break;
         case kArmPseudoCaseLabel:
             LOG(INFO) << "LC" << (intptr_t)lir << ": Case target 0x" <<
                 std::hex << lir->operands[0] << "|" << std::dec <<
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 07e2e97..e436eea 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -28,7 +28,7 @@
  *        pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
  *        registers.
  * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
- * r4   : Callee save (promotion target)
+ * r4   : (rSUSPEND) is reserved (suspend check assist)
  * r5   : Callee save (promotion target)
  * r6   : Callee save (promotion target)
  * r7   : Callee save (promotion target)
@@ -243,7 +243,7 @@
 
 /*
  * Annotate special-purpose core registers:
- *   - VM: r4PC, r5FP, and r6SELF
+ *   - VM: r6SELF
  *   - ARM architecture: r13sp, r14lr, and r15pc
  *
  * rPC, rFP, and rSELF are for architecture-independent code to use.
@@ -253,7 +253,7 @@
     r1     = 1,
     r2     = 2,
     r3     = 3,
-    r4     = 4,
+    rSUSPEND = 4,
     r5     = 5,
     r6     = 6,
     r7     = 7,
@@ -366,6 +366,7 @@
  * Assemble.c.
  */
 typedef enum ArmOpcode {
+    kArmPseudoSuspendTarget = -15,
     kArmPseudoThrowTarget = -14,
     kArmPseudoCaseLabel = -13,
     kArmPseudoMethodEntry = -12,
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index ce65803..41053a2 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -1113,10 +1113,12 @@
 
         case OP_RETURN:
         case OP_RETURN_OBJECT:
+            genSuspendPoll(cUnit, mir);
             storeValue(cUnit, retLoc, rlSrc[0]);
             break;
 
         case OP_RETURN_WIDE:
+            genSuspendPoll(cUnit, mir);
             rlDest = retLocWide;
             rlDest.fp = rlSrc[0].fp;
             storeValueWide(cUnit, rlDest, rlSrc[0]);
@@ -1277,11 +1279,8 @@
         case OP_GOTO:
         case OP_GOTO_16:
         case OP_GOTO_32:
-            // TUNING: add MIR flag to disable when unnecessary
-            bool backwardBranch;
-            backwardBranch = (bb->taken->startOffset <= mir->offset);
-            if (backwardBranch) {
-                genSuspendPoll(cUnit, mir);
+            if (bb->taken->startOffset <= mir->offset) {
+                genSuspendTest(cUnit, mir);
             }
             genUnconditionalBranch(cUnit, &labelList[bb->taken->id]);
             break;
@@ -1315,7 +1314,7 @@
             ArmConditionCode cond;
             backwardBranch = (bb->taken->startOffset <= mir->offset);
             if (backwardBranch) {
-                genSuspendPoll(cUnit, mir);
+                genSuspendTest(cUnit, mir);
             }
             rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
             rlSrc[1] = loadValue(cUnit, rlSrc[1], kCoreReg);
@@ -1358,7 +1357,7 @@
             ArmConditionCode cond;
             backwardBranch = (bb->taken->startOffset <= mir->offset);
             if (backwardBranch) {
-                genSuspendPoll(cUnit, mir);
+                genSuspendTest(cUnit, mir);
             }
             rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
             opRegImm(cUnit, kOpCmp, rlSrc[0].lowReg, 0);
@@ -1999,6 +1998,27 @@
     }
 }
 
+static void handleSuspendLaunchpads(CompilationUnit *cUnit)
+{
+    ArmLIR** suspendLabel =
+        (ArmLIR **) cUnit->suspendLaunchpads.elemList;
+    int numElems = cUnit->suspendLaunchpads.numUsed;
+
+    for (int i = 0; i < numElems; i++) {
+        /* TUNING: move suspend count load into helper */
+        ArmLIR* lab = suspendLabel[i];
+        ArmLIR* resumeLab = (ArmLIR*)lab->operands[0];
+        cUnit->currentDalvikOffset = lab->operands[1];
+        oatAppendLIR(cUnit, (LIR *)lab);
+        loadWordDisp(cUnit, rSELF,
+                     OFFSETOF_MEMBER(Thread, pTestSuspendFromCode), rLR);
+        loadWordDisp(cUnit, rSELF,
+            art::Thread::SuspendCountOffset().Int32Value(), rSUSPEND);
+        opReg(cUnit, kOpBlx, rLR);
+        genUnconditionalBranch(cUnit, resumeLab);
+    }
+}
+
 static void handleThrowLaunchpads(CompilationUnit *cUnit)
 {
     ArmLIR** throwLabel =
@@ -2084,9 +2104,11 @@
 
     oatDataFlowAnalysisDispatcher(cUnit, methodBlockCodeGen,
                                   kPreOrderDFSTraversal, false /* Iterative */);
-    removeRedundantBranches(cUnit);
+    handleSuspendLaunchpads(cUnit);
 
     handleThrowLaunchpads(cUnit);
+
+    removeRedundantBranches(cUnit);
 }
 
 /* Common initialization routine for an architecture family */
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index 254802d..9321753 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -22,9 +22,9 @@
  *
  */
 
-static int coreRegs[] = {r0, r1, r2, r3, r4, r5, r6, r7, rSELF, r8, r10, r11,
-                         r12, rSP, rLR, rPC};
-static int reservedRegs[] = {rSELF, rSP, rLR, rPC};
+static int coreRegs[] = {r0, r1, r2, r3, rSUSPEND, r5, r6, r7, rSELF, r8, r10,
+                         r11, r12, rSP, rLR, rPC};
+static int reservedRegs[] = {rSUSPEND, rSELF, rSP, rLR, rPC};
 static int fpRegs[] = {fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
                        fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15,
                        fr16, fr17, fr18, fr19, fr20, fr21, fr22, fr23,
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 2404ca7..76d8b45 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -1683,9 +1683,31 @@
     return false;
 }
 
+/* Check if we need to check for pending suspend request */
+static void genSuspendTest(CompilationUnit* cUnit, MIR* mir)
+{
+    if (mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
+        return;
+    }
+    newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
+    ArmLIR* branch = opCondBranch(cUnit, kArmCondEq);
+    ArmLIR* retLab = newLIR0(cUnit, kArmPseudoTargetLabel);
+    retLab->defMask = ENCODE_ALL;
+    ArmLIR* target = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
+    target->generic.dalvikOffset = cUnit->currentDalvikOffset;
+    target->opcode = kArmPseudoSuspendTarget;
+    target->operands[0] = (intptr_t)retLab;
+    target->operands[1] = mir->offset;
+    branch->generic.target = (LIR*)target;
+    oatInsertGrowableList(&cUnit->suspendLaunchpads, (intptr_t)target);
+}
+
 /* Check for pending suspend request.  */
 static void genSuspendPoll(CompilationUnit* cUnit, MIR* mir)
 {
+    if (mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
+        return;
+    }
     oatLockCallTemps(cUnit);   // Explicit register usage
     int rSuspendCount = r1;
     ArmLIR* ld;
diff --git a/src/jni_internal_arm.cc b/src/jni_internal_arm.cc
index 65dc380..8ba5717 100644
--- a/src/jni_internal_arm.cc
+++ b/src/jni_internal_arm.cc
@@ -31,21 +31,21 @@
   UniquePtr<ArmAssembler> assembler(
       down_cast<ArmAssembler*>(Assembler::Create(kArm)));
 #define __ assembler->
-  // Size of frame - spill of R9/LR + Method* + possible receiver + arg array
-  size_t unpadded_frame_size = (3 * kPointerSize) +
+  // Size of frame - spill of R4,R9/LR + Method* + possible receiver + arg array
+  size_t unpadded_frame_size = (4 * kPointerSize) +
                                (method->IsStatic() ? 0 : kPointerSize) +
                                method->NumArgArrayBytes();
   size_t frame_size = RoundUp(unpadded_frame_size, kStackAlignment);
 
-  // Spill R9 and LR
-  RegList save = (1 << R9);
+  // Spill R4,R9 and LR
+  RegList save = (1 << R9) | (1 << R4);
   __ PushList(save | (1 << LR));
 
   // Move the managed thread pointer into R9.
   __ mov(R9, ShifterOperand(R2));
 
-  // Move frame down for arguments less 2 pushed values above
-  __ AddConstant(SP, -frame_size + (2 * kPointerSize));
+  // Move frame down for arguments less 3 pushed values above
+  __ AddConstant(SP, -frame_size + (3 * kPointerSize));
 
   // Can either get 3 or 2 arguments into registers
   size_t reg_bytes = (method->IsStatic() ? 3 : 2) * kPointerSize;
@@ -112,10 +112,10 @@
     }
   }
 
-  // Remove the frame less the spilled R9 and LR
-  __ AddConstant(SP, frame_size - (2 * kPointerSize));
+  // Remove the frame less the spilled R4, R9 and LR
+  __ AddConstant(SP, frame_size - (3 * kPointerSize));
 
-  // Pop R9 and the LR into PC
+  // Pop R4, R9 and the LR into PC
   __ PopList(save | (1 << PC));
   // TODO: store native_entry in the stub table
   ByteArray* code = ByteArray::Alloc(assembler->CodeSize());
diff --git a/src/runtime_support.S b/src/runtime_support.S
index 24883fc..6522243 100644
--- a/src/runtime_support.S
+++ b/src/runtime_support.S
@@ -159,6 +159,23 @@
     mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
     bx      lr
 
+    .balign 4
+    .global art_test_suspend
+    .extern artCheckSuspendFromCode
+art_test_suspend:
+    /*
+     * Check to see if there's a pending suspend request on our thread.
+     * reset rSUSPEND to SUSPEND_CHECK_INTERVAL.
+     * On entry, rSUSPEND holds the suspend request value
+     * [TUNING: move load of suspend check value into this stub.
+     */
+    cmp    rSUSPEND, #0
+    mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL
+    bxeq   rLR
+    mov    r0, rSELF
+    b      artCheckSuspendFromCode
+
+
 #endif
 
 #if defined(__i386__)
diff --git a/src/runtime_support.h b/src/runtime_support.h
index d421fcc..ed047c9 100644
--- a/src/runtime_support.h
+++ b/src/runtime_support.h
@@ -15,6 +15,7 @@
   extern "C" void art_throw_div_zero_from_code();
   extern "C" void art_throw_array_bounds_from_code(int32_t index, int32_t limit);
   extern "C" void art_invoke_interface_trampoline(void*, void*, void*, void*);
+  extern "C" void art_test_suspend();
 
   /* Conversions */
   extern "C" float __aeabi_i2f(int op1);             // OP_INT_TO_FLOAT
diff --git a/src/thread.cc b/src/thread.cc
index d86a0c5..8ab10af 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -178,7 +178,7 @@
   // TODO: throw and unwind on failure.
 }
 
-void CheckSuspendFromCode(Thread* thread) {
+extern "C" void artCheckSuspendFromCode(Thread* thread) {
   Runtime::Current()->GetThreadList()->FullSuspendCheck(thread);
 }
 
@@ -367,6 +367,7 @@
   pThrowArrayBoundsFromCode = art_throw_array_bounds_from_code;
   pThrowDivZeroFromCode = art_throw_div_zero_from_code;
   pInvokeInterfaceTrampoline = art_invoke_interface_trampoline;
+  pTestSuspendFromCode = art_test_suspend;
 #endif
   pDeliverException = art_deliver_exception;
   pF2l = F2L;
@@ -391,7 +392,7 @@
   pLockObjectFromCode = LockObjectFromCode;
   pUnlockObjectFromCode = UnlockObjectFromCode;
   pFindInstanceFieldFromCode = Field::FindInstanceFieldFromCode;
-  pCheckSuspendFromCode = CheckSuspendFromCode;
+  pCheckSuspendFromCode = artCheckSuspendFromCode;
   pStackOverflowFromCode = StackOverflowFromCode;
   pThrowVerificationErrorFromCode = ThrowVerificationErrorFromCode;
   pThrowNegArraySizeFromCode = ThrowNegArraySizeFromCode;
diff --git a/src/thread.h b/src/thread.h
index 03f3ef6..212abdc 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -224,6 +224,7 @@
   StaticStorageBase* (*pInitializeStaticStorage)(uint32_t, const Method*);
   Field* (*pFindInstanceFieldFromCode)(uint32_t, const Method*);
   void (*pCheckSuspendFromCode)(Thread*);
+  void (*pTestSuspendFromCode)();
   void (*pStackOverflowFromCode)(Method*);
   void (*pThrowNullPointerFromCode)();
   void (*pThrowArrayBoundsFromCode)(int32_t, int32_t);
