Move thread flags and state into 32bits.

We need to ensure that transitions to Runnable are atomic wrt to a
thread modifying the suspend count. Currently this is achieved by
holding the thread_suspend_count_lock_. This change creates a set of bit
flags that summarize that the suspend_count_ is raised and also others
flags that signify the managed code should go into a slow path.

The effect of this change are two-fold:
1) transitions from suspended to runnable can CAS the thread state
rather than holding the suspend_count_lock_. This will make JNI
transitions cheaper.
2) the exception/suspend/interpreter poll needed for shadow frames can
be rolled into a single compare of the bit fields against 0.

Change-Id: I589f84e3dca396c3db448bf32d814565acf3d11f
diff --git a/src/compiler/codegen/MethodBitcode.cc b/src/compiler/codegen/MethodBitcode.cc
index c50d74d..682de7a 100644
--- a/src/compiler/codegen/MethodBitcode.cc
+++ b/src/compiler/codegen/MethodBitcode.cc
@@ -2663,21 +2663,8 @@
 
 void cvtMoveException(CompilationUnit* cUnit, llvm::CallInst* callInst)
 {
-  DCHECK_EQ(callInst->getNumArgOperands(), 0U);
-  int exOffset = Thread::ExceptionOffset().Int32Value();
   RegLocation rlDest = getLoc(cUnit, callInst);
-  RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-#if defined(TARGET_X86)
-  newLIR2(cUnit, kX86Mov32RT, rlResult.lowReg, exOffset);
-  newLIR2(cUnit, kX86Mov32TI, exOffset, 0);
-#else
-  int resetReg = oatAllocTemp(cUnit);
-  loadWordDisp(cUnit, rSELF, exOffset, rlResult.lowReg);
-  loadConstant(cUnit, resetReg, 0);
-  storeWordDisp(cUnit, rSELF, exOffset, resetReg);
-  oatFreeTemp(cUnit, resetReg);
-#endif
-  storeValue(cUnit, rlDest, rlResult);
+  genMoveException(cUnit, rlDest);
 }
 
 void cvtSget(CompilationUnit* cUnit, llvm::CallInst* callInst, bool isWide,