ART: Single-frame deopt

Add deoptimization of a single frame. Works by removing the managed
code frame and jumping into the quick-to-interpreter bridge, and
the bridge understanding a stored ShadowFrame.

We need a separate fixup pass. For x86, we leave the return address
on the stack so we don't need to push it there.

Bug: 21611912
Change-Id: I06625685ced8b054244f8685ab50b238a705b9d2
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 403d348..8f6b1ff 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -30,9 +30,11 @@
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
   gprs_[PC] = &pc_;
+  gprs_[R0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = ArmContext::kBadGprBase + SP;
   pc_ = ArmContext::kBadGprBase + PC;
+  arg0_ = 0;
 }
 
 void ArmContext::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 77bb5c8..ea31055 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -45,6 +45,10 @@
     SetGPR(PC, new_pc);
   }
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(R0, new_arg0_value);
+  }
+
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg] != nullptr;
@@ -84,7 +88,7 @@
   uintptr_t* gprs_[kNumberOfCoreRegisters];
   uint32_t* fprs_[kNumberOfSRegisters];
   // Hold values for sp and pc if they are not located within a stack frame.
-  uintptr_t sp_, pc_;
+  uintptr_t sp_, pc_, arg0_;
 };
 
 }  // namespace arm
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index e45d828..dc1cf8a 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -437,8 +437,8 @@
     ldr  r14, [r0, #56]   @ (LR from gprs_ 56=4*14)
     add  r0, r0, #12      @ increment r0 to skip gprs_[0..2] 12=4*3
     ldm  r0, {r3-r13}     @ load remaining gprs from argument gprs_
-    mov  r0, #0           @ clear result registers r0 and r1
-    mov  r1, #0
+    ldr  r0, [r0, #-12]   @ load r0 value
+    mov  r1, #0           @ clear result register r1
     bx   r2               @ do long jump
 END art_quick_do_long_jump
 
@@ -1142,7 +1142,7 @@
 
     /*
      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
-     * will long jump to the upcall with a special exception of -1.
+     * will long jump to the interpreter bridge.
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 60becc6..4477631 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -31,10 +31,12 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
-  gprs_[LR] = &pc_;
+  gprs_[kPC] = &pc_;
+  gprs_[X0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = Arm64Context::kBadGprBase + SP;
-  pc_ = Arm64Context::kBadGprBase + LR;
+  pc_ = Arm64Context::kBadGprBase + kPC;
+  arg0_ = 0;
 }
 
 void Arm64Context::FillCalleeSaves(const StackVisitor& fr) {
@@ -58,8 +60,8 @@
 }
 
 void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
-  DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
-  DCHECK_NE(reg, static_cast<uint32_t>(XZR));
+  DCHECK_LT(reg, arraysize(gprs_));
+  // Note: we use kPC == XZR, so do not ensure that reg != XZR.
   DCHECK(IsAccessibleGPR(reg));
   DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
   *gprs_[reg] = value;
@@ -124,13 +126,13 @@
 extern "C" NO_RETURN void art_quick_do_long_jump(uint64_t*, uint64_t*);
 
 void Arm64Context::DoLongJump() {
-  uint64_t gprs[kNumberOfXRegisters];
+  uint64_t gprs[arraysize(gprs_)];
   uint64_t fprs[kNumberOfDRegisters];
 
   // The long jump routine called below expects to find the value for SP at index 31.
   DCHECK_EQ(SP, 31);
 
-  for (size_t i = 0; i < kNumberOfXRegisters; ++i) {
+  for (size_t i = 0; i < arraysize(gprs_); ++i) {
     gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index 1c99f3c..11314e0 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -42,20 +42,25 @@
   }
 
   void SetPC(uintptr_t new_lr) OVERRIDE {
-    SetGPR(LR, new_lr);
+    SetGPR(kPC, new_lr);
+  }
+
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(X0, new_arg0_value);
   }
 
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
-    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
+    DCHECK_LT(reg, arraysize(gprs_));
     return gprs_[reg] != nullptr;
   }
 
   uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
-    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
+    DCHECK_LT(reg, arraysize(gprs_));
     return gprs_[reg];
   }
 
   uintptr_t GetGPR(uint32_t reg) OVERRIDE {
+    // Note: PC isn't an available GPR (outside of internals), so don't allow retrieving the value.
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
     DCHECK(IsAccessibleGPR(reg));
     return *gprs_[reg];
@@ -79,12 +84,15 @@
   void SmashCallerSaves() OVERRIDE;
   NO_RETURN void DoLongJump() OVERRIDE;
 
+  static constexpr size_t kPC = kNumberOfXRegisters;
+
  private:
-  // Pointers to register locations, initialized to null or the specific registers below.
-  uintptr_t* gprs_[kNumberOfXRegisters];
+  // Pointers to register locations, initialized to null or the specific registers below. We need
+  // an additional one for the PC.
+  uintptr_t* gprs_[kNumberOfXRegisters + 1];
   uint64_t * fprs_[kNumberOfDRegisters];
-  // Hold values for sp and pc if they are not located within a stack frame.
-  uintptr_t sp_, pc_;
+  // Hold values for sp, pc and arg0 if they are not located within a stack frame.
+  uintptr_t sp_, pc_, arg0_;
 };
 
 }  // namespace arm64
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 169bc38..6812178 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -941,7 +941,7 @@
     // Load GPRs
     // TODO: lots of those are smashed, could optimize.
     add x0, x0, #30*8
-    ldp x30, x1, [x0], #-16
+    ldp x30, x1, [x0], #-16          // LR & SP
     ldp x28, x29, [x0], #-16
     ldp x26, x27, [x0], #-16
     ldp x24, x25, [x0], #-16
@@ -958,10 +958,12 @@
     ldp x2, x3, [x0], #-16
     mov sp, x1
 
-    // TODO: Is it really OK to use LR for the target PC?
-    mov x0, #0
-    mov x1, #0
-    br  xLR
+    // Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
+    ldr x1, [x0, #33*8]
+    // And the value of x0.
+    ldr x0, [x0]
+
+    br  x1
 END art_quick_do_long_jump
 
     /*
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index 9ef761e..9af7c04 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -50,6 +50,9 @@
   // Sets the program counter value.
   virtual void SetPC(uintptr_t new_pc) = 0;
 
+  // Sets the first argument register.
+  virtual void SetArg0(uintptr_t new_arg0_value) = 0;
+
   // Returns whether the given GPR is accessible (read or write).
   virtual bool IsAccessibleGPR(uint32_t reg) = 0;
 
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index bc2bf68..08ab356 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -30,9 +30,11 @@
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
   gprs_[RA] = &ra_;
+  gprs_[A0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = MipsContext::kBadGprBase + SP;
   ra_ = MipsContext::kBadGprBase + RA;
+  arg0_ = 0;
 }
 
 void MipsContext::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index 38cf29a..0affe53 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -78,12 +78,17 @@
   void SmashCallerSaves() OVERRIDE;
   NO_RETURN void DoLongJump() OVERRIDE;
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(A0, new_arg0_value);
+  }
+
  private:
   // Pointers to registers in the stack, initialized to null except for the special cases below.
   uintptr_t* gprs_[kNumberOfCoreRegisters];
   uint32_t* fprs_[kNumberOfFRegisters];
-  // Hold values for sp and ra (return address) if they are not located within a stack frame.
-  uintptr_t sp_, ra_;
+  // Hold values for sp and ra (return address) if they are not located within a stack frame, as
+  // well as the first argument.
+  uintptr_t sp_, ra_, arg0_;
 };
 }  // namespace mips
 }  // namespace art
diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc
index cc6dc7e..2c17f1c 100644
--- a/runtime/arch/mips64/context_mips64.cc
+++ b/runtime/arch/mips64/context_mips64.cc
@@ -30,9 +30,11 @@
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
   gprs_[T9] = &t9_;
+  gprs_[A0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = Mips64Context::kBadGprBase + SP;
   t9_ = Mips64Context::kBadGprBase + T9;
+  arg0_ = 0;
 }
 
 void Mips64Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h
index 26fbcfe..84b1c9b 100644
--- a/runtime/arch/mips64/context_mips64.h
+++ b/runtime/arch/mips64/context_mips64.h
@@ -78,14 +78,20 @@
   void SmashCallerSaves() OVERRIDE;
   NO_RETURN void DoLongJump() OVERRIDE;
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(A0, new_arg0_value);
+  }
+
  private:
   // Pointers to registers in the stack, initialized to null except for the special cases below.
   uintptr_t* gprs_[kNumberOfGpuRegisters];
   uint64_t* fprs_[kNumberOfFpuRegisters];
   // Hold values for sp and t9 if they are not located within a stack frame. We use t9 for the
-  // PC (as ra is required to be valid for single-frame deopt and must not be clobbered).
-  uintptr_t sp_, t9_;
+  // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). We
+  // also need the first argument for single-frame deopt.
+  uintptr_t sp_, t9_, arg0_;
 };
+
 }  // namespace mips64
 }  // namespace art
 
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 7096c82..987ad60 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -29,9 +29,11 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[ESP] = &esp_;
+  gprs_[EAX] = &arg0_;
   // Initialize registers with easy to spot debug values.
   esp_ = X86Context::kBadGprBase + ESP;
   eip_ = X86Context::kBadGprBase + kNumberOfCpuRegisters;
+  arg0_ = 0;
 }
 
 void X86Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index c4a11d8..59beb12 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -44,6 +44,10 @@
     eip_ = new_pc;
   }
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(EAX, new_arg0_value);
+  }
+
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg] != nullptr;
@@ -95,10 +99,10 @@
   // Pointers to register locations. Values are initialized to null or the special registers below.
   uintptr_t* gprs_[kNumberOfCpuRegisters];
   uint32_t* fprs_[kNumberOfFloatRegisters];
-  // Hold values for esp and eip if they are not located within a stack frame. EIP is somewhat
+  // Hold values for esp, eip and arg0 if they are not located within a stack frame. EIP is somewhat
   // special in that it cannot be encoded normally as a register operand to an instruction (except
   // in 64bit addressing modes).
-  uintptr_t esp_, eip_;
+  uintptr_t esp_, eip_, arg0_;
 };
 }  // namespace x86
 }  // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 029a296..f3b15c9 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1695,7 +1695,7 @@
 
     /*
      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
-     * will long jump to the upcall with a special exception of -1.
+     * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 1fe2ef8..3dc7d71 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -29,9 +29,11 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[RSP] = &rsp_;
+  gprs_[RDI] = &arg0_;
   // Initialize registers with easy to spot debug values.
   rsp_ = X86_64Context::kBadGprBase + RSP;
   rip_ = X86_64Context::kBadGprBase + kNumberOfCpuRegisters;
+  arg0_ = 0;
 }
 
 void X86_64Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
index 30bb9ec..f05b7f0 100644
--- a/runtime/arch/x86_64/context_x86_64.h
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -44,6 +44,10 @@
     rip_ = new_pc;
   }
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(RDI, new_arg0_value);
+  }
+
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg] != nullptr;
@@ -82,10 +86,10 @@
   // Pointers to register locations. Values are initialized to null or the special registers below.
   uintptr_t* gprs_[kNumberOfCpuRegisters];
   uint64_t* fprs_[kNumberOfFloatRegisters];
-  // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat
+  // Hold values for rsp, rip and arg0 if they are not located within a stack frame. RIP is somewhat
   // special in that it cannot be encoded normally as a register operand to an instruction (except
   // in 64bit addressing modes).
-  uintptr_t rsp_, rip_;
+  uintptr_t rsp_, rip_, arg0_;
 };
 }  // namespace x86_64
 }  // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 861f802..2f438a3 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1724,18 +1724,18 @@
      * will long jump to the upcall with a special exception of -1.
      */
 DEFINE_FUNCTION art_quick_deoptimize
-    pushq %rsi                     // Entry point for a jump. Fake that we were called.
-                                   // Use hidden arg.
+    pushq %rsi                         // Entry point for a jump. Fake that we were called.
+                                       // Use hidden arg.
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-                                   // Stack should be aligned now.
-    movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
-    call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
+                                       // Stack should be aligned now.
+    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
+    call SYMBOL(artDeoptimize)         // artDeoptimize(Thread*)
     UNREACHABLE
 END_FUNCTION art_quick_deoptimize
 
     /*
      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
-     * will long jump to the upcall with a special exception of -1.
+     * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME