ART: Single-frame deopt
Add deoptimization of a single frame. Works by removing the managed
code frame and jumping into the quick-to-interpreter bridge, and
the bridge understanding a stored ShadowFrame.
We need a separate fixup pass. For x86, we leave the return address
on the stack so we don't need to push it there.
Bug: 21611912
Change-Id: I06625685ced8b054244f8685ab50b238a705b9d2
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 403d348..8f6b1ff 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -30,9 +30,11 @@
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[SP] = &sp_;
gprs_[PC] = &pc_;
+ gprs_[R0] = &arg0_;
// Initialize registers with easy to spot debug values.
sp_ = ArmContext::kBadGprBase + SP;
pc_ = ArmContext::kBadGprBase + PC;
+ arg0_ = 0;
}
void ArmContext::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 77bb5c8..ea31055 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -45,6 +45,10 @@
SetGPR(PC, new_pc);
}
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(R0, new_arg0_value);
+ }
+
bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
return gprs_[reg] != nullptr;
@@ -84,7 +88,7 @@
uintptr_t* gprs_[kNumberOfCoreRegisters];
uint32_t* fprs_[kNumberOfSRegisters];
// Hold values for sp and pc if they are not located within a stack frame.
- uintptr_t sp_, pc_;
+ uintptr_t sp_, pc_, arg0_;
};
} // namespace arm
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index e45d828..dc1cf8a 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -437,8 +437,8 @@
ldr r14, [r0, #56] @ (LR from gprs_ 56=4*14)
add r0, r0, #12 @ increment r0 to skip gprs_[0..2] 12=4*3
ldm r0, {r3-r13} @ load remaining gprs from argument gprs_
- mov r0, #0 @ clear result registers r0 and r1
- mov r1, #0
+ ldr r0, [r0, #-12] @ load r0 value
+ mov r1, #0 @ clear result register r1
bx r2 @ do long jump
END art_quick_do_long_jump
@@ -1142,7 +1142,7 @@
/*
* Compiled code has requested that we deoptimize into the interpreter. The deoptimization
- * will long jump to the upcall with a special exception of -1.
+ * will long jump to the interpreter bridge.
*/
.extern artDeoptimizeFromCompiledCode
ENTRY art_quick_deoptimize_from_compiled_code
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 60becc6..4477631 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -31,10 +31,12 @@
std::fill_n(gprs_, arraysize(gprs_), nullptr);
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[SP] = &sp_;
- gprs_[LR] = &pc_;
+ gprs_[kPC] = &pc_;
+ gprs_[X0] = &arg0_;
// Initialize registers with easy to spot debug values.
sp_ = Arm64Context::kBadGprBase + SP;
- pc_ = Arm64Context::kBadGprBase + LR;
+ pc_ = Arm64Context::kBadGprBase + kPC;
+ arg0_ = 0;
}
void Arm64Context::FillCalleeSaves(const StackVisitor& fr) {
@@ -58,8 +60,8 @@
}
void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
- DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
- DCHECK_NE(reg, static_cast<uint32_t>(XZR));
+ DCHECK_LT(reg, arraysize(gprs_));
+ // Note: we use kPC == XZR, so do not ensure that reg != XZR.
DCHECK(IsAccessibleGPR(reg));
DCHECK_NE(gprs_[reg], &gZero); // Can't overwrite this static value since they are never reset.
*gprs_[reg] = value;
@@ -124,13 +126,13 @@
extern "C" NO_RETURN void art_quick_do_long_jump(uint64_t*, uint64_t*);
void Arm64Context::DoLongJump() {
- uint64_t gprs[kNumberOfXRegisters];
+ uint64_t gprs[arraysize(gprs_)];
uint64_t fprs[kNumberOfDRegisters];
// The long jump routine called below expects to find the value for SP at index 31.
DCHECK_EQ(SP, 31);
- for (size_t i = 0; i < kNumberOfXRegisters; ++i) {
+ for (size_t i = 0; i < arraysize(gprs_); ++i) {
gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i;
}
for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index 1c99f3c..11314e0 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -42,20 +42,25 @@
}
void SetPC(uintptr_t new_lr) OVERRIDE {
- SetGPR(LR, new_lr);
+ SetGPR(kPC, new_lr);
+ }
+
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(X0, new_arg0_value);
}
bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
- DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
+ DCHECK_LT(reg, arraysize(gprs_));
return gprs_[reg] != nullptr;
}
uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
- DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
+ DCHECK_LT(reg, arraysize(gprs_));
return gprs_[reg];
}
uintptr_t GetGPR(uint32_t reg) OVERRIDE {
+ // Note: PC isn't an available GPR (outside of internals), so don't allow retrieving the value.
DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
DCHECK(IsAccessibleGPR(reg));
return *gprs_[reg];
@@ -79,12 +84,15 @@
void SmashCallerSaves() OVERRIDE;
NO_RETURN void DoLongJump() OVERRIDE;
+ static constexpr size_t kPC = kNumberOfXRegisters;
+
private:
- // Pointers to register locations, initialized to null or the specific registers below.
- uintptr_t* gprs_[kNumberOfXRegisters];
+ // Pointers to register locations, initialized to null or the specific registers below. We need
+ // an additional one for the PC.
+ uintptr_t* gprs_[kNumberOfXRegisters + 1];
uint64_t * fprs_[kNumberOfDRegisters];
- // Hold values for sp and pc if they are not located within a stack frame.
- uintptr_t sp_, pc_;
+ // Hold values for sp, pc and arg0 if they are not located within a stack frame.
+ uintptr_t sp_, pc_, arg0_;
};
} // namespace arm64
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 169bc38..6812178 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -941,7 +941,7 @@
// Load GPRs
// TODO: lots of those are smashed, could optimize.
add x0, x0, #30*8
- ldp x30, x1, [x0], #-16
+ ldp x30, x1, [x0], #-16 // LR & SP
ldp x28, x29, [x0], #-16
ldp x26, x27, [x0], #-16
ldp x24, x25, [x0], #-16
@@ -958,10 +958,12 @@
ldp x2, x3, [x0], #-16
mov sp, x1
- // TODO: Is it really OK to use LR for the target PC?
- mov x0, #0
- mov x1, #0
- br xLR
+ // Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
+ ldr x1, [x0, #33*8]
+ // And the value of x0.
+ ldr x0, [x0]
+
+ br x1
END art_quick_do_long_jump
/*
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index 9ef761e..9af7c04 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -50,6 +50,9 @@
// Sets the program counter value.
virtual void SetPC(uintptr_t new_pc) = 0;
+ // Sets the first argument register.
+ virtual void SetArg0(uintptr_t new_arg0_value) = 0;
+
// Returns whether the given GPR is accessible (read or write).
virtual bool IsAccessibleGPR(uint32_t reg) = 0;
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index bc2bf68..08ab356 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -30,9 +30,11 @@
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[SP] = &sp_;
gprs_[RA] = &ra_;
+ gprs_[A0] = &arg0_;
// Initialize registers with easy to spot debug values.
sp_ = MipsContext::kBadGprBase + SP;
ra_ = MipsContext::kBadGprBase + RA;
+ arg0_ = 0;
}
void MipsContext::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index 38cf29a..0affe53 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -78,12 +78,17 @@
void SmashCallerSaves() OVERRIDE;
NO_RETURN void DoLongJump() OVERRIDE;
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(A0, new_arg0_value);
+ }
+
private:
// Pointers to registers in the stack, initialized to null except for the special cases below.
uintptr_t* gprs_[kNumberOfCoreRegisters];
uint32_t* fprs_[kNumberOfFRegisters];
- // Hold values for sp and ra (return address) if they are not located within a stack frame.
- uintptr_t sp_, ra_;
+ // Hold values for sp and ra (return address) if they are not located within a stack frame, as
+ // well as the first argument.
+ uintptr_t sp_, ra_, arg0_;
};
} // namespace mips
} // namespace art
diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc
index cc6dc7e..2c17f1c 100644
--- a/runtime/arch/mips64/context_mips64.cc
+++ b/runtime/arch/mips64/context_mips64.cc
@@ -30,9 +30,11 @@
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[SP] = &sp_;
gprs_[T9] = &t9_;
+ gprs_[A0] = &arg0_;
// Initialize registers with easy to spot debug values.
sp_ = Mips64Context::kBadGprBase + SP;
t9_ = Mips64Context::kBadGprBase + T9;
+ arg0_ = 0;
}
void Mips64Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h
index 26fbcfe..84b1c9b 100644
--- a/runtime/arch/mips64/context_mips64.h
+++ b/runtime/arch/mips64/context_mips64.h
@@ -78,14 +78,20 @@
void SmashCallerSaves() OVERRIDE;
NO_RETURN void DoLongJump() OVERRIDE;
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(A0, new_arg0_value);
+ }
+
private:
// Pointers to registers in the stack, initialized to null except for the special cases below.
uintptr_t* gprs_[kNumberOfGpuRegisters];
uint64_t* fprs_[kNumberOfFpuRegisters];
// Hold values for sp and t9 if they are not located within a stack frame. We use t9 for the
- // PC (as ra is required to be valid for single-frame deopt and must not be clobbered).
- uintptr_t sp_, t9_;
+ // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). We
+ // also need the first argument for single-frame deopt.
+ uintptr_t sp_, t9_, arg0_;
};
+
} // namespace mips64
} // namespace art
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 7096c82..987ad60 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -29,9 +29,11 @@
std::fill_n(gprs_, arraysize(gprs_), nullptr);
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[ESP] = &esp_;
+ gprs_[EAX] = &arg0_;
// Initialize registers with easy to spot debug values.
esp_ = X86Context::kBadGprBase + ESP;
eip_ = X86Context::kBadGprBase + kNumberOfCpuRegisters;
+ arg0_ = 0;
}
void X86Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index c4a11d8..59beb12 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -44,6 +44,10 @@
eip_ = new_pc;
}
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(EAX, new_arg0_value);
+ }
+
bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
return gprs_[reg] != nullptr;
@@ -95,10 +99,10 @@
// Pointers to register locations. Values are initialized to null or the special registers below.
uintptr_t* gprs_[kNumberOfCpuRegisters];
uint32_t* fprs_[kNumberOfFloatRegisters];
- // Hold values for esp and eip if they are not located within a stack frame. EIP is somewhat
+ // Hold values for esp, eip and arg0 if they are not located within a stack frame. EIP is somewhat
// special in that it cannot be encoded normally as a register operand to an instruction (except
// in 64bit addressing modes).
- uintptr_t esp_, eip_;
+ uintptr_t esp_, eip_, arg0_;
};
} // namespace x86
} // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 029a296..f3b15c9 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1695,7 +1695,7 @@
/*
* Compiled code has requested that we deoptimize into the interpreter. The deoptimization
- * will long jump to the upcall with a special exception of -1.
+ * will long jump to the interpreter bridge.
*/
DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 1fe2ef8..3dc7d71 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -29,9 +29,11 @@
std::fill_n(gprs_, arraysize(gprs_), nullptr);
std::fill_n(fprs_, arraysize(fprs_), nullptr);
gprs_[RSP] = &rsp_;
+ gprs_[RDI] = &arg0_;
// Initialize registers with easy to spot debug values.
rsp_ = X86_64Context::kBadGprBase + RSP;
rip_ = X86_64Context::kBadGprBase + kNumberOfCpuRegisters;
+ arg0_ = 0;
}
void X86_64Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
index 30bb9ec..f05b7f0 100644
--- a/runtime/arch/x86_64/context_x86_64.h
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -44,6 +44,10 @@
rip_ = new_pc;
}
+ void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+ SetGPR(RDI, new_arg0_value);
+ }
+
bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
return gprs_[reg] != nullptr;
@@ -82,10 +86,10 @@
// Pointers to register locations. Values are initialized to null or the special registers below.
uintptr_t* gprs_[kNumberOfCpuRegisters];
uint64_t* fprs_[kNumberOfFloatRegisters];
- // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat
+ // Hold values for rsp, rip and arg0 if they are not located within a stack frame. RIP is somewhat
// special in that it cannot be encoded normally as a register operand to an instruction (except
// in 64bit addressing modes).
- uintptr_t rsp_, rip_;
+ uintptr_t rsp_, rip_, arg0_;
};
} // namespace x86_64
} // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 861f802..2f438a3 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1724,18 +1724,18 @@
* will long jump to the upcall with a special exception of -1.
*/
DEFINE_FUNCTION art_quick_deoptimize
- pushq %rsi // Entry point for a jump. Fake that we were called.
- // Use hidden arg.
+ pushq %rsi // Entry point for a jump. Fake that we were called.
+ // Use hidden arg.
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
- // Stack should be aligned now.
- movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread.
- call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
+ // Stack should be aligned now.
+ movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread.
+ call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
UNREACHABLE
END_FUNCTION art_quick_deoptimize
/*
* Compiled code has requested that we deoptimize into the interpreter. The deoptimization
- * will long jump to the upcall with a special exception of -1.
+ * will long jump to the interpreter bridge.
*/
DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME