Revert "Revert "Implement on-stack replacement for arm/arm64/x86/x86_64.""

This reverts commit bd89a5c556324062b7d841843b039392e84cfaf4.

Change-Id: I08d190431520baa7fcec8fbdb444519f25ac8d44
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 631b784..b3a2979 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -429,6 +429,56 @@
 END art_quick_invoke_stub_internal
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   r0 = stack to copy
+     *   r1 = size of stack
+     *   r2 = pc to call
+     *   r3 = JValue* result
+     *   [sp] = shorty
+     *   [sp + 4] = thread
+     */
+ENTRY art_quick_osr_stub
+    SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
+    mov    r11, sp                         @ Save the stack pointer
+    mov    r10, r1                         @ Save size of stack
+    ldr    r9, [r11, #40]                  @ Move managed thread pointer into r9
+    mov    r8, r2                          @ Save the pc to call
+    sub    r7, sp, #12                     @ Reserve space for stack pointer, JValue result, and ArtMethod* slot
+    and    r7, #0xFFFFFFF0                 @ Align stack pointer
+    mov    sp, r7                          @ Update stack pointer
+    str    r11, [sp, #4]                   @ Save old stack pointer
+    str    r3, [sp, #8]                    @ Save JValue result
+    mov    ip, #0
+    str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
+    sub    sp, sp, r1                      @ Reserve space for callee stack
+    mov    r2, r1
+    mov    r1, r0
+    mov    r0, sp
+    bl     memcpy                          @ memcpy (dest r0, src r1, bytes r2)
+    bl     .Losr_entry                     @ Call the method
+    ldr    r11, [sp, #4]                   @ Restore saved stack pointer
+    ldr    r10, [sp, #8]                   @ Restire JValue result
+    mov    sp, r11                         @ Restore stack pointer.
+    ldr    r4, [sp, #36]                   @ load shorty
+    ldr    r4, [r4, #0]                    @ load return type
+    cmp    r4, #68                         @ Test if result type char == 'D'.
+    beq    .Losr_fp_result
+    cmp    r4, #70                         @ Test if result type char == 'F'.
+    beq    .Losr_fp_result
+    strd r0, [r10]                         @ Store r0/r1 into result pointer
+    b    .Losr_exit
+.Losr_fp_result:
+    vstr d0, [r10]                         @ Store s0-s1/d0 into result pointer
+.Losr_exit:
+    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+.Losr_entry:
+    sub r10, r10, #4
+    str lr, [sp, r10]                     @ Store link register per the compiler ABI
+    bx r8
+END art_quick_osr_stub
+
+    /*
      * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
      */
 ARM_ENTRY art_quick_do_long_jump
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 9ccabad..e848008 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -915,6 +915,105 @@
 
 
 
+/*  extern"C" void art_quick_osr_stub(void** stack,                x0
+ *                                    size_t stack_size_in_bytes,  x1
+ *                                    const uin8_t* native_pc,     x2
+ *                                    JValue *result,              x3
+ *                                    char   *shorty,              x4
+ *                                    Thread *self)                x5
+ */
+ENTRY art_quick_osr_stub
+SAVE_SIZE=15*8   // x3, x4, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
+    mov x9, sp                             // Save stack pointer.
+    .cfi_register sp,x9
+
+    sub x10, sp, # SAVE_SIZE
+    and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
+    mov sp, x10                            // Set new SP.
+
+    str x28, [sp, #112]
+    stp x26, x27, [sp, #96]
+    stp x24, x25, [sp, #80]
+    stp x22, x23, [sp, #64]
+    stp x20, x21, [sp, #48]
+    stp x9, x19, [sp, #32]                // Save old stack pointer and x19.
+    stp x3, x4, [sp, #16]                 // Save result and shorty addresses.
+    stp xFP, xLR, [sp]                    // Store LR & FP.
+    mov xSELF, x5                         // Move thread pointer into SELF register.
+
+    sub sp, sp, #16
+    str xzr, [sp]                         // Store null for ArtMethod* slot
+    // Branch to stub.
+    bl .Losr_entry
+    add sp, sp, #16
+
+    // Restore return value address and shorty address.
+    ldp x3,x4, [sp, #16]
+    ldr x28, [sp, #112]
+    ldp x26, x27, [sp, #96]
+    ldp x24, x25, [sp, #80]
+    ldp x22, x23, [sp, #64]
+    ldp x20, x21, [sp, #48]
+
+    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
+    ldrb w10, [x4]
+
+    // Check the return type and store the correct register into the jvalue in memory.
+
+    // Don't set anything for a void type.
+    cmp w10, #'V'
+    beq .Losr_exit
+
+    // Is it a double?
+    cmp w10, #'D'
+    bne .Lno_double
+    str d0, [x3]
+    b .Losr_exit
+
+.Lno_double:  // Is it a float?
+    cmp w10, #'F'
+    bne .Lno_float
+    str s0, [x3]
+    b .Losr_exit
+
+.Lno_float:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
+    str x0, [x3]
+
+.Losr_exit:  // Finish up.
+    ldp x2, x19, [sp, #32]   // Restore stack pointer and x19.
+    ldp xFP, xLR, [sp]    // Restore old frame pointer and link register.
+    mov sp, x2
+    ret
+
+.Losr_entry:
+    // Update stack pointer for the callee
+    sub sp, sp, x1
+
+    // Update link register slot expected by the callee.
+    sub w1, w1, #8
+    str lr, [sp, x1]
+
+    // Copy arguments into stack frame.
+    // Use simple copy routine for now.
+    // 4 bytes per slot.
+    // X0 - source address
+    // W1 - args length
+    // SP - destination address.
+    // W10 - temporary
+.Losr_loop_entry:
+    cmp w1, #0
+    beq .Losr_loop_exit
+    sub w1, w1, #4
+    ldr w10, [x0, x1]
+    str w10, [sp, x1]
+    b .Losr_loop_entry
+
+.Losr_loop_exit:
+    // Branch to the OSR entry point.
+    br x2
+
+END art_quick_osr_stub
+
     /*
      * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
      */
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index da30331..fbee5d7 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1712,5 +1712,65 @@
     ret
 END_FUNCTION art_quick_read_barrier_for_root_slow
 
+  /*
+     * On stack replacement stub.
+     * On entry:
+     *   [sp] = return address
+     *   [sp + 4] = stack to copy
+     *   [sp + 8] = size of stack
+     *   [sp + 12] = pc to call
+     *   [sp + 16] = JValue* result
+     *   [sp + 20] = shorty
+     *   [sp + 24] = thread
+     */
+DEFINE_FUNCTION art_quick_osr_stub
+    // Save native callee saves.
+    PUSH ebp
+    PUSH ebx
+    PUSH esi
+    PUSH edi
+    mov 4+16(%esp), %esi           // ESI = argument array
+    mov 8+16(%esp), %ecx           // ECX = size of args
+    mov 12+16(%esp), %ebx          // EBX = pc to call
+    mov %esp, %ebp                 // Save stack pointer
+    andl LITERAL(0xFFFFFFF0), %esp // Align stack
+    PUSH ebp                       // Save old stack pointer
+    subl LITERAL(12), %esp         // Align stack
+    movl LITERAL(0), (%esp)        // Store null for ArtMethod* slot
+    call .Losr_entry
+
+    // Restore stack pointer.
+    addl LITERAL(12), %esp
+    POP ebp
+    mov %ebp, %esp
+
+    // Restore callee saves.
+    POP edi
+    POP esi
+    POP ebx
+    POP ebp
+    mov 16(%esp), %ecx            // Get JValue result
+    mov %eax, (%ecx)              // Store the result assuming it is a long, int or Object*
+    mov %edx, 4(%ecx)             // Store the other half of the result
+    mov 20(%esp), %edx            // Get the shorty
+    cmpb LITERAL(68), (%edx)      // Test if result type char == 'D'
+    je .Losr_return_double_quick
+    cmpb LITERAL(70), (%edx)      // Test if result type char == 'F'
+    je .Losr_return_float_quick
+    ret
+.Losr_return_double_quick:
+    movsd %xmm0, (%ecx)           // Store the floating point result
+    ret
+.Losr_return_float_quick:
+    movss %xmm0, (%ecx)           // Store the floating point result
+    ret
+.Losr_entry:
+    subl LITERAL(4), %ecx         // Given stack size contains pushed frame pointer, substract it.
+    subl %ecx, %esp
+    mov %esp, %edi                // EDI = beginning of stack
+    rep movsb                     // while (ecx--) { *edi++ = *esi++ }
+    jmp *%ebx
+END_FUNCTION art_quick_osr_stub
+
     // TODO: implement these!
 UNIMPLEMENTED art_quick_memcmp16
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 883da96..d6e0f1c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1744,3 +1744,62 @@
     RESTORE_FP_CALLEE_SAVE_FRAME
     ret
 END_FUNCTION art_quick_read_barrier_for_root_slow
+
+    /*
+     * On stack replacement stub.
+     * On entry:
+     *   [sp] = return address
+     *   rdi = stack to copy
+     *   rsi = size of stack
+     *   rdx = pc to call
+     *   rcx = JValue* result
+     *   r8 = shorty
+     *   r9 = thread
+     */
+DEFINE_FUNCTION art_quick_osr_stub
+    // Save the non-volatiles.
+    PUSH rbp                      // Save rbp.
+    PUSH rcx                      // Save rcx/result*.
+    PUSH r8                       // Save r8/shorty*.
+
+    // Save callee saves.
+    PUSH rbx
+    PUSH r12
+    PUSH r13
+    PUSH r14
+    PUSH r15
+
+    pushq LITERAL(0)              // Push null for ArtMethod*.
+    movl %esi, %ecx               // rcx := size of stack
+    movq %rdi, %rsi               // rsi := stack to copy
+    call .Losr_entry
+
+    // Restore stack and callee-saves.
+    addq LITERAL(8), %rsp
+    POP r15
+    POP r14
+    POP r13
+    POP r12
+    POP rbx
+    POP r8
+    POP rcx
+    POP rbp
+    cmpb LITERAL(68), (%r8)        // Test if result type char == 'D'.
+    je .Losr_return_double_quick
+    cmpb LITERAL(70), (%r8)        // Test if result type char == 'F'.
+    je .Losr_return_float_quick
+    movq %rax, (%rcx)              // Store the result assuming its a long, int or Object*
+    ret
+.Losr_return_double_quick:
+    movsd %xmm0, (%rcx)            // Store the double floating point result.
+    ret
+.Losr_return_float_quick:
+    movss %xmm0, (%rcx)            // Store the floating point result.
+    ret
+.Losr_entry:
+    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
+    subq %rcx, %rsp
+    movq %rsp, %rdi               // rdi := beginning of stack
+    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
+    jmp *%rdx
+END_FUNCTION art_quick_osr_stub