ART: Simplify invoke-polymorphic entrypoints

Moves to pattern used by the interpreter bridge and writes the result
in both the regular return register and the floating point result
register.

Add return value tests to 956-method-handles.

Test: art/test.py --host -r -t 956
Test: art/test.py --target --32 -r -t 956
Test: art/test.py --target --64 -r -t 956
Change-Id: I7389d04b70b88e149682f6d656ab185e48bcbf66
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 4791fa3..9f2346d 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -516,7 +516,7 @@
         locations->AddTemp(visitor->GetMethodLocation());
         break;
     }
-  } else {
+  } else if (!invoke->IsInvokePolymorphic()) {
     locations->AddTemp(visitor->GetMethodLocation());
   }
 }
@@ -579,7 +579,9 @@
 }
 
 void CodeGenerator::GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke) {
-  MoveConstant(invoke->GetLocations()->GetTemp(0), static_cast<int32_t>(invoke->GetType()));
+  // invoke-polymorphic does not use a temporary to convey any additional information (e.g. a
+  // method index) since it requires multiple info from the instruction (registers A, B, H). Not
+  // using the reservation has no effect on the registers used in the runtime call.
   QuickEntrypointEnum entrypoint = kQuickInvokePolymorphic;
   InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr);
 }
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 311e838..2ef30c0 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -2686,82 +2686,15 @@
 .extern artInvokePolymorphic
 ENTRY art_quick_invoke_polymorphic
     SETUP_SAVE_REFS_AND_ARGS_FRAME r2
-    mov     r2, rSELF              @ pass Thread::Current
-    mov     r3, sp                 @ pass SP
-    mov     r0, #0                 @ initialize 64-bit JValue as zero.
-    str     r0, [sp, #-4]!
-    .cfi_adjust_cfa_offset 4
-    str     r0, [sp, #-4]!
-    .cfi_adjust_cfa_offset 4
-    mov     r0, sp                 @ pass JValue for return result as first argument.
-    bl      artInvokePolymorphic   @ artInvokePolymorphic(JValue, receiver, Thread*, SP)
-    sub     r0, 'A'                @ return value is descriptor of handle's return type.
-    cmp     r0, 'Z' - 'A'          @ check if value is in bounds of handler table
-    bgt     .Lcleanup_and_return   @ and clean-up if not.
-    adr     r1, .Lhandler_table
-    tbb     [r0, r1]               @ branch to handler for return value based on return type.
-
-.Lstart_of_handlers:
-.Lstore_boolean_result:
-    ldrb    r0, [sp]               @ Copy boolean value to return value of this function.
-    b       .Lcleanup_and_return
-.Lstore_char_result:
-    ldrh    r0, [sp]               @ Copy char value to return value of this function.
-    b       .Lcleanup_and_return
-.Lstore_float_result:
-    vldr    s0, [sp]               @ Copy float value from JValue result to the context restored by
-    vstr    s0, [sp, #16]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
-    b       .Lcleanup_and_return
-.Lstore_double_result:
-    vldr    d0, [sp]               @ Copy double value from JValue result to the context restored by
-    vstr    d0, [sp, #16]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
-    b       .Lcleanup_and_return
-.Lstore_long_result:
-    ldr     r1, [sp, #4]           @ Copy the upper bits from JValue result to the context restored by
-    str     r1, [sp, #80]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
-    // Fall-through for lower bits.
-.Lstore_int_result:
-    ldr     r0, [sp]               @ Copy int value to return value of this function.
-    // Fall-through to clean up and return.
-.Lcleanup_and_return:
-    add     sp, #8
-    .cfi_adjust_cfa_offset -8
+    mov     r0, r1                 @ r0 := receiver
+    mov     r1, rSELF              @ r1 := Thread::Current
+    mov     r2, sp                 @ r2 := SP
+    bl      artInvokePolymorphic   @ artInvokePolymorphic(receiver, Thread*, SP)
+    str     r1, [sp, 72]           @ r0:r1 := Result. Copy r1 to context.
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
     REFRESH_MARKING_REGISTER
+    vmov    d0, r0, r1             @ Put result r0:r1 into floating point return  register.
     RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
-
-.macro HANDLER_TABLE_OFFSET handler_label
-    .byte (\handler_label - .Lstart_of_handlers) / 2
-.endm
-
-.Lhandler_table:
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // A
-    HANDLER_TABLE_OFFSET(.Lstore_int_result)      // B (byte)
-    HANDLER_TABLE_OFFSET(.Lstore_char_result)     // C (char)
-    HANDLER_TABLE_OFFSET(.Lstore_double_result)   // D (double)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // E
-    HANDLER_TABLE_OFFSET(.Lstore_float_result)    // F (float)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // G
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // H
-    HANDLER_TABLE_OFFSET(.Lstore_int_result)      // I (int)
-    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // J (long)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // K
-    HANDLER_TABLE_OFFSET(.Lstore_int_result)      // L (object)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // M
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // N
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // O
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // P
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Q
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // R
-    HANDLER_TABLE_OFFSET(.Lstore_int_result)      // S (short)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // T
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // U
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // V (void)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // W
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // X
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Y
-    HANDLER_TABLE_OFFSET(.Lstore_boolean_result)  // Z (boolean)
-.purgem HANDLER_TABLE_OFFSET
 END art_quick_invoke_polymorphic
 
 // Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 14d0cc7..5e540dd 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2844,82 +2844,15 @@
 
 .extern artInvokePolymorphic
 ENTRY art_quick_invoke_polymorphic
-    SETUP_SAVE_REFS_AND_ARGS_FRAME                // Save callee saves in case allocation triggers GC.
-    mov     x2, xSELF
-    mov     x3, sp
-    INCREASE_FRAME 16                             // Reserve space for JValue result.
-    str     xzr, [sp, #0]                         // Initialize result to zero.
-    mov     x0, sp                                // Set r0 to point to result.
-    bl      artInvokePolymorphic                  // artInvokePolymorphic(result, receiver, thread, save_area)
-    uxtb    w0, w0                                // Result is the return type descriptor as a char.
-    sub     w0, w0, 'A'                           // Convert to zero based index.
-    cmp     w0, 'Z' - 'A'
-    bhi     .Lcleanup_and_return                  // Clean-up if out-of-bounds.
-    adrp    x1, .Lhandler_table                   // Compute address of handler table.
-    add     x1, x1, :lo12:.Lhandler_table
-    ldrb    w0, [x1, w0, uxtw]                    // Lookup handler offset in handler table.
-    adr     x1, .Lstart_of_handlers
-    add     x0, x1, w0, sxtb #2                   // Convert relative offset to absolute address.
-    br      x0                                    // Branch to handler.
-
-.Lstart_of_handlers:
-.Lstore_boolean_result:
-    ldrb    w0, [sp]
-    b       .Lcleanup_and_return
-.Lstore_char_result:
-    ldrh    w0, [sp]
-    b       .Lcleanup_and_return
-.Lstore_float_result:
-    ldr     s0, [sp]
-    str     s0, [sp, #32]
-    b       .Lcleanup_and_return
-.Lstore_double_result:
-    ldr     d0, [sp]
-    str     d0, [sp, #32]
-    b       .Lcleanup_and_return
-.Lstore_long_result:
-    ldr     x0, [sp]
-    // Fall-through
-.Lcleanup_and_return:
-    DECREASE_FRAME 16
+    SETUP_SAVE_REFS_AND_ARGS_FRAME      // Save callee saves in case allocation triggers GC.
+    mov     x0, x1                      // x0 := receiver
+    mov     x1, xSELF                   // x1 := Thread::Current()
+    mov     x2, sp                      // x2 := SP
+    bl      artInvokePolymorphic        // artInvokePolymorphic(receiver, thread, save_area)
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
     REFRESH_MARKING_REGISTER
-    RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
-
-    .section    .rodata                           // Place handler table in read-only section away from text.
-    .align  2
-.macro HANDLER_TABLE_OFFSET handler_label
-    .byte (\handler_label - .Lstart_of_handlers) / 4
-.endm
-.Lhandler_table:
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // A
-    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // B (byte)
-    HANDLER_TABLE_OFFSET(.Lstore_char_result)     // C (char)
-    HANDLER_TABLE_OFFSET(.Lstore_double_result)   // D (double)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // E
-    HANDLER_TABLE_OFFSET(.Lstore_float_result)    // F (float)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // G
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // H
-    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // I (int)
-    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // J (long)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // K
-    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // L (object - references are compressed and only 32-bits)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // M
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // N
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // O
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // P
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Q
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // R
-    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // S (short)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // T
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // U
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // V (void)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // W
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // X
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Y
-    HANDLER_TABLE_OFFSET(.Lstore_boolean_result)  // Z (boolean)
-    .text
-
+    fmov    d0, x0                      // Result is in x0. Copy to floating return register.
+    RETURN_OR_DELIVER_PENDING_EXCEPTION
 END  art_quick_invoke_polymorphic
 
 // Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index c367ea6..c9bdc96 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -3246,58 +3246,26 @@
     BRB_FIELD_EXIT_BREAK
 END art_quick_read_barrier_mark_introspection
 
+    /*
+     * Polymorphic method invocation.
+     * On entry:
+     *   a0 = unused
+     *   a1 = receiver
+     */
 .extern artInvokePolymorphic
 ENTRY art_quick_invoke_polymorphic
     SETUP_SAVE_REFS_AND_ARGS_FRAME
-    move  $a2, rSELF                          # Make $a2 an alias for the current Thread.
-    addiu $a3, $sp, ARG_SLOT_SIZE             # Make $a3 a pointer to the saved frame context.
-    sw    $zero, 20($sp)                      # Initialize JValue result.
-    sw    $zero, 16($sp)
-    la    $t9, artInvokePolymorphic
-    jalr  $t9                                 # artInvokePolymorphic(result, receiver, Thread*, context)
-    addiu $a0, $sp, 16                        # Make $a0 a pointer to the JValue result
-.macro MATCH_RETURN_TYPE c, handler
-    li    $t0, \c
-    beq   $v0, $t0, \handler
-.endm
-    MATCH_RETURN_TYPE 'V', .Lcleanup_and_return
-    MATCH_RETURN_TYPE 'L', .Lstore_int_result
-    MATCH_RETURN_TYPE 'I', .Lstore_int_result
-    MATCH_RETURN_TYPE 'J', .Lstore_long_result
-    MATCH_RETURN_TYPE 'B', .Lstore_int_result
-    MATCH_RETURN_TYPE 'C', .Lstore_char_result
-    MATCH_RETURN_TYPE 'D', .Lstore_double_result
-    MATCH_RETURN_TYPE 'F', .Lstore_float_result
-    MATCH_RETURN_TYPE 'S', .Lstore_int_result
-    MATCH_RETURN_TYPE 'Z', .Lstore_boolean_result
-.purgem MATCH_RETURN_TYPE
-    nop
-    b .Lcleanup_and_return
-    nop
-.Lstore_boolean_result:
-    b .Lcleanup_and_return
-    lbu   $v0, 16($sp)                        # Move byte from JValue result to return value register.
-.Lstore_char_result:
-    b .Lcleanup_and_return
-    lhu   $v0, 16($sp)                        # Move char from JValue result to return value register.
-.Lstore_double_result:
-.Lstore_float_result:
-    CHECK_ALIGNMENT $sp, $t0
-    ldc1  $f0, 16($sp)                        # Move double/float from JValue result to return value register.
-    b .Lcleanup_and_return
-    nop
-.Lstore_long_result:
-    lw    $v1, 20($sp)                        # Move upper bits from JValue result to return value register.
-    // Fall-through for lower bits.
-.Lstore_int_result:
-    lw    $v0, 16($sp)                        # Move lower bits from JValue result to return value register.
-    // Fall-through to clean up and return.
-.Lcleanup_and_return:
-    lw    $t7, THREAD_EXCEPTION_OFFSET(rSELF) # Load Thread::Current()->exception_
+    move    $a0, $a1                            # Make $a0 the receiver.
+    move    $a1, rSELF                          # Make $a1 an alias for the current Thread.
+    la      $t9, artInvokePolymorphic           # Invoke artInvokePolymorphic
+    jalr    $t9                                 # with args (receiver, Thread*, context).
+    addiu   $a2, $sp, ARG_SLOT_SIZE             # Make $a2 a pointer to the saved frame context.
+    lw      $t7, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
-    bnez  $t7, 1f                             # Success if no exception is pending.
-    nop
-    jalr  $zero, $ra
+    bnez    $t7, 1f
+    # don't care if $v0 and/or $v1 are modified, when exception branch taken
+    MTD     $v0, $v1, $f0, $f1                  # move float value to return value
+    jalr    $zero, $ra
     nop
 1:
     DELIVER_PENDING_EXCEPTION
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 1f4f174..1800056 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -3046,59 +3046,26 @@
     BRB_FIELD_EXIT_BREAK
 END art_quick_read_barrier_mark_introspection
 
+    /*
+     * Polymorphic method invocation.
+     * On entry:
+     *   a0 = unused
+     *   a1 = receiver
+     */
 .extern artInvokePolymorphic
 ENTRY art_quick_invoke_polymorphic
     SETUP_SAVE_REFS_AND_ARGS_FRAME
-    move   $a2, rSELF                          # Make $a2 an alias for the current Thread.
-    move   $a3, $sp                            # Make $a3 a pointer to the saved frame context.
-    daddiu $sp, $sp, -8                        # Reserve space for JValue result.
-    .cfi_adjust_cfa_offset 8
-    sd     $zero, 0($sp)                       # Initialize JValue result.
-    jal    artInvokePolymorphic                # artInvokePolymorphic(result, receiver, Thread*, context)
-    move   $a0, $sp                            # Make $a0 a pointer to the JValue result
-.macro MATCH_RETURN_TYPE c, handler
-    li     $t0, \c
-    beq    $v0, $t0, \handler
-.endm
-    MATCH_RETURN_TYPE 'V', .Lcleanup_and_return
-    MATCH_RETURN_TYPE 'L', .Lstore_ref_result
-    MATCH_RETURN_TYPE 'I', .Lstore_long_result
-    MATCH_RETURN_TYPE 'J', .Lstore_long_result
-    MATCH_RETURN_TYPE 'B', .Lstore_long_result
-    MATCH_RETURN_TYPE 'C', .Lstore_char_result
-    MATCH_RETURN_TYPE 'D', .Lstore_double_result
-    MATCH_RETURN_TYPE 'F', .Lstore_float_result
-    MATCH_RETURN_TYPE 'S', .Lstore_long_result
-    MATCH_RETURN_TYPE 'Z', .Lstore_boolean_result
-.purgem MATCH_RETURN_TYPE
-    nop
-    b .Lcleanup_and_return
-    nop
-.Lstore_boolean_result:
-    b      .Lcleanup_and_return
-    lbu    $v0, 0($sp)                         # Move byte from JValue result to return value register.
-.Lstore_char_result:
-    b      .Lcleanup_and_return
-    lhu    $v0, 0($sp)                         # Move char from JValue result to return value register.
-.Lstore_double_result:
-.Lstore_float_result:
-    b      .Lcleanup_and_return
-    l.d    $f0, 0($sp)                         # Move double/float from JValue result to return value register.
-.Lstore_ref_result:
-    b      .Lcleanup_and_return
-    lwu    $v0, 0($sp)                         # Move zero extended lower 32-bits to return value register.
-.Lstore_long_result:
-    ld     $v0, 0($sp)                         # Move long from JValue result to return value register.
-    // Fall-through to clean up and return.
-.Lcleanup_and_return:
-    daddiu $sp, $sp, 8                         # Remove space for JValue result.
-    .cfi_adjust_cfa_offset -8
-    ld     $t0, THREAD_EXCEPTION_OFFSET(rSELF) # Load Thread::Current()->exception_
-    RESTORE_SAVE_REFS_AND_ARGS_FRAME
-    bnez   $t0, 1f                             # Success if no exception is pending.
-    nop
-    jalr   $zero, $ra
-    nop
+    move    $a0, $a1               # Make $a0 the receiver
+    move    $a1, rSELF             # Make $a1 an alias for the current Thread.
+    jal     artInvokePolymorphic   # artInvokePolymorphic(receiver, Thread*, context)
+    move    $a2, $sp               # Make $a3 a pointer to the saved frame context.
+    ld      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
+    daddiu  $sp, $sp, REFS_AND_ARGS_MINUS_REFS_SIZE  # skip a0-a7 and f12-f19
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    bne     $t0, $zero, 1f
+    dmtc1   $v0, $f0               # place return value to FP return value
+    jalr    $zero, $ra
+    dmtc1   $v1, $f1               # place return value to FP return value
 1:
     DELIVER_PENDING_EXCEPTION
 END art_quick_invoke_polymorphic
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index b89d45f..e392198 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -2434,97 +2434,25 @@
 END_FUNCTION art_quick_osr_stub
 
 DEFINE_FUNCTION art_quick_invoke_polymorphic
-    SETUP_SAVE_REFS_AND_ARGS_FRAME  ebx, ebx       // Save frame.
-    mov %esp, %edx                                 // Remember SP.
-    subl LITERAL(16), %esp                         // Make space for JValue result.
-    CFI_ADJUST_CFA_OFFSET(16)
-    movl LITERAL(0), (%esp)                        // Initialize result to zero.
-    movl LITERAL(0), 4(%esp)
-    mov %esp, %eax                                 // Store pointer to JValue result in eax.
-    PUSH edx                                       // pass SP
-    pushl %fs:THREAD_SELF_OFFSET                   // pass Thread::Current()
+                                                   // On entry: EAX := unused, ECX := receiver
+    SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, ebx        // Save frame.
+    mov %esp, %edx                                 // Remember SP
+    sub LITERAL(4), %esp                           // Alignment padding
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH ecx                                       // pass receiver (method handle)
-    PUSH eax                                       // pass JResult
-    call SYMBOL(artInvokePolymorphic)              // artInvokePolymorphic(result, receiver, Thread*, SP)
-    subl LITERAL('A'), %eax                        // Eliminate out of bounds options
-    cmpb LITERAL('Z' - 'A'), %al
-    ja .Lcleanup_and_return
-    movzbl %al, %eax
-    call .Lput_eip_in_ecx
-.Lbranch_start:
-    movl %ecx, %edx
-    add $(.Lhandler_table - .Lbranch_start), %edx  // Make EDX point to handler_table.
-    leal (%edx, %eax, 2), %eax                     // Calculate address of entry in table.
-    movzwl (%eax), %eax                            // Lookup relative branch in table.
-    addl %ecx, %eax                                // Add EIP relative offset.
-    jmp *%eax                                      // Branch to handler.
-
-    // Handlers for different return types.
-.Lstore_boolean_result:
-    movzbl 16(%esp), %eax                          // Copy boolean result to the accumulator.
-    jmp .Lcleanup_and_return
-.Lstore_char_result:
-    movzwl 16(%esp), %eax                          // Copy char result to the accumulator.
-    jmp .Lcleanup_and_return
-.Lstore_float_result:
-    movd 16(%esp), %xmm0                           // Copy float result to the context restored by
-    movd %xmm0, 36(%esp)                           // RESTORE_SAVE_REFS_ONLY_FRAME.
-    jmp .Lcleanup_and_return
-.Lstore_double_result:
-    movsd 16(%esp), %xmm0                          // Copy double result to the context restored by
-    movsd %xmm0, 36(%esp)                          // RESTORE_SAVE_REFS_ONLY_FRAME.
-    jmp .Lcleanup_and_return
-.Lstore_long_result:
-    movl 20(%esp), %edx                            // Copy upper-word of result to the context restored by
-    movl %edx, 72(%esp)                            // RESTORE_SAVE_REFS_ONLY_FRAME.
-    // Fall-through for lower bits.
-.Lstore_int_result:
-    movl 16(%esp), %eax                            // Copy int result to the accumulator.
-    // Fall-through to clean up and return.
-.Lcleanup_and_return:
-    addl LITERAL(32), %esp                         // Pop arguments and stack allocated JValue result.
-    CFI_ADJUST_CFA_OFFSET(-32)
+    push %edx                                      // Push SP
+    CFI_ADJUST_CFA_OFFSET(4)
+    pushl %fs:THREAD_SELF_OFFSET                   // Push Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    push %ecx                                      // Push receiver (method handle)
+    CFI_ADJUST_CFA_OFFSET(4)
+    call SYMBOL(artInvokePolymorphic)              // invoke with (receiver, thread, SP)
+    addl LITERAL(16), %esp                         // Pop arguments.
+    CFI_ADJUST_CFA_OFFSET(-16)
+    mov %eax, 4(%esp)                              // Result is in EAX:EDX. Copy to saved FP state.
+    mov %edx, 8(%esp)
+    mov %edx, 40(%esp)                             // Copy EDX to saved context
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
     RETURN_OR_DELIVER_PENDING_EXCEPTION
-
-.Lput_eip_in_ecx:                                  // Internal function that puts address of
-    movl 0(%esp), %ecx                             // next instruction into ECX when CALL
-    ret
-
-    // Handler table to handlers for given type.
-.Lhandler_table:
-MACRO1(HANDLER_TABLE_ENTRY, handler_label)
-    // NB some tools require 16-bits for relocations. Shouldn't need adjusting.
-    .word RAW_VAR(handler_label) - .Lbranch_start
-END_MACRO
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // A
-    HANDLER_TABLE_ENTRY(.Lstore_int_result)        // B (byte)
-    HANDLER_TABLE_ENTRY(.Lstore_char_result)       // C (char)
-    HANDLER_TABLE_ENTRY(.Lstore_double_result)     // D (double)
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // E
-    HANDLER_TABLE_ENTRY(.Lstore_float_result)      // F (float)
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // G
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // H
-    HANDLER_TABLE_ENTRY(.Lstore_int_result)        // I (int)
-    HANDLER_TABLE_ENTRY(.Lstore_long_result)       // J (long)
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // K
-    HANDLER_TABLE_ENTRY(.Lstore_int_result)        // L (object)
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // M
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // N
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // O
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // P
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // Q
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // R
-    HANDLER_TABLE_ENTRY(.Lstore_int_result)        // S (short)
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // T
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // U
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // V (void)
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // W
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // X
-    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // Y
-    HANDLER_TABLE_ENTRY(.Lstore_boolean_result)    // Z (boolean)
-
 END_FUNCTION art_quick_invoke_polymorphic
 
 // Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index c179033..3f5d4f6 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2418,78 +2418,15 @@
 END_FUNCTION art_quick_osr_stub
 
 DEFINE_FUNCTION art_quick_invoke_polymorphic
+                                                   // On entry: RDI := unused, RSI := receiver
     SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
-    movq %gs:THREAD_SELF_OFFSET, %rdx              // pass Thread
-    movq %rsp, %rcx                                // pass SP
-    subq LITERAL(16), %rsp                         // make space for JValue result
-    CFI_ADJUST_CFA_OFFSET(16)
-    movq LITERAL(0), (%rsp)                        // initialize result
-    movq %rsp, %rdi                                // store pointer to JValue result
-    call SYMBOL(artInvokePolymorphic)              // artInvokePolymorphic(result, receiver, Thread*, SP)
+    movq %rsi, %rdi                                // RDI := receiver
+    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread (self)
+    movq %rsp, %rdx                                // RDX := pass SP
+    call SYMBOL(artInvokePolymorphic)              // invoke with (receiver, self, SP)
                                                    // save the code pointer
-    subq LITERAL('A'), %rax                        // Convert type descriptor character value to a zero based index.
-    cmpb LITERAL('Z' - 'A'), %al                   // Eliminate out of bounds options
-    ja .Lcleanup_and_return
-    movzbq %al, %rax
-    leaq .Lhandler_table(%rip), %rcx               // Get the address of the handler table
-    movslq (%rcx, %rax, 4), %rax                   // Lookup handler offset relative to table
-    addq %rcx, %rax                                // Add table address to yield handler address.
-    jmpq *%rax                                     // Jump to handler.
-
-.align 4
-.Lhandler_table:                                   // Table of type descriptor to handlers.
-MACRO1(HANDLER_TABLE_OFFSET, handle_label)
-    // NB some tools require 32-bits for relocations. Shouldn't need adjusting.
-    .long RAW_VAR(handle_label) - .Lhandler_table
-END_MACRO
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // A
-    HANDLER_TABLE_OFFSET(.Lstore_long_result)      // B (byte)
-    HANDLER_TABLE_OFFSET(.Lstore_char_result)      // C (char)
-    HANDLER_TABLE_OFFSET(.Lstore_double_result)    // D (double)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // E
-    HANDLER_TABLE_OFFSET(.Lstore_float_result)     // F (float)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // G
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // H
-    HANDLER_TABLE_OFFSET(.Lstore_long_result)      // I (int)
-    HANDLER_TABLE_OFFSET(.Lstore_long_result)      // J (long)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // K
-    HANDLER_TABLE_OFFSET(.Lstore_long_result)      // L (object - references are compressed and only 32-bits)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // M
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // N
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // O
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // P
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // Q
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // R
-    HANDLER_TABLE_OFFSET(.Lstore_long_result)      // S (short)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // T
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // U
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // V (void)
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // W
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // X
-    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // Y
-    HANDLER_TABLE_OFFSET(.Lstore_boolean_result)   // Z (boolean)
-
-.Lstore_boolean_result:
-    movzbq (%rsp), %rax                            // Copy boolean result to the accumulator
-    jmp .Lcleanup_and_return
-.Lstore_char_result:
-    movzwq (%rsp), %rax                            // Copy char result to the accumulator
-    jmp .Lcleanup_and_return
-.Lstore_float_result:
-    movd (%rsp), %xmm0                             // Copy float result to the context restored by
-    movd %xmm0, 32(%rsp)                           // RESTORE_SAVE_REFS_AND_ARGS_FRAME.
-    jmp .Lcleanup_and_return
-.Lstore_double_result:
-    movsd (%rsp), %xmm0                            // Copy double result to the context restored by
-    movsd %xmm0, 32(%rsp)                          // RESTORE_SAVE_REFS_AND_ARGS_FRAME.
-    jmp .Lcleanup_and_return
-.Lstore_long_result:
-    movq (%rsp), %rax                              // Copy long result to the accumulator.
-     // Fall-through
-.Lcleanup_and_return:
-    addq LITERAL(16), %rsp                         // Pop space for JValue result.
-    CFI_ADJUST_CFA_OFFSET(16)
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_invoke_polymorphic
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index af6a936..d2b8a98 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2722,18 +2722,11 @@
                                 reinterpret_cast<uintptr_t>(method));
 }
 
-// Returns shorty type so the caller can determine how to put |result|
-// into expected registers. The shorty type is static so the compiler
-// could call different flavors of this code path depending on the
-// shorty type though this would require different entry points for
-// each type.
-extern "C" uintptr_t artInvokePolymorphic(
-    JValue* result,
-    mirror::Object* raw_receiver,
-    Thread* self,
-    ArtMethod** sp)
+// Returns uint64_t representing raw bits from JValue.
+extern "C" uint64_t artInvokePolymorphic(mirror::Object* raw_receiver, Thread* self, ArtMethod** sp)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
+  DCHECK(raw_receiver != nullptr);
   DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs));
 
   // Start new JNI local reference state
@@ -2766,18 +2759,12 @@
   ArtMethod* resolved_method = linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>(
       self, inst.VRegB(), caller_method, kVirtual);
 
-  if (UNLIKELY(receiver_handle.IsNull())) {
-    ThrowNullPointerExceptionForMethodAccess(resolved_method, InvokeType::kVirtual);
-    return static_cast<uintptr_t>('V');
-  }
-
   Handle<mirror::MethodType> method_type(
       hs.NewHandle(linker->ResolveMethodType(self, proto_idx, caller_method)));
-
-  // This implies we couldn't resolve one or more types in this method handle.
   if (UNLIKELY(method_type.IsNull())) {
+    // This implies we couldn't resolve one or more types in this method handle.
     CHECK(self->IsExceptionPending());
-    return static_cast<uintptr_t>('V');
+    return 0UL;
   }
 
   DCHECK_EQ(ArtMethod::NumArgRegisters(shorty) + 1u, (uint32_t)inst.VRegA());
@@ -2811,6 +2798,7 @@
   // consecutive order.
   RangeInstructionOperands operands(first_arg + 1, num_vregs - 1);
   Intrinsics intrinsic = static_cast<Intrinsics>(resolved_method->GetIntrinsic());
+  JValue result;
   bool success = false;
   if (resolved_method->GetDeclaringClass() == GetClassRoot<mirror::MethodHandle>(linker)) {
     Handle<mirror::MethodHandle> method_handle(hs.NewHandle(
@@ -2821,7 +2809,7 @@
                                         method_handle,
                                         method_type,
                                         &operands,
-                                        result);
+                                        &result);
     } else {
       DCHECK_EQ(static_cast<uint32_t>(intrinsic),
                 static_cast<uint32_t>(Intrinsics::kMethodHandleInvoke));
@@ -2830,7 +2818,7 @@
                                    method_handle,
                                    method_type,
                                    &operands,
-                                   result);
+                                   &result);
     }
   } else {
     DCHECK_EQ(GetClassRoot<mirror::VarHandle>(linker), resolved_method->GetDeclaringClass());
@@ -2844,7 +2832,7 @@
                                       method_type,
                                       access_mode,
                                       &operands,
-                                      result);
+                                      &result);
   }
 
   DCHECK(success || self->IsExceptionPending());
@@ -2852,7 +2840,7 @@
   // Pop transition record.
   self->PopManagedStackFragment(fragment);
 
-  return static_cast<uintptr_t>(shorty[0]);
+  return result.GetJ();
 }
 
 }  // namespace art
diff --git a/test/956-methodhandles/expected.txt b/test/956-methodhandles/expected.txt
index 6954c22..a8b609b 100644
--- a/test/956-methodhandles/expected.txt
+++ b/test/956-methodhandles/expected.txt
@@ -15,6 +15,7 @@
 Chatty.chatter()
 Chatty.chatter()
 String constructors done.
+testReturnValues done.
 testReferenceReturnValueConversions done.
 testPrimitiveReturnValueConversions done.
 Hi
diff --git a/test/956-methodhandles/src/Main.java b/test/956-methodhandles/src/Main.java
index dee818a..11d6ead 100644
--- a/test/956-methodhandles/src/Main.java
+++ b/test/956-methodhandles/src/Main.java
@@ -102,6 +102,7 @@
     testAsType();
     testConstructors();
     testStringConstructors();
+    testReturnValues();
     testReturnValueConversions();
     testVariableArity();
     testVariableArity_MethodHandles_bind();
@@ -873,6 +874,89 @@
     System.out.println("String constructors done.");
   }
 
+  private static void testReturnValues() throws Throwable {
+    Lookup lookup = MethodHandles.lookup();
+
+    // byte
+    MethodHandle mhByteValue =
+        lookup.findVirtual(Byte.class, "byteValue", MethodType.methodType(byte.class));
+    assertEquals((byte) -77, (byte) mhByteValue.invokeExact(Byte.valueOf((byte) -77)));
+    assertEquals((byte) -77, (byte) mhByteValue.invoke(Byte.valueOf((byte) -77)));
+
+    // char
+    MethodHandle mhCharacterValue =
+        lookup.findStaticGetter(Character.class, "MAX_SURROGATE", char.class);
+    assertEquals(Character.MAX_SURROGATE, (char) mhCharacterValue.invokeExact());
+    assertEquals(Character.MAX_SURROGATE, (char) mhCharacterValue.invoke());
+
+    // double
+    MethodHandle mhSin =
+        lookup.findStatic(
+            Math.class, "sin", MethodType.methodType(double.class, double.class));
+    for (double i = -Math.PI; i <= Math.PI; i += Math.PI / 8) {
+      assertEquals(Math.sin(i), (double) mhSin.invokeExact(i));
+      assertEquals(Math.sin(i), (double) mhSin.invoke(i));
+    }
+
+    // float
+    MethodHandle mhAbsFloat =
+        lookup.findStatic(
+            Math.class, "abs", MethodType.methodType(float.class, float.class));
+    assertEquals(Math.abs(-3.3e6f), (float) mhAbsFloat.invokeExact(-3.3e6f));
+    assertEquals(Math.abs(-3.3e6f), (float) mhAbsFloat.invoke(-3.3e6f));
+
+    // int
+    MethodHandle mhAbsInt =
+        lookup.findStatic(Math.class, "abs", MethodType.methodType(int.class, int.class));
+    assertEquals(Math.abs(-1000), (int) mhAbsInt.invokeExact(-1000));
+    assertEquals(Math.abs(-1000), (int) mhAbsInt.invoke(-1000));
+
+    // long
+    MethodHandle mhMaxLong =
+        lookup.findStatic(
+            Math.class,
+            "max",
+            MethodType.methodType(long.class, long.class, long.class));
+    assertEquals(
+        Long.MAX_VALUE, (long) mhMaxLong.invokeExact(Long.MAX_VALUE, Long.MAX_VALUE / 2));
+    assertEquals(Long.MAX_VALUE, (long) mhMaxLong.invoke(Long.MAX_VALUE, Long.MAX_VALUE / 2));
+    assertEquals(0x0123456789abcdefL, (long) mhMaxLong.invokeExact(0x0123456789abcdefL, 0L));
+    assertEquals(0x0123456789abcdefL, (long) mhMaxLong.invoke(0x0123456789abcdefL, 0L));
+
+    // ref
+    MethodHandle mhShortValueOf =
+        lookup.findStatic(
+            Short.class, "valueOf", MethodType.methodType(Short.class, short.class));
+    assertEquals(
+        (short) -7890, ((Short) mhShortValueOf.invokeExact((short) -7890)).shortValue());
+    assertEquals((short) -7890, ((Short) mhShortValueOf.invoke((short) -7890)).shortValue());
+
+    // array
+    int [] array = {Integer.MIN_VALUE, -1, 0, +1, Integer.MAX_VALUE};
+    MethodHandle mhCopyOf =
+            lookup.findStatic(
+                Arrays.class, "copyOf", MethodType.methodType(int[].class, int[].class, int.class));
+    assertTrue(Arrays.equals(array, (int[]) mhCopyOf.invokeExact(array, array.length)));
+    assertTrue(Arrays.equals(array, (int[]) mhCopyOf.invoke(array, array.length)));
+
+    // short
+    MethodHandle mhShortValue =
+        lookup.findVirtual(Short.class, "shortValue", MethodType.methodType(short.class));
+    assertEquals((short) 12131, (short) mhShortValue.invokeExact(Short.valueOf((short) 12131)));
+    assertEquals((short) 12131, (short) mhShortValue.invoke(Short.valueOf((short) 12131)));
+
+    // boolean
+    MethodHandle mhBooleanValue =
+        lookup.findVirtual(
+            Boolean.class, "booleanValue", MethodType.methodType(boolean.class));
+    assertEquals(true, (boolean) mhBooleanValue.invokeExact(Boolean.valueOf(true)));
+    assertEquals(true, (boolean) mhBooleanValue.invoke(Boolean.valueOf(true)));
+    assertEquals(false, (boolean) mhBooleanValue.invokeExact(Boolean.valueOf(false)));
+    assertEquals(false, (boolean) mhBooleanValue.invoke(Boolean.valueOf(false)));
+
+    System.out.println("testReturnValues done.");
+  }
+
   private static void testReferenceReturnValueConversions() throws Throwable {
     MethodHandle mh = MethodHandles.lookup().findStatic(
         Float.class, "valueOf", MethodType.methodType(Float.class, String.class));