Fix bugs in the x86 and arm versions of generic JNI.

Also fix the run script of 116-nodex2oat to use the non-prebuild
script for target.

Bug: 17000769

Change-Id: I439fcd710fb8bb408f3288ff8fb34fef23890adb
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 6c63a1a..5ab70ea 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1059,14 +1059,13 @@
     // result sign extension is handled in C code
     // prepare for artQuickGenericJniEndTrampoline call
     // (Thread*, result, result_f)
-    //    r0      r1,r2    r3,stack       <= C calling convention
+    //    r0      r2,r3    stack       <= C calling convention
     //    r11     r0,r1    r0,r1          <= where they are
-    sub sp, sp, #12 // Stack alignment.
+    sub sp, sp, #8 // Stack alignment.
 
-    push {r1}
-    mov r3, r0
-    mov r2, r1
-    mov r1, r0
+    push {r0-r1}
+    mov r3, r1
+    mov r2, r0
     mov r0, r11
 
     blx artQuickGenericJniEndTrampoline
@@ -1083,7 +1082,18 @@
     cbnz r2, .Lexception_in_native
 
     // Tear down the callee-save frame.
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    add  sp, #12                      @ rewind sp
+    // Do not pop r0 and r1, they contain the return value.
+    pop {r2-r3, r5-r8, r10-r11, lr}  @ 9 words of callee saves
+    .cfi_restore r2
+    .cfi_restore r3
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
+    .cfi_restore r8
+    .cfi_restore r10
+    .cfi_restore r11
+    .cfi_adjust_cfa_offset -48
 
     bx lr      // ret
 
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index dc4019d..117738a 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1183,10 +1183,10 @@
     // prepare for artQuickGenericJniEndTrampoline call
     // (Thread*, result, result_f)
     //  (esp)    4(esp)  12(esp)    <= C calling convention
-    //  fs:...  eax:edx   xmm0      <= where they are
+    //  fs:...  eax:edx   fp0      <= where they are
 
     subl LITERAL(20), %esp         // Padding & pass float result.
-    movsd %xmm0, (%esp)
+    fstpl (%esp)
     pushl %edx                    // Pass int result.
     pushl %eax
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
@@ -1211,7 +1211,7 @@
     POP ebp  // Restore callee saves
     POP esi
     POP edi
-    // store into fpr, for when it's a fpr return...
+    // Quick expects the return value to be in xmm0.
     movd %eax, %xmm0
     movd %edx, %xmm1
     punpckldq %xmm1, %xmm0