Use implicit null checks inside try blocks.

Make implicit null check entrypoint save all registers, use
platform-specific approach to still pass the fault address.
Allow implicit null checks in try blocks.

On Nexus 9, AOSP ToT, the boot.oat size reduction is
  prebuilt multi-part boot image:
    - 32-bit boot.oat: -452KiB (-0.7%)
    - 64-bit boot.oat: -482KiB (-0.7%)
  on-device built single boot image:
    - 32-bit boot.oat: -444KiB (-0.7%)
    - 64-bit boot.oat: -488KiB (-0.7%)

Test: Run ART test suite on host and Nexus 9.
Test: Build aosp_mips64-eng.
Change-Id: I279f3ab57e2e2f338131c5cac45c51b673bdca19
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index befdd48..daa2dff 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -122,13 +122,16 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-
   uint32_t instr_size = GetInstructionSize(ptr);
-  sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
+  uintptr_t gc_map_location = (sc->arm_pc + instr_size) | 1;
+
+  // Push the gc map location to the stack and pass the fault address in LR.
+  sc->arm_sp -= sizeof(uintptr_t);
+  *reinterpret_cast<uintptr_t*>(sc->arm_sp) = gc_map_location;
+  sc->arm_lr = reinterpret_cast<uintptr_t>(info->si_addr);
   sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   // Pass the faulting address as the first argument of
   // art_quick_throw_null_pointer_exception_from_signal.
-  sc->arm_r0 = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index a3f053b..3fc83ba 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -173,6 +173,29 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when core registers are already saved.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp
+                                        @ 14 words of callee saves and args already saved.
+    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
+    .cfi_adjust_cfa_offset 128
+    sub sp, #8                          @ 2 words of space, alignment padding and Method*
+    .cfi_adjust_cfa_offset 8
+    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
+    @ Load kSaveEverything Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
+    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
+
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
+#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
+#endif
+.endm
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      */
 .macro SETUP_SAVE_EVERYTHING_FRAME rTemp
     push {r0-r12, lr}                   @ 14 words of callee saves and args.
@@ -191,20 +214,7 @@
     .cfi_rel_offset r11, 44
     .cfi_rel_offset ip, 48
     .cfi_rel_offset lr, 52
-    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
-    .cfi_adjust_cfa_offset 128
-    sub sp, #8                          @ 2 words of space, alignment padding and Method*
-    .cfi_adjust_cfa_offset 8
-    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
-    @ Load kSaveEverything Method* into rTemp.
-    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
-    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
-    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
-
-    // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
-#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
-#endif
+    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp
 .endm
 
 .macro RESTORE_SAVE_EVERYTHING_FRAME
@@ -356,7 +366,34 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+    .extern art_quick_throw_null_pointer_exception_from_signal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    // The fault handler pushes the gc map address, i.e. "return address", to stack
+    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
+    .cfi_def_cfa_offset __SIZEOF_POINTER__
+    .cfi_rel_offset lr, 0
+    push {r0-r12}                   @ 13 words of callee saves and args; LR already saved.
+    .cfi_adjust_cfa_offset 52
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r4, 16
+    .cfi_rel_offset r5, 20
+    .cfi_rel_offset r6, 24
+    .cfi_rel_offset r7, 28
+    .cfi_rel_offset r8, 32
+    .cfi_rel_offset r9, 36
+    .cfi_rel_offset r10, 40
+    .cfi_rel_offset r11, 44
+    .cfi_rel_offset ip, 48
+
+    @ save all registers as basis for long jump context
+    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1
+    mov r0, lr                      @ pass the fault address stored in LR by the fault handler.
+    mov r1, r9                      @ pass Thread::Current
+    b   artThrowNullPointerExceptionFromSignal  @ (Thread*)
+END art_quick_throw_null_pointer_exception_from_signal
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 6724d6d..c02be87 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -96,12 +96,12 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
-  sc->regs[30] = sc->pc + 4;      // LR needs to point to gc map location
+  // Push the gc map location to the stack and pass the fault address in LR.
+  sc->sp -= sizeof(uintptr_t);
+  *reinterpret_cast<uintptr_t*>(sc->sp) = sc->pc + 4;
+  sc->regs[30] = reinterpret_cast<uintptr_t>(info->si_addr);
 
   sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-  sc->regs[0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 25aa8ce..ea4669d 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -265,10 +265,10 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when the SP has already been decremented by FRAME_SIZE_SAVE_EVERYTHING
+     * and saving registers x29 and LR is handled elsewhere.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
-    INCREASE_FRAME 512
-
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 512)
 #error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
@@ -310,7 +310,6 @@
     SAVE_TWO_REGS x23, x24, 448
     SAVE_TWO_REGS x25, x26, 464
     SAVE_TWO_REGS x27, x28, 480
-    SAVE_TWO_REGS x29, xLR, 496
 
     // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
@@ -328,6 +327,16 @@
     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    INCREASE_FRAME 512
+    SAVE_TWO_REGS x29, xLR, 496
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
+.endm
+
 .macro RESTORE_SAVE_EVERYTHING_FRAME
     // Restore FP registers.
     // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
@@ -462,7 +471,21 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+    .extern art_quick_throw_null_pointer_exception_from_signal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    // The fault handler pushes the gc map address, i.e. "return address", to stack
+    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
+    .cfi_def_cfa_offset __SIZEOF_POINTER__
+    .cfi_rel_offset lr, 0
+    // Save all registers as basis for long jump context.
+    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__)
+    SAVE_REG x29, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)  // LR already saved.
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
+    mov x0, lr                        // pass the fault address stored in LR by the fault handler.
+    mov x1, xSELF                     // pass Thread::Current.
+    b   artThrowNullPointerExceptionFromSignal  // (arg, Thread*).
+    brk 0
+END art_quick_throw_null_pointer_exception_from_signal
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.
diff --git a/runtime/arch/mips/asm_support_mips.S b/runtime/arch/mips/asm_support_mips.S
index 801f708..7955b1d 100644
--- a/runtime/arch/mips/asm_support_mips.S
+++ b/runtime/arch/mips/asm_support_mips.S
@@ -43,7 +43,7 @@
 .endm
 
      // Declare a function called name, doesn't set up $gp.
-.macro ENTRY_NO_GP name
+.macro ENTRY_NO_GP_CUSTOM_CFA name, cfa_offset
     .type \name, %function
     .global \name
     // Cache alignment for function entry.
@@ -51,7 +51,12 @@
 \name:
     .cfi_startproc
      // Ensure we get a sane starting CFA.
-    .cfi_def_cfa $sp,0
+    .cfi_def_cfa $sp, \cfa_offset
+.endm
+
+     // Declare a function called name, doesn't set up $gp.
+.macro ENTRY_NO_GP name
+    ENTRY_NO_GP_CUSTOM_CFA \name, 0
 .endm
 
 .macro END name
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 7969a8f..b6a63ca 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-
+#include "arch/mips/quick_method_frame_info_mips.h"
 #include "fault_handler.h"
 #include <sys/ucontext.h>
 #include "art_method-inl.h"
@@ -82,12 +82,15 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
+  // Decrement $sp by the frame size of the kSaveEverything method and store
+  // the fault address in the padding right after the ArtMethod*.
+  sc->sc_regs[mips::SP] -= mips::MipsCalleeSaveFrameSize(Runtime::kSaveEverything);
+  uintptr_t* padding = reinterpret_cast<uintptr_t*>(sc->sc_regs[mips::SP]) + /* ArtMethod* */ 1;
+  *padding = reinterpret_cast<uintptr_t>(info->si_addr);
+
   sc->sc_regs[mips::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   sc->sc_regs[mips::T9] = sc->sc_pc;          // make sure T9 points to the function
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-  sc->sc_regs[mips::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 4bd1314..71b8ae2 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -279,6 +279,7 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * when the $sp has already been decremented by FRAME_SIZE_SAVE_EVERYTHING.
      * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
      *              28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
      * Clobbers $t0 and $t1.
@@ -286,10 +287,7 @@
      * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
      * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
-    addiu  $sp, $sp, -256
-    .cfi_adjust_cfa_offset 256
-
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
      // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 256)
 #error "FRAME_SIZE_SAVE_EVERYTHING(MIPS) size not as expected."
@@ -388,6 +386,22 @@
     .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
+     *              28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
+     * Clobbers $t0 and $t1.
+     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
+     * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    addiu  $sp, $sp, -(FRAME_SIZE_SAVE_EVERYTHING)
+    .cfi_adjust_cfa_offset (FRAME_SIZE_SAVE_EVERYTHING)
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+.endm
+
 .macro RESTORE_SAVE_EVERYTHING_FRAME
     addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
     .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
@@ -708,8 +722,10 @@
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
     .extern artThrowNullPointerExceptionFromSignal
-ENTRY art_quick_throw_null_pointer_exception_from_signal
-    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ENTRY_NO_GP_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, FRAME_SIZE_SAVE_EVERYTHING
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+    # Retrieve the fault address from the padding where the signal handler stores it.
+    lw   $a0, (ARG_SLOT_SIZE + __SIZEOF_POINTER__)($sp)
     la   $t9, artThrowNullPointerExceptionFromSignal
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uintptr_t, Thread*)
     move $a1, rSELF                 # pass Thread::Current
diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S
index 786e860..6c58fcf 100644
--- a/runtime/arch/mips64/asm_support_mips64.S
+++ b/runtime/arch/mips64/asm_support_mips64.S
@@ -45,8 +45,8 @@
 .L\name\()_gp_set:
 .endm
 
-     // Declare a function called name, doesn't set up $gp.
-.macro ENTRY_NO_GP name
+    // Declare a function called name, doesn't set up $gp.
+.macro ENTRY_NO_GP_CUSTOM_CFA name, cfa_offset
     .type \name, %function
     .global \name
     // Cache alignment for function entry.
@@ -54,7 +54,12 @@
 \name:
     .cfi_startproc
      // Ensure we get a sane starting CFA.
-    .cfi_def_cfa $sp,0
+    .cfi_def_cfa $sp, \cfa_offset
+.endm
+
+    // Declare a function called name, doesn't set up $gp.
+.macro ENTRY_NO_GP name
+    ENTRY_NO_GP_CUSTOM_CFA \name, 0
 .endm
 
 .macro END name
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index 0bbb6e1..e52dc73 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-
+#include "arch/mips64/quick_method_frame_info_mips64.h"
 #include "fault_handler.h"
 #include <sys/ucontext.h>
 #include "art_method-inl.h"
@@ -83,12 +83,15 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
+  // Decrement $sp by the frame size of the kSaveEverything method and store
+  // the fault address in the padding right after the ArtMethod*.
+  sc->sc_regs[mips64::SP] -= mips64::Mips64CalleeSaveFrameSize(Runtime::kSaveEverything);
+  uintptr_t* padding = reinterpret_cast<uintptr_t*>(sc->sc_regs[mips64::SP]) + /* ArtMethod* */ 1;
+  *padding = reinterpret_cast<uintptr_t>(info->si_addr);
+
   sc->sc_regs[mips64::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   sc->sc_regs[mips64::T9] = sc->sc_pc;          // make sure T9 points to the function
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-  sc->sc_regs[mips64::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 0bf2a35..d51d18a 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -316,14 +316,12 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * when the $sp has already been decremented by FRAME_SIZE_SAVE_EVERYTHING.
      * callee-save: $at + $v0-$v1 + $a0-$a7 + $t0-$t3 + $s0-$s7 + $t8-$t9 + $gp + $s8 + $ra + $s8,
      *              $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
      * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
-    daddiu $sp, $sp, -496
-    .cfi_adjust_cfa_offset 496
-
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
      // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 496)
 #error "FRAME_SIZE_SAVE_EVERYTHING(MIPS64) size not as expected."
@@ -436,6 +434,19 @@
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * callee-save: $at + $v0-$v1 + $a0-$a7 + $t0-$t3 + $s0-$s7 + $t8-$t9 + $gp + $s8 + $ra + $s8,
+     *              $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    daddiu $sp, $sp, -(FRAME_SIZE_SAVE_EVERYTHING)
+    .cfi_adjust_cfa_offset (FRAME_SIZE_SAVE_EVERYTHING)
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+.endm
+
 .macro RESTORE_SAVE_EVERYTHING_FRAME
     // Restore FP registers.
     l.d    $f31, 264($sp)
@@ -818,8 +829,10 @@
      * Call installed by a signal handler to create and deliver a NullPointerException
      */
     .extern artThrowNullPointerExceptionFromSignal
-ENTRY art_quick_throw_null_pointer_exception_from_signal
-    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ENTRY_NO_GP_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, FRAME_SIZE_SAVE_EVERYTHING
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+    # Retrieve the fault address from the padding where the signal handler stores it.
+    ld   $a0, (__SIZEOF_POINTER__)($sp)
     dla  $t9, artThrowNullPointerExceptionFromSignal
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uinptr_t, Thread*)
     move $a1, rSELF                 # pass Thread::Current
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 3e47209..14b01c5 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -114,7 +114,7 @@
     .balign 16
 END_MACRO
 
-MACRO1(DEFINE_FUNCTION, c_name)
+MACRO2(DEFINE_FUNCTION_CUSTOM_CFA, c_name, cfa_offset)
     FUNCTION_TYPE(SYMBOL(\c_name))
     ASM_HIDDEN CALLVAR(c_name)
     .globl CALLVAR(c_name)
@@ -122,7 +122,11 @@
 CALLVAR(c_name):
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
-    CFI_DEF_CFA(esp, 4)
+    CFI_DEF_CFA(esp, RAW_VAR(cfa_offset))
+END_MACRO
+
+MACRO1(DEFINE_FUNCTION, c_name)
+    DEFINE_FUNCTION_CUSTOM_CFA RAW_VAR(c_name), __SIZEOF_POINTER__
 END_MACRO
 
 MACRO1(END_FUNCTION, c_name)
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index c7af249..a4d6bb4 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -325,21 +325,15 @@
   // next instruction (this instruction + instruction size).  The return address
   // is on the stack at the top address of the current frame.
 
-  // Push the return address onto the stack.
+  // Push the return address and fault address onto the stack.
   uintptr_t retaddr = reinterpret_cast<uintptr_t>(pc + instr_size);
-  uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp - sizeof(uintptr_t));
-  *next_sp = retaddr;
+  uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp - 2 * sizeof(uintptr_t));
+  next_sp[1] = retaddr;
+  next_sp[0] = reinterpret_cast<uintptr_t>(sig->si_addr);
   uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
   uc->CTX_EIP = reinterpret_cast<uintptr_t>(
       art_quick_throw_null_pointer_exception_from_signal);
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-#if defined(__x86_64__)
-  uc->CTX_RDI = reinterpret_cast<uintptr_t>(sig->si_addr);
-#else
-  uc->CTX_EAX = reinterpret_cast<uintptr_t>(sig->si_addr);
-#endif
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 646a80c..0beb2a4 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -224,10 +224,11 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when EDI is already saved.
      */
-MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
-    // Save core registers.
-    PUSH edi
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg)
+    // Save core registers from highest to lowest to agree with core spills bitmap.
+    // EDI, or at least a placeholder for it, is already on the stack.
     PUSH esi
     PUSH ebp
     PUSH ebx
@@ -264,6 +265,15 @@
 #endif
 END_MACRO
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
+    PUSH edi
+    SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg)
+END_MACRO
+
 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
     // Restore FPRs. Method and padding is still on the stack.
     movsd 16(%esp), %xmm0
@@ -320,7 +330,6 @@
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
-    mov %esp, %ecx
     // Outgoing argument set up
     subl MACRO_LITERAL(8), %esp                // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -354,7 +363,23 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
+    // Fault address and return address were saved by the fault handler.
+    // Save all registers as basis for long jump context; EDI will replace fault address later.
+    SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED ebx, ebx
+    // Retrieve fault address and save EDI.
+    movl (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%esp), %eax
+    movl %edi, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%esp)
+    CFI_REL_OFFSET(%edi, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
+    // Outgoing argument set up
+    subl MACRO_LITERAL(8), %esp                           // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    pushl %fs:THREAD_SELF_OFFSET                          // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH eax                                              // pass arg1
+    call SYMBOL(artThrowNullPointerExceptionFromSignal)   // (addr, self)
+    UNREACHABLE
+END_FUNCTION art_quick_throw_null_pointer_exception
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index 0728f99..af4a6c4 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -110,7 +110,7 @@
 
 // TODO: we might need to use SYMBOL() here to add the underscore prefix
 // for mac builds.
-MACRO1(DEFINE_FUNCTION, c_name)
+MACRO2(DEFINE_FUNCTION_CUSTOM_CFA, c_name, cfa_offset)
     FUNCTION_TYPE(SYMBOL(\c_name))
     ASM_HIDDEN CALLVAR(c_name)
     .globl CALLVAR(c_name)
@@ -118,7 +118,11 @@
 CALLVAR(c_name):
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
-    CFI_DEF_CFA(rsp, 8)
+    CFI_DEF_CFA(rsp, RAW_VAR(cfa_offset))
+END_MACRO
+
+MACRO1(DEFINE_FUNCTION, c_name)
+    DEFINE_FUNCTION_CUSTOM_CFA RAW_VAR(c_name), __SIZEOF_POINTER__
 END_MACRO
 
 MACRO1(END_FUNCTION, c_name)
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 5ea58af..089ed75 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -263,14 +263,15 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when R15 is already saved.
      */
-MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED)
 #if defined(__APPLE__)
     int3
     int3
 #else
     // Save core registers from highest to lowest to agree with core spills bitmap.
-    PUSH r15
+    // R15, or at least a placeholder for it, is already on the stack.
     PUSH r14
     PUSH r13
     PUSH r12
@@ -322,6 +323,15 @@
 #endif  // __APPLE__
 END_MACRO
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
+    PUSH r15
+    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
+END_MACRO
+
 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
     // Restore FPRs. Method and padding is still on the stack.
     movq 16(%rsp), %xmm0
@@ -413,7 +423,19 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
+    // Fault address and return address were saved by the fault handler.
+    // Save all registers as basis for long jump context; R15 will replace fault address later.
+    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
+    // Retrieve fault address and save R15.
+    movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
+    movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
+    CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
+    // Outgoing argument set up; RDI already contains the fault address.
+    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
+    call SYMBOL(artThrowNullPointerExceptionFromSignal)  // (addr, self)
+    UNREACHABLE
+END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.