Add implicit checks for x86_64 architecture.

This combines the x86 and x86_64 fault handlers into one.  It also
merges in the change to the entrypoints for X86_64.

Replaces generic instruction length calculator with one that only
works with the specific instructions we use.

Bug: 16256184

Change-Id: I1e8ab5ad43f46060de9597615b423c89a836035c
Signed-off-by: Chao-ying Fu <chao-ying.fu@intel.com>
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 435f280..8b6c9b1 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -31,14 +31,21 @@
 #define CTX_ESP uc_mcontext->__ss.__esp
 #define CTX_EIP uc_mcontext->__ss.__eip
 #define CTX_EAX uc_mcontext->__ss.__eax
+#define CTX_METHOD uc_mcontext->__ss.__eax
+#elif defined(__x86_64__)
+#define CTX_ESP uc_mcontext.gregs[REG_RSP]
+#define CTX_EIP uc_mcontext.gregs[REG_RIP]
+#define CTX_EAX uc_mcontext.gregs[REG_RAX]
+#define CTX_METHOD uc_mcontext.gregs[REG_RDI]
 #else
 #define CTX_ESP uc_mcontext.gregs[REG_ESP]
 #define CTX_EIP uc_mcontext.gregs[REG_EIP]
 #define CTX_EAX uc_mcontext.gregs[REG_EAX]
+#define CTX_METHOD uc_mcontext.gregs[REG_EAX]
 #endif
 
 //
-// X86 specific fault handler functions.
+// X86 (and X86_64) specific fault handler functions.
 //
 
 namespace art {
@@ -47,129 +54,146 @@
 extern "C" void art_quick_throw_stack_overflow_from_signal();
 extern "C" void art_quick_test_suspend();
 
-// From the x86 disassembler...
-enum SegmentPrefix {
-  kCs = 0x2e,
-  kSs = 0x36,
-  kDs = 0x3e,
-  kEs = 0x26,
-  kFs = 0x64,
-  kGs = 0x65,
-};
-
 // Get the size of an instruction in bytes.
-static uint32_t GetInstructionSize(uint8_t* pc) {
-  uint8_t* instruction_start = pc;
-  bool have_prefixes = true;
-  bool two_byte = false;
-
-  // Skip all the prefixes.
-  do {
-    switch (*pc) {
-        // Group 1 - lock and repeat prefixes:
-      case 0xF0:
-      case 0xF2:
-      case 0xF3:
-        // Group 2 - segment override prefixes:
-      case kCs:
-      case kSs:
-      case kDs:
-      case kEs:
-      case kFs:
-      case kGs:
-        // Group 3 - operand size override:
-      case 0x66:
-        // Group 4 - address size override:
-      case 0x67:
-        break;
-      default:
-        have_prefixes = false;
-        break;
-    }
-    if (have_prefixes) {
-      pc++;
-    }
-  } while (have_prefixes);
-
-#if defined(__x86_64__)
-  // Skip REX is present.
-  if (*pc >= 0x40 && *pc <= 0x4F) {
-    ++pc;
-  }
+// Return 0 if the instruction is not handled.
+static uint32_t GetInstructionSize(const uint8_t* pc) {
+#if defined(__x86_64)
+  const bool x86_64 = true;
+#else
+  const bool x86_64 = false;
 #endif
 
-  // Check for known instructions.
-  uint32_t known_length = 0;
-  switch (*pc) {
-  case 0x83:                // cmp [r + v], b: 4 byte instruction
-    known_length = 4;
-    break;
-  }
+  const uint8_t* startpc = pc;
 
-  if (known_length > 0) {
-    VLOG(signals) << "known instruction with length " << known_length;
-    return known_length;
-  }
-
-  // Unknown instruction, work out length.
-
-  // Work out if we have a ModR/M byte.
   uint8_t opcode = *pc++;
-  if (opcode == 0xf) {
+  uint8_t modrm;
+  bool has_modrm = false;
+  bool two_byte = false;
+  uint32_t displacement_size = 0;
+  uint32_t immediate_size = 0;
+
+  // Prefixes.
+  while (true) {
+    bool prefix_present = false;
+    switch (opcode) {
+      // Group 1
+      case 0xf0:
+      case 0xf2:
+      case 0xf3:
+
+      // Group 2
+      case 0x2e:
+      case 0x36:
+      case 0x3e:
+      case 0x26:
+      case 0x64:
+      case 0x65:
+
+      // Group 3
+      case 0x66:
+
+      // Group 4
+      case 0x67:
+        opcode = *pc++;
+        prefix_present = true;
+        break;
+    }
+    if (!prefix_present) {
+      break;
+    }
+  }
+
+  if (x86_64 && opcode >= 0x40 && opcode <= 0x4f) {
+    opcode = *pc++;
+  }
+
+  if (opcode == 0x0f) {
+    // Two byte opcode
     two_byte = true;
     opcode = *pc++;
   }
 
-  bool has_modrm = false;         // Is ModR/M byte present?
-  uint8_t hi = opcode >> 4;       // Opcode high nybble.
-  uint8_t lo = opcode & 0b1111;   // Opcode low nybble.
+  bool unhandled_instruction = false;
 
-  // From the Intel opcode tables.
   if (two_byte) {
-    has_modrm = true;   // TODO: all of these?
-  } else if (hi < 4) {
-    has_modrm = lo < 4 || (lo >= 8 && lo <= 0xb);
-  } else if (hi == 6) {
-    has_modrm = lo == 3 || lo == 9 || lo == 0xb;
-  } else if (hi == 8) {
-    has_modrm = lo != 0xd;
-  } else if (hi == 0xc) {
-    has_modrm = lo == 1 || lo == 2 || lo == 6 || lo == 7;
-  } else if (hi == 0xd) {
-    has_modrm = lo < 4;
-  } else if (hi == 0xf) {
-    has_modrm = lo == 6 || lo == 7;
-  }
+    switch (opcode) {
+      case 0x10:            // vmovsd/ss
+      case 0x11:            // vmovsd/ss
+      case 0xb6:        // movzx
+      case 0xb7:
+      case 0xbe:        // movsx
+      case 0xbf:
+        modrm = *pc++;
+        has_modrm = true;
+        break;
+      default:
+        unhandled_instruction = true;
+        break;
+    }
+  } else {
+    switch (opcode) {
+      case 0x89:            // mov
+      case 0x8b:
+      case 0x38:        // cmp with memory.
+      case 0x39:
+      case 0x3a:
+      case 0x3b:
+      case 0x3c:
+      case 0x3d:
+      case 0x85:        // test.
+        modrm = *pc++;
+        has_modrm = true;
+        break;
 
-  if (has_modrm) {
-    uint8_t modrm = *pc++;
-    uint8_t mod = (modrm >> 6) & 0b11;
-    uint8_t reg = (modrm >> 3) & 0b111;
-    switch (mod) {
-      case 0:
+      case 0x80:        // group 1, byte immediate.
+      case 0x83:
+        modrm = *pc++;
+        has_modrm = true;
+        immediate_size = 1;
         break;
-      case 1:
-        if (reg == 4) {
-          // SIB + 1 byte displacement.
-          pc += 2;
-        } else {
-          pc += 1;
-        }
+
+      case 0x81:        // group 1, word immediate.
+        modrm = *pc++;
+        has_modrm = true;
+        immediate_size = 4;
         break;
-      case 2:
-        // SIB + 4 byte displacement.
-        pc += 5;
-        break;
-      case 3:
+
+      default:
+        unhandled_instruction = true;
         break;
     }
   }
 
-  VLOG(signals) << "calculated X86 instruction size is " << (pc - instruction_start);
-  return pc - instruction_start;
+  if (unhandled_instruction) {
+    VLOG(signals) << "Unhandled x86 instruction with opcode " << static_cast<int>(opcode);
+    return 0;
+  }
+
+  if (has_modrm) {
+    uint8_t mod = (modrm >> 6) & 0b11;
+
+    // Check for SIB.
+    if (mod != 0b11 && (modrm & 0b111) == 4) {
+      ++pc;     // SIB
+    }
+
+    switch (mod) {
+      case 0b00: break;
+      case 0b01: displacement_size = 1; break;
+      case 0b10: displacement_size = 4; break;
+      case 0b11:
+        break;
+    }
+  }
+
+  // Skip displacement and immediate.
+  pc += displacement_size + immediate_size;
+
+  VLOG(signals) << "x86 instruction length calculated as " << (pc - startpc);
+  return pc - startpc;
 }
 
-void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
                                              mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
@@ -180,21 +204,30 @@
   }
 
   // In the case of a stack overflow, the stack is not valid and we can't
-  // get the method from the top of the stack.  However it's in EAX.
+  // get the method from the top of the stack.  However it's in EAX(x86)/RDI(x86_64).
   uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(siginfo->si_addr);
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+#if defined(__x86_64__)
+      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kX86_64));
+#else
       reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kX86));
+#endif
   if (overflow_addr == fault_addr) {
-    *out_method = reinterpret_cast<mirror::ArtMethod*>(uc->CTX_EAX);
+    *out_method = reinterpret_cast<mirror::ArtMethod*>(uc->CTX_METHOD);
   } else {
     // The method is at the top of the stack.
-    *out_method = reinterpret_cast<mirror::ArtMethod*>(reinterpret_cast<uintptr_t*>(*out_sp)[0]);
+    *out_method = (reinterpret_cast<StackReference<mirror::ArtMethod>* >(*out_sp)[0]).AsMirrorPtr();
   }
 
   uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
   VLOG(signals) << HexDump(pc, 32, true, "PC ");
 
   uint32_t instr_size = GetInstructionSize(pc);
+  if (instr_size == 0) {
+    // Unknown instruction, tell caller it's not ours.
+    *out_method = nullptr;
+    return;
+  }
   *out_return_pc = reinterpret_cast<uintptr_t>(pc + instr_size);
 }
 
@@ -204,16 +237,21 @@
   uint8_t* sp = reinterpret_cast<uint8_t*>(uc->CTX_ESP);
 
   uint32_t instr_size = GetInstructionSize(pc);
+  if (instr_size == 0) {
+    // Unknown instruction, can't really happen.
+    return false;
+  }
+
   // We need to arrange for the signal handler to return to the null pointer
   // exception generator.  The return address must be the address of the
   // next instruction (this instruction + instruction size).  The return address
   // is on the stack at the top address of the current frame.
 
   // Push the return address onto the stack.
-  uint32_t retaddr = reinterpret_cast<uint32_t>(pc + instr_size);
-  uint32_t* next_sp = reinterpret_cast<uint32_t*>(sp - 4);
+  uintptr_t retaddr = reinterpret_cast<uintptr_t>(pc + instr_size);
+  uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp - sizeof(uintptr_t));
   *next_sp = retaddr;
-  uc->CTX_ESP = reinterpret_cast<uint32_t>(next_sp);
+  uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
   uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
   VLOG(signals) << "Generating null pointer exception";
@@ -221,9 +259,14 @@
 }
 
 // A suspend check is done using the following instruction sequence:
+// (x86)
 // 0xf720f1df:         648B058C000000      mov     eax, fs:[0x8c]  ; suspend_trigger
 // .. some intervening instructions.
 // 0xf720f1e6:                   8500      test    eax, [eax]
+// (x86_64)
+// 0x7f579de45d9e: 65488B0425A8000000      movq    rax, gs:[0xa8]  ; suspend_trigger
+// .. some intervening instructions.
+// 0x7f579de45da7:               8500      test    eax, [eax]
 
 // The offset from fs is Thread::ThreadSuspendTriggerOffset().
 // To check for a suspend check, we examine the instructions that caused
@@ -231,11 +274,20 @@
 bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
   // These are the instructions to check for.  The first one is the mov eax, fs:[xxx]
   // where xxx is the offset of the suspend trigger.
+#if defined(__x86_64__)
+  uint32_t trigger = Thread::ThreadSuspendTriggerOffset<8>().Int32Value();
+#else
   uint32_t trigger = Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
+#endif
 
   VLOG(signals) << "Checking for suspension point";
+#if defined(__x86_64__)
+  uint8_t checkinst1[] = {0x65, 0x48, 0x8b, 0x04, 0x25, static_cast<uint8_t>(trigger & 0xff),
+      static_cast<uint8_t>((trigger >> 8) & 0xff), 0, 0};
+#else
   uint8_t checkinst1[] = {0x64, 0x8b, 0x05, static_cast<uint8_t>(trigger & 0xff),
       static_cast<uint8_t>((trigger >> 8) & 0xff), 0, 0};
+#endif
   uint8_t checkinst2[] = {0x85, 0x00};
 
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
@@ -270,10 +322,10 @@
     // is on the stack at the top address of the current frame.
 
     // Push the return address onto the stack.
-    uint32_t retaddr = reinterpret_cast<uint32_t>(pc + 2);
-    uint32_t* next_sp = reinterpret_cast<uint32_t*>(sp - 4);
+    uintptr_t retaddr = reinterpret_cast<uintptr_t>(pc + 2);
+    uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp - sizeof(uintptr_t));
     *next_sp = retaddr;
-    uc->CTX_ESP = reinterpret_cast<uint32_t>(next_sp);
+    uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
     uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_test_suspend);
 
@@ -302,7 +354,11 @@
   VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
     ", fault_addr: " << fault_addr;
 
+#if defined(__x86_64__)
+  uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kX86_64);
+#else
   uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kX86);
+#endif
 
   Thread* self = Thread::Current();
   uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -