Revert "Revert "Fast ART x86 interpreter""

This reverts commit 2d093a1213cc2f85b5e5e02782332657c479eb94.

Disable x86 mterp compilation on Mac host builds (but keep enabled
for all target builds).

Change-Id: Ie355279f166d2964a786646ee53f065b7e0f5ede
diff --git a/runtime/interpreter/mterp/x86/header.S b/runtime/interpreter/mterp/x86/header.S
new file mode 100644
index 0000000..2481785
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/header.S
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+x86 ABI general notes:
+
+Caller save set:
+   eax, edx, ecx, st(0)-st(7)
+Callee save set:
+   ebx, esi, edi, ebp
+Return regs:
+   32-bit in eax
+   64-bit in edx:eax (low-order 32 in eax)
+   fp on top of fp stack st(0)
+
+Parameters passed on stack, pushed right-to-left.  On entry to target, first
+parm is at 4(%esp).  Traditional entry code is:
+
+functEntry:
+    push    %ebp             # save old frame pointer
+    mov     %ebp,%esp        # establish new frame pointer
+    sub     FrameSize,%esp   # Allocate storage for spill, locals & outs
+
+Once past the prologue, arguments are referenced at ((argno + 2)*4)(%ebp)
+
+Stack must be 16-byte aligned to support SSE in native code.
+
+If we're not doing variable stack allocation (alloca), the frame pointer can be
+eliminated and all arg references adjusted to be esp relative.
+*/
+
+/*
+Mterp and x86 notes:
+
+Some key interpreter variables will be assigned to registers.
+
+  nick     reg   purpose
+  rPC      esi   interpreted program counter, used for fetching instructions
+  rFP      edi   interpreted frame pointer, used for accessing locals and args
+  rINSTw   bx    first 16-bit code of current instruction
+  rINSTbl  bl    opcode portion of instruction word
+  rINSTbh  bh    high byte of inst word, usually contains src/tgt reg names
+  rIBASE   edx   base of instruction handler table
+  rREFS    ebp   base of object references in shadow frame.
+
+Notes:
+   o High order 16 bits of ebx must be zero on entry to handler
+   o rPC, rFP, rINSTw/rINSTbl valid on handler entry and exit
+   o eax and ecx are scratch, rINSTw/ebx sometimes scratch
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/* Frame size must be 16-byte aligned.
+ * Remember about 4 bytes for return address
+ */
+#define FRAME_SIZE     44
+
+/* Frame diagram while executing ExecuteMterpImpl, high to low addresses */
+#define IN_ARG3        (FRAME_SIZE + 16)
+#define IN_ARG2        (FRAME_SIZE + 12)
+#define IN_ARG1        (FRAME_SIZE +  8)
+#define IN_ARG0        (FRAME_SIZE +  4)
+#define CALLER_RP      (FRAME_SIZE +  0)
+/* Spill offsets relative to %esp */
+#define EBP_SPILL      (FRAME_SIZE -  4)
+#define EDI_SPILL      (FRAME_SIZE -  8)
+#define ESI_SPILL      (FRAME_SIZE - 12)
+#define EBX_SPILL      (FRAME_SIZE - 16)
+#define LOCAL0         (FRAME_SIZE - 20)
+#define LOCAL1         (FRAME_SIZE - 24)
+#define LOCAL2         (FRAME_SIZE - 28)
+/* Out Arg offsets, relative to %esp */
+#define OUT_ARG3       ( 12)
+#define OUT_ARG2       (  8)
+#define OUT_ARG1       (  4)
+#define OUT_ARG0       (  0)  /* <- ExecuteMterpImpl esp + 0 */
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rSELF    IN_ARG0(%esp)
+#define rPC      %esi
+#define rFP      %edi
+#define rINST    %ebx
+#define rINSTw   %bx
+#define rINSTbh  %bh
+#define rINSTbl  %bl
+#define rIBASE   %edx
+#define rREFS    %ebp
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+.endm
+
+/*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro REFRESH_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
+ * If rSELF is already loaded then we can use it from known reg.
+ */
+.macro REFRESH_IBASE_FROM_SELF _reg
+    movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
+.endm
+
+/*
+ * Refresh rINST.
+ * At enter to handler rINST does not contain the opcode number.
+ * However some utilities require the full value, so this macro
+ * restores the opcode number.
+ */
+.macro REFRESH_INST _opnum
+    movb    rINSTbl, rINSTbh
+    movb    $$\_opnum, rINSTbl
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINSTw.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    movzwl  (rPC), rINST
+.endm
+
+/*
+ * Remove opcode from rINST, compute the address of handler and jump to it.
+ */
+.macro GOTO_NEXT
+    movzx   rINSTbl,%eax
+    movzbl  rINSTbh,rINST
+    shll    $$${handler_size_bits}, %eax
+    addl    rIBASE, %eax
+    jmp     *%eax
+.endm
+
+/*
+ * Advance rPC by instruction count.
+ */
+.macro ADVANCE_PC _count
+    leal    2*\_count(rPC), rPC
+.endm
+
+/*
+ * Advance rPC by instruction count, fetch instruction and jump to handler.
+ */
+.macro ADVANCE_PC_FETCH_AND_GOTO_NEXT _count
+    ADVANCE_PC \_count
+    FETCH_INST
+    GOTO_NEXT
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+#define VREG_ADDRESS(_vreg) (rFP,_vreg,4)
+#define VREG_HIGH_ADDRESS(_vreg) 4(rFP,_vreg,4)
+#define VREG_REF_ADDRESS(_vreg) (rREFS,_vreg,4)
+#define VREG_REF_HIGH_ADDRESS(_vreg) 4(rREFS,_vreg,4)
+
+.macro GET_VREG _reg _vreg
+    movl    (rFP,\_vreg,4), \_reg
+.endm
+
+/* Read wide value to xmm. */
+.macro GET_WIDE_FP_VREG _reg _vreg
+    movq    (rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    $$0, (rREFS,\_vreg,4)
+.endm
+
+/* Write wide value from xmm. xmm is clobbered. */
+.macro SET_WIDE_FP_VREG _reg _vreg
+    movq    \_reg, (rFP,\_vreg,4)
+    pxor    \_reg, \_reg
+    movq    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro SET_VREG_OBJECT _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro GET_VREG_HIGH _reg _vreg
+    movl    4(rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG_HIGH _reg _vreg
+    movl    \_reg, 4(rFP,\_vreg,4)
+    movl    $$0, 4(rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_REF _vreg
+    movl    $$0,  (rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_WIDE_REF _vreg
+    movl    $$0,  (rREFS,\_vreg,4)
+    movl    $$0, 4(rREFS,\_vreg,4)
+.endm