Merge change 4657

* changes:
  Method override tests.
diff --git a/vm/Android.mk b/vm/Android.mk
index 5ef3b0c..7638613 100644
--- a/vm/Android.mk
+++ b/vm/Android.mk
@@ -248,17 +248,19 @@
   LOCAL_SHARED_LIBRARIES += libdl
 endif
 
+MTERP_ARCH_KNOWN := false
+
 ifeq ($(TARGET_ARCH),arm)
   #TARGET_ARCH_VARIANT := armv5te-vfp
+  MTERP_ARCH_KNOWN := true
+  # Select architecture-specific sources (armv4t, armv5te etc.)
   LOCAL_SRC_FILES += \
 		arch/arm/CallOldABI.S \
 		arch/arm/CallEABI.S \
-		arch/arm/HintsEABI.c
-  # Select architecture specific sources (armv4t,armv5te etc)
-  LOCAL_SRC_FILES += \
+		arch/arm/HintsEABI.c \
 		mterp/out/InterpC-$(TARGET_ARCH_VARIANT).c.arm \
 		mterp/out/InterpAsm-$(TARGET_ARCH_VARIANT).S
-  LOCAL_SHARED_LIBRARIES += libdl
+
   ifeq ($(WITH_JIT),true)
     LOCAL_SRC_FILES += \
 		compiler/codegen/armv5te/Codegen.c \
@@ -269,26 +271,27 @@
 		compiler/codegen/armv5te/GlobalOptimizations.c \
 		compiler/template/out/CompilerTemplateAsm-armv5te.S
   endif
-else
-  ifeq ($(TARGET_ARCH),x86)
-    LOCAL_SRC_FILES += \
+endif
+
+ifeq ($(TARGET_ARCH),x86)
+  MTERP_ARCH_KNOWN := true
+  LOCAL_SRC_FILES += \
 		arch/x86/Call386ABI.S \
-		arch/x86/Hints386ABI.c
-    LOCAL_SRC_FILES += \
+		arch/x86/Hints386ABI.c \
 		mterp/out/InterpC-x86.c \
 		mterp/out/InterpAsm-x86.S
-  else
-	# unknown architecture, try to use FFI
-    LOCAL_C_INCLUDES += external/libffi/$(TARGET_OS)-$(TARGET_ARCH)
-    LOCAL_SRC_FILES += \
+endif
+
+ifeq ($(MTERP_ARCH_KNOWN),false)
+  # unknown architecture, try to use FFI
+  LOCAL_C_INCLUDES += external/libffi/$(TARGET_OS)-$(TARGET_ARCH)
+  LOCAL_SHARED_LIBRARIES += libffi
+
+  LOCAL_SRC_FILES += \
 		arch/generic/Call.c \
-		arch/generic/Hints.c
-    LOCAL_SHARED_LIBRARIES += libffi
-	
-    LOCAL_SRC_FILES += \
+		arch/generic/Hints.c \
 		mterp/out/InterpC-allstubs.c \
 		mterp/out/InterpAsm-allstubs.S
-  endif
 endif
 
 
diff --git a/vm/Globals.h b/vm/Globals.h
index 067e913..1d3460e 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h
@@ -713,6 +713,9 @@
 
     /* Flag to dump all compiled code */
     bool printMe;
+
+    /* Flag to count trace execution */
+    bool profile;
 };
 
 extern struct DvmJitGlobals gDvmJit;
diff --git a/vm/Init.c b/vm/Init.c
index 7486ad9..d624c23 100644
--- a/vm/Init.c
+++ b/vm/Init.c
@@ -119,6 +119,7 @@
     dvmFprintf(stderr, "  -Xjitmethod:signture[,signature]* "
                        "(eg Ljava/lang/String\\;replace)\n");
     dvmFprintf(stderr, "  -Xjitverbose\n");
+    dvmFprintf(stderr, "  -Xjitprofile\n");
 #endif
     dvmFprintf(stderr, "\n");
     dvmFprintf(stderr, "Configured with:"
@@ -904,6 +905,8 @@
           gDvmJit.includeSelectedMethod = true;
         } else if (strncmp(argv[i], "-Xjitverbose", 12) == 0) {
           gDvmJit.printMe = true;
+        } else if (strncmp(argv[i], "-Xjitprofile", 12) == 0) {
+          gDvmJit.profile = true;
 #endif
 
         } else if (strncmp(argv[i], "-Xdeadlockpredict:", 18) == 0) {
diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h
index 1eb7c40..8a2028a 100644
--- a/vm/compiler/CompilerIR.h
+++ b/vm/compiler/CompilerIR.h
@@ -75,6 +75,7 @@
     LIR *firstLIRInsn;
     LIR *lastLIRInsn;
     LIR *wordList;
+    LIR *chainCellOffsetLIR;
     GrowableList pcReconstructionList;
     int headerSize;                     // bytes before the first code ptr
     int dataOffset;                     // starting offset of literal pool
@@ -84,6 +85,7 @@
     bool printMe;
     bool allSingleStep;
     bool halveInstCount;
+    bool executionCount;                // Add code to count trace executions
     int numChainingCells[CHAINING_CELL_LAST];
     LIR *firstChainingLIR[CHAINING_CELL_LAST];
     RegisterScoreboard registerScoreboard;      // Track register dependency
diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c
index 5e75d15..c8f3abc 100644
--- a/vm/compiler/Frontend.c
+++ b/vm/compiler/Frontend.c
@@ -206,6 +206,9 @@
     /* Initialize the printMe flag */
     cUnit.printMe = gDvmJit.printMe;
 
+    /* Initialize the profile flag */
+    cUnit.executionCount = gDvmJit.profile;
+
     /* Identify traces that we don't want to compile */
     if (gDvmJit.methodTable) {
         int len = strlen(desc->method->clazz->descriptor) +
diff --git a/vm/compiler/codegen/armv5te/Armv5teLIR.h b/vm/compiler/codegen/armv5te/Armv5teLIR.h
index dcf501b..f0a3f42 100644
--- a/vm/compiler/codegen/armv5te/Armv5teLIR.h
+++ b/vm/compiler/codegen/armv5te/Armv5teLIR.h
@@ -36,8 +36,19 @@
     rFP = 5,
     rGLUE = 6,
     r7 = 7,
+    r8 = 8,
+    r9 = 9,
+    r10 = 10,
+    r11 = 11,
+    r12 = 12,
+    r13 = 13,
+    rlr = 14,
+    rpc = 15
 } NativeRegisterPool;
 
+/* Mask to convert high reg to low for Thumb */
+#define THUMB_REG_MASK 0x7
+
 /* Thumb condition encodings */
 typedef enum Armv5teConditionCode {
     ARM_COND_EQ = 0x0,    /* 0000 */
diff --git a/vm/compiler/codegen/armv5te/Assemble.c b/vm/compiler/codegen/armv5te/Assemble.c
index a59d27f..9b4595d 100644
--- a/vm/compiler/codegen/armv5te/Assemble.c
+++ b/vm/compiler/codegen/armv5te/Assemble.c
@@ -436,7 +436,6 @@
  * before sending them off to the assembler. If out-of-range branch distance is
  * seen rearrange the instructions a bit to correct it.
  */
-#define CHAIN_CELL_OFFSET_SIZE 2
 void dvmCompilerAssembleLIR(CompilationUnit *cUnit)
 {
     LIR *lir;
@@ -469,7 +468,8 @@
 
     /* Add space for chain cell counts & trace description */
     u4 chainCellOffset = offset;
-    Armv5teLIR *chainCellOffsetLIR = (Armv5teLIR *) (cUnit->firstLIRInsn);
+    Armv5teLIR *chainCellOffsetLIR = cUnit->chainCellOffsetLIR;
+    assert(chainCellOffsetLIR);
     assert(chainCellOffset < 0x10000);
     assert(chainCellOffsetLIR->opCode == ARMV5TE_16BIT_DATA &&
            chainCellOffsetLIR->operands[0] == CHAIN_CELL_OFFSET_TAG);
@@ -517,8 +517,8 @@
         return;
     }
 
+
     cUnit->baseAddr = (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
-    cUnit->headerSize = CHAIN_CELL_OFFSET_SIZE;
     gDvmJit.codeCacheByteUsed += offset;
 
     /* Install the code block */
diff --git a/vm/compiler/codegen/armv5te/Codegen.c b/vm/compiler/codegen/armv5te/Codegen.c
index 86faa54..27bdec6 100644
--- a/vm/compiler/codegen/armv5te/Codegen.c
+++ b/vm/compiler/codegen/armv5te/Codegen.c
@@ -2842,11 +2842,36 @@
 
     BasicBlock **blockList = cUnit->blockList;
 
-    /*
-     * Reserve space at the beginning of each translation with fillers
-     * + Chain cell count (2 bytes)
-     */
-    newLIR1(cUnit, ARMV5TE_16BIT_DATA, CHAIN_CELL_OFFSET_TAG);
+    if (cUnit->executionCount) {
+        /*
+         * Reserve 6 bytes at the beginning of the trace
+         *        +----------------------------+
+         *        | execution count (4 bytes)  |
+         *        +----------------------------+
+         *        | chain cell offset (2 bytes)|
+         *        +----------------------------+
+         * ...and then code to increment the execution
+         * count:
+         *       mov   r0, pc       @ move adr of "mov r0,pc" + 4 to r0
+         *       sub   r0, #10      @ back up to addr of executionCount
+         *       ldr   r1, [r0]
+         *       add   r1, #1
+         *       str   r1, [r0]
+         */
+        newLIR1(cUnit, ARMV5TE_16BIT_DATA, 0);
+        newLIR1(cUnit, ARMV5TE_16BIT_DATA, 0);
+        cUnit->chainCellOffsetLIR = newLIR1(cUnit, ARMV5TE_16BIT_DATA, CHAIN_CELL_OFFSET_TAG);
+        cUnit->headerSize = 6;
+        newLIR2(cUnit, ARMV5TE_MOV_RR_HL, r0, rpc & THUMB_REG_MASK);
+        newLIR2(cUnit, ARMV5TE_SUB_RI8, r0, 10);
+        newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, r0, 0);
+        newLIR2(cUnit, ARMV5TE_ADD_RI8, r1, 1);
+        newLIR3(cUnit, ARMV5TE_STR_RRI5, r1, r0, 0);
+    } else {
+         /* Just reserve 2 bytes for the chain cell offset */
+        cUnit->chainCellOffsetLIR = newLIR1(cUnit, ARMV5TE_16BIT_DATA, CHAIN_CELL_OFFSET_TAG);
+        cUnit->headerSize = 2;
+    }
 
     /* Handle the content in each basic block */
     for (i = 0; i < cUnit->numBlocks; i++) {
diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c
index 031d46d..c64d4d5 100644
--- a/vm/interp/Jit.c
+++ b/vm/interp/Jit.c
@@ -29,6 +29,8 @@
 #include <sys/time.h>
 #include <signal.h>
 #include "compiler/Compiler.h"
+#include "compiler/CompilerUtility.h"
+#include "compiler/CompilerIR.h"
 #include <errno.h>
 
 /*
@@ -148,6 +150,40 @@
 }
 #endif
 
+/* Dumps profile info for a single trace */
+void dvmCompilerDumpTraceProfile(struct JitEntry *p)
+{
+    ChainCellCounts* pCellCounts;
+    char* traceBase;
+    u4* pExecutionCount;
+    u2* pCellOffset;
+    JitTraceDescription *desc;
+    const Method* method;
+
+    /*
+     * The codeAddress field has the low bit set to mark thumb
+     * mode.  We need to strip that off before reconstructing the
+     * trace data.  See the diagram in Assemble.c for more info
+     * on the trace layout in memory.
+     */
+    traceBase = (char*)p->codeAddress - 7;
+
+    if (p->codeAddress == NULL) {
+        LOGD("TRACEPROFILE 0x%08x 0 NULL 0 0", (int)traceBase);
+        return;
+    }
+
+    pExecutionCount = (u4*) (traceBase);
+    pCellOffset = (u2*) (traceBase + 4);
+    pCellCounts = (ChainCellCounts*) (traceBase + *pCellOffset);
+    desc = (JitTraceDescription*) ((char*)pCellCounts + sizeof(*pCellCounts));
+    method = desc->method;
+    LOGD("TRACEPROFILE 0x%08x % 10d %s%s [0x%x,%d]", (int)traceBase,
+          *pExecutionCount, method->clazz->descriptor, method->name,
+          desc->trace[0].frag.startOffset,
+          desc->trace[0].frag.numInsts);
+}
+
 /* Dumps debugging & tuning stats to the log */
 void dvmJitStats()
 {
@@ -181,6 +217,13 @@
         LOGD("JIT: Invoke: %d noOpt, %d chainable, %d return",
           gDvmJit.invokeNoOpt, gDvmJit.invokeChain, gDvmJit.returnOp);
 #endif
+       if (gDvmJit.profile) {
+           for (i=0; i < (int) gDvmJit.jitTableSize; i++) {
+              if (gDvmJit.pJitEntryTable[i].dPC != 0) {
+                  dvmCompilerDumpTraceProfile( &gDvmJit.pJitEntryTable[i] );
+              }
+           }
+        }
     }
 }
 
@@ -463,7 +506,7 @@
  * requested
  */
 
-#define PROFILE_STALENESS_THRESHOLD 250000LL
+#define PROFILE_STALENESS_THRESHOLD 100000LL
 bool dvmJitCheckTraceRequest(Thread* self, InterpState* interpState)
 {
     bool res = false;    /* Assume success */
diff --git a/vm/interp/Jit.h b/vm/interp/Jit.h
index 5d748d5..560a4db 100644
--- a/vm/interp/Jit.h
+++ b/vm/interp/Jit.h
@@ -21,7 +21,7 @@
 
 #include "InterpDefs.h"
 
-#define JIT_PROF_SIZE 512
+#define JIT_PROF_SIZE 4096
 
 #define JIT_MAX_TRACE_LEN 100
 
diff --git a/vm/mterp/armv5te/footer.S b/vm/mterp/armv5te/footer.S
index 004ee13..129760d 100644
--- a/vm/mterp/armv5te/footer.S
+++ b/vm/mterp/armv5te/footer.S
@@ -154,11 +154,11 @@
 
 common_updateProfile:
     eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
-    lsl     r3,r3,#23          @ shift out excess 511
-    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    lsl     r3,r3,#20          @ shift out excess 4095
+    ldrb    r1,[r0,r3,lsr #20] @ get counter
     GET_INST_OPCODE(ip)
     subs    r1,r1,#1           @ decrement counter
-    strb    r1,[r0,r3,lsr #23] @ and store it
+    strb    r1,[r0,r3,lsr #20] @ and store it
     GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
 
 /*
@@ -168,7 +168,7 @@
  * jump to it now).
  */
     mov     r1,#255
-    strb    r1,[r0,r3,lsr #23] @ reset counter
+    strb    r1,[r0,r3,lsr #20] @ reset counter
     EXPORT_PC()
     mov     r0,rPC
     bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
diff --git a/vm/mterp/out/InterpAsm-armv4t.S b/vm/mterp/out/InterpAsm-armv4t.S
index a45093e..21fb884 100644
--- a/vm/mterp/out/InterpAsm-armv4t.S
+++ b/vm/mterp/out/InterpAsm-armv4t.S
@@ -201,6 +201,12 @@
  */
 #include "../common/asm-constants.h"
 
+/*
+ * Power of 2 width in bits of the hash table size.
+ *   for ex: 9 -> 512, 10-> 1024, etc.
+#define JIT_PROF_TAB_WIDTH    12
+#define JIT_PROF_TAB_LSHIFT   (32 - JIT_PROF_TAB_WIDTH)
+#defnie JIT_PROF_TAB_THRESH_RESET 255
 
 /* File: armv5te/platform.S */
 /*
@@ -9633,11 +9639,11 @@
 
 common_updateProfile:
     eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
-    lsl     r3,r3,#23          @ shift out excess 511
-    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    lsl     r3,r3,#20          @ shift out excess 4095
+    ldrb    r1,[r0,r3,lsr #20] @ get counter
     GET_INST_OPCODE(ip)
     subs    r1,r1,#1           @ decrement counter
-    strb    r1,[r0,r3,lsr #23] @ and store it
+    strb    r1,[r0,r3,lsr #20] @ and store it
     GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
 
 /*
@@ -9647,7 +9653,7 @@
  * jump to it now).
  */
     mov     r1,#255
-    strb    r1,[r0,r3,lsr #23] @ reset counter
+    strb    r1,[r0,r3,lsr #20] @ reset counter
     EXPORT_PC()
     mov     r0,rPC
     bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
diff --git a/vm/mterp/out/InterpAsm-armv5te-vfp.S b/vm/mterp/out/InterpAsm-armv5te-vfp.S
index 93e647a..55a157d 100644
--- a/vm/mterp/out/InterpAsm-armv5te-vfp.S
+++ b/vm/mterp/out/InterpAsm-armv5te-vfp.S
@@ -201,6 +201,12 @@
  */
 #include "../common/asm-constants.h"
 
+/*
+ * Power of 2 width in bits of the hash table size.
+ *   for ex: 9 -> 512, 10-> 1024, etc.
+#define JIT_PROF_TAB_WIDTH    12
+#define JIT_PROF_TAB_LSHIFT   (32 - JIT_PROF_TAB_WIDTH)
+#defnie JIT_PROF_TAB_THRESH_RESET 255
 
 /* File: armv5te/platform.S */
 /*
@@ -9147,11 +9153,11 @@
 
 common_updateProfile:
     eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
-    lsl     r3,r3,#23          @ shift out excess 511
-    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    lsl     r3,r3,#20          @ shift out excess 4095
+    ldrb    r1,[r0,r3,lsr #20] @ get counter
     GET_INST_OPCODE(ip)
     subs    r1,r1,#1           @ decrement counter
-    strb    r1,[r0,r3,lsr #23] @ and store it
+    strb    r1,[r0,r3,lsr #20] @ and store it
     GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
 
 /*
@@ -9161,7 +9167,7 @@
  * jump to it now).
  */
     mov     r1,#255
-    strb    r1,[r0,r3,lsr #23] @ reset counter
+    strb    r1,[r0,r3,lsr #20] @ reset counter
     EXPORT_PC()
     mov     r0,rPC
     bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
diff --git a/vm/mterp/out/InterpAsm-armv5te.S b/vm/mterp/out/InterpAsm-armv5te.S
index b377a93..68ae10d 100644
--- a/vm/mterp/out/InterpAsm-armv5te.S
+++ b/vm/mterp/out/InterpAsm-armv5te.S
@@ -201,6 +201,12 @@
  */
 #include "../common/asm-constants.h"
 
+/*
+ * Power of 2 width in bits of the hash table size.
+ *   for ex: 9 -> 512, 10-> 1024, etc.
+#define JIT_PROF_TAB_WIDTH    12
+#define JIT_PROF_TAB_LSHIFT   (32 - JIT_PROF_TAB_WIDTH)
+#defnie JIT_PROF_TAB_THRESH_RESET 255
 
 /* File: armv5te/platform.S */
 /*
@@ -9627,11 +9633,11 @@
 
 common_updateProfile:
     eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
-    lsl     r3,r3,#23          @ shift out excess 511
-    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    lsl     r3,r3,#20          @ shift out excess 4095
+    ldrb    r1,[r0,r3,lsr #20] @ get counter
     GET_INST_OPCODE(ip)
     subs    r1,r1,#1           @ decrement counter
-    strb    r1,[r0,r3,lsr #23] @ and store it
+    strb    r1,[r0,r3,lsr #20] @ and store it
     GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
 
 /*
@@ -9641,7 +9647,7 @@
  * jump to it now).
  */
     mov     r1,#255
-    strb    r1,[r0,r3,lsr #23] @ reset counter
+    strb    r1,[r0,r3,lsr #20] @ reset counter
     EXPORT_PC()
     mov     r0,rPC
     bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)