Use trampolines for calls to helpers

This is an ARM specific optimization to the compiler
that uses trampoline islands to make calls to runtime
helper functions.  The intention is to reduce the size
of the generated code (by 2 bytes per call) without
affecting performance.

By default this is on when generating an OAT file.  It is
off when compiling to memory.

To switch this off in dex2oat, use the command line option:
--no-helper-trampolines

Enhances disassembler to print the trampoline entry on the
BL instruction like this:

0xb6a850c0: f7ffff9e  bl      -196 (0xb6a85000)  ; pTestSuspend

Bug: 12607709
Change-Id: I9202bdb7cf21252ad807bd48701f1f6ce8e3d0fe
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 35f948e..cecb01b 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -328,6 +328,18 @@
       LIR* const cont_;
     };
 
+    // This holds the data for a call to a trampoline.  An instruction is making a call
+    // to something through a trampoline and this holds the offset into the code containing
+    // the instruction, and which trampoline offset to call.
+    struct TrampolineCall {
+      TrampolineCall(uint32_t code_offset, uint32_t trampoline_offset) : code_offset_(code_offset),
+         trampoline_offset_(trampoline_offset) {
+      }
+
+      uint32_t code_offset_;          // Offset of instruction in method code stream (bytes).
+      uint32_t trampoline_offset_;    // Which trampoline to call.
+    };
+
     virtual ~Mir2Lir() {}
 
     int32_t s4FromSwitchData(const void* switch_data) {
@@ -614,11 +626,11 @@
     virtual void GenConstWide(RegLocation rl_dest, int64_t value);
     virtual void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
                        RegLocation rl_src1, RegLocation rl_src2);
+    virtual LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+                    bool use_link = true);
+    virtual RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
 
     // Shared by all targets - implemented in gen_invoke.cc.
-    LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
-                    bool use_link = true);
-    RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
     void CallRuntimeHelperImm(ThreadOffset<4> helper_offset, int arg0, bool safepoint_pc);
     void CallRuntimeHelperReg(ThreadOffset<4> helper_offset, RegStorage arg0, bool safepoint_pc);
     void CallRuntimeHelperRegLocation(ThreadOffset<4> helper_offset, RegLocation arg0,
@@ -1277,6 +1289,7 @@
     LIR* last_lir_insn_;
 
     GrowableArray<LIRSlowPath*> slow_paths_;
+    std::vector<TrampolineCall> trampoline_calls_;
 };  // Class Mir2Lir
 
 }  // namespace art