Use trampolines for calls to helpers
This is an ARM specific optimization to the compiler
that uses trampoline islands to make calls to runtime
helper functions. The intention is to reduce the size
of the generated code (by 2 bytes per call) without
affecting performance.
By default this is on when generating an OAT file. It is
off when compiling to memory.
To switch this off in dex2oat, use the command line option:
--no-helper-trampolines
Enhances disassembler to print the trampoline entry on the
BL instruction like this:
0xb6a850c0: f7ffff9e bl -196 (0xb6a85000) ; pTestSuspend
Bug: 12607709
Change-Id: I9202bdb7cf21252ad807bd48701f1f6ce8e3d0fe
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index c9acd66..8c5c6c5 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -496,6 +496,7 @@
kThumb2LdrdPcRel8, // ldrd rt, rt2, pc +-/1024.
kThumb2LdrdI8, // ldrd rt, rt2, [rn +-/1024].
kThumb2StrdI8, // strd rt, rt2, [rn +-/1024].
+ kThumb2BlTramp, // Thumb2 BL to trampoline
kArmLast,
};
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index f77b0a6..151f3c7 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1035,6 +1035,11 @@
kFmtBitBlt, 7, 0,
IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
"strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
+ ENCODING_MAP(kThumb2BlTramp, 0xf000d000,
+ kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1,
+ IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+ "bl", "!0t", 4, kFixupTrampCall),
};
// new_lir replaces orig_lir in the pcrel_fixup list.
@@ -1224,6 +1229,7 @@
while (true) {
offset_adjustment = 0;
AssemblerStatus res = kSuccess; // Assume success
+
generation ^= 1;
// Note: nodes requring possible fixup linked in ascending order.
lir = first_fixup_;
@@ -1577,6 +1583,17 @@
}
break;
}
+ case kFixupTrampCall: {
+ // This is a call to a trampoline. The value for the trampoline call needs
+ // both the offset into the code and the trampoline to call. It will be
+ // added to the list of calls when we actually insert this instruction into
+ // the code_buffer (when we have a stable instruction stream).
+ uint32_t instoffset = lir->offset;
+ // LOG(INFO) << "adding trampoline call: offset: " << instoffset <<
+ // " entrypoint: " << lir->operands[0];
+ trampoline_calls_.push_back(TrampolineCall(instoffset, lir->operands[0]));
+ break;
+ }
default:
LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
}
@@ -1595,6 +1612,7 @@
starting_offset += offset_adjustment;
data_offset_ = (starting_offset + 0x3) & ~0x3;
AssignDataOffsets();
+ trampoline_calls_.clear(); // These are invalid now.
}
}
@@ -1675,5 +1693,4 @@
total_size_ = AssignFillArrayDataOffset(offset);
}
-
} // namespace art
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index d0d0e6b..1b485a3 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -19,6 +19,7 @@
#include "arm_lir.h"
#include "codegen_arm.h"
#include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
namespace art {
@@ -468,4 +469,34 @@
NewLIR1(kThumbBx, rARM_LR);
}
+// Entrypoint calls.
+RegStorage ArmMir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
+ const CompilerOptions& compiler_options = cu_->compiler_driver->GetCompilerOptions();
+ if (compiler_options.GenerateHelperTrampolines()) {
+ return RegStorage::InvalidReg();
+ } else {
+ return LoadHelper(helper_offset);
+ }
+}
+
+LIR* ArmMir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+ bool use_link) {
+ LIR* call_inst = nullptr;
+ if (use_link) {
+ const CompilerOptions& compiler_options = cu_->compiler_driver->GetCompilerOptions();
+ if (compiler_options.GenerateHelperTrampolines()) {
+ call_inst = OpThreadMem(kOpBlx, helper_offset);
+ } else {
+ call_inst = OpReg(kOpBlx, r_tgt);
+ FreeTemp(r_tgt);
+ }
+ } else {
+ call_inst = OpReg(kOpBx, r_tgt);
+ FreeTemp(r_tgt);
+ }
+ if (safepoint_pc) {
+ MarkSafepointPC(call_inst);
+ }
+ return call_inst;
+}
} // namespace art
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 13fa635..3c0aa03 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -195,6 +195,11 @@
bool InexpensiveConstantLong(int64_t value);
bool InexpensiveConstantDouble(int64_t value);
+ // Entrypoint calls.
+ RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
+ LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset,
+ bool safepoint_pc, bool use_link);
+
private:
void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
ConditionCode ccode);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 70cbdd2..8e6d9a8 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -1111,8 +1111,14 @@
}
LIR* ArmMir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
- LOG(FATAL) << "Unexpected use of OpThreadMem for Arm";
- return NULL;
+ if (op == kOpBlx) {
+ const uint32_t trampoline = cu_->compiler_driver->AddEntrypointTrampoline(
+ thread_offset.Int32Value());
+ return NewLIR1(kThumb2BlTramp, trampoline);
+ } else {
+ LOG(FATAL) << "Invalid opcode for ARM OpThreadMem on Arm";
+ return NULL;
+ }
}
LIR* ArmMir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 6e6b8f0..b163ef7 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1012,11 +1012,26 @@
vmap_encoder.PushBackUnsigned(0u); // Size is 0.
}
+ // All relocations
+ UniquePtr<FinalRelocations> all_relocs(new FinalRelocations());
+
+ // Build the final relocations for this method.
+ if (trampoline_calls_.size() != 0) {
+ FinalEntrypointRelocationSet* ep_relocs =
+ cu_->compiler_driver->AllocateFinalEntrypointRelocationSet(cu_);
+ for (size_t i = 0 ; i < trampoline_calls_.size(); ++i) {
+ const TrampolineCall& call = trampoline_calls_[i];
+ ep_relocs->Add(call.code_offset_, call.trampoline_offset_);
+ }
+ all_relocs->push_back(ep_relocs);
+ }
+
UniquePtr<std::vector<uint8_t> > cfi_info(ReturnCallFrameInformation());
CompiledMethod* result =
new CompiledMethod(*cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
core_spill_mask_, fp_spill_mask_, encoded_mapping_table_,
- vmap_encoder.GetData(), native_gc_map_, cfi_info.get());
+ vmap_encoder.GetData(), native_gc_map_, cfi_info.get(),
+ all_relocs.release());
return result;
}
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index a3fb420..4e32931 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -17,6 +17,7 @@
#include "dex/compiler_internals.h"
#include "dex/quick/arm/arm_lir.h"
#include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "mirror/array.h"
#include "mirror/object-inl.h"
@@ -950,12 +951,20 @@
void Compile() {
GenerateTargetLabel();
- RegStorage r_tgt = m2l_->CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pResolveString));
+ const CompilerOptions& compiler_options =
+ m2l_->cu_->compiler_driver->GetCompilerOptions();
+ if (compiler_options.GenerateHelperTrampolines()) {
+ m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);
+ m2l_->CallHelper(RegStorage::InvalidReg(), QUICK_ENTRYPOINT_OFFSET(4, pResolveString),
+ true);
+ } else {
+ RegStorage r_tgt = m2l_->CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pResolveString));
- m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_); // .eq
- LIR* call_inst = m2l_->OpReg(kOpBlx, r_tgt);
- m2l_->MarkSafepointPC(call_inst);
- m2l_->FreeTemp(r_tgt);
+ m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);
+ LIR* call_inst = m2l_->OpReg(kOpBlx, r_tgt);
+ m2l_->MarkSafepointPC(call_inst);
+ m2l_->FreeTemp(r_tgt);
+ }
m2l_->OpUnconditionalBranch(cont_);
}
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 396a709..fee15d7 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -19,6 +19,7 @@
#include "dex/quick/dex_file_method_inliner.h"
#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "dex_file-inl.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "invoke_type.h"
#include "mirror/array.h"
@@ -62,25 +63,19 @@
/*
* To save scheduling time, helper calls are broken into two parts: generation of
- * the helper target address, and the actual call to the helper. Because x86
- * has a memory call operation, part 1 is a NOP for x86. For other targets,
- * load arguments between the two parts.
+ * the helper target address, and the actual call to the helper.
+ * These functions can be overridden by architecture specific codegen.
*/
RegStorage Mir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
- return (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) ? RegStorage::InvalidReg() : LoadHelper(helper_offset);
+ return LoadHelper(helper_offset);
}
/* NOTE: if r_tgt is a temp, it will be freed following use */
LIR* Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
bool use_link) {
- LIR* call_inst;
OpKind op = use_link ? kOpBlx : kOpBx;
- if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
- call_inst = OpThreadMem(op, helper_offset);
- } else {
- call_inst = OpReg(op, r_tgt);
- FreeTemp(r_tgt);
- }
+ LIR* call_inst = OpReg(op, r_tgt);
+ FreeTemp(r_tgt);
if (safepoint_pc) {
MarkSafepointPC(call_inst);
}
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 35f948e..cecb01b 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -328,6 +328,18 @@
LIR* const cont_;
};
+ // This holds the data for a call to a trampoline. An instruction is making a call
+ // to something through a trampoline and this holds the offset into the code containing
+ // the instruction, and which trampoline offset to call.
+ struct TrampolineCall {
+ TrampolineCall(uint32_t code_offset, uint32_t trampoline_offset) : code_offset_(code_offset),
+ trampoline_offset_(trampoline_offset) {
+ }
+
+ uint32_t code_offset_; // Offset of instruction in method code stream (bytes).
+ uint32_t trampoline_offset_; // Which trampoline to call.
+ };
+
virtual ~Mir2Lir() {}
int32_t s4FromSwitchData(const void* switch_data) {
@@ -614,11 +626,11 @@
virtual void GenConstWide(RegLocation rl_dest, int64_t value);
virtual void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
RegLocation rl_src1, RegLocation rl_src2);
+ virtual LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+ bool use_link = true);
+ virtual RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
// Shared by all targets - implemented in gen_invoke.cc.
- LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
- bool use_link = true);
- RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
void CallRuntimeHelperImm(ThreadOffset<4> helper_offset, int arg0, bool safepoint_pc);
void CallRuntimeHelperReg(ThreadOffset<4> helper_offset, RegStorage arg0, bool safepoint_pc);
void CallRuntimeHelperRegLocation(ThreadOffset<4> helper_offset, RegLocation arg0,
@@ -1277,6 +1289,7 @@
LIR* last_lir_insn_;
GrowableArray<LIRSlowPath*> slow_paths_;
+ std::vector<TrampolineCall> trampoline_calls_;
}; // Class Mir2Lir
} // namespace art
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 729b30d..2bd2caa 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -278,4 +278,18 @@
NewLIR0(kX86Ret);
}
+RegStorage X86Mir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
+ return RegStorage::InvalidReg();
+}
+
+LIR* X86Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+ bool use_link) {
+ LIR* call_inst = OpThreadMem(use_link ? kOpBlx : kOpBx, helper_offset);
+ if (safepoint_pc) {
+ MarkSafepointPC(call_inst);
+ }
+ return call_inst;
+}
+
+
} // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index af2a140..e913d1d 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -328,6 +328,11 @@
*/
std::vector<uint8_t>* ReturnCallFrameInformation();
+ // Entrypoint calls.
+ RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
+ LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset,
+ bool safepoint_pc, bool use_link);
+
private:
void EmitPrefix(const X86EncodingMap* entry);
void EmitOpcode(const X86EncodingMap* entry);