Fixed layout for dex caches in boot image.
Define a fixed layout for dex cache arrays (type, method,
string and field arrays) for dex caches in the boot image.
This gives those arrays fixed offsets from the boot image
code and allows PC-relative addressing of their elements.
Use the PC-relative load on arm64 for relevant instructions,
i.e. invoke-static, invoke-direct, const-string,
const-class, check-cast and instance-of. This reduces the
arm64 boot.oat on Nexus 9 by 1.1MiB.
This CL provides the infrastructure and shows on the arm64
the gains that we can achieve by having fixed dex cache
arrays' layout. To fully use this for the boot images, we
need to implement the PC-relative addressing for other
architectures. To achieve similar gains for apps, we need
to move the dex cache arrays to a .bss section of the oat
file. These changes will be implemented in subsequent CLs.
(Also remove some compiler_driver.h dependencies to reduce
incremental build times.)
Change-Id: Ib1859fa4452d01d983fd92ae22b611f45a85d69b
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index d15412a..f6fa938 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -236,6 +236,7 @@
kA64Add4rrro, // add [00001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
kA64Add4RRre, // add [00001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
kA64Adr2xd, // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0].
+ kA64Adrp2xd, // adrp [1] immlo[30-29] [10000] immhi[23-5] rd[4-0].
kA64And3Rrl, // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
kA64And4rrro, // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
kA64Asr3rrd, // asr [0001001100] immr[21-16] imms[15-10] rn[9-5] rd[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 329bb1e..a59deb5 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -131,6 +131,10 @@
kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
"adr", "!0x, #!1d", kFixupAdr),
+ ENCODING_MAP(kA64Adrp2xd, NO_VARIANTS(0x90000000),
+ kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
+ "adrp", "!0x, #!1d", kFixupLabel),
ENCODING_MAP(WIDE(kA64And3Rrl), SF_VARIANTS(0x12000000),
kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 823cb60..3316945 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -23,10 +23,12 @@
#include "dex/mir_graph.h"
#include "dex/quick/mir_to_lir-inl.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "gc/accounting/card_table.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "mirror/art_method.h"
#include "mirror/object_array-inl.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
namespace art {
@@ -438,13 +440,13 @@
* Bit of a hack here - in the absence of a real scheduling pass,
* emit the next instruction in static & direct invoke sequences.
*/
-static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
- int state, const MethodReference& target_method,
- uint32_t unused_idx,
- uintptr_t direct_code, uintptr_t direct_method,
- InvokeType type) {
+int Arm64Mir2Lir::Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+ int state, const MethodReference& target_method,
+ uint32_t unused_idx,
+ uintptr_t direct_code, uintptr_t direct_method,
+ InvokeType type) {
UNUSED(info, unused_idx);
- Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+ Arm64Mir2Lir* cg = static_cast<Arm64Mir2Lir*>(cu->cg.get());
if (direct_code != 0 && direct_method != 0) {
switch (state) {
case 0: // Get the current Method* [sets kArg0]
@@ -465,17 +467,24 @@
return -1;
}
} else {
+ bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad();
RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
switch (state) {
case 0: // Get the current Method* [sets kArg0]
// TUNING: we can save a reg copy if Method* has been promoted.
- cg->LoadCurrMethodDirect(arg0_ref);
- break;
+ if (!use_pc_rel) {
+ cg->LoadCurrMethodDirect(arg0_ref);
+ break;
+ }
+ ++state;
+ FALLTHROUGH_INTENDED;
case 1: // Get method->dex_cache_resolved_methods_
- cg->LoadRefDisp(arg0_ref,
- mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
- arg0_ref,
- kNotVolatile);
+ if (!use_pc_rel) {
+ cg->LoadRefDisp(arg0_ref,
+ mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+ arg0_ref,
+ kNotVolatile);
+ }
// Set up direct code if known.
if (direct_code != 0) {
if (direct_code != static_cast<uintptr_t>(-1)) {
@@ -487,14 +496,23 @@
cg->LoadCodeAddress(target_method, type, kInvokeTgt);
}
}
- break;
+ if (!use_pc_rel || direct_code != 0) {
+ break;
+ }
+ ++state;
+ FALLTHROUGH_INTENDED;
case 2: // Grab target method*
CHECK_EQ(cu->dex_file, target_method.dex_file);
- cg->LoadRefDisp(arg0_ref,
- mirror::ObjectArray<mirror::Object>::OffsetOfElement(
- target_method.dex_method_index).Int32Value(),
- arg0_ref,
- kNotVolatile);
+ if (!use_pc_rel) {
+ cg->LoadRefDisp(arg0_ref,
+ mirror::ObjectArray<mirror::Object>::OffsetOfElement(
+ target_method.dex_method_index).Int32Value(),
+ arg0_ref,
+ kNotVolatile);
+ } else {
+ size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
+ cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref);
+ }
break;
case 3: // Grab the code from the method*
if (direct_code == 0) {
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 54fd46d..8184f02 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -78,6 +78,9 @@
/// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
+ bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
+ void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE;
+
LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
int offset, int check_value, LIR* target, LIR** compare) OVERRIDE;
@@ -393,9 +396,16 @@
void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2, bool is_div, int flags);
+ static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+ int state, const MethodReference& target_method,
+ uint32_t unused_idx,
+ uintptr_t direct_code, uintptr_t direct_method,
+ InvokeType type);
+
static const A64EncodingMap EncodingMap[kA64Last];
ArenaVector<LIR*> call_method_insns_;
+ ArenaVector<LIR*> dex_cache_access_insns_;
int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
};
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 2372ccc..e9b9b5d 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -18,6 +18,7 @@
#include "codegen_arm64.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
#include "arch/instruction_set_features.h"
#include "arm64_lir.h"
#include "base/logging.h"
@@ -943,6 +944,28 @@
lir->target = target;
}
+bool Arm64Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
+ if (cu_->compiler_driver->GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()
+ ->NeedFixCortexA53_843419()) {
+ // TODO: Implement link-time workaround in OatWriter so that we can use ADRP on Cortex-A53.
+ return false;
+ }
+ return dex_cache_arrays_layout_.Valid();
+}
+
+void Arm64Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset,
+ RegStorage r_dest) {
+ LIR* adrp = NewLIR2(kA64Adrp2xd, r_dest.GetReg(), 0);
+ adrp->operands[2] = WrapPointer(dex_file);
+ adrp->operands[3] = offset;
+ adrp->operands[4] = WrapPointer(adrp);
+ dex_cache_access_insns_.push_back(adrp);
+ LIR* ldr = LoadBaseDisp(r_dest, 0, r_dest, kReference, kNotVolatile);
+ ldr->operands[4] = adrp->operands[4];
+ ldr->flags.fixup = kFixupLabel;
+ dex_cache_access_insns_.push_back(ldr);
+}
+
LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
UNUSED(r_base, count);
LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 09a34bf..c5c0dc5 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -606,7 +606,8 @@
Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
: Mir2Lir(cu, mir_graph, arena),
- call_method_insns_(arena->Adapter()) {
+ call_method_insns_(arena->Adapter()),
+ dex_cache_access_insns_(arena->Adapter()) {
// Sanity check - make sure encoding map lines up.
for (int i = 0; i < kA64Last; i++) {
DCHECK_EQ(UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode), i)
@@ -846,8 +847,9 @@
}
void Arm64Mir2Lir::InstallLiteralPools() {
+ patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size());
+
// PC-relative calls to methods.
- patches_.reserve(call_method_insns_.size());
for (LIR* p : call_method_insns_) {
DCHECK_EQ(p->opcode, kA64Bl1t);
uint32_t target_method_idx = p->operands[1];
@@ -856,6 +858,18 @@
target_dex_file, target_method_idx));
}
+ // PC-relative references to dex cache arrays.
+ for (LIR* p : dex_cache_access_insns_) {
+ DCHECK(p->opcode == kA64Adrp2xd || p->opcode == kA64Ldr3rXD);
+ const LIR* adrp = UnwrapPointer<LIR>(p->operands[4]);
+ DCHECK_EQ(adrp->opcode, kA64Adrp2xd);
+ const DexFile* dex_file = UnwrapPointer<DexFile>(adrp->operands[2]);
+ uint32_t offset = adrp->operands[3];
+ DCHECK(!p->flags.is_nop);
+ DCHECK(!adrp->flags.is_nop);
+ patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset, dex_file, adrp->offset, offset));
+ }
+
// And do the normal processing.
Mir2Lir::InstallLiteralPools();
}