Revert "Revert "Enable Load Store Elimination for ARM and ARM64""
This patch refactors the implementation of the LoadStoreElimination
optimisation pass. Please note that this pass was disabled and not
functional for any of the backends.
The current implementation tracks aliases and handles DalvikRegs as well
as Heap memory regions. It has been tested and it is known to optimise
out the following:
* Load - Load
* Store - Load
* Store - Store
* Load Literals
Change-Id: I3aadb12a787164146a95bc314e85fa73ad91e12b
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 462be54..1d7cdab 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -214,7 +214,7 @@
"csneg", "!0r, !1r, !2r, !3c", kFixupNone),
ENCODING_MAP(kA64Dmb1B, NO_VARIANTS(0xd50330bf),
kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_UNARY_OP,
+ kFmtUnused, -1, -1, IS_UNARY_OP | IS_VOLATILE,
"dmb", "#!0B", kFixupNone),
ENCODING_MAP(WIDE(kA64Eor3Rrl), SF_VARIANTS(0x52000000),
kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
@@ -274,7 +274,7 @@
"fmin", "!0f, !1f, !2f", kFixupNone),
ENCODING_MAP(FWIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
"fmov", "!0f, !1f", kFixupNone),
ENCODING_MAP(FWIDE(kA64Fmov2fI), FLOAT_VARIANTS(0x1e201000),
kFmtRegF, 4, 0, kFmtBitBlt, 20, 13, kFmtUnused, -1, -1,
@@ -318,7 +318,7 @@
"fsub", "!0f, !1f, !2f", kFixupNone),
ENCODING_MAP(kA64Ldrb3wXd, NO_VARIANTS(0x39400000),
kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
"ldrb", "!0w, [!1X, #!2d]", kFixupNone),
ENCODING_MAP(kA64Ldrb3wXx, NO_VARIANTS(0x38606800),
kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -326,7 +326,7 @@
"ldrb", "!0w, [!1X, !2x]", kFixupNone),
ENCODING_MAP(WIDE(kA64Ldrsb3rXd), CUSTOM_VARIANTS(0x39c00000, 0x39800000),
kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
"ldrsb", "!0r, [!1X, #!2d]", kFixupNone),
ENCODING_MAP(WIDE(kA64Ldrsb3rXx), CUSTOM_VARIANTS(0x38e06800, 0x38a06800),
kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -334,19 +334,19 @@
"ldrsb", "!0r, [!1X, !2x]", kFixupNone),
ENCODING_MAP(kA64Ldrh3wXF, NO_VARIANTS(0x79400000),
kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
"ldrh", "!0w, [!1X, #!2F]", kFixupNone),
ENCODING_MAP(kA64Ldrh4wXxd, NO_VARIANTS(0x78606800),
kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
- kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+ kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
"ldrh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
ENCODING_MAP(WIDE(kA64Ldrsh3rXF), CUSTOM_VARIANTS(0x79c00000, 0x79800000),
kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
"ldrsh", "!0r, [!1X, #!2F]", kFixupNone),
ENCODING_MAP(WIDE(kA64Ldrsh4rXxd), CUSTOM_VARIANTS(0x78e06800, 0x78906800),
kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
- kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+ kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
"ldrsh", "!0r, [!1X, !2x, lsl #!3d]", kFixupNone),
ENCODING_MAP(FWIDE(kA64Ldr2fp), SIZE_VARIANTS(0x1c000000),
kFmtRegF, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
@@ -360,11 +360,11 @@
"ldr", "!0r, !1p", kFixupLoad),
ENCODING_MAP(FWIDE(kA64Ldr3fXD), SIZE_VARIANTS(0xbd400000),
kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
"ldr", "!0f, [!1X, #!2D]", kFixupNone),
ENCODING_MAP(WIDE(kA64Ldr3rXD), SIZE_VARIANTS(0xb9400000),
kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
"ldr", "!0r, [!1X, #!2D]", kFixupNone),
ENCODING_MAP(FWIDE(kA64Ldr4fXxG), SIZE_VARIANTS(0xbc606800),
kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -380,11 +380,11 @@
"ldr", "!0r, [!1X], #!2d", kFixupNone),
ENCODING_MAP(WIDE(kA64Ldp4ffXD), CUSTOM_VARIANTS(0x2d400000, 0x6d400000),
kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
- kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD_OFF,
"ldp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
ENCODING_MAP(WIDE(kA64Ldp4rrXD), SF_VARIANTS(0x29400000),
kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
- kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD_OFF,
"ldp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
ENCODING_MAP(WIDE(kA64LdpPost4rrXD), CUSTOM_VARIANTS(0x28c00000, 0xa8c00000),
kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
@@ -400,11 +400,11 @@
"ldur", "!0r, [!1X, #!2d]", kFixupNone),
ENCODING_MAP(WIDE(kA64Ldxr2rX), SIZE_VARIANTS(0x885f7c00),
kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOADX,
"ldxr", "!0r, [!1X]", kFixupNone),
ENCODING_MAP(WIDE(kA64Ldaxr2rX), SIZE_VARIANTS(0x885ffc00),
kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOADX,
"ldaxr", "!0r, [!1X]", kFixupNone),
ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000),
kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
@@ -432,7 +432,7 @@
"movz", "!0r, #!1d!2M", kFixupNone),
ENCODING_MAP(WIDE(kA64Mov2rr), SF_VARIANTS(0x2a0003e0),
kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
"mov", "!0r, !1r", kFixupNone),
ENCODING_MAP(WIDE(kA64Mvn2rr), SF_VARIANTS(0x2a2003e0),
kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
@@ -508,11 +508,11 @@
"smulh", "!0x, !1x, !2x", kFixupNone),
ENCODING_MAP(WIDE(kA64Stp4ffXD), CUSTOM_VARIANTS(0x2d000000, 0x6d000000),
kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
- kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
"stp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
ENCODING_MAP(WIDE(kA64Stp4rrXD), SF_VARIANTS(0x29000000),
kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
- kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
"stp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
ENCODING_MAP(WIDE(kA64StpPost4rrXD), CUSTOM_VARIANTS(0x28800000, 0xa8800000),
kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
@@ -524,7 +524,7 @@
"stp", "!0r, !1r, [!2X, #!3D]!!", kFixupNone),
ENCODING_MAP(FWIDE(kA64Str3fXD), CUSTOM_VARIANTS(0xbd000000, 0xfd000000),
kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
"str", "!0f, [!1X, #!2D]", kFixupNone),
ENCODING_MAP(FWIDE(kA64Str4fXxG), CUSTOM_VARIANTS(0xbc206800, 0xfc206800),
kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -532,7 +532,7 @@
"str", "!0f, [!1X, !2x!3G]", kFixupNone),
ENCODING_MAP(WIDE(kA64Str3rXD), SIZE_VARIANTS(0xb9000000),
kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
"str", "!0r, [!1X, #!2D]", kFixupNone),
ENCODING_MAP(WIDE(kA64Str4rXxG), SIZE_VARIANTS(0xb8206800),
kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -540,7 +540,7 @@
"str", "!0r, [!1X, !2x!3G]", kFixupNone),
ENCODING_MAP(kA64Strb3wXd, NO_VARIANTS(0x39000000),
kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
"strb", "!0w, [!1X, #!2d]", kFixupNone),
ENCODING_MAP(kA64Strb3wXx, NO_VARIANTS(0x38206800),
kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -548,7 +548,7 @@
"strb", "!0w, [!1X, !2x]", kFixupNone),
ENCODING_MAP(kA64Strh3wXF, NO_VARIANTS(0x79000000),
kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
"strh", "!0w, [!1X, #!2F]", kFixupNone),
ENCODING_MAP(kA64Strh4wXxd, NO_VARIANTS(0x78206800),
kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -568,11 +568,11 @@
"stur", "!0r, [!1X, #!2d]", kFixupNone),
ENCODING_MAP(WIDE(kA64Stxr3wrX), SIZE_VARIANTS(0x88007c00),
kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STOREX,
"stxr", "!0w, !1r, [!2X]", kFixupNone),
ENCODING_MAP(WIDE(kA64Stlxr3wrX), SIZE_VARIANTS(0x8800fc00),
kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STOREX,
"stlxr", "!0w, !1r, [!2X]", kFixupNone),
ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000),
kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index ac36519..fd2f541 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -297,6 +297,7 @@
bool WideFPRsAreAliases() OVERRIDE {
return true; // 64b architecture.
}
+ size_t GetInstructionOffset(LIR* lir);
LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
@@ -381,6 +382,7 @@
RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
bool is_div, bool check_zero);
RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
+ size_t GetLoadStoreSize(LIR* lir);
};
} // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index f9f85f4..360acd5 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -788,6 +788,7 @@
}
LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+ ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
}
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index f6c140f..221dbfa 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -87,6 +87,26 @@
return (bit7 | bit6 | bit5_to_0);
}
+size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) {
+ bool opcode_is_wide = IS_WIDE(lir->opcode);
+ ArmOpcode opcode = UNWIDE(lir->opcode);
+ DCHECK(!IsPseudoLirOp(opcode));
+ const ArmEncodingMap *encoder = &EncodingMap[opcode];
+ uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
+ return (bits >> 30);
+}
+
+size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) {
+ size_t offset = lir->operands[2];
+ uint64_t check_flags = GetTargetInstFlags(lir->opcode);
+ DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
+ if (check_flags & SCALED_OFFSET_X0) {
+ DCHECK(check_flags & IS_TERTIARY_OP);
+ offset = offset * (1 << GetLoadStoreSize(lir));
+ }
+ return offset;
+}
+
LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) {
DCHECK(r_dest.IsSingle());
if (value == 0) {