Revert "Revert "Enable Load Store Elimination for ARM and ARM64""

This patch refactors the implementation of the LoadStoreElimination
optimisation pass. Please note that this pass was disabled and not
functional for any of the backends.

The current implementation tracks aliases and handles DalvikRegs as well
as Heap memory regions. It has been tested and it is known to optimise
out the following:
  * Load - Load
  * Store - Load
  * Store - Store
  * Load Literals

Change-Id: I3aadb12a787164146a95bc314e85fa73ad91e12b
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 462be54..1d7cdab 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -214,7 +214,7 @@
                  "csneg", "!0r, !1r, !2r, !3c", kFixupNone),
     ENCODING_MAP(kA64Dmb1B, NO_VARIANTS(0xd50330bf),
                  kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_VOLATILE,
                  "dmb", "#!0B", kFixupNone),
     ENCODING_MAP(WIDE(kA64Eor3Rrl), SF_VARIANTS(0x52000000),
                  kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
@@ -274,7 +274,7 @@
                  "fmin", "!0f, !1f, !2f", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
                  "fmov", "!0f, !1f", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Fmov2fI), FLOAT_VARIANTS(0x1e201000),
                  kFmtRegF, 4, 0, kFmtBitBlt, 20, 13, kFmtUnused, -1, -1,
@@ -318,7 +318,7 @@
                  "fsub", "!0f, !1f, !2f", kFixupNone),
     ENCODING_MAP(kA64Ldrb3wXd, NO_VARIANTS(0x39400000),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrb", "!0w, [!1X, #!2d]", kFixupNone),
     ENCODING_MAP(kA64Ldrb3wXx, NO_VARIANTS(0x38606800),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -326,7 +326,7 @@
                  "ldrb", "!0w, [!1X, !2x]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldrsb3rXd), CUSTOM_VARIANTS(0x39c00000, 0x39800000),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrsb", "!0r, [!1X, #!2d]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldrsb3rXx), CUSTOM_VARIANTS(0x38e06800, 0x38a06800),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -334,19 +334,19 @@
                  "ldrsb", "!0r, [!1X, !2x]", kFixupNone),
     ENCODING_MAP(kA64Ldrh3wXF, NO_VARIANTS(0x79400000),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrh", "!0w, [!1X, #!2F]", kFixupNone),
     ENCODING_MAP(kA64Ldrh4wXxd, NO_VARIANTS(0x78606800),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
                  "ldrh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldrsh3rXF), CUSTOM_VARIANTS(0x79c00000, 0x79800000),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrsh", "!0r, [!1X, #!2F]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldrsh4rXxd), CUSTOM_VARIANTS(0x78e06800, 0x78906800),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
                  "ldrsh", "!0r, [!1X, !2x, lsl #!3d]", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Ldr2fp), SIZE_VARIANTS(0x1c000000),
                  kFmtRegF, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
@@ -360,11 +360,11 @@
                  "ldr", "!0r, !1p", kFixupLoad),
     ENCODING_MAP(FWIDE(kA64Ldr3fXD), SIZE_VARIANTS(0xbd400000),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldr", "!0f, [!1X, #!2D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldr3rXD), SIZE_VARIANTS(0xb9400000),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldr", "!0r, [!1X, #!2D]", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Ldr4fXxG), SIZE_VARIANTS(0xbc606800),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -380,11 +380,11 @@
                  "ldr", "!0r, [!1X], #!2d", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldp4ffXD), CUSTOM_VARIANTS(0x2d400000, 0x6d400000),
                  kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD_OFF,
                  "ldp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldp4rrXD), SF_VARIANTS(0x29400000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD_OFF,
                  "ldp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64LdpPost4rrXD), CUSTOM_VARIANTS(0x28c00000, 0xa8c00000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
@@ -400,11 +400,11 @@
                  "ldur", "!0r, [!1X, #!2d]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldxr2rX), SIZE_VARIANTS(0x885f7c00),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOADX,
                  "ldxr", "!0r, [!1X]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldaxr2rX), SIZE_VARIANTS(0x885ffc00),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOADX,
                  "ldaxr", "!0r, [!1X]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000),
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
@@ -432,7 +432,7 @@
                  "movz", "!0r, #!1d!2M", kFixupNone),
     ENCODING_MAP(WIDE(kA64Mov2rr), SF_VARIANTS(0x2a0003e0),
                  kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
                  "mov", "!0r, !1r", kFixupNone),
     ENCODING_MAP(WIDE(kA64Mvn2rr), SF_VARIANTS(0x2a2003e0),
                  kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
@@ -508,11 +508,11 @@
                  "smulh", "!0x, !1x, !2x", kFixupNone),
     ENCODING_MAP(WIDE(kA64Stp4ffXD), CUSTOM_VARIANTS(0x2d000000, 0x6d000000),
                  kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
                  "stp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Stp4rrXD), SF_VARIANTS(0x29000000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
                  "stp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64StpPost4rrXD), CUSTOM_VARIANTS(0x28800000, 0xa8800000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
@@ -524,7 +524,7 @@
                  "stp", "!0r, !1r, [!2X, #!3D]!!", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Str3fXD), CUSTOM_VARIANTS(0xbd000000, 0xfd000000),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "str", "!0f, [!1X, #!2D]", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Str4fXxG), CUSTOM_VARIANTS(0xbc206800, 0xfc206800),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -532,7 +532,7 @@
                  "str", "!0f, [!1X, !2x!3G]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Str3rXD), SIZE_VARIANTS(0xb9000000),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "str", "!0r, [!1X, #!2D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Str4rXxG), SIZE_VARIANTS(0xb8206800),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -540,7 +540,7 @@
                  "str", "!0r, [!1X, !2x!3G]", kFixupNone),
     ENCODING_MAP(kA64Strb3wXd, NO_VARIANTS(0x39000000),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "strb", "!0w, [!1X, #!2d]", kFixupNone),
     ENCODING_MAP(kA64Strb3wXx, NO_VARIANTS(0x38206800),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -548,7 +548,7 @@
                  "strb", "!0w, [!1X, !2x]", kFixupNone),
     ENCODING_MAP(kA64Strh3wXF, NO_VARIANTS(0x79000000),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "strh", "!0w, [!1X, #!2F]", kFixupNone),
     ENCODING_MAP(kA64Strh4wXxd, NO_VARIANTS(0x78206800),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -568,11 +568,11 @@
                  "stur", "!0r, [!1X, #!2d]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Stxr3wrX), SIZE_VARIANTS(0x88007c00),
                  kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STOREX,
                  "stxr", "!0w, !1r, [!2X]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Stlxr3wrX), SIZE_VARIANTS(0x8800fc00),
                  kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STOREX,
                  "stlxr", "!0w, !1r, [!2X]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000),
                  kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index ac36519..fd2f541 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -297,6 +297,7 @@
     bool WideFPRsAreAliases() OVERRIDE {
       return true;  // 64b architecture.
     }
+    size_t GetInstructionOffset(LIR* lir);
 
     LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
 
@@ -381,6 +382,7 @@
     RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
                           bool is_div, bool check_zero);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
+    size_t GetLoadStoreSize(LIR* lir);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index f9f85f4..360acd5 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -788,6 +788,7 @@
 }
 
 LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
 }
 
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index f6c140f..221dbfa 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -87,6 +87,26 @@
   return (bit7 | bit6 | bit5_to_0);
 }
 
+size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) {
+  bool opcode_is_wide = IS_WIDE(lir->opcode);
+  ArmOpcode opcode = UNWIDE(lir->opcode);
+  DCHECK(!IsPseudoLirOp(opcode));
+  const ArmEncodingMap *encoder = &EncodingMap[opcode];
+  uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
+  return (bits >> 30);
+}
+
+size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) {
+  size_t offset = lir->operands[2];
+  uint64_t check_flags = GetTargetInstFlags(lir->opcode);
+  DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
+  if (check_flags & SCALED_OFFSET_X0) {
+    DCHECK(check_flags & IS_TERTIARY_OP);
+    offset = offset * (1 << GetLoadStoreSize(lir));
+  }
+  return offset;
+}
+
 LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) {
   DCHECK(r_dest.IsSingle());
   if (value == 0) {