ART: Quick compiler: More size checks, add TargetReg variants

Add variants for TargetReg for requesting specific register usage,
e.g., wide and ref. More register size checks.

With code adapted from https://android-review.googlesource.com/#/c/98605/.

Change-Id: I852d3be509d4dcd242c7283da702a2a76357278d
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index b0865f1..ea7f439 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -179,6 +179,8 @@
 // RegisterLocation templates return values (following the hard-float calling convention).
 const RegLocation arm_loc_c_return =
     {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_w0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_ref =
+    {kLocPhysReg, 0, 0, 0, 0, 0, 1, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
 const RegLocation arm_loc_c_return_wide =
     {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
 const RegLocation arm_loc_c_return_float =
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index cfdf926..3e0b3cf 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -132,7 +132,7 @@
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  LoadBaseIndexed(table_base, As64BitReg(key_reg), As64BitReg(disp_reg), 2, k32);
+  LoadBaseIndexed(table_base, As64BitReg(key_reg), disp_reg, 2, k32);
 
   // Get base branch address.
   RegStorage branch_reg = AllocTempWide();
@@ -195,7 +195,7 @@
   // TUNING: How much performance we get when we inline this?
   // Since we've already flush all register.
   FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_w0);
+  LoadValueDirectFixed(rl_src, rs_x0);  // = TargetRefReg(kArg0)
   LockCallTemps();  // Prepare for explicit register usage
   LIR* null_check_branch = nullptr;
   if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
@@ -243,7 +243,7 @@
   // TUNING: How much performance we get when we inline this?
   // Since we've already flush all register.
   FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_w0);  // Get obj
+  LoadValueDirectFixed(rl_src, rs_x0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
   LIR* null_check_branch = nullptr;
   if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
@@ -291,12 +291,12 @@
  */
 void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
   RegStorage reg_card_base = AllocTempWide();
-  RegStorage reg_card_no = AllocTemp();
+  RegStorage reg_card_no = AllocTempWide();  // Needs to be wide as addr is ref=64b
   LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
   LoadWordDisp(rs_xSELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"?
-  StoreBaseIndexed(reg_card_base, As64BitReg(reg_card_no), As32BitReg(reg_card_base),
+  StoreBaseIndexed(reg_card_base, reg_card_no, As32BitReg(reg_card_base),
                    0, kUnsignedByte);
   LIR* target = NewLIR0(kPseudoTargetLabel);
   branch_over->target = target;
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index a9340a5..f71713f 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -24,13 +24,8 @@
 
 namespace art {
 
-class Arm64Mir2Lir : public Mir2Lir {
+class Arm64Mir2Lir FINAL : public Mir2Lir {
  protected:
-  // If we detect a size error, FATAL out.
-  static constexpr bool kFailOnSizeError = false && kIsDebugBuild;
-  // If we detect a size error, report to LOG.
-  static constexpr bool kReportSizeError = false && kIsDebugBuild;
-
   // TODO: consolidate 64-bit target support.
   class InToRegStorageMapper {
    public:
@@ -102,7 +97,19 @@
                            int offset, int check_value, LIR* target) OVERRIDE;
 
     // Required for target - register utilities.
-    RegStorage TargetReg(SpecialTargetRegister reg);
+    RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
+    RegStorage TargetReg(SpecialTargetRegister symbolic_reg, bool is_wide) OVERRIDE {
+      RegStorage reg = TargetReg(symbolic_reg);
+      if (is_wide) {
+        return (reg.Is64Bit()) ? reg : As64BitReg(reg);
+      } else {
+        return (reg.Is32Bit()) ? reg : As32BitReg(reg);
+      }
+    }
+    RegStorage TargetRefReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
+      RegStorage reg = TargetReg(symbolic_reg);
+      return (reg.Is64Bit() ? reg : As64BitReg(reg));
+    }
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 56fb2dd..18a4e8f 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -181,6 +181,8 @@
 
   if (LIKELY(dest_is_fp == src_is_fp)) {
     if (LIKELY(!dest_is_fp)) {
+      DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
+
       // Core/core copy.
       // Copies involving the sp register require a different instruction.
       opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
@@ -210,14 +212,14 @@
       if (r_dest.IsDouble()) {
         opcode = kA64Fmov2Sx;
       } else {
-        DCHECK(r_src.IsSingle());
+        r_src = Check32BitReg(r_src);
         opcode = kA64Fmov2sw;
       }
     } else {
       if (r_src.IsDouble()) {
         opcode = kA64Fmov2xS;
       } else {
-        DCHECK(r_dest.Is32Bit());
+        r_dest = Check32BitReg(r_dest);
         opcode = kA64Fmov2ws;
       }
     }
@@ -655,7 +657,7 @@
 
   rl_src = LoadValue(rl_src, kCoreReg);
   rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 31);
+  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), As64BitReg(rl_src.reg).GetReg(), 0, 31);
   StoreValueWide(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 6105837..dcb0050 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -88,7 +88,7 @@
 }
 
 RegLocation Arm64Mir2Lir::LocCReturnRef() {
-  return arm_loc_c_return;
+  return arm_loc_c_return_ref;
 }
 
 RegLocation Arm64Mir2Lir::LocCReturnWide() {
@@ -1097,7 +1097,7 @@
 
       // Instead of allocating a new temp, simply reuse one of the registers being used
       // for argument passing.
-      RegStorage temp = TargetReg(kArg3);
+      RegStorage temp = TargetReg(kArg3, false);
 
       // Now load the argument VR and store to the outs.
       Load32Disp(TargetReg(kSp), current_src_offset, temp);
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index e248410..8561091 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -891,9 +891,8 @@
   LIR* load;
   int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
-  DCHECK(r_base.Is64Bit());
-  // TODO: need a cleaner handling of index registers here and throughout.
-  r_index = Check32BitReg(r_index);
+  r_base = Check64BitReg(r_base);
+  r_index = Check64BitReg(r_index);
 
   if (r_dest.IsFloat()) {
     if (r_dest.IsDouble()) {
@@ -928,17 +927,21 @@
       expected_scale = 2;
       break;
     case kUnsignedHalf:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrh4wXxd;
       expected_scale = 1;
       break;
     case kSignedHalf:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrsh4rXxd;
       expected_scale = 1;
       break;
     case kUnsignedByte:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrb3wXx;
       break;
     case kSignedByte:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrsb3rXx;
       break;
     default:
@@ -968,9 +971,8 @@
   LIR* store;
   int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
-  DCHECK(r_base.Is64Bit());
-  // TODO: need a cleaner handling of index registers here and throughout.
-  r_index = Check32BitReg(r_index);
+  r_base = Check64BitReg(r_base);
+  r_index = Check64BitReg(r_index);
 
   if (r_src.IsFloat()) {
     if (r_src.IsDouble()) {
@@ -1006,11 +1008,13 @@
       break;
     case kUnsignedHalf:
     case kSignedHalf:
+      r_src = Check32BitReg(r_src);
       opcode = kA64Strh4wXxd;
       expected_scale = 1;
       break;
     case kUnsignedByte:
     case kSignedByte:
+      r_src = Check32BitReg(r_src);
       opcode = kA64Strb3wXx;
       break;
     default: