Optimize easy multiply and easy div remainder.

Update OpRegRegShift and OpRegRegRegShift to use RegStorage parameters.
Add special cases for *0 and *1. Add more easy multiply special cases for
Arm.
Reuse easy multiply in SmallLiteralDivRem() to support remainder cases.

Change-Id: Icd76a993d3ac8d4988e9653c19eab4efca14fad0
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 46db466..964c2fb 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -425,10 +425,6 @@
   if (pattern == DivideNone) {
     return false;
   }
-  // Tuning: add rem patterns
-  if (!is_div) {
-    return false;
-  }
 
   RegStorage r_magic = AllocTemp();
   LoadConstant(r_magic, magic_table[lit].magic);
@@ -439,23 +435,136 @@
   NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
   switch (pattern) {
     case Divide3:
-      OpRegRegRegShift(kOpSub, rl_result.reg.GetReg(), r_hi.GetReg(),
-               rl_src.reg.GetReg(), EncodeShift(kArmAsr, 31));
+      OpRegRegRegShift(kOpSub, rl_result.reg, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31));
       break;
     case Divide5:
       OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
-      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
-               EncodeShift(kArmAsr, magic_table[lit].shift));
+      OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi,
+                       EncodeShift(kArmAsr, magic_table[lit].shift));
       break;
     case Divide7:
       OpRegReg(kOpAdd, r_hi, rl_src.reg);
       OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
-      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
-               EncodeShift(kArmAsr, magic_table[lit].shift));
+      OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi,
+                       EncodeShift(kArmAsr, magic_table[lit].shift));
       break;
     default:
       LOG(FATAL) << "Unexpected pattern: " << pattern;
   }
+
+  if (!is_div) {
+    RegStorage tmp1 = r_lo;
+    EasyMultiplyOp ops[2];
+
+    bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops);
+    DCHECK_NE(canEasyMultiply, false);
+
+    GenEasyMultiplyTwoOps(tmp1, rl_result.reg, ops);
+    OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1);
+  }
+
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+// Try to convert *lit to 1 RegRegRegShift/RegRegShift form.
+bool ArmMir2Lir::GetEasyMultiplyOp(int lit, ArmMir2Lir::EasyMultiplyOp* op) {
+  if (IsPowerOfTwo(lit)) {
+    op->op = kOpLsl;
+    op->shift = LowestSetBit(lit);
+    return true;
+  }
+
+  if (IsPowerOfTwo(lit - 1)) {
+    op->op = kOpAdd;
+    op->shift = LowestSetBit(lit - 1);
+    return true;
+  }
+
+  if (IsPowerOfTwo(lit + 1)) {
+    op->op = kOpRsub;
+    op->shift = LowestSetBit(lit + 1);
+    return true;
+  }
+
+  op->op = kOpInvalid;
+  return false;
+}
+
+// Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms.
+bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) {
+  GetEasyMultiplyOp(lit, &ops[0]);
+  if (GetEasyMultiplyOp(lit, &ops[0])) {
+    ops[1].op = kOpInvalid;
+    return true;
+  }
+
+  int lit1 = lit;
+  uint32_t shift = LowestSetBit(lit1);
+  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
+    ops[1].op = kOpLsl;
+    ops[1].shift = shift;
+    return true;
+  }
+
+  lit1 = lit - 1;
+  shift = LowestSetBit(lit1);
+  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
+    ops[1].op = kOpAdd;
+    ops[1].shift = shift;
+    return true;
+  }
+
+  lit1 = lit + 1;
+  shift = LowestSetBit(lit1);
+  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
+    ops[1].op = kOpRsub;
+    ops[1].shift = shift;
+    return true;
+  }
+
+  return false;
+}
+
+void ArmMir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) {
+  // dest = ( src << shift1) + [ src | -src | 0 ]
+  // dest = (dest << shift2) + [ src | -src | 0 ]
+  for (int i = 0; i < 2; i++) {
+    RegStorage r_src2;
+    if (i == 0) {
+      r_src2 = r_src;
+    } else {
+      r_src2 = r_dest;
+    }
+    switch (ops[i].op) {
+    case kOpLsl:
+      OpRegRegImm(kOpLsl, r_dest, r_src2, ops[i].shift);
+      break;
+    case kOpAdd:
+      OpRegRegRegShift(kOpAdd, r_dest, r_src, r_src2, EncodeShift(kArmLsl, ops[i].shift));
+      break;
+    case kOpRsub:
+      OpRegRegRegShift(kOpRsub, r_dest, r_src, r_src2, EncodeShift(kArmLsl, ops[i].shift));
+      break;
+    default:
+      DCHECK_NE(i, 0);
+      DCHECK_EQ(ops[i].op, kOpInvalid);
+      break;
+    }
+  }
+}
+
+bool ArmMir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
+  EasyMultiplyOp ops[2];
+
+  if (!GetEasyMultiplyTwoOps(lit, ops)) {
+    return false;
+  }
+
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  GenEasyMultiplyTwoOps(rl_result.reg, rl_src.reg, ops);
   StoreValue(rl_dest, rl_result);
   return true;
 }
@@ -752,7 +861,7 @@
 void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
                                                RegLocation rl_result, int lit,
                                                int first_bit, int second_bit) {
-  OpRegRegRegShift(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(),
+  OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
                    EncodeShift(kArmLsl, second_bit - first_bit));
   if (first_bit != 0) {
     OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
@@ -898,8 +1007,7 @@
       NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
       NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(),
               rl_src1.reg.GetLowReg());
-      OpRegRegRegShift(kOpAdd, res_hi.GetReg(), res_hi.GetReg(), tmp1.GetReg(),
-                       EncodeShift(kArmLsl, 1));
+      OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
     } else {
       NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg());
       if (reg_status == 2) {
@@ -1009,8 +1117,7 @@
     } else {
       // No special indexed operation, lea + load w/ displacement
       reg_ptr = AllocTemp();
-      OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
-                       EncodeShift(kArmLsl, scale));
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
       FreeTemp(rl_index.reg.GetReg());
     }
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -1117,8 +1224,7 @@
       rl_src = LoadValue(rl_src, reg_class);
     }
     if (!constant_index) {
-      OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
-                       EncodeShift(kArmLsl, scale));
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
     }
     if (needs_range_check) {
       if (constant_index) {
@@ -1183,7 +1289,7 @@
         LoadConstant(rl_result.reg.GetLow(), 0);
       } else {
         OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetHighReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetLowReg(),
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), rl_src.reg.GetLow(),
                          EncodeShift(kArmLsr, 32 - shift_amount));
         OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount);
       }
@@ -1199,7 +1305,7 @@
       } else {
         RegStorage t_reg = AllocTemp();
         OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
                          EncodeShift(kArmLsl, 32 - shift_amount));
         FreeTemp(t_reg);
         OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
@@ -1216,7 +1322,7 @@
       } else {
         RegStorage t_reg = AllocTemp();
         OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
                          EncodeShift(kArmLsl, 32 - shift_amount));
         FreeTemp(t_reg);
         OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);