Compiler: Take advantage of constant propagation

The common frontend tracks constants via a constant propagation pass.
When converting from MIR to GBC (for Portable) or LIR (for Quick),
recognize constant arguments and select more efficient codegen forms.

Note: we still have to flush constants to their associated vregs to
support deoptimization.  There's quite a bit of possible code size
gain if we were to eliminate unnecessary stores or enhance the vmap
table to explicitly represent the ranges over which Dalvik vregs
are constant.

Also some minor code refactoring related to array operations.  There
are sufficient architectural differences to make it worthwhile to
have target-dependent aget/aput generators.  On Arm, this is mostly
beneficial to floating point array loads and stores.

This CL yields a ~0.8% decrease in code size over the framework,
and a nice pop to a few of the standard point benchmarks
(linpack: ~10%, cm: ~11%, scimark: ~13% - no significant change to
the others)

Change-Id: I2337e1aa0622b34a34c3775f8b7dbf5e6969da3e
diff --git a/src/compiler/codegen/arm/codegen_arm.h b/src/compiler/codegen/arm/codegen_arm.h
index f085a19..ca39e5a 100644
--- a/src/compiler/codegen/arm/codegen_arm.h
+++ b/src/compiler/codegen/arm/codegen_arm.h
@@ -39,7 +39,6 @@
     virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value);
     virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
                                        int val_lo, int val_hi);
-    virtual void LoadPair(CompilationUnit* cu, int base, int low_reg, int high_reg);
     virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
                                OpSize size);
     virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -90,6 +89,12 @@
     virtual bool IsUnconditionalBranch(LIR* lir);
 
     // Required for target - Dalvik-level generators.
+    virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
+                                RegLocation rl_index, RegLocation rl_src, int scale);
+    virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_dest, int scale);
+    virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_src, int scale);
     virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2);
     virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -191,6 +196,7 @@
     static int EncodeShift(int code, int amount);
     static int ModifiedImmediate(uint32_t value);
     static ArmConditionCode ArmConditionEncoding(ConditionCode code);
+    bool InexpensiveConstant(int reg, int value);
 };
 
 }  // namespace art
diff --git a/src/compiler/codegen/arm/int_arm.cc b/src/compiler/codegen/arm/int_arm.cc
index 0a6abd2..e86f379 100644
--- a/src/compiler/codegen/arm/int_arm.cc
+++ b/src/compiler/codegen/arm/int_arm.cc
@@ -558,4 +558,204 @@
   return false;
 }
 
+/*
+ * Generate array load
+ */
+void ArmCodegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                          RegLocation rl_index, RegLocation rl_dest, int scale)
+{
+  RegisterClass reg_class = oat_reg_class_by_size(size);
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset;
+  RegLocation rl_result;
+  rl_array = LoadValue(cu, rl_array, kCoreReg);
+  rl_index = LoadValue(cu, rl_index, kCoreReg);
+
+  if (rl_dest.wide) {
+    data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  /* null object? */
+  GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  int reg_len = INVALID_REG;
+  if (needs_range_check) {
+    reg_len = AllocTemp(cu);
+    /* Get len */
+    LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
+  }
+  if (rl_dest.wide || rl_dest.fp) {
+    // No special indexed operation, lea + load w/ displacement
+    int reg_ptr = AllocTemp(cu);
+    OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
+                     EncodeShift(kArmLsl, scale));
+    FreeTemp(cu, rl_index.low_reg);
+    rl_result = EvalLoc(cu, rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      // TODO: change kCondCS to a more meaningful name, is the sense of
+      // carry-set/clear flipped?
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+    if (rl_dest.wide) {
+      LoadBaseDispWide(cu, reg_ptr, data_offset, rl_result.low_reg, rl_result.high_reg, INVALID_SREG);
+      FreeTemp(cu, reg_ptr);
+      StoreValueWide(cu, rl_dest, rl_result);
+    } else {
+      LoadBaseDisp(cu, reg_ptr, data_offset, rl_result.low_reg, size, INVALID_SREG);
+      FreeTemp(cu, reg_ptr);
+      StoreValue(cu, rl_dest, rl_result);
+    }
+  } else {
+    // Offset base, then use indexed load
+    int reg_ptr = AllocTemp(cu);
+    OpRegRegImm(cu, kOpAdd, reg_ptr, rl_array.low_reg, data_offset);
+    FreeTemp(cu, rl_array.low_reg);
+    rl_result = EvalLoc(cu, rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      // TODO: change kCondCS to a more meaningful name, is the sense of
+      // carry-set/clear flipped?
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+    LoadBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_result.low_reg, scale, size);
+    FreeTemp(cu, reg_ptr);
+    StoreValue(cu, rl_dest, rl_result);
+  }
+}
+
+/*
+ * Generate array store
+ *
+ */
+void ArmCodegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+                          RegLocation rl_index, RegLocation rl_src, int scale)
+{
+  RegisterClass reg_class = oat_reg_class_by_size(size);
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset;
+
+  if (size == kLong || size == kDouble) {
+    data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  rl_array = LoadValue(cu, rl_array, kCoreReg);
+  rl_index = LoadValue(cu, rl_index, kCoreReg);
+  int reg_ptr = INVALID_REG;
+  if (IsTemp(cu, rl_array.low_reg)) {
+    Clobber(cu, rl_array.low_reg);
+    reg_ptr = rl_array.low_reg;
+  } else {
+    reg_ptr = AllocTemp(cu);
+  }
+
+  /* null object? */
+  GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  int reg_len = INVALID_REG;
+  if (needs_range_check) {
+    reg_len = AllocTemp(cu);
+    //NOTE: max live temps(4) here.
+    /* Get len */
+    LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
+  }
+  /* at this point, reg_ptr points to array, 2 live temps */
+  if (rl_src.wide || rl_src.fp) {
+    if (rl_src.wide) {
+      rl_src = LoadValueWide(cu, rl_src, reg_class);
+    } else {
+      rl_src = LoadValue(cu, rl_src, reg_class);
+    }
+    OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
+                     EncodeShift(kArmLsl, scale));
+    if (needs_range_check) {
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+    if (rl_src.wide) {
+      StoreBaseDispWide(cu, reg_ptr, data_offset, rl_src.low_reg, rl_src.high_reg);
+    } else {
+      StoreBaseDisp(cu, reg_ptr, data_offset, rl_src.low_reg, size);
+    }
+  } else {
+    /* reg_ptr -> array data */
+    OpRegRegImm(cu, kOpAdd, reg_ptr, rl_array.low_reg, data_offset);
+    rl_src = LoadValue(cu, rl_src, reg_class);
+    if (needs_range_check) {
+      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      FreeTemp(cu, reg_len);
+    }
+    StoreBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_src.low_reg,
+                     scale, size);
+  }
+  FreeTemp(cu, reg_ptr);
+}
+
+/*
+ * Generate array store
+ *
+ */
+void ArmCodegen::GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_src, int scale)
+{
+  int len_offset = Array::LengthOffset().Int32Value();
+  int data_offset = Array::DataOffset(sizeof(Object*)).Int32Value();
+
+  FlushAllRegs(cu);  // Use explicit registers
+  LockCallTemps(cu);
+
+  int r_value = TargetReg(kArg0);  // Register holding value
+  int r_array_class = TargetReg(kArg1);  // Register holding array's Class
+  int r_array = TargetReg(kArg2);  // Register holding array
+  int r_index = TargetReg(kArg3);  // Register holding index into array
+
+  LoadValueDirectFixed(cu, rl_array, r_array);  // Grab array
+  LoadValueDirectFixed(cu, rl_src, r_value);  // Grab value
+  LoadValueDirectFixed(cu, rl_index, r_index);  // Grab index
+
+  GenNullCheck(cu, rl_array.s_reg_low, r_array, opt_flags);  // NPE?
+
+  // Store of null?
+  LIR* null_value_check = OpCmpImmBranch(cu, kCondEq, r_value, 0, NULL);
+
+  // Get the array's class.
+  LoadWordDisp(cu, r_array, Object::ClassOffset().Int32Value(), r_array_class);
+  CallRuntimeHelperRegReg(cu, ENTRYPOINT_OFFSET(pCanPutArrayElementFromCode), r_value,
+                          r_array_class, true);
+  // Redo LoadValues in case they didn't survive the call.
+  LoadValueDirectFixed(cu, rl_array, r_array);  // Reload array
+  LoadValueDirectFixed(cu, rl_index, r_index);  // Reload index
+  LoadValueDirectFixed(cu, rl_src, r_value);  // Reload value
+  r_array_class = INVALID_REG;
+
+  // Branch here if value to be stored == null
+  LIR* target = NewLIR0(cu, kPseudoTargetLabel);
+  null_value_check->target = target;
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  int reg_len = INVALID_REG;
+  if (needs_range_check) {
+    reg_len = TargetReg(kArg1);
+    LoadWordDisp(cu, r_array, len_offset, reg_len);  // Get len
+  }
+  /* r_ptr -> array data */
+  int r_ptr = AllocTemp(cu);
+  OpRegRegImm(cu, kOpAdd, r_ptr, r_array, data_offset);
+  if (needs_range_check) {
+    GenRegRegCheck(cu, kCondCs, r_index, reg_len, kThrowArrayBounds);
+  }
+  StoreBaseIndexed(cu, r_ptr, r_index, r_value, scale, kWord);
+  FreeTemp(cu, r_ptr);
+  FreeTemp(cu, r_index);
+  MarkGCCard(cu, r_value, r_array);
+}
+
 }  // namespace art
diff --git a/src/compiler/codegen/arm/utility_arm.cc b/src/compiler/codegen/arm/utility_arm.cc
index 7f37bea..5c25eee 100644
--- a/src/compiler/codegen/arm/utility_arm.cc
+++ b/src/compiler/codegen/arm/utility_arm.cc
@@ -126,6 +126,21 @@
    return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */
 }
 
+bool ArmCodegen::InexpensiveConstant(int reg, int value)
+{
+  bool res = false;
+  if (ARM_FPREG(reg)) {
+    res = (EncodeImmSingle(value) >= 0);
+  } else {
+    if (ARM_LOWREG(reg) && (value >= 0) && (IsUint(8, value))) {
+      res = true;
+    } else {
+      res = (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+    }
+  }
+  return res;
+}
+
 /*
  * Load a immediate using a shortcut if possible; otherwise
  * grab from the per-translation literal pool.
@@ -1011,11 +1026,6 @@
   return StoreBaseDispBody(cu, rBase, displacement, r_src_lo, r_src_hi, kLong);
 }
 
-void ArmCodegen::LoadPair(CompilationUnit* cu, int base, int low_reg, int high_reg)
-{
-  LoadBaseDispWide(cu, base, 0, low_reg, high_reg, INVALID_SREG);
-}
-
 LIR* ArmCodegen::OpFpRegCopy(CompilationUnit* cu, int r_dest, int r_src)
 {
   int opcode;