Compiler: Take advantage of constant propagation
The common frontend tracks constants via a constant propagation pass.
When converting from MIR to GBC (for Portable) or LIR (for Quick),
recognize constant arguments and select more efficient codegen forms.
Note: we still have to flush constants to their associated vregs to
support deoptimization. There's quite a bit of possible code size
gain if we were to eliminate unnecessary stores or enhance the vmap
table to explicitly represent the ranges over which Dalvik vregs
are constant.
Also some minor code refactoring related to array operations. There
are sufficient architectural differences to make it worthwhile to
have target-dependent aget/aput generators. On Arm, this is mostly
beneficial to floating point array loads and stores.
This CL yields a ~0.8% decrease in code size over the framework,
and a nice pop to a few of the standard point benchmarks
(linpack: ~10%, cm: ~11%, scimark: ~13% - no significant change to
the others)
Change-Id: I2337e1aa0622b34a34c3775f8b7dbf5e6969da3e
diff --git a/src/compiler/codegen/x86/codegen_x86.h b/src/compiler/codegen/x86/codegen_x86.h
index 4ef186a..f467e83 100644
--- a/src/compiler/codegen/x86/codegen_x86.h
+++ b/src/compiler/codegen/x86/codegen_x86.h
@@ -40,7 +40,6 @@
virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value);
virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
int val_lo, int val_hi);
- virtual void LoadPair(CompilationUnit* cu, int base, int low_reg, int high_reg);
virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
OpSize size);
virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -91,6 +90,12 @@
virtual bool IsUnconditionalBranch(LIR* lir);
// Required for target - Dalvik-level generators.
+ virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
+ RegLocation rl_index, RegLocation rl_src, int scale);
+ virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+ RegLocation rl_index, RegLocation rl_dest, int scale);
+ virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+ RegLocation rl_index, RegLocation rl_src, int scale);
virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2);
virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -182,6 +187,7 @@
void SpillCoreRegs(CompilationUnit* cu);
void UnSpillCoreRegs(CompilationUnit* cu);
static const X86EncodingMap EncodingMap[kX86Last];
+ bool InexpensiveConstant(int reg, int value);
};
} // namespace art
diff --git a/src/compiler/codegen/x86/int_x86.cc b/src/compiler/codegen/x86/int_x86.cc
index bd3a7fa..0f1fc53 100644
--- a/src/compiler/codegen/x86/int_x86.cc
+++ b/src/compiler/codegen/x86/int_x86.cc
@@ -439,4 +439,148 @@
NewLIR2(cu, opcode, r_dest, thread_offset);
}
+/*
+ * Generate array load
+ */
+void X86Codegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+ RegLocation rl_index, RegLocation rl_dest, int scale)
+{
+ RegisterClass reg_class = oat_reg_class_by_size(size);
+ int len_offset = Array::LengthOffset().Int32Value();
+ int data_offset;
+ RegLocation rl_result;
+ rl_array = LoadValue(cu, rl_array, kCoreReg);
+ rl_index = LoadValue(cu, rl_index, kCoreReg);
+
+ if (size == kLong || size == kDouble) {
+ data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
+ } else {
+ data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
+ }
+
+ /* null object? */
+ GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
+
+ if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
+ /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */
+ GenRegMemCheck(cu, kCondUge, rl_index.low_reg, rl_array.low_reg,
+ len_offset, kThrowArrayBounds);
+ }
+ if ((size == kLong) || (size == kDouble)) {
+ int reg_addr = AllocTemp(cu);
+ OpLea(cu, reg_addr, rl_array.low_reg, rl_index.low_reg, scale, data_offset);
+ FreeTemp(cu, rl_array.low_reg);
+ FreeTemp(cu, rl_index.low_reg);
+ rl_result = EvalLoc(cu, rl_dest, reg_class, true);
+ LoadBaseIndexedDisp(cu, reg_addr, INVALID_REG, 0, 0, rl_result.low_reg,
+ rl_result.high_reg, size, INVALID_SREG);
+ StoreValueWide(cu, rl_dest, rl_result);
+ } else {
+ rl_result = EvalLoc(cu, rl_dest, reg_class, true);
+
+ LoadBaseIndexedDisp(cu, rl_array.low_reg, rl_index.low_reg, scale,
+ data_offset, rl_result.low_reg, INVALID_REG, size,
+ INVALID_SREG);
+
+ StoreValue(cu, rl_dest, rl_result);
+ }
+}
+
+/*
+ * Generate array store
+ *
+ */
+void X86Codegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
+ RegLocation rl_index, RegLocation rl_src, int scale)
+{
+ RegisterClass reg_class = oat_reg_class_by_size(size);
+ int len_offset = Array::LengthOffset().Int32Value();
+ int data_offset;
+
+ if (size == kLong || size == kDouble) {
+ data_offset = Array::DataOffset(sizeof(int64_t)).Int32Value();
+ } else {
+ data_offset = Array::DataOffset(sizeof(int32_t)).Int32Value();
+ }
+
+ rl_array = LoadValue(cu, rl_array, kCoreReg);
+ rl_index = LoadValue(cu, rl_index, kCoreReg);
+
+ /* null object? */
+ GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
+
+ if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
+ /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */
+ GenRegMemCheck(cu, kCondUge, rl_index.low_reg, rl_array.low_reg, len_offset, kThrowArrayBounds);
+ }
+ if ((size == kLong) || (size == kDouble)) {
+ rl_src = LoadValueWide(cu, rl_src, reg_class);
+ } else {
+ rl_src = LoadValue(cu, rl_src, reg_class);
+ }
+ // If the src reg can't be byte accessed, move it to a temp first.
+ if ((size == kSignedByte || size == kUnsignedByte) && rl_src.low_reg >= 4) {
+ int temp = AllocTemp(cu);
+ OpRegCopy(cu, temp, rl_src.low_reg);
+ StoreBaseIndexedDisp(cu, rl_array.low_reg, rl_index.low_reg, scale, data_offset, temp,
+ INVALID_REG, size, INVALID_SREG);
+ } else {
+ StoreBaseIndexedDisp(cu, rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_src.low_reg,
+ rl_src.high_reg, size, INVALID_SREG);
+ }
+}
+
+/*
+ * Generate array store
+ *
+ */
+void X86Codegen::GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
+ RegLocation rl_index, RegLocation rl_src, int scale)
+{
+ int len_offset = Array::LengthOffset().Int32Value();
+ int data_offset = Array::DataOffset(sizeof(Object*)).Int32Value();
+
+ FlushAllRegs(cu); // Use explicit registers
+ LockCallTemps(cu);
+
+ int r_value = TargetReg(kArg0); // Register holding value
+ int r_array_class = TargetReg(kArg1); // Register holding array's Class
+ int r_array = TargetReg(kArg2); // Register holding array
+ int r_index = TargetReg(kArg3); // Register holding index into array
+
+ LoadValueDirectFixed(cu, rl_array, r_array); // Grab array
+ LoadValueDirectFixed(cu, rl_src, r_value); // Grab value
+ LoadValueDirectFixed(cu, rl_index, r_index); // Grab index
+
+ GenNullCheck(cu, rl_array.s_reg_low, r_array, opt_flags); // NPE?
+
+ // Store of null?
+ LIR* null_value_check = OpCmpImmBranch(cu, kCondEq, r_value, 0, NULL);
+
+ // Get the array's class.
+ LoadWordDisp(cu, r_array, Object::ClassOffset().Int32Value(), r_array_class);
+ CallRuntimeHelperRegReg(cu, ENTRYPOINT_OFFSET(pCanPutArrayElementFromCode), r_value,
+ r_array_class, true);
+ // Redo LoadValues in case they didn't survive the call.
+ LoadValueDirectFixed(cu, rl_array, r_array); // Reload array
+ LoadValueDirectFixed(cu, rl_index, r_index); // Reload index
+ LoadValueDirectFixed(cu, rl_src, r_value); // Reload value
+ r_array_class = INVALID_REG;
+
+ // Branch here if value to be stored == null
+ LIR* target = NewLIR0(cu, kPseudoTargetLabel);
+ null_value_check->target = target;
+
+ // make an extra temp available for card mark below
+ FreeTemp(cu, TargetReg(kArg1));
+ if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
+ /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */
+ GenRegMemCheck(cu, kCondUge, r_index, r_array, len_offset, kThrowArrayBounds);
+ }
+ StoreBaseIndexedDisp(cu, r_array, r_index, scale,
+ data_offset, r_value, INVALID_REG, kWord, INVALID_SREG);
+ FreeTemp(cu, r_index);
+ MarkGCCard(cu, r_value, r_array);
+}
+
} // namespace art
diff --git a/src/compiler/codegen/x86/utility_x86.cc b/src/compiler/codegen/x86/utility_x86.cc
index bdbc547..ce55b4b 100644
--- a/src/compiler/codegen/x86/utility_x86.cc
+++ b/src/compiler/codegen/x86/utility_x86.cc
@@ -50,6 +50,11 @@
return res;
}
+bool X86Codegen::InexpensiveConstant(int reg, int value)
+{
+ return true;
+}
+
/*
* Load a immediate using a shortcut if possible; otherwise
* grab from the per-translation literal pool. If target is
@@ -559,9 +564,4 @@
r_src_lo, r_src_hi, kLong, INVALID_SREG);
}
-void X86Codegen::LoadPair(CompilationUnit *cu, int base, int low_reg, int high_reg)
-{
- LoadBaseDispWide(cu, base, 0, low_reg, high_reg, INVALID_SREG);
-}
-
} // namespace art