Add "select" detection to common frontend
dx produces a somewhat ugly code pattern for selects:
foo = (condition) ? true : false;
There is no select Dex opcode, so this turns into:
IF_EQ v0, L1
CONST_4 V2, #0
L2:
<rejoin>
.
.
L1:
CONST_4 V2, #1
GOTO L2
... or ...
foo = (condition) ? bar1 : bar2;
IF_EQ v0, L1
MOVE V2, V3
L2:
<rejoin>
.
.
L1:
MOVE V2, V4
GOTO L2
Not only do we end up with excessive branching (and, unless we
something special, really poor code layout), but the compilers
generally drop down a suspend check on backwards branches - which is
completely unnecessary in the "GOTO L2" case above. There are ~2100
instances of the simplest variants of this pattern in the framework.
With this new optimization, boot.oat size is reduced by 90K bytes
and one of our standard benchmarks got an 8% pop.
This CL adds a select detection operation to the common frontend's
BasicBlock optimization pass, and introduces a new extended MIR
opcode: kMirOpSelect.
Change-Id: I06249956ba21afb0ed5cdd35019ac87cd063a17b
diff --git a/src/compiler/codegen/arm/codegen_arm.h b/src/compiler/codegen/arm/codegen_arm.h
index 9342620..17b8357 100644
--- a/src/compiler/codegen/arm/codegen_arm.h
+++ b/src/compiler/codegen/arm/codegen_arm.h
@@ -140,6 +140,7 @@
virtual void GenFusedFPCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir, bool gt_bias,
bool is_double);
virtual void GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir);
+ virtual void GenSelect(CompilationUnit* cu, BasicBlock* bb, MIR* mir);
virtual void GenMemBarrier(CompilationUnit* cu, MemBarrierKind barrier_kind);
virtual void GenMonitorEnter(CompilationUnit* cu, int opt_flags, RegLocation rl_src);
virtual void GenMonitorExit(CompilationUnit* cu, int opt_flags, RegLocation rl_src);
diff --git a/src/compiler/codegen/arm/int_arm.cc b/src/compiler/codegen/arm/int_arm.cc
index fbc48d4..1447aec 100644
--- a/src/compiler/codegen/arm/int_arm.cc
+++ b/src/compiler/codegen/arm/int_arm.cc
@@ -182,6 +182,57 @@
OpCmpImmBranch(cu, ccode, low_reg, val_lo, taken);
}
+void ArmCodegen::GenSelect(CompilationUnit* cu, BasicBlock* bb, MIR* mir)
+{
+ RegLocation rl_result;
+ RegLocation rl_src = GetSrc(cu, mir, 0);
+ RegLocation rl_dest = GetDest(cu, mir);
+ rl_src = LoadValue(cu, rl_src, kCoreReg);
+ if (mir->ssa_rep->num_uses == 1) {
+ // CONST case
+ int true_val = mir->dalvikInsn.vB;
+ int false_val = mir->dalvikInsn.vC;
+ rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
+ if ((true_val == 1) && (false_val == 0)) {
+ OpRegRegImm(cu, kOpRsub, rl_result.low_reg, rl_src.low_reg, 1);
+ OpIT(cu, kCondCc, "");
+ LoadConstant(cu, rl_result.low_reg, 0);
+ GenBarrier(cu); // Add a scheduling barrier to keep the IT shadow intact
+ } else if (InexpensiveConstantInt(true_val) && InexpensiveConstantInt(false_val)) {
+ OpRegImm(cu, kOpCmp, rl_src.low_reg, 0);
+ OpIT(cu, kCondEq, "E");
+ LoadConstant(cu, rl_result.low_reg, true_val);
+ LoadConstant(cu, rl_result.low_reg, false_val);
+ GenBarrier(cu); // Add a scheduling barrier to keep the IT shadow intact
+ } else {
+ // Unlikely case - could be tuned.
+ int t_reg1 = AllocTemp(cu);
+ int t_reg2 = AllocTemp(cu);
+ LoadConstant(cu, t_reg1, true_val);
+ LoadConstant(cu, t_reg2, false_val);
+ OpRegImm(cu, kOpCmp, rl_src.low_reg, 0);
+ OpIT(cu, kCondEq, "E");
+ OpRegCopy(cu, rl_result.low_reg, t_reg1);
+ OpRegCopy(cu, rl_result.low_reg, t_reg2);
+ GenBarrier(cu); // Add a scheduling barrier to keep the IT shadow intact
+ }
+ } else {
+ // MOVE case
+ RegLocation rl_true = cu->reg_location[mir->ssa_rep->uses[1]];
+ RegLocation rl_false = cu->reg_location[mir->ssa_rep->uses[2]];
+ rl_true = LoadValue(cu, rl_true, kCoreReg);
+ rl_false = LoadValue(cu, rl_false, kCoreReg);
+ rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
+ OpRegImm(cu, kOpCmp, rl_src.low_reg, 0);
+ OpIT(cu, kCondEq, "E");
+ LIR* l1 = OpRegCopy(cu, rl_result.low_reg, rl_true.low_reg);
+ l1->flags.is_nop = false; // Make sure this instruction isn't optimized away
+ LIR* l2 = OpRegCopy(cu, rl_result.low_reg, rl_false.low_reg);
+ l2->flags.is_nop = false; // Make sure this instruction isn't optimized away
+ GenBarrier(cu); // Add a scheduling barrier to keep the IT shadow intact
+ }
+ StoreValue(cu, rl_dest, rl_result);
+}
void ArmCodegen::GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir)
{
diff --git a/src/compiler/codegen/codegen.h b/src/compiler/codegen/codegen.h
index 372e842..4085a41 100644
--- a/src/compiler/codegen/codegen.h
+++ b/src/compiler/codegen/codegen.h
@@ -335,6 +335,7 @@
virtual void GenFusedFPCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir, bool gt_bias,
bool is_double) = 0;
virtual void GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir) = 0;
+ virtual void GenSelect(CompilationUnit* cu, BasicBlock* bb, MIR* mir) = 0;
virtual void GenMemBarrier(CompilationUnit* cu, MemBarrierKind barrier_kind) = 0;
virtual void GenMonitorEnter(CompilationUnit* cu, int opt_flags, RegLocation rl_src) = 0;
virtual void GenMonitorExit(CompilationUnit* cu, int opt_flags, RegLocation rl_src) = 0;
diff --git a/src/compiler/codegen/mips/codegen_mips.h b/src/compiler/codegen/mips/codegen_mips.h
index eec7b08..10a3f77 100644
--- a/src/compiler/codegen/mips/codegen_mips.h
+++ b/src/compiler/codegen/mips/codegen_mips.h
@@ -141,6 +141,7 @@
virtual void GenFusedFPCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir, bool gt_bias,
bool is_double);
virtual void GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir);
+ virtual void GenSelect(CompilationUnit* cu, BasicBlock* bb, MIR* mir);
virtual void GenMemBarrier(CompilationUnit* cu, MemBarrierKind barrier_kind);
virtual void GenMonitorEnter(CompilationUnit* cu, int opt_flags, RegLocation rl_src);
virtual void GenMonitorExit(CompilationUnit* cu, int opt_flags, RegLocation rl_src);
diff --git a/src/compiler/codegen/mips/int_mips.cc b/src/compiler/codegen/mips/int_mips.cc
index 113183c..8e71ca6 100644
--- a/src/compiler/codegen/mips/int_mips.cc
+++ b/src/compiler/codegen/mips/int_mips.cc
@@ -215,6 +215,11 @@
}
}
+void MipsCodegen::GenSelect(CompilationUnit* cu, BasicBlock* bb, MIR* mir)
+{
+ UNIMPLEMENTED(FATAL) << "Need codegen for select";
+}
+
void MipsCodegen::GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir)
{
UNIMPLEMENTED(FATAL) << "Need codegen for fused long cmp branch";
diff --git a/src/compiler/codegen/mir_to_lir.cc b/src/compiler/codegen/mir_to_lir.cc
index 23c3fe7..1e777d9 100644
--- a/src/compiler/codegen/mir_to_lir.cc
+++ b/src/compiler/codegen/mir_to_lir.cc
@@ -689,6 +689,9 @@
case kMirOpFusedCmpLong:
cg->GenFusedLongCmpBranch(cu, bb, mir);
break;
+ case kMirOpSelect:
+ cg->GenSelect(cu, bb, mir);
+ break;
default:
break;
}
diff --git a/src/compiler/codegen/x86/codegen_x86.h b/src/compiler/codegen/x86/codegen_x86.h
index 15a4662..a004232 100644
--- a/src/compiler/codegen/x86/codegen_x86.h
+++ b/src/compiler/codegen/x86/codegen_x86.h
@@ -141,6 +141,7 @@
virtual void GenFusedFPCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir, bool gt_bias,
bool is_double);
virtual void GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir);
+ virtual void GenSelect(CompilationUnit* cu, BasicBlock* bb, MIR* mir);
virtual void GenMemBarrier(CompilationUnit* cu, MemBarrierKind barrier_kind);
virtual void GenMonitorEnter(CompilationUnit* cu, int opt_flags, RegLocation rl_src);
virtual void GenMonitorExit(CompilationUnit* cu, int opt_flags, RegLocation rl_src);
diff --git a/src/compiler/codegen/x86/int_x86.cc b/src/compiler/codegen/x86/int_x86.cc
index b2292fb..09e5eb3 100644
--- a/src/compiler/codegen/x86/int_x86.cc
+++ b/src/compiler/codegen/x86/int_x86.cc
@@ -168,6 +168,11 @@
}
}
+void X86Codegen::GenSelect(CompilationUnit* cu, BasicBlock* bb, MIR* mir)
+{
+ UNIMPLEMENTED(FATAL) << "Need codegen for GenSelect";
+}
+
void X86Codegen::GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir) {
LIR* label_list = cu->block_label_list;
LIR* taken = &label_list[bb->taken->id];