Add conditional move support to x86 and allow GenMinMax to use it

X86 supports conditional moves which is useful for reducing branchiness.
This patch adds support to the x86 backend to generate conditional reg
to reg operations. Both encoder and decoder support was added for cmov.

The x86 version of GenMinMax used for generating inlined version Math.min/max
has been updated to make use of the conditional move support.

Change-Id: I92c5428e40aa8ff88bd3071619957ac3130efae7
Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index c24f0e3..1dcff65 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -177,6 +177,8 @@
 
   { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
 
+  { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" },
+
 #define SHIFT_ENCODING_MAP(opname, modrm_opcode) \
 { kX86 ## opname ## 8RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8RI", "!0r,!1d" }, \
 { kX86 ## opname ## 8MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8MI", "[!0r+!1d],!2d" }, \
@@ -449,6 +451,8 @@
       return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
     case kArrayCond:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond
       return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+    case kRegRegCond:  // lir operands - 0: reg, 1: reg, 2: cond
+      return ComputeSize(entry, 0, 0, false);
     case kJcc:
       if (lir->opcode == kX86Jcc8) {
         return 2;  // opcode + rel8
@@ -860,6 +864,30 @@
   DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
 }
 
+void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, uint8_t condition) {
+  // Generate prefix and opcode without the condition
+  EmitPrefixAndOpcode(entry);
+
+  // Now add the condition. The last byte of opcode is the one that receives it.
+  DCHECK_LE(condition, 0xF);
+  code_buffer_.back() += condition;
+
+  // Not expecting to have to encode immediate or do anything special for ModR/M since there are two registers.
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+
+  // Check that registers requested for encoding are sane.
+  DCHECK_LT(reg1, 8);
+  DCHECK_LT(reg2, 8);
+
+  // For register to register encoding, the mod is 3.
+  const uint8_t mod = (3 << 6);
+
+  // Encode the ModR/M byte now.
+  const uint8_t modrm = mod | (reg1 << 3) | reg2;
+  code_buffer_.push_back(modrm);
+}
+
 void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) {
   if (entry->opcode == kX86Jmp8) {
     DCHECK(IS_SIMM8(rel));
@@ -1178,6 +1206,9 @@
       case kRegCond:  // lir operands - 0: reg, 1: condition
         EmitRegCond(entry, lir->operands[0], lir->operands[1]);
         break;
+      case kRegRegCond:  // lir operands - 0: reg, 1: reg, 2: condition
+        EmitRegRegCond(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
       case kJmp:  // lir operands - 0: rel
         EmitJmp(entry, lir->operands[0]);
         break;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 22c4452..e6621f3 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -153,6 +153,7 @@
     LIR* OpRegImm(OpKind op, int r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset);
     LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2);
+    LIR* OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src);
     LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value);
     LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2);
     LIR* OpTestSuspend(LIR* target);
@@ -201,6 +202,16 @@
     void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl);
     void EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition);
+
+    /**
+     * @brief Used for encoding conditional register to register operation.
+     * @param entry The entry in the encoding map for the opcode.
+     * @param reg1 The first physical register.
+     * @param reg2 The second physical register.
+     * @param condition The condition code for operation.
+     */
+    void EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, uint8_t condition);
+
     void EmitJmp(const X86EncodingMap* entry, int rel);
     void EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc);
     void EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 75eddd6..11ccd4b 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -130,7 +130,7 @@
     return OpFpRegCopy(r_dest, r_src);
   LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR,
                     r_dest, r_src);
-  if (r_dest == r_src) {
+  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
     res->flags.is_nop = true;
   }
   return res;
@@ -296,20 +296,39 @@
 
 bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
   DCHECK_EQ(cu_->instruction_set, kX86);
+
+  // Get the two arguments to the invoke and place them in GP registers.
   RegLocation rl_src1 = info->args[0];
   RegLocation rl_src2 = info->args[1];
   rl_src1 = LoadValue(rl_src1, kCoreReg);
   rl_src2 = LoadValue(rl_src2, kCoreReg);
+
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegReg(kOpCmp, rl_src1.low_reg, rl_src2.low_reg);
-  DCHECK_EQ(cu_->instruction_set, kX86);
-  LIR* branch = NewLIR2(kX86Jcc8, 0, is_min ? kX86CondG : kX86CondL);
-  OpRegReg(kOpMov, rl_result.low_reg, rl_src1.low_reg);
-  LIR* branch2 = NewLIR1(kX86Jmp8, 0);
-  branch->target = NewLIR0(kPseudoTargetLabel);
-  OpRegReg(kOpMov, rl_result.low_reg, rl_src2.low_reg);
-  branch2->target = NewLIR0(kPseudoTargetLabel);
+
+  /*
+   * If the result register is the same as the second element, then we need to be careful.
+   * The reason is that the first copy will inadvertently clobber the second element with
+   * the first one thus yielding the wrong result. Thus we do a swap in that case.
+   */
+  if (rl_result.low_reg == rl_src2.low_reg) {
+    std::swap(rl_src1, rl_src2);
+  }
+
+  // Pick the first integer as min/max.
+  OpRegCopy(rl_result.low_reg, rl_src1.low_reg);
+
+  // If the integers are both in the same register, then there is nothing else to do
+  // because they are equal and we have already moved one into the result.
+  if (rl_src1.low_reg != rl_src2.low_reg) {
+    // It is possible we didn't pick correctly so do the actual comparison now.
+    OpRegReg(kOpCmp, rl_src1.low_reg, rl_src2.low_reg);
+
+    // Conditionally move the other integer into the destination register.
+    ConditionCode condition_code = is_min ? kCondGt : kCondLt;
+    OpCondRegReg(kOpCmov, condition_code, rl_result.low_reg, rl_src2.low_reg);
+  }
+
   StoreValue(rl_dest, rl_result);
   return true;
 }
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 6ec7ebb..f683aff 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -203,6 +203,12 @@
     return NewLIR2(opcode, r_dest_src1, r_src2);
 }
 
+LIR* X86Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src) {
+  // The only conditional reg to reg operation supported is Cmov
+  DCHECK_EQ(op, kOpCmov);
+  return NewLIR3(kX86Cmov32RRC, r_dest, r_src, X86ConditionEncoding(cc));
+}
+
 LIR* X86Mir2Lir::OpRegMem(OpKind op, int r_dest, int rBase,
               int offset) {
   X86OpCode opcode = kX86Nop;
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index a2d5c3e..f38a16d 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -279,6 +279,9 @@
   kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT,
   kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI,
   kX86Lea32RA,
+  // RRC - Register Register ConditionCode - cond_opcode reg1, reg2
+  //             - lir operands - 0: reg1, 1: reg2, 2: CC
+  kX86Cmov32RRC,
   // RC - Register CL - opcode reg, CL
   //          - lir operands - 0: reg, 1: CL
   // MC - Memory CL   - opcode [base + disp], CL
@@ -398,6 +401,7 @@
   kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
   kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
   kRegCond, kMemCond, kArrayCond,          // R, M, A instruction kinds following by a condition.
+  kRegRegCond,                             // RR instruction kind followed by a condition.
   kJmp, kJcc, kCall,                       // Branch instruction kinds.
   kPcRel,                                  // Operation with displacement that is PC relative
   kMacro,                                  // An instruction composing multiple others