Add byte swap instructions for ARM and x86.

Change-Id: I03fdd61ffc811ae521141f532b3e04dda566c77d
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 92d58d5..2047f30 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -246,6 +246,8 @@
   UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
 #undef UNARY_ENCODING_MAP
 
+  { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0, { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" },
+
 #define EXT_0F_ENCODING_MAP(opname, prefix, opcode, reg_def) \
 { kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE01,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
@@ -371,6 +373,8 @@
       return lir->operands[0];  // length of nop is sole operand
     case kNullary:
       return 1;  // 1 byte of opcode
+    case kRegOpcode:  // lir operands - 0: reg
+      return ComputeSize(entry, 0, 0, false) - 1;  // substract 1 for modrm
     case kReg:  // lir operands - 0: reg
       return ComputeSize(entry, 0, 0, false);
     case kMem:  // lir operands - 0: base, 1: disp
@@ -514,6 +518,33 @@
   }
 }
 
+void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) {
+  if (entry->skeleton.prefix1 != 0) {
+    code_buffer_.push_back(entry->skeleton.prefix1);
+    if (entry->skeleton.prefix2 != 0) {
+      code_buffer_.push_back(entry->skeleton.prefix2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.prefix2);
+  }
+  code_buffer_.push_back(entry->skeleton.opcode);
+  if (entry->skeleton.opcode == 0x0F) {
+    code_buffer_.push_back(entry->skeleton.extra_opcode1);
+    // There's no 3-byte instruction with +rd
+    DCHECK_NE(0x38, entry->skeleton.extra_opcode1);
+    DCHECK_NE(0x3A, entry->skeleton.extra_opcode1);
+    DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  } else {
+    DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+    DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  }
+  DCHECK(!X86_FPREG(reg));
+  DCHECK_LT(reg, 8);
+  code_buffer_.back() += reg;
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
 void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) {
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
@@ -1303,6 +1334,9 @@
         DCHECK_EQ(0, entry->skeleton.ax_opcode);
         DCHECK_EQ(0, entry->skeleton.immediate_bytes);
         break;
+      case kRegOpcode:  // lir operands - 0: reg
+        EmitOpRegOpcode(entry, lir->operands[0]);
+        break;
       case kReg:  // lir operands - 0: reg
         EmitOpReg(entry, lir->operands[0]);
         break;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index b1d95ff..b28d7ef 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -170,6 +170,7 @@
 
   private:
     void EmitDisp(int base, int disp);
+    void EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg);
     void EmitOpReg(const X86EncodingMap* entry, uint8_t reg);
     void EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp);
     void EmitMemReg(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg);
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index c519bfe..6ec7ebb 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -117,6 +117,7 @@
   switch (op) {
     case kOpNeg: opcode = kX86Neg32R; break;
     case kOpNot: opcode = kX86Not32R; break;
+    case kOpRev: opcode = kX86Bswap32R; break;
     case kOpBlx: opcode = kX86CallR; break;
     default:
       LOG(FATAL) << "Bad case in OpReg " << op;
@@ -161,6 +162,13 @@
       case kOpNeg:
         OpRegCopy(r_dest_src1, r_src2);
         return OpReg(kOpNeg, r_dest_src1);
+      case kOpRev:
+        OpRegCopy(r_dest_src1, r_src2);
+        return OpReg(kOpRev, r_dest_src1);
+      case kOpRevsh:
+        OpRegCopy(r_dest_src1, r_src2);
+        OpReg(kOpRev, r_dest_src1);
+        return OpRegImm(kOpAsr, r_dest_src1, 16);
         // X86 binary opcodes
       case kOpSub: opcode = kX86Sub32RR; break;
       case kOpSbc: opcode = kX86Sbb32RR; break;
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index f1b91ca..3518131 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -313,6 +313,7 @@
   UnaryOpcode(kX86Imul, DaR, DaM, DaA),
   UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
   UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
+  kX86Bswap32R,
 #undef UnaryOpcode
 #define Binary0fOpCode(opcode) \
   opcode ## RR, opcode ## RM, opcode ## RA
@@ -381,6 +382,7 @@
   kData,                                   // Special case for raw data.
   kNop,                                    // Special case for variable length nop.
   kNullary,                                // Opcode that takes no arguments.
+  kRegOpcode,                              // Shorter form of R instruction kind (opcode+rd)
   kReg, kMem, kArray,                      // R, M and A instruction kinds.
   kMemReg, kArrayReg, kThreadReg,          // MR, AR and TR instruction kinds.
   kRegReg, kRegMem, kRegArray, kRegThread,  // RR, RM, RA and RT instruction kinds.