Fixes to enable TrackLiveTemps optimization on x86.

- Created new kRegRegStore instruction class for Movdrx, where the
  source is first, and the destination is second.

- Reverted neg_float and neg_double implementation to prevent confusion
  of register types when optimizations are performed.

- Swapped order of loads for wide values to prevent base pointer from
  being clobbered when the base pointer equals the low destination reg.

- Implemented opRegCopyWide for general purpose reg source to floating
  point reg destination and vice versa.

- Added more opcode coverage to x86 disassembler.

Change-Id: I4e58eec91742cc51333003fa5a678ba5b23eb575
diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc
index d60d9de..9721038 100644
--- a/src/compiler/codegen/x86/X86/Factory.cc
+++ b/src/compiler/codegen/x86/X86/Factory.cc
@@ -475,9 +475,15 @@
     if (!pair) {
       load = newLIR3(cUnit, opcode, rDest, rBase, displacement + LOWORD_OFFSET);
     } else {
-      load = newLIR3(cUnit, opcode, rDest, rBase, displacement + LOWORD_OFFSET);
-      load2 = newLIR3(cUnit, opcode, rDestHi, rBase,
-                      displacement + HIWORD_OFFSET);
+      if (rBase == rDest) {
+        load2 = newLIR3(cUnit, opcode, rDestHi, rBase,
+                        displacement + HIWORD_OFFSET);
+        load = newLIR3(cUnit, opcode, rDest, rBase, displacement + LOWORD_OFFSET);
+      } else {
+        load = newLIR3(cUnit, opcode, rDest, rBase, displacement + LOWORD_OFFSET);
+        load2 = newLIR3(cUnit, opcode, rDestHi, rBase,
+                        displacement + HIWORD_OFFSET);
+      }
     }
     if (rBase == rSP) {
       annotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0))
@@ -492,10 +498,17 @@
       load = newLIR5(cUnit, opcode, rDest, rBase, rIndex, scale,
                      displacement + LOWORD_OFFSET);
     } else {
-      load = newLIR5(cUnit, opcode, rDest, rBase, rIndex, scale,
-                     displacement + LOWORD_OFFSET);
-      load2 = newLIR5(cUnit, opcode, rDestHi, rBase, rIndex, scale,
-                      displacement + HIWORD_OFFSET);
+      if (rBase == rDest) {
+        load2 = newLIR5(cUnit, opcode, rDestHi, rBase, rIndex, scale,
+                        displacement + HIWORD_OFFSET);
+        load = newLIR5(cUnit, opcode, rDest, rBase, rIndex, scale,
+                       displacement + LOWORD_OFFSET);
+      } else {
+        load = newLIR5(cUnit, opcode, rDest, rBase, rIndex, scale,
+                       displacement + LOWORD_OFFSET);
+        load2 = newLIR5(cUnit, opcode, rDestHi, rBase, rIndex, scale,
+                        displacement + HIWORD_OFFSET);
+      }
     }
   }
 
diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc
index b0b6ba8..adad05b 100644
--- a/src/compiler/codegen/x86/X86/Gen.cc
+++ b/src/compiler/codegen/x86/X86/Gen.cc
@@ -352,13 +352,18 @@
     if (srcFP) {
       opRegCopy(cUnit, S2D(destLo, destHi), S2D(srcLo, srcHi));
     } else {
-      UNIMPLEMENTED(WARNING);
-      newLIR0(cUnit, kX86Bkpt);
+      // TODO: Prevent this from happening in the code. The result is often
+      // unused or could have been loaded more easily from memory.
+      newLIR2(cUnit, kX86MovdxrRR, destLo, srcLo);
+      newLIR2(cUnit, kX86MovdxrRR, destHi, srcHi);
+      newLIR2(cUnit, kX86PsllqRI, destHi, 32);
+      newLIR2(cUnit, kX86OrpsRR, destLo, destHi);
     }
   } else {
     if (srcFP) {
-      UNIMPLEMENTED(WARNING);
-      newLIR0(cUnit, kX86Bkpt);
+      newLIR2(cUnit, kX86MovdrxRR, destLo, srcLo);
+      newLIR2(cUnit, kX86PsrlqRI, srcLo, 32);
+      newLIR2(cUnit, kX86MovdrxRR, destHi, srcLo);
     } else {
       // Handle overlap
       if (srcHi == destLo) {