Implement register allocator for floating point registers.

Also:
- Fix misuses of emitting the rex prefix in the x86_64 assembler.
- Fix movaps code generation in the x86_64 assembler.

Change-Id: Ib6dcf6e7c4a9c43368cfc46b02ba50f69ae69cbe
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 408e13e..d5cd490 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -473,8 +473,7 @@
       case Location::kRegister : {
         int id = location.reg();
         stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id);
-        if (current->GetType() == Primitive::kPrimDouble
-            || current->GetType() == Primitive::kPrimLong) {
+        if (current->GetType() == Primitive::kPrimLong) {
           stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id);
           ++i;
           DCHECK_LT(i, environment_size);
@@ -482,52 +481,55 @@
         break;
       }
 
+      case Location::kFpuRegister : {
+        int id = location.reg();
+        stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id);
+        if (current->GetType() == Primitive::kPrimDouble) {
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id);
+          ++i;
+          DCHECK_LT(i, environment_size);
+        }
+        break;
+      }
+
       default:
         LOG(FATAL) << "Unexpected kind " << location.GetKind();
     }
   }
 }
 
-size_t CodeGenerator::GetStackOffsetOfSavedRegister(size_t index) {
-  return first_register_slot_in_slow_path_ + index * GetWordSize();
-}
-
 void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) {
   RegisterSet* register_set = locations->GetLiveRegisters();
-  uint32_t count = 0;
+  size_t stack_offset = first_register_slot_in_slow_path_;
   for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
     if (register_set->ContainsCoreRegister(i)) {
-      size_t stack_offset = GetStackOffsetOfSavedRegister(count);
-      ++count;
-      SaveCoreRegister(Location::StackSlot(stack_offset), i);
       // If the register holds an object, update the stack mask.
       if (locations->RegisterContainsObject(i)) {
         locations->SetStackBit(stack_offset / kVRegSize);
       }
+      stack_offset += SaveCoreRegister(stack_offset, i);
     }
   }
 
   for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (register_set->ContainsFloatingPointRegister(i)) {
-      LOG(FATAL) << "Unimplemented";
+      stack_offset += SaveFloatingPointRegister(stack_offset, i);
     }
   }
 }
 
 void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) {
   RegisterSet* register_set = locations->GetLiveRegisters();
-  uint32_t count = 0;
+  size_t stack_offset = first_register_slot_in_slow_path_;
   for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
     if (register_set->ContainsCoreRegister(i)) {
-      size_t stack_offset = GetStackOffsetOfSavedRegister(count);
-      ++count;
-      RestoreCoreRegister(Location::StackSlot(stack_offset), i);
+      stack_offset += RestoreCoreRegister(stack_offset, i);
     }
   }
 
   for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (register_set->ContainsFloatingPointRegister(i)) {
-      LOG(FATAL) << "Unimplemented";
+      stack_offset += RestoreFloatingPointRegister(stack_offset, i);
     }
   }
 }