MIPS32: Saves 128-bit vector registers along SuspendCheckSlowPath

We need to save 128 bits of data. This is only done for vector
registers that are live, so overhead is not too big.

Test: mma test-art-host-gtest
Test: ./testrunner.py --optimizing --target in QEMU (MIPS)
Change-Id: I0f792e9c98011be3e24d5fad35a8244faafcb9a0
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5fb8755..232241c 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1289,6 +1289,11 @@
                           SP,
                           source.GetStackIndex());
       }
+    } else if (source.IsSIMDStackSlot()) {
+      __ LoadFpuFromOffset(kLoadQuadword,
+                           destination.AsFpuRegister<FpuRegister>(),
+                           SP,
+                           source.GetStackIndex());
     } else if (source.IsConstant()) {
       // Move to GPR/FPR from constant
       GpuRegister gpr = AT;
@@ -1329,12 +1334,17 @@
       }
     } else if (source.IsFpuRegister()) {
       if (destination.IsFpuRegister()) {
-        // Move to FPR from FPR
-        if (dst_type == Primitive::kPrimFloat) {
-          __ MovS(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+        if (GetGraph()->HasSIMD()) {
+          __ MoveV(VectorRegisterFrom(destination),
+                   VectorRegisterFrom(source));
         } else {
-          DCHECK_EQ(dst_type, Primitive::kPrimDouble);
-          __ MovD(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+          // Move to FPR from FPR
+          if (dst_type == Primitive::kPrimFloat) {
+            __ MovS(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+          } else {
+            DCHECK_EQ(dst_type, Primitive::kPrimDouble);
+            __ MovD(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+          }
         }
       } else {
         DCHECK(destination.IsRegister());
@@ -1345,6 +1355,23 @@
         }
       }
     }
+  } else if (destination.IsSIMDStackSlot()) {
+    if (source.IsFpuRegister()) {
+      __ StoreFpuToOffset(kStoreQuadword,
+                          source.AsFpuRegister<FpuRegister>(),
+                          SP,
+                          destination.GetStackIndex());
+    } else {
+      DCHECK(source.IsSIMDStackSlot());
+      __ LoadFpuFromOffset(kLoadQuadword,
+                           FTMP,
+                           SP,
+                           source.GetStackIndex());
+      __ StoreFpuToOffset(kStoreQuadword,
+                          FTMP,
+                          SP,
+                          destination.GetStackIndex());
+    }
   } else {  // The destination is not a register. It must be a stack slot.
     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
     if (source.IsRegister() || source.IsFpuRegister()) {