[optimizing compiler] Add REM_FLOAT and REM_DOUBLE

- for arm, x86, x86_64 backends
- reinstated fmod quick entry points for x86. This is a partial revert
of bd3682eada753de52975ae2b4a712bd87dc139a6 which added inline assembly
for floting point rem on x86. Note that Quick still uses the inline
version.
- fix rem tests for longs

Change-Id: I73be19a9f2f2bcf3f718d9ca636e67bdd72b5440
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 48d6c80..a121542 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -29,6 +29,10 @@
 extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
 
+// fmod entrypointes.
+extern "C" double art_quick_fmod(double, double);
+extern "C" float art_quick_fmodf(float, float);
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
   // Interpreter
@@ -105,9 +109,9 @@
   // points->pCmpgFloat = NULL;  // Not needed on x86.
   // points->pCmplDouble = NULL;  // Not needed on x86.
   // points->pCmplFloat = NULL;  // Not needed on x86.
-  // qpoints->pFmod = NULL;  // Not needed on x86.
+  qpoints->pFmod = art_quick_fmod;
   // qpoints->pL2d = NULL;  // Not needed on x86.
-  // qpoints->pFmodf = NULL;  // Not needed on x86.
+  qpoints->pFmodf = art_quick_fmodf;
   // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 1ce01c4..0bfa1ce 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -795,6 +795,35 @@
 
 NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
 
+DEFINE_FUNCTION art_quick_fmod
+    subl LITERAL(12), %esp        // alignment padding
+    CFI_ADJUST_CFA_OFFSET(12)
+    PUSH ebx                      // pass arg4 b.hi
+    PUSH edx                      // pass arg3 b.lo
+    PUSH ecx                      // pass arg2 a.hi
+    PUSH eax                      // pass arg1 a.lo
+    SETUP_GOT_NOSAVE ebx          // clobbers EBX
+    call PLT_SYMBOL(fmod)         // (jdouble a, jdouble b)
+    fstpl (%esp)                  // pop return value off fp stack
+    movsd (%esp), %xmm0           // place into %xmm0
+    addl LITERAL(28), %esp        // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-28)
+    ret
+END_FUNCTION art_quick_fmod
+
+DEFINE_FUNCTION art_quick_fmodf
+    PUSH eax                      // alignment padding
+    PUSH ecx                      // pass arg2 b
+    PUSH eax                      // pass arg1 a
+    SETUP_GOT_NOSAVE ebx          // clobbers EBX
+    call PLT_SYMBOL(fmodf)        // (jfloat a, jfloat b)
+    fstps (%esp)                  // pop return value off fp stack
+    movss (%esp), %xmm0           // place into %xmm0
+    addl LITERAL(12), %esp        // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-12)
+    ret
+END_FUNCTION art_quick_fmodf
+
 DEFINE_FUNCTION art_quick_d2l
     PUSH eax                      // alignment padding
     PUSH ecx                      // pass arg2 a.hi
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index a2766f7..2cfcfed 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -110,9 +110,9 @@
   // points->pCmpgFloat = NULL;  // Not needed on x86.
   // points->pCmplDouble = NULL;  // Not needed on x86.
   // points->pCmplFloat = NULL;  // Not needed on x86.
-  // qpoints->pFmod = NULL;  // Not needed on x86.
+  qpoints->pFmod = fmod;
   // qpoints->pL2d = NULL;  // Not needed on x86.
-  // qpoints->pFmodf = NULL;  // Not needed on x86.
+  qpoints->pFmodf = fmodf;
   // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index a80e7d2..7f85ab7 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1101,6 +1101,8 @@
 UNIMPLEMENTED art_quick_lshl
 UNIMPLEMENTED art_quick_lshr
 UNIMPLEMENTED art_quick_lushr
+UNIMPLEMENTED art_quick_fmod
+UNIMPLEMENTED art_quick_fmodf
 
 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_EAX_ZERO
 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_EAX_ZERO