AArch64: Add few more inline functions

This patch adds inlining support for the following functions:
* Math.max/min(long, long)
* Math.max/min(float, float)
* Math.max/min(double, double)
* Integer.reverse(int)
* Long.reverse(long)

Change-Id: Ia2b1619fd052358b3a0d23e5fcbfdb823d2029b9
Signed-off-by: Serban Constantinescu <serban.constantinescu@arm.com>
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index ea7f439..5077d11 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -259,6 +259,8 @@
   kA64Fcvt2Ss,       // fcvt   [0001111000100010110000] rn[9-5] rd[4-0].
   kA64Fcvt2sS,       // fcvt   [0001111001100010010000] rn[9-5] rd[4-0].
   kA64Fdiv3fff,      // fdiv[000111100s1] rm[20-16] [000110] rn[9-5] rd[4-0].
+  kA64Fmax3fff,      // fmax[000111100s1] rm[20-16] [010010] rn[9-5] rd[4-0].
+  kA64Fmin3fff,      // fmin[000111100s1] rm[20-16] [010110] rn[9-5] rd[4-0].
   kA64Fmov2ff,       // fmov[000111100s100000010000] rn[9-5] rd[4-0].
   kA64Fmov2fI,       // fmov[000111100s1] imm_8[20-13] [10000000] rd[4-0].
   kA64Fmov2sw,       // fmov[0001111000100111000000] rn[9-5] rd[4-0].
@@ -306,6 +308,7 @@
   kA64Orr3Rrl,       // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
   kA64Orr4rrro,      // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
   kA64Ret,           // ret [11010110010111110000001111000000].
+  kA64Rbit2rr,       // rbit [s101101011000000000000] rn[9-5] rd[4-0].
   kA64Rev2rr,        // rev [s10110101100000000001x] rn[9-5] rd[4-0].
   kA64Rev162rr,      // rev16[s101101011000000000001] rn[9-5] rd[4-0].
   kA64Ror3rrr,       // ror [s0011010110] rm[20-16] [001011] rn[9-5] rd[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 2ac34cd..284593b 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -260,6 +260,14 @@
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "fdiv", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmax3fff), FLOAT_VARIANTS(0x1e204800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fmax", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmin3fff), FLOAT_VARIANTS(0x1e205800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fmin", "!0f, !1f, !2f", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
@@ -450,6 +458,10 @@
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
                  "ret", "", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Rbit2rr), SF_VARIANTS(0x5ac00000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "rbit", "!0r, !1r", kFixupNone),
     ENCODING_MAP(WIDE(kA64Rev2rr), CUSTOM_VARIANTS(0x5ac00800, 0xdac00c00),
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 8692c6c..b070c8a 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -176,8 +176,10 @@
     void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                   RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
+    bool GenInlinedReverseBits(CallInfo* info, OpSize size);
     bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
-    bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
+    bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
+    bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double);
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 9814cb4..0f9de5b 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -333,4 +333,19 @@
   return true;
 }
 
+bool Arm64Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
+  DCHECK_EQ(cu_->instruction_set, kArm64);
+  int op = (is_min) ? kA64Fmin3fff : kA64Fmax3fff;
+  ArmOpcode wide = (is_double) ? FWIDE(0) : FUNWIDE(0);
+  RegLocation rl_src1 = info->args[0];
+  RegLocation rl_src2 = (is_double) ? info->args[2] : info->args[1];
+  rl_src1 = (is_double) ? LoadValueWide(rl_src1, kFPReg) : LoadValue(rl_src1, kFPReg);
+  rl_src2 = (is_double) ? LoadValueWide(rl_src2, kFPReg) : LoadValue(rl_src2, kFPReg);
+  RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR3(op | wide, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  (is_double) ?  StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 51c8723..bab5499 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -434,18 +434,18 @@
   return true;
 }
 
-bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
+bool Arm64Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
   DCHECK_EQ(cu_->instruction_set, kArm64);
   RegLocation rl_src1 = info->args[0];
-  RegLocation rl_src2 = info->args[1];
-  rl_src1 = LoadValue(rl_src1, kCoreReg);
-  rl_src2 = LoadValue(rl_src2, kCoreReg);
-  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
+  rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
+  rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
+  RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
-          rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt);
-  StoreValue(rl_dest, rl_result);
+  NewLIR4((is_long) ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc, rl_result.reg.GetReg(),
+          rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt);
+  (is_long) ?  StoreValueWide(rl_dest, rl_result) :StoreValue(rl_dest, rl_result);
   return true;
 }
 
@@ -1108,4 +1108,15 @@
   }
 }
 
+bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
+  ArmOpcode wide = (size == k64) ? WIDE(0) : UNWIDE(0);
+  RegLocation rl_src_i = info->args[0];
+  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
+  NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg());
+  (size == k64) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
+  return true;
+}
+
 }  // namespace art