MIPS32: improvements in code generation (mostly 64-bit ALU ops)

Specifically:
- Use the delay slot in InvokeRuntime() for direct entry points
- Use kNoOutputOverlap wherever possible
- Improve and/or/xor/add/sub with 64-bit integer constants
- Improve 64-bit shifts by a constant amount on R2+
- More efficient load/store of 64-bit constants (especially, 0 & +0.0)

Change-Id: I86d2217c8b5b8e2a9371effc2ce38b9eec62782b
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index afca8ad..0dc307c 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -400,6 +400,20 @@
   EmitR(0, rs, rt, rd, 0, 0x07);
 }
 
+void MipsAssembler::Ext(Register rd, Register rt, int pos, int size) {
+  CHECK(IsUint<5>(pos)) << pos;
+  CHECK(0 < size && size <= 32) << size;
+  CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size;
+  EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00);
+}
+
+void MipsAssembler::Ins(Register rd, Register rt, int pos, int size) {
+  CHECK(IsUint<5>(pos)) << pos;
+  CHECK(0 < size && size <= 32) << size;
+  CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size;
+  EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04);
+}
+
 void MipsAssembler::Lb(Register rt, Register rs, uint16_t imm16) {
   EmitI(0x20, rs, rt, imm16);
 }
@@ -1121,8 +1135,14 @@
 }
 
 void MipsAssembler::LoadConst64(Register reg_hi, Register reg_lo, int64_t value) {
-  LoadConst32(reg_lo, Low32Bits(value));
-  LoadConst32(reg_hi, High32Bits(value));
+  uint32_t low = Low32Bits(value);
+  uint32_t high = High32Bits(value);
+  LoadConst32(reg_lo, low);
+  if (high != low) {
+    LoadConst32(reg_hi, high);
+  } else {
+    Move(reg_hi, reg_lo);
+  }
 }
 
 void MipsAssembler::StoreConst32ToOffset(int32_t value,
@@ -1136,7 +1156,11 @@
     base = AT;
     offset = 0;
   }
-  LoadConst32(temp, value);
+  if (value == 0) {
+    temp = ZERO;
+  } else {
+    LoadConst32(temp, value);
+  }
   Sw(temp, base, offset);
 }
 
@@ -1152,22 +1176,48 @@
     base = AT;
     offset = 0;
   }
-  LoadConst32(temp, Low32Bits(value));
-  Sw(temp, base, offset);
-  LoadConst32(temp, High32Bits(value));
-  Sw(temp, base, offset + kMipsWordSize);
+  uint32_t low = Low32Bits(value);
+  uint32_t high = High32Bits(value);
+  if (low == 0) {
+    Sw(ZERO, base, offset);
+  } else {
+    LoadConst32(temp, low);
+    Sw(temp, base, offset);
+  }
+  if (high == 0) {
+    Sw(ZERO, base, offset + kMipsWordSize);
+  } else {
+    if (high != low) {
+      LoadConst32(temp, high);
+    }
+    Sw(temp, base, offset + kMipsWordSize);
+  }
 }
 
 void MipsAssembler::LoadSConst32(FRegister r, int32_t value, Register temp) {
-  LoadConst32(temp, value);
+  if (value == 0) {
+    temp = ZERO;
+  } else {
+    LoadConst32(temp, value);
+  }
   Mtc1(temp, r);
 }
 
 void MipsAssembler::LoadDConst64(FRegister rd, int64_t value, Register temp) {
-  LoadConst32(temp, Low32Bits(value));
-  Mtc1(temp, rd);
-  LoadConst32(temp, High32Bits(value));
-  Mthc1(temp, rd);
+  uint32_t low = Low32Bits(value);
+  uint32_t high = High32Bits(value);
+  if (low == 0) {
+    Mtc1(ZERO, rd);
+  } else {
+    LoadConst32(temp, low);
+    Mtc1(temp, rd);
+  }
+  if (high == 0) {
+    Mthc1(ZERO, rd);
+  } else {
+    LoadConst32(temp, high);
+    Mthc1(temp, rd);
+  }
 }
 
 void MipsAssembler::Addiu32(Register rt, Register rs, int32_t value, Register temp) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index f569aa8..066e7b0 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -156,6 +156,8 @@
   void Srlv(Register rd, Register rt, Register rs);
   void Rotrv(Register rd, Register rt, Register rs);  // R2+
   void Srav(Register rd, Register rt, Register rs);
+  void Ext(Register rd, Register rt, int pos, int size);  // R2+
+  void Ins(Register rd, Register rt, int pos, int size);  // R2+
 
   void Lb(Register rt, Register rs, uint16_t imm16);
   void Lh(Register rt, Register rs, uint16_t imm16);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 6f8b3e8..4361843 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -367,6 +367,44 @@
   DriverStr(RepeatRRR(&mips::MipsAssembler::Srav, "srav ${reg1}, ${reg2}, ${reg3}"), "Srav");
 }
 
+TEST_F(AssemblerMIPSTest, Ins) {
+  std::vector<mips::Register*> regs = GetRegisters();
+  WarnOnCombinations(regs.size() * regs.size() * 33 * 16);
+  std::string expected;
+  for (mips::Register* reg1 : regs) {
+    for (mips::Register* reg2 : regs) {
+      for (int32_t pos = 0; pos < 32; pos++) {
+        for (int32_t size = 1; pos + size <= 32; size++) {
+          __ Ins(*reg1, *reg2, pos, size);
+          std::ostringstream instr;
+          instr << "ins $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+          expected += instr.str();
+        }
+      }
+    }
+  }
+  DriverStr(expected, "Ins");
+}
+
+TEST_F(AssemblerMIPSTest, Ext) {
+  std::vector<mips::Register*> regs = GetRegisters();
+  WarnOnCombinations(regs.size() * regs.size() * 33 * 16);
+  std::string expected;
+  for (mips::Register* reg1 : regs) {
+    for (mips::Register* reg2 : regs) {
+      for (int32_t pos = 0; pos < 32; pos++) {
+        for (int32_t size = 1; pos + size <= 32; size++) {
+          __ Ext(*reg1, *reg2, pos, size);
+          std::ostringstream instr;
+          instr << "ext $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+          expected += instr.str();
+        }
+      }
+    }
+  }
+  DriverStr(expected, "Ext");
+}
+
 TEST_F(AssemblerMIPSTest, Lb) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lb, -16, "lb ${reg1}, {imm}(${reg2})"), "Lb");
 }