MIPS32: Fill branch delay slots

Test: booted MIPS32 in QEMU
Test: test-art-host-gtest
Test: test-art-target-gtest
Test: test-art-target-run-test-optimizing on CI20

Change-Id: I727e80753395ab99fff004cb5d2e0a06409150d7
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 50a8dc2..708bc3d 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -2009,14 +2009,17 @@
 }
 
 TEST_F(AssemblerMIPSTest, Beq) {
+  __ SetReorder(false);
   BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq");
 }
 
 TEST_F(AssemblerMIPSTest, Bne) {
+  __ SetReorder(false);
   BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne");
 }
 
 TEST_F(AssemblerMIPSTest, Beqz) {
+  __ SetReorder(false);
   mips::MipsLabel label;
   __ Beqz(mips::A0, &label);
   constexpr size_t kAdduCount1 = 63;
@@ -2043,6 +2046,7 @@
 }
 
 TEST_F(AssemblerMIPSTest, Bnez) {
+  __ SetReorder(false);
   mips::MipsLabel label;
   __ Bnez(mips::A0, &label);
   constexpr size_t kAdduCount1 = 63;
@@ -2069,22 +2073,27 @@
 }
 
 TEST_F(AssemblerMIPSTest, Bltz) {
+  __ SetReorder(false);
   BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz");
 }
 
 TEST_F(AssemblerMIPSTest, Bgez) {
+  __ SetReorder(false);
   BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez");
 }
 
 TEST_F(AssemblerMIPSTest, Blez) {
+  __ SetReorder(false);
   BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez");
 }
 
 TEST_F(AssemblerMIPSTest, Bgtz) {
+  __ SetReorder(false);
   BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz");
 }
 
 TEST_F(AssemblerMIPSTest, Blt) {
+  __ SetReorder(false);
   mips::MipsLabel label;
   __ Blt(mips::A0, mips::A1, &label);
   constexpr size_t kAdduCount1 = 63;
@@ -2113,6 +2122,7 @@
 }
 
 TEST_F(AssemblerMIPSTest, Bge) {
+  __ SetReorder(false);
   mips::MipsLabel label;
   __ Bge(mips::A0, mips::A1, &label);
   constexpr size_t kAdduCount1 = 63;
@@ -2141,6 +2151,7 @@
 }
 
 TEST_F(AssemblerMIPSTest, Bltu) {
+  __ SetReorder(false);
   mips::MipsLabel label;
   __ Bltu(mips::A0, mips::A1, &label);
   constexpr size_t kAdduCount1 = 63;
@@ -2169,6 +2180,7 @@
 }
 
 TEST_F(AssemblerMIPSTest, Bgeu) {
+  __ SetReorder(false);
   mips::MipsLabel label;
   __ Bgeu(mips::A0, mips::A1, &label);
   constexpr size_t kAdduCount1 = 63;
@@ -2197,6 +2209,7 @@
 }
 
 TEST_F(AssemblerMIPSTest, Bc1f) {
+  __ SetReorder(false);
   mips::MipsLabel label;
   __ Bc1f(0, &label);
   constexpr size_t kAdduCount1 = 63;
@@ -2223,6 +2236,7 @@
 }
 
 TEST_F(AssemblerMIPSTest, Bc1t) {
+  __ SetReorder(false);
   mips::MipsLabel label;
   __ Bc1t(0, &label);
   constexpr size_t kAdduCount1 = 63;
@@ -2331,6 +2345,410 @@
   DriverStr(expected, "LoadNearestFarLiteral");
 }
 
+TEST_F(AssemblerMIPSTest, ImpossibleReordering) {
+  mips::MipsLabel label1, label2;
+  __ SetReorder(true);
+
+  __ B(&label1);  // No preceding or target instruction for the delay slot.
+
+  __ Addu(mips::T0, mips::T1, mips::T2);
+  __ Bind(&label1);
+  __ B(&label1);  // The preceding label prevents moving Addu into the delay slot.
+  __ B(&label1);  // No preceding or target instruction for the delay slot.
+
+  __ Addu(mips::T0, mips::T1, mips::T2);
+  __ Beqz(mips::T0, &label1);  // T0 dependency.
+
+  __ Or(mips::T1, mips::T2, mips::T3);
+  __ Bne(mips::T2, mips::T1, &label1);  // T1 dependency.
+
+  __ And(mips::T0, mips::T1, mips::T2);
+  __ Blt(mips::T1, mips::T0, &label1);  // T0 dependency.
+
+  __ Xor(mips::AT, mips::T0, mips::T1);
+  __ Bge(mips::T1, mips::T0, &label1);  // AT dependency.
+
+  __ Subu(mips::T0, mips::T1, mips::AT);
+  __ Bltu(mips::T1, mips::T0, &label1);  // AT dependency.
+
+  __ ColtS(1, mips::F2, mips::F4);
+  __ Bc1t(1, &label1);  // cc1 dependency.
+
+  __ Move(mips::T0, mips::RA);
+  __ Bal(&label1);  // RA dependency.
+
+  __ Lw(mips::RA, mips::T0, 0);
+  __ Bal(&label1);  // RA dependency.
+
+  __ LlR2(mips::T9, mips::T0, 0);
+  __ Jalr(mips::T9);  // T9 dependency.
+
+  __ Sw(mips::RA, mips::T0, 0);
+  __ Jalr(mips::T9);  // RA dependency.
+
+  __ Lw(mips::T1, mips::T0, 0);
+  __ Jalr(mips::T1, mips::T9);  // T1 dependency.
+
+  __ ScR2(mips::T9, mips::T0, 0);
+  __ Jr(mips::T9);  // T9 dependency.
+
+  __ Bind(&label2);
+
+  __ Bnez(mips::T0, &label2);  // No preceding instruction for the delay slot.
+
+  __ Bgeu(mips::T1, mips::T0, &label2);  // No preceding instruction for the delay slot.
+
+  __ Bc1f(2, &label2);  // No preceding instruction for the delay slot.
+
+  __ Bal(&label2);  // No preceding instruction for the delay slot.
+
+  __ Jalr(mips::T9);  // No preceding instruction for the delay slot.
+
+  __ Addu(mips::T0, mips::T1, mips::T2);
+  __ CodePosition();  // Drops the delay slot candidate (the last instruction).
+  __ Beq(mips::T1, mips::T2, &label2);  // No preceding or target instruction for the delay slot.
+
+  std::string expected =
+      ".set noreorder\n"
+      "b 1f\n"
+      "nop\n"
+
+      "addu $t0, $t1, $t2\n"
+      "1:\n"
+      "b 1b\n"
+      "nop\n"
+      "b 1b\n"
+      "nop\n"
+
+      "addu $t0, $t1, $t2\n"
+      "beq $zero, $t0, 1b\n"
+      "nop\n"
+
+      "or $t1, $t2, $t3\n"
+      "bne $t2, $t1, 1b\n"
+      "nop\n"
+
+      "and $t0, $t1, $t2\n"
+      "slt $at, $t1, $t0\n"
+      "bne $zero, $at, 1b\n"
+      "nop\n"
+
+      "xor $at, $t0, $t1\n"
+      "slt $at, $t1, $t0\n"
+      "beq $zero, $at, 1b\n"
+      "nop\n"
+
+      "subu $t0, $t1, $at\n"
+      "sltu $at, $t1, $t0\n"
+      "bne $zero, $at, 1b\n"
+      "nop\n"
+
+      "c.olt.s $fcc1, $f2, $f4\n"
+      "bc1t $fcc1, 1b\n"
+      "nop\n"
+
+      "or $t0, $ra, $zero\n"
+      "bal 1b\n"
+      "nop\n"
+
+      "lw $ra, 0($t0)\n"
+      "bal 1b\n"
+      "nop\n"
+
+      "ll $t9, 0($t0)\n"
+      "jalr $t9\n"
+      "nop\n"
+
+      "sw $ra, 0($t0)\n"
+      "jalr $t9\n"
+      "nop\n"
+
+      "lw $t1, 0($t0)\n"
+      "jalr $t1, $t9\n"
+      "nop\n"
+
+      "sc $t9, 0($t0)\n"
+      "jalr $zero, $t9\n"
+      "nop\n"
+
+      "2:\n"
+
+      "bne $zero, $t0, 2b\n"
+      "nop\n"
+
+      "sltu $at, $t1, $t0\n"
+      "beq $zero, $at, 2b\n"
+      "nop\n"
+
+      "bc1f $fcc2, 2b\n"
+      "nop\n"
+
+      "bal 2b\n"
+      "nop\n"
+
+      "jalr $t9\n"
+      "nop\n"
+
+      "addu $t0, $t1, $t2\n"
+      "beq $t1, $t2, 2b\n"
+      "nop\n";
+  DriverStr(expected, "ImpossibleReordering");
+}
+
+TEST_F(AssemblerMIPSTest, Reordering) {
+  mips::MipsLabel label1, label2;
+  __ SetReorder(true);
+
+  __ Bind(&label1);
+  __ Bind(&label2);
+
+  __ Addu(mips::T0, mips::T1, mips::T2);
+  __ Beqz(mips::T1, &label1);
+
+  __ Or(mips::T1, mips::T2, mips::T3);
+  __ Bne(mips::T2, mips::T3, &label1);
+
+  __ And(mips::T0, mips::T1, mips::T2);
+  __ Blt(mips::T1, mips::T2, &label1);
+
+  __ Xor(mips::T2, mips::T0, mips::T1);
+  __ Bge(mips::T1, mips::T0, &label1);
+
+  __ Subu(mips::T2, mips::T1, mips::T0);
+  __ Bltu(mips::T1, mips::T0, &label1);
+
+  __ ColtS(0, mips::F2, mips::F4);
+  __ Bc1t(1, &label1);
+
+  __ Move(mips::T0, mips::T1);
+  __ Bal(&label1);
+
+  __ LlR2(mips::T1, mips::T0, 0);
+  __ Jalr(mips::T9);
+
+  __ ScR2(mips::T1, mips::T0, 0);
+  __ Jr(mips::T9);
+
+  std::string expected =
+      ".set noreorder\n"
+      "1:\n"
+
+      "beq $zero, $t1, 1b\n"
+      "addu $t0, $t1, $t2\n"
+
+      "bne $t2, $t3, 1b\n"
+      "or $t1, $t2, $t3\n"
+
+      "slt $at, $t1, $t2\n"
+      "bne $zero, $at, 1b\n"
+      "and $t0, $t1, $t2\n"
+
+      "slt $at, $t1, $t0\n"
+      "beq $zero, $at, 1b\n"
+      "xor $t2, $t0, $t1\n"
+
+      "sltu $at, $t1, $t0\n"
+      "bne $zero, $at, 1b\n"
+      "subu $t2, $t1, $t0\n"
+
+      "bc1t $fcc1, 1b\n"
+      "c.olt.s $fcc0, $f2, $f4\n"
+
+      "bal 1b\n"
+      "or $t0, $t1, $zero\n"
+
+      "jalr $t9\n"
+      "ll $t1, 0($t0)\n"
+
+      "jalr $zero, $t9\n"
+      "sc $t1, 0($t0)\n";
+  DriverStr(expected, "Reordering");
+}
+
+TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) {
+  mips::MipsLabel label1, label2, label3, label4, label5, label6;
+  __ SetReorder(true);
+
+  __ B(&label1);
+  __ Bind(&label1);
+  __ Addu(mips::T0, mips::T1, mips::T2);
+
+  __ Bind(&label2);
+  __ Xor(mips::T0, mips::T1, mips::T2);
+  __ Addu(mips::T0, mips::T1, mips::T2);
+  __ Bind(&label3);  // Prevents reordering ADDU above with B below.
+  __ B(&label2);
+
+  __ B(&label4);
+  __ Bind(&label4);
+  __ Addu(mips::T0, mips::T1, mips::T2);
+  __ CodePosition();  // Prevents absorbing ADDU above.
+
+  __ B(&label5);
+  __ Bind(&label5);
+  __ Addu(mips::T0, mips::T1, mips::T2);
+  __ Bind(&label6);
+  __ CodePosition();  // Even across Bind(), CodePosition() prevents absorbing the ADDU above.
+
+  std::string expected =
+      ".set noreorder\n"
+      "b 1f\n"
+      "addu $t0, $t1, $t2\n"
+      "addu $t0, $t1, $t2\n"
+      "1:\n"
+
+      "xor $t0, $t1, $t2\n"
+      "2:\n"
+      "addu $t0, $t1, $t2\n"
+      "b 2b\n"
+      "xor $t0, $t1, $t2\n"
+
+      "b 4f\n"
+      "nop\n"
+      "4:\n"
+      "addu $t0, $t1, $t2\n"
+
+      "b 5f\n"
+      "nop\n"
+      "5:\n"
+      "addu $t0, $t1, $t2\n";
+  DriverStr(expected, "AbsorbTargetInstruction");
+}
+
+TEST_F(AssemblerMIPSTest, SetReorder) {
+  mips::MipsLabel label1, label2, label3, label4, label5, label6;
+
+  __ SetReorder(true);
+  __ Bind(&label1);
+  __ Addu(mips::T0, mips::T1, mips::T2);
+  __ B(&label1);
+  __ B(&label5);
+  __ B(&label6);
+
+  __ SetReorder(false);
+  __ Bind(&label2);
+  __ Addu(mips::T0, mips::T1, mips::T2);
+  __ B(&label2);
+  __ B(&label5);
+  __ B(&label6);
+
+  __ SetReorder(true);
+  __ Bind(&label3);
+  __ Addu(mips::T0, mips::T1, mips::T2);
+  __ B(&label3);
+  __ B(&label5);
+  __ B(&label6);
+
+  __ SetReorder(false);
+  __ Bind(&label4);
+  __ Addu(mips::T0, mips::T1, mips::T2);
+  __ B(&label4);
+  __ B(&label5);
+  __ B(&label6);
+
+  __ SetReorder(true);
+  __ Bind(&label5);
+  __ Subu(mips::T0, mips::T1, mips::T2);
+
+  __ SetReorder(false);
+  __ Bind(&label6);
+  __ Xor(mips::T0, mips::T1, mips::T2);
+
+  std::string expected =
+      ".set noreorder\n"
+      "1:\n"
+      "b 1b\n"
+      "addu $t0, $t1, $t2\n"
+      "b 55f\n"
+      "subu $t0, $t1, $t2\n"
+      "b 6f\n"
+      "nop\n"
+
+      "2:\n"
+      "addu $t0, $t1, $t2\n"
+      "b 2b\n"
+      "nop\n"
+      "b 5f\n"
+      "nop\n"
+      "b 6f\n"
+      "nop\n"
+
+      "3:\n"
+      "b 3b\n"
+      "addu $t0, $t1, $t2\n"
+      "b 55f\n"
+      "subu $t0, $t1, $t2\n"
+      "b 6f\n"
+      "nop\n"
+
+      "4:\n"
+      "addu $t0, $t1, $t2\n"
+      "b 4b\n"
+      "nop\n"
+      "b 5f\n"
+      "nop\n"
+      "b 6f\n"
+      "nop\n"
+
+      "5:\n"
+      "subu $t0, $t1, $t2\n"
+      "55:\n"
+      "6:\n"
+      "xor $t0, $t1, $t2\n";
+  DriverStr(expected, "SetReorder");
+}
+
+TEST_F(AssemblerMIPSTest, LongBranchReorder) {
+  mips::MipsLabel label;
+  __ SetReorder(true);
+  __ Subu(mips::T0, mips::T1, mips::T2);
+  __ B(&label);
+  constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr uint32_t kAdduCount2 = (1u << 15) + 1;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Subu(mips::T0, mips::T1, mips::T2);
+  __ B(&label);
+
+  // Account for 5 extra instructions: ori, addu, lw, jalr, addiu.
+  uint32_t offset_forward = (kAdduCount1 + 5) * sizeof(uint32_t);
+  // Account for 5 extra instructions: subu, addiu, sw, nal, lui.
+  uint32_t offset_back = -(kAdduCount1 + 5) * sizeof(uint32_t);
+
+  std::ostringstream oss;
+  oss <<
+      ".set noreorder\n"
+      "subu $t0, $t1, $t2\n"
+      "addiu $sp, $sp, -4\n"
+      "sw $ra, 0($sp)\n"
+      "bltzal $zero, .+4\n"
+      "lui $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+      "ori $at, $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+      "addu $at, $at, $ra\n"
+      "lw $ra, 0($sp)\n"
+      "jalr $zero, $at\n"
+      "addiu $sp, $sp, 4\n" <<
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+      "subu $t0, $t1, $t2\n"
+      "addiu $sp, $sp, -4\n"
+      "sw $ra, 0($sp)\n"
+      "bltzal $zero, .+4\n"
+      "lui $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "ori $at, $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+      "addu $at, $at, $ra\n"
+      "lw $ra, 0($sp)\n"
+      "jalr $zero, $at\n"
+      "addiu $sp, $sp, 4\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongBranchReorder");
+}
+
 #undef __
 
 }  // namespace art