MIPS32R2: Enable table-based switch in presence of irreducible loops

Test: test-art-host-gtest
Test: booted MIPS32R2 in QEMU
Test: testrunner.py --target --optimizing --32
Test: repeat all of the above with suppressed generation
      of HMipsPackedSwitch

Change-Id: Ic8a27d88cd2d7eebaf5826ce8fd1a5607a024844
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index b83e3f5..e85645b 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -3353,8 +3353,6 @@
   CHECK_NE(dest_reg, ZERO);
   if (is_r6) {
     CHECK_EQ(base_reg, ZERO);
-  } else {
-    CHECK_NE(base_reg, ZERO);
   }
   InitializeType(label_or_literal_type, is_r6);
 }
@@ -3646,15 +3644,29 @@
     case Branch::kFarLabel:
     case Branch::kLiteral:
     case Branch::kFarLiteral:
-      return GetLabelLocation(&pc_rel_base_label_);
+      if (branch->GetRightRegister() != ZERO) {
+        return GetLabelLocation(&pc_rel_base_label_);
+      }
+      // For those label/literal loads which come with their own NAL instruction
+      // and don't depend on `pc_rel_base_label_` we can simply use the location
+      // of the "branch" (the NAL precedes the "branch" immediately). The location
+      // is close enough for the user of the returned location, PromoteIfNeeded(),
+      // to not miss needed promotion to a far load.
+      // (GetOffsetSizeNeeded() provides a little leeway by means of kMaxBranchSize,
+      // which is larger than all composite branches and label/literal loads: it's
+      // OK to promote a bit earlier than strictly necessary, it makes things
+      // simpler.)
+      FALLTHROUGH_INTENDED;
     default:
       return branch->GetLocation();
   }
 }
 
 uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) {
-  // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 labels/literals or
-  // `this->GetLocation()` for everything else.
+  // `location` comes from GetBranchLocationOrPcRelBase() and is either the location
+  // of the PC-relative branch or (for some R2 label and literal loads) the location
+  // of `pc_rel_base_label_`. The PC-relative offset of the branch/load is relative
+  // to this location.
   // If the branch is still unresolved or already long, nothing to do.
   if (IsLong() || !IsResolved()) {
     return 0;
@@ -3695,7 +3707,15 @@
     case Branch::kFarLabel:
     case Branch::kLiteral:
     case Branch::kFarLiteral:
-      return GetLabelLocation(&pc_rel_base_label_);
+      if (branch->GetRightRegister() == ZERO) {
+        // These loads don't use `pc_rel_base_label_` and instead rely on their own
+        // NAL instruction (it immediately precedes the "branch"). Therefore the
+        // effective PC-relative base register is RA and it corresponds to the 2nd
+        // instruction after the NAL.
+        return branch->GetLocation() + sizeof(uint32_t);
+      } else {
+        return GetLabelLocation(&pc_rel_base_label_);
+      }
     default:
       return branch->GetOffsetLocation() +
           Branch::branch_info_[branch->GetType()].pc_org * sizeof(uint32_t);
@@ -3703,9 +3723,10 @@
 }
 
 uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const {
-  // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 labels/literals or
-  // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)`
-  // for everything else.
+  // `location` comes from GetBranchOrPcRelBaseForEncoding() and is either a location
+  // within/near the PC-relative branch or (for some R2 label and literal loads) the
+  // location of `pc_rel_base_label_`. The PC-relative offset of the branch/load is
+  // relative to this location.
   CHECK(IsResolved());
   uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
   // Calculate the byte distance between instructions and also account for
@@ -4001,6 +4022,12 @@
 void MipsAssembler::LoadLabelAddress(Register dest_reg, Register base_reg, MipsLabel* label) {
   // Label address loads are treated as pseudo branches since they require very similar handling.
   DCHECK(!label->IsBound());
+  // If `pc_rel_base_label_` isn't bound or none of registers contains its address, we
+  // may generate an individual NAL instruction to simulate PC-relative addressing on R2
+  // by specifying `base_reg` of `ZERO`. Check for it.
+  if (base_reg == ZERO && !IsR6()) {
+    Nal();
+  }
   branches_.emplace_back(IsR6(), buffer_.Size(), dest_reg, base_reg, Branch::kLabel);
   FinalizeLabeledBranch(label);
 }
@@ -4016,6 +4043,12 @@
   DCHECK_EQ(literal->GetSize(), 4u);
   MipsLabel* label = literal->GetLabel();
   DCHECK(!label->IsBound());
+  // If `pc_rel_base_label_` isn't bound or none of registers contains its address, we
+  // may generate an individual NAL instruction to simulate PC-relative addressing on R2
+  // by specifying `base_reg` of `ZERO`. Check for it.
+  if (base_reg == ZERO && !IsR6()) {
+    Nal();
+  }
   branches_.emplace_back(IsR6(), buffer_.Size(), dest_reg, base_reg, Branch::kLiteral);
   FinalizeLabeledBranch(label);
 }
@@ -4203,6 +4236,13 @@
   }
 }
 
+static inline Register GetR2PcRelBaseRegister(Register reg) {
+  // LoadLabelAddress() and LoadLiteral() generate individual NAL
+  // instructions on R2 when the specified base register is ZERO
+  // and so the effective PC-relative base register is RA, not ZERO.
+  return (reg == ZERO) ? RA : reg;
+}
+
 // Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
 void MipsAssembler::EmitBranch(uint32_t branch_id) {
   CHECK_EQ(overwriting_, true);
@@ -4293,13 +4333,13 @@
     case Branch::kLabel:
       DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Addiu(lhs, rhs, offset);
+      Addiu(lhs, GetR2PcRelBaseRegister(rhs), offset);
       break;
     // R2 near literal.
     case Branch::kLiteral:
       DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Lw(lhs, rhs, offset);
+      Lw(lhs, GetR2PcRelBaseRegister(rhs), offset);
       break;
 
     // R2 long branches.
@@ -4378,7 +4418,7 @@
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Lui(AT, High16Bits(offset));
       Ori(AT, AT, Low16Bits(offset));
-      Addu(lhs, AT, rhs);
+      Addu(lhs, AT, GetR2PcRelBaseRegister(rhs));
       break;
     // R2 far literal.
     case Branch::kFarLiteral:
@@ -4386,7 +4426,7 @@
       offset += (offset & 0x8000) << 1;  // Account for sign extension in lw.
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Lui(AT, High16Bits(offset));
-      Addu(AT, AT, rhs);
+      Addu(AT, AT, GetR2PcRelBaseRegister(rhs));
       Lw(lhs, AT, Low16Bits(offset));
       break;
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 57b3edd..1c5b442 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -1061,16 +1061,36 @@
     return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
   }
 
-  // Load label address using the base register (for R2 only) or using PC-relative loads
-  // (for R6 only; base_reg must be ZERO). To be used with data labels in the literal /
-  // jump table area only and not with regular code labels.
+  // Load label address using PC-relative addressing.
+  // To be used with data labels in the literal / jump table area only and not
+  // with regular code labels.
+  //
+  // For R6 base_reg must be ZERO.
+  //
+  // On R2 there are two possible uses w.r.t. base_reg:
+  //
+  // - base_reg = ZERO:
+  //   The NAL instruction will be generated as part of the load and it will
+  //   clobber the RA register.
+  //
+  // - base_reg != ZERO:
+  //   The RA-clobbering NAL instruction won't be generated as part of the load.
+  //   The label pc_rel_base_label_ must be bound (with BindPcRelBaseLabel())
+  //   and base_reg must hold the address of the label. Example:
+  //     __ Nal();
+  //     __ Move(S3, RA);
+  //     __ BindPcRelBaseLabel();  // S3 holds the address of pc_rel_base_label_.
+  //     __ LoadLabelAddress(A0, S3, label1);
+  //     __ LoadLabelAddress(A1, S3, label2);
+  //     __ LoadLiteral(V0, S3, literal1);
+  //     __ LoadLiteral(V1, S3, literal2);
   void LoadLabelAddress(Register dest_reg, Register base_reg, MipsLabel* label);
 
   // Create a new literal with the given data.
   Literal* NewLiteral(size_t size, const uint8_t* data);
 
-  // Load literal using the base register (for R2 only) or using PC-relative loads
-  // (for R6 only; base_reg must be ZERO).
+  // Load literal using PC-relative addressing.
+  // See the above comments for LoadLabelAddress() on the value of base_reg.
   void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal);
 
   // Create a jump table for the given labels that will be emitted when finalizing.
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index eed83a5..9397be4 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -2913,6 +2913,46 @@
   DriverStr(expected, "LoadNearestFarLabelAddress");
 }
 
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLabelAddressUsingNal) {
+  mips::MipsLabel label;
+  __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
+  constexpr size_t kAddiuCount = 0x1FDE;
+  for (size_t i = 0; i != kAddiuCount; ++i) {
+    __ Addiu(mips::A0, mips::A1, 0);
+  }
+  __ Bind(&label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bltzal $zero, .+4\n"
+      "addiu $v0, $ra, %lo(2f - 1f)\n"
+      "1:\n" +
+      RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+      "2:\n";
+  DriverStr(expected, "LoadFarthestNearLabelAddressUsingNal");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddressUsingNal) {
+  mips::MipsLabel label;
+  __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
+  constexpr size_t kAdduCount = 0x1FDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bltzal $zero, .+4\n"
+      "lui $at, %hi(2f - 1f)\n"
+      "1:\n"
+      "ori $at, $at, %lo(2f - 1b)\n"
+      "addu $v0, $at, $ra\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n";
+  DriverStr(expected, "LoadNearestFarLabelAddressUsingNal");
+}
+
 TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) {
   mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
   __ BindPcRelBaseLabel();
@@ -2951,6 +2991,46 @@
   DriverStr(expected, "LoadNearestFarLiteral");
 }
 
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteralUsingNal) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips::V0, mips::ZERO, literal);
+  constexpr size_t kAddiuCount = 0x1FDE;
+  for (size_t i = 0; i != kAddiuCount; ++i) {
+    __ Addiu(mips::A0, mips::A1, 0);
+  }
+
+  std::string expected =
+      ".set noreorder\n"
+      "bltzal $zero, .+4\n"
+      "lw $v0, %lo(2f - 1f)($ra)\n"
+      "1:\n" +
+      RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadFarthestNearLiteralUsingNal");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLiteralUsingNal) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips::V0, mips::ZERO, literal);
+  constexpr size_t kAdduCount = 0x1FDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      ".set noreorder\n"
+      "bltzal $zero, .+4\n"
+      "lui $at, %hi(2f - 1f)\n"
+      "1:\n"
+      "addu $at, $at, $ra\n"
+      "lw $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadNearestFarLiteralUsingNal");
+}
+
 #undef __
 
 }  // namespace art