MIPS32: Implement table-based packed switch

Test: booted MIPS32R2 in QEMU
Test: test-art-target-run-test-optimizing (MIPS32R2) on CI20
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: test-art-target-run-test-optimizing (MIPS32R6) in QEMU
Test: test-art-host-gtest

Change-Id: I2e1a65ff1ba9406b84351ba7998f853b1ce4aef9
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index f07f8a0..306538c 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -5824,13 +5824,11 @@
   locations->SetInAt(0, Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) {
-  int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
-  LocationSummary* locations = switch_instr->GetLocations();
-  Register value_reg = locations->InAt(0).AsRegister<Register>();
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
-
+void InstructionCodeGeneratorMIPS::GenPackedSwitchWithCompares(Register value_reg,
+                                                               int32_t lower_bound,
+                                                               uint32_t num_entries,
+                                                               HBasicBlock* switch_block,
+                                                               HBasicBlock* default_block) {
   // Create a set of compare/jumps.
   Register temp_reg = TMP;
   __ Addiu32(temp_reg, value_reg, -lower_bound);
@@ -5839,7 +5837,7 @@
   // this case, index >= num_entries must be true. So that we can save one branch instruction.
   __ Bltz(temp_reg, codegen_->GetLabelOf(default_block));
 
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+  const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
   // Jump to successors[0] if value == lower_bound.
   __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0]));
   int32_t last_index = 0;
@@ -5857,11 +5855,107 @@
   }
 
   // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+  if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
     __ B(codegen_->GetLabelOf(default_block));
   }
 }
 
+void InstructionCodeGeneratorMIPS::GenTableBasedPackedSwitch(Register value_reg,
+                                                             Register constant_area,
+                                                             int32_t lower_bound,
+                                                             uint32_t num_entries,
+                                                             HBasicBlock* switch_block,
+                                                             HBasicBlock* default_block) {
+  // Create a jump table.
+  std::vector<MipsLabel*> labels(num_entries);
+  const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+  for (uint32_t i = 0; i < num_entries; i++) {
+    labels[i] = codegen_->GetLabelOf(successors[i]);
+  }
+  JumpTable* table = __ CreateJumpTable(std::move(labels));
+
+  // Is the value in range?
+  __ Addiu32(TMP, value_reg, -lower_bound);
+  if (IsInt<16>(static_cast<int32_t>(num_entries))) {
+    __ Sltiu(AT, TMP, num_entries);
+    __ Beqz(AT, codegen_->GetLabelOf(default_block));
+  } else {
+    __ LoadConst32(AT, num_entries);
+    __ Bgeu(TMP, AT, codegen_->GetLabelOf(default_block));
+  }
+
+  // We are in the range of the table.
+  // Load the target address from the jump table, indexing by the value.
+  __ LoadLabelAddress(AT, constant_area, table->GetLabel());
+  __ Sll(TMP, TMP, 2);
+  __ Addu(TMP, TMP, AT);
+  __ Lw(TMP, TMP, 0);
+  // Compute the absolute target address by adding the table start address
+  // (the table contains offsets to targets relative to its start).
+  __ Addu(TMP, TMP, AT);
+  // And jump.
+  __ Jr(TMP);
+  __ NopIfNoReordering();
+}
+
+void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  uint32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+  HBasicBlock* switch_block = switch_instr->GetBlock();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  if (codegen_->GetInstructionSetFeatures().IsR6() &&
+      num_entries > kPackedSwitchJumpTableThreshold) {
+    // R6 uses PC-relative addressing to access the jump table.
+    // R2, OTOH, requires an HMipsComputeBaseMethodAddress input to access
+    // the jump table and it is implemented by changing HPackedSwitch to
+    // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress.
+    // See VisitMipsPackedSwitch() for the table-based implementation on R2.
+    GenTableBasedPackedSwitch(value_reg,
+                              ZERO,
+                              lower_bound,
+                              num_entries,
+                              switch_block,
+                              default_block);
+  } else {
+    GenPackedSwitchWithCompares(value_reg,
+                                lower_bound,
+                                num_entries,
+                                switch_block,
+                                default_block);
+  }
+}
+
+void LocationsBuilderMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  // Constant area pointer (HMipsComputeBaseMethodAddress).
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  uint32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+  Register constant_area = locations->InAt(1).AsRegister<Register>();
+  HBasicBlock* switch_block = switch_instr->GetBlock();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // This is an R2-only path. HPackedSwitch has been changed to
+  // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress
+  // required to address the jump table relative to PC.
+  GenTableBasedPackedSwitch(value_reg,
+                            constant_area,
+                            lower_bound,
+                            num_entries,
+                            switch_block,
+                            default_block);
+}
+
 void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress(
     HMipsComputeBaseMethodAddress* insn) {
   LocationSummary* locations =
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 0039981..956a466 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -218,6 +218,14 @@
 
   MipsAssembler* GetAssembler() const { return assembler_; }
 
+  // Compare-and-jump packed switch generates approx. 3 + 2.5 * N 32-bit
+  // instructions for N cases.
+  // Table-based packed switch generates approx. 11 32-bit instructions
+  // and N 32-bit data words for N cases.
+  // At N = 6 they come out as 18 and 17 32-bit words respectively.
+  // We switch to the table-based method starting with 7 cases.
+  static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+
  private:
   void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
   void GenerateMemoryBarrier(MemBarrierKind kind);
@@ -262,6 +270,17 @@
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
   auto GetImplicitNullChecker(HInstruction* instruction);
+  void GenPackedSwitchWithCompares(Register value_reg,
+                                   int32_t lower_bound,
+                                   uint32_t num_entries,
+                                   HBasicBlock* switch_block,
+                                   HBasicBlock* default_block);
+  void GenTableBasedPackedSwitch(Register value_reg,
+                                 Register constant_area,
+                                 int32_t lower_bound,
+                                 uint32_t num_entries,
+                                 HBasicBlock* switch_block,
+                                 HBasicBlock* default_block);
 
   MipsAssembler* const assembler_;
   CodeGeneratorMIPS* const codegen_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 149a71d..97b7916 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1311,7 +1311,8 @@
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                           \
   M(MipsComputeBaseMethodAddress, Instruction)                          \
-  M(MipsDexCacheArraysBase, Instruction)
+  M(MipsDexCacheArraysBase, Instruction)                                \
+  M(MipsPackedSwitch, Instruction)
 #endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M)
diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h
index de77245..36431c1 100644
--- a/compiler/optimizing/nodes_mips.h
+++ b/compiler/optimizing/nodes_mips.h
@@ -66,6 +66,41 @@
   DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase);
 };
 
+// Mips version of HPackedSwitch that holds a pointer to the base method address.
+class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> {
+ public:
+  HMipsPackedSwitch(int32_t start_value,
+                    int32_t num_entries,
+                    HInstruction* input,
+                    HMipsComputeBaseMethodAddress* method_base,
+                    uint32_t dex_pc)
+    : HTemplateInstruction(SideEffects::None(), dex_pc),
+      start_value_(start_value),
+      num_entries_(num_entries) {
+    SetRawInputAt(0, input);
+    SetRawInputAt(1, method_base);
+  }
+
+  bool IsControlFlow() const OVERRIDE { return true; }
+
+  int32_t GetStartValue() const { return start_value_; }
+
+  int32_t GetNumEntries() const { return num_entries_; }
+
+  HBasicBlock* GetDefaultBlock() const {
+    // Last entry is the default block.
+    return GetBlock()->GetSuccessors()[num_entries_];
+  }
+
+  DECLARE_INSTRUCTION(MipsPackedSwitch);
+
+ private:
+  const int32_t start_value_;
+  const int32_t num_entries_;
+
+  DISALLOW_COPY_AND_ASSIGN(HMipsPackedSwitch);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index c6d297d..6006e6c 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -92,6 +92,25 @@
     }
   }
 
+  void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+    if (switch_insn->GetNumEntries() <=
+        InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) {
+      return;
+    }
+    // We need to replace the HPackedSwitch with a HMipsPackedSwitch in order to
+    // address the constant area.
+    InitializePCRelativeBasePointer();
+    HGraph* graph = GetGraph();
+    HBasicBlock* block = switch_insn->GetBlock();
+    HMipsPackedSwitch* mips_switch = new (graph->GetArena()) HMipsPackedSwitch(
+        switch_insn->GetStartValue(),
+        switch_insn->GetNumEntries(),
+        switch_insn->InputAt(0),
+        base_,
+        switch_insn->GetDexPc());
+    block->ReplaceAndRemoveInstructionWith(switch_insn, mips_switch);
+  }
+
   void HandleInvoke(HInvoke* invoke) {
     // If this is an invoke-static/-direct with PC-relative dex cache array
     // addressing, we need the PC-relative address base.