Introduce a number of MSA instructions for MIPS32

Added a number of MSA (The MIPS SIMD Architecture) instructions.
Added assembler tests for each instruction.

Test: mma test-art-host-gtest

Change-Id: I1d499309fc08923484f64d1883b9c3f95eadd3be
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 463daeb..e4c9940 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -36,6 +36,7 @@
 namespace art {
 namespace mips {
 
+static constexpr size_t kMipsHalfwordSize = 2;
 static constexpr size_t kMipsWordSize = 4;
 static constexpr size_t kMipsDoublewordSize = 8;
 
@@ -194,6 +195,7 @@
         last_position_adjustment_(0),
         last_old_position_(0),
         last_branch_id_(0),
+        has_msa_(instruction_set_features != nullptr ? instruction_set_features->HasMsa() : false),
         isa_features_(instruction_set_features) {
     cfi().DelayEmittingAdvancePCs();
   }
@@ -464,6 +466,149 @@
   void Clear(Register rd);
   void Not(Register rd, Register rs);
 
+  // MSA instructions.
+  void AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void Ffint_sW(VectorRegister wd, VectorRegister ws);
+  void Ffint_sD(VectorRegister wd, VectorRegister ws);
+  void Ftint_sW(VectorRegister wd, VectorRegister ws);
+  void Ftint_sD(VectorRegister wd, VectorRegister ws);
+
+  void SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  // Immediate shift instructions, where shamtN denotes shift amount (must be between 0 and 2^N-1).
+  void SlliB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SlliH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SlliW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SlliD(VectorRegister wd, VectorRegister ws, int shamt6);
+  void SraiB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SraiH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SraiW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SraiD(VectorRegister wd, VectorRegister ws, int shamt6);
+  void SrliB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SrliH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SrliW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SrliD(VectorRegister wd, VectorRegister ws, int shamt6);
+
+  void MoveV(VectorRegister wd, VectorRegister ws);
+  void SplatiB(VectorRegister wd, VectorRegister ws, int n4);
+  void SplatiH(VectorRegister wd, VectorRegister ws, int n3);
+  void SplatiW(VectorRegister wd, VectorRegister ws, int n2);
+  void SplatiD(VectorRegister wd, VectorRegister ws, int n1);
+  void FillB(VectorRegister wd, Register rs);
+  void FillH(VectorRegister wd, Register rs);
+  void FillW(VectorRegister wd, Register rs);
+
+  void LdiB(VectorRegister wd, int imm8);
+  void LdiH(VectorRegister wd, int imm10);
+  void LdiW(VectorRegister wd, int imm10);
+  void LdiD(VectorRegister wd, int imm10);
+  void LdB(VectorRegister wd, Register rs, int offset);
+  void LdH(VectorRegister wd, Register rs, int offset);
+  void LdW(VectorRegister wd, Register rs, int offset);
+  void LdD(VectorRegister wd, Register rs, int offset);
+  void StB(VectorRegister wd, Register rs, int offset);
+  void StH(VectorRegister wd, Register rs, int offset);
+  void StW(VectorRegister wd, Register rs, int offset);
+  void StD(VectorRegister wd, Register rs, int offset);
+
+  void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
   // Higher level composite instructions.
   void LoadConst32(Register rd, int32_t value);
   void LoadConst64(Register reg_hi, Register reg_lo, int64_t value);
@@ -1282,6 +1427,30 @@
   uint32_t EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm);
   void EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
   void EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);
+  uint32_t EmitMsa3R(int operation,
+                     int df,
+                     VectorRegister wt,
+                     VectorRegister ws,
+                     VectorRegister wd,
+                     int minor_opcode);
+  uint32_t EmitMsaBIT(int operation,
+                      int df_m,
+                      VectorRegister ws,
+                      VectorRegister wd,
+                      int minor_opcode);
+  uint32_t EmitMsaELM(int operation,
+                      int df_n,
+                      VectorRegister ws,
+                      VectorRegister wd,
+                      int minor_opcode);
+  uint32_t EmitMsaMI10(int s10, Register rs, VectorRegister wd, int minor_opcode, int df);
+  uint32_t EmitMsaI10(int operation, int df, int i10, VectorRegister wd, int minor_opcode);
+  uint32_t EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
+  uint32_t EmitMsa2RF(int operation,
+                      int df,
+                      VectorRegister ws,
+                      VectorRegister wd,
+                      int minor_opcode);
 
   void Buncond(MipsLabel* label);
   void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
@@ -1332,6 +1501,10 @@
   // Emits exception block.
   void EmitExceptionPoll(MipsExceptionSlowPath* exception);
 
+  bool HasMsa() const {
+    return has_msa_;
+  }
+
   bool IsR6() const {
     if (isa_features_ != nullptr) {
       return isa_features_->IsR6();
@@ -1386,6 +1559,8 @@
   uint32_t last_old_position_;
   uint32_t last_branch_id_;
 
+  const bool has_msa_;
+
   const MipsInstructionSetFeatures* isa_features_;
 
   DISALLOW_COPY_AND_ASSIGN(MipsAssembler);