ARM assembler support for VCNT and VPADDL.

Test: Gtest assembler_thumb2_test.
Change-Id: I8a0e47da746e1c67650cb68196a9f661deed7383
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 274d0de..a571d14 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -671,6 +671,9 @@
   virtual void vcmpdz(DRegister dd, Condition cond = AL) = 0;
   virtual void vmstat(Condition cond = AL) = 0;  // VMRS APSR_nzcv, FPSCR
 
+  virtual void vcntd(DRegister dd, DRegister dm) = 0;
+  virtual void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) = 0;
+
   virtual void vpushs(SRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpushd(DRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpops(SRegister reg, int nregs, Condition cond = AL) = 0;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 0a227b2..6f7119d 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1264,6 +1264,31 @@
   Emit(encoding);
 }
 
+void Arm32Assembler::vcntd(DRegister dd, DRegister dm) {
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B10 | B8) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit(encoding);
+}
+
+void Arm32Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) {
+  CHECK(size == 8 || size == 16 || size == 32) << size;
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B9) |
+    (is_unsigned ? B7 : 0) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit(encoding);
+}
+
 
 void Arm32Assembler::svc(uint32_t imm24) {
   CHECK(IsUint<24>(imm24)) << imm24;
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index bc6020e..8726ac8 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -205,6 +205,9 @@
   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
 
+  void vcntd(DRegister dd, DRegister dm) OVERRIDE;
+  void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
+
   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index e570e22..b214062 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -899,4 +899,43 @@
   T3Helper(&arm::Arm32Assembler::revsh, true, "revsh{cond} {reg1}, {reg2}", "revsh");
 }
 
+TEST_F(AssemblerArm32Test, vcnt) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  GetAssembler()->vcntd(arm::D0, arm::D1);
+  GetAssembler()->vcntd(arm::D19, arm::D20);
+  GetAssembler()->vcntd(arm::D0, arm::D9);
+  GetAssembler()->vcntd(arm::D16, arm::D20);
+
+  std::string expected =
+      "vcnt.8 d0, d1\n"
+      "vcnt.8 d19, d20\n"
+      "vcnt.8 d0, d9\n"
+      "vcnt.8 d16, d20\n";
+
+  DriverStr(expected, "vcnt");
+}
+
+TEST_F(AssemblerArm32Test, vpaddl) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  GetAssembler()->vpaddld(arm::D0, arm::D0, 8, true);
+  GetAssembler()->vpaddld(arm::D20, arm::D20, 8, false);
+  GetAssembler()->vpaddld(arm::D0, arm::D20, 16, false);
+  GetAssembler()->vpaddld(arm::D20, arm::D0, 32, true);
+
+  std::string expected =
+      "vpaddl.u8 d0, d0\n"
+      "vpaddl.s8 d20, d20\n"
+      "vpaddl.s16 d0, d20\n"
+      "vpaddl.u32 d20, d0\n";
+
+  DriverStr(expected, "vpaddl");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 546dd65..a72ea41 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -3117,6 +3117,30 @@
   Emit32(encoding);
 }
 
+void Thumb2Assembler::vcntd(DRegister dd, DRegister dm) {
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B10 | B8) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit32(encoding);
+}
+
+void Thumb2Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) {
+  CHECK(size == 8 || size == 16 || size == 32) << size;
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B9) |
+    (is_unsigned ? B7 : 0) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit32(encoding);
+}
 
 void Thumb2Assembler::svc(uint32_t imm8) {
   CHECK(IsUint<8>(imm8)) << imm8;
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index ce310a4..2ca74fc 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -250,6 +250,9 @@
   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
 
+  void vcntd(DRegister dd, DRegister dm) OVERRIDE;
+  void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
+
   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index b5cafcb..7f1dc49 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -1380,4 +1380,43 @@
   DriverStr(expected, "revsh");
 }
 
+TEST_F(AssemblerThumb2Test, vcnt) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  __ vcntd(arm::D0, arm::D1);
+  __ vcntd(arm::D19, arm::D20);
+  __ vcntd(arm::D0, arm::D9);
+  __ vcntd(arm::D16, arm::D20);
+
+  std::string expected =
+      "vcnt.8 d0, d1\n"
+      "vcnt.8 d19, d20\n"
+      "vcnt.8 d0, d9\n"
+      "vcnt.8 d16, d20\n";
+
+  DriverStr(expected, "vcnt");
+}
+
+TEST_F(AssemblerThumb2Test, vpaddl) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  __ vpaddld(arm::D0, arm::D0, 8, true);
+  __ vpaddld(arm::D20, arm::D20, 8, false);
+  __ vpaddld(arm::D0, arm::D20, 16, false);
+  __ vpaddld(arm::D20, arm::D0, 32, true);
+
+  std::string expected =
+      "vpaddl.u8 d0, d0\n"
+      "vpaddl.s8 d20, d20\n"
+      "vpaddl.s16 d0, d20\n"
+      "vpaddl.u32 d20, d0\n";
+
+  DriverStr(expected, "vpaddl");
+}
+
 }  // namespace art