ART: Add HADDPS/HADDPD/SHUFPS/SHUFPD instruction generation
The patch adds the HADDPS, HADDPD, SHUFPS, and SHUFPD instruction generation
for X86.
Change-Id: Ida105d3e57be231a5331564c1a9bc298cf176ce6
Signed-off-by: Olivier Come <olivier.come@intel.com>
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 3f362f2..6d2b6fa 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -400,6 +400,8 @@
EXT_0F_ENCODING_MAP(Pxor, 0x66, 0xEF, REG_DEF0_USE0),
EXT_0F_ENCODING2_MAP(Phaddw, 0x66, 0x38, 0x01, REG_DEF0_USE0),
EXT_0F_ENCODING2_MAP(Phaddd, 0x66, 0x38, 0x02, REG_DEF0_USE0),
+ EXT_0F_ENCODING_MAP(Haddpd, 0x66, 0x7C, REG_DEF0_USE0),
+ EXT_0F_ENCODING_MAP(Haddps, 0xF2, 0x7C, REG_DEF0_USE0),
{ kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" },
{ kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" },
@@ -408,6 +410,9 @@
{ kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" },
{ kX86PshufdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuffRRI", "!0r,!1r,!2d" },
+ { kX86ShufpsRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x00, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "kX86ShufpsRRI", "!0r,!1r,!2d" },
+ { kX86ShufpdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "kX86ShufpdRRI", "!0r,!1r,!2d" },
+
{ kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1, false }, "PsrawRI", "!0r,!1d" },
{ kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1, false }, "PsradRI", "!0r,!1d" },
{ kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1, false }, "PsrlwRI", "!0r,!1d" },
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 17c44bc..b719a12 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -557,11 +557,15 @@
Binary0fOpCode(kX86Pxor), // parallel XOR 128 bits x 1
Binary0fOpCode(kX86Phaddw), // parallel horizontal addition 16 bits x 8
Binary0fOpCode(kX86Phaddd), // parallel horizontal addition 32 bits x 4
+ Binary0fOpCode(kX86Haddpd), // parallel FP horizontal addition 64 bits x 2
+ Binary0fOpCode(kX86Haddps), // parallel FP horizontal addition 32 bits x 4
kX86PextrbRRI, // Extract 8 bits from XMM into GPR
kX86PextrwRRI, // Extract 16 bits from XMM into GPR
kX86PextrdRRI, // Extract 32 bits from XMM into GPR
kX86PshuflwRRI, // Shuffle 16 bits in lower 64 bits of XMM.
kX86PshufdRRI, // Shuffle 32 bits in XMM.
+ kX86ShufpsRRI, // FP Shuffle 32 bits in XMM.
+ kX86ShufpdRRI, // FP Shuffle 64 bits in XMM.
kX86PsrawRI, // signed right shift of floating point registers 16 bits x 8
kX86PsradRI, // signed right shift of floating point registers 32 bits x 4
kX86PsrlwRI, // logical right shift of floating point registers 16 bits x 8