Reduce x86 sequence for GP pair to XMM
Added support for punpckldq which is useful for interleaving
32-bit values from two xmm registers.
This new instruction is now used for transfers from GP pairs
to XMM in order to reduce path length.
Change-Id: I70d9b69449dfcfb9a94a628deb74a7cffe96bac7
Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 5e1c4d1..35bdb0f 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -288,6 +288,7 @@
EXT_0F_ENCODING_MAP(Subss, 0xF3, 0x5C, REG_DEF0),
EXT_0F_ENCODING_MAP(Divsd, 0xF2, 0x5E, REG_DEF0),
EXT_0F_ENCODING_MAP(Divss, 0xF3, 0x5E, REG_DEF0),
+ EXT_0F_ENCODING_MAP(Punpckldq, 0x66, 0x62, REG_DEF0),
{ kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
{ kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index a2c215c..fcfd885 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -346,8 +346,7 @@
if (val_hi != 0) {
r_dest_hi = AllocTempDouble();
LoadConstantNoClobber(r_dest_hi, val_hi);
- NewLIR2(kX86PsllqRI, r_dest_hi, 32);
- NewLIR2(kX86OrpsRR, r_dest_lo, r_dest_hi);
+ NewLIR2(kX86PunpckldqRR, r_dest_lo, r_dest_hi);
FreeTemp(r_dest_hi);
}
}
@@ -594,8 +593,7 @@
NewLIR2(kX86MovdxrRR, fp_reg, low_reg);
int tmp_reg = AllocTempDouble();
NewLIR2(kX86MovdxrRR, tmp_reg, high_reg);
- NewLIR2(kX86PsllqRI, tmp_reg, 32);
- NewLIR2(kX86OrpsRR, fp_reg, tmp_reg);
+ NewLIR2(kX86PunpckldqRR, fp_reg, tmp_reg);
FreeTemp(tmp_reg);
}
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index d7f61fc..e091a84 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -350,6 +350,7 @@
Binary0fOpCode(kX86Subss), // float subtract
Binary0fOpCode(kX86Divsd), // double divide
Binary0fOpCode(kX86Divss), // float divide
+ Binary0fOpCode(kX86Punpckldq), // Interleave low-order double words
kX86PsrlqRI, // right shift of floating point registers
kX86PsllqRI, // left shift of floating point registers
kX86SqrtsdRR, // sqrt of floating point register