Specializing x86 range argument copying
The ARM implementation of range argument copying was specialized in some cases.
For all other architectures, it would fall back to generating memcpy. This patch
updates the x86 implementation so it does not call memcpy and instead generates
loads and stores, favoring movement of 128-bit chunks.
Change-Id: Ic891e5609a4b0e81a47c29cc5a9b301bd10a1933
Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 3058b0c..ae53ddb 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -297,6 +297,24 @@
{ kX86SqrtsdRR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0xF2, 0, 0x0F, 0x51, 0, 0, 0, 0 }, "SqrtsdRR", "!0r,!1r" },
{ kX86FstpdM, kMem, IS_STORE | IS_BINARY_OP | REG_USE0, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
+ EXT_0F_ENCODING_MAP(Movups, 0x0, 0x10, REG_DEF0),
+ { kX86MovupsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" },
+ { kX86MovupsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+ EXT_0F_ENCODING_MAP(Movaps, 0x0, 0x28, REG_DEF0),
+ { kX86MovapsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsMR", "[!0r+!1d],!2r" },
+ { kX86MovapsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+ { kX86MovlpsRM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRM", "!0r,[!1r+!2d]" },
+ { kX86MovlpsRA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+ { kX86MovlpsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsMR", "[!0r+!1d],!2r" },
+ { kX86MovlpsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+ { kX86MovhpsRM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRM", "!0r,[!1r+!2d]" },
+ { kX86MovhpsRA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+ { kX86MovhpsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsMR", "[!0r+!1d],!2r" },
+ { kX86MovhpsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
EXT_0F_ENCODING_MAP(Movdxr, 0x66, 0x6E, REG_DEF0),
{ kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
{ kX86MovdrxMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" },
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 6896504..4c1c171 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -207,6 +207,8 @@
LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value);
LIR* OpRegMem(OpKind op, int r_dest, RegLocation value);
LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2);
+ LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type);
+ LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type);
LIR* OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src);
LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value);
LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2);
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 18c5ca8..e2744d0 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -211,6 +211,110 @@
return NewLIR2(opcode, r_dest_src1, r_src2);
}
+LIR* X86Mir2Lir::OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) {
+ DCHECK(!(X86_FPREG(r_base)));
+
+ X86OpCode opcode = kX86Nop;
+ switch (move_type) {
+ case kMov8GP:
+ CHECK(!X86_FPREG(r_dest));
+ opcode = kX86Mov8RM;
+ break;
+ case kMov16GP:
+ CHECK(!X86_FPREG(r_dest));
+ opcode = kX86Mov16RM;
+ break;
+ case kMov32GP:
+ CHECK(!X86_FPREG(r_dest));
+ opcode = kX86Mov32RM;
+ break;
+ case kMov32FP:
+ CHECK(X86_FPREG(r_dest));
+ opcode = kX86MovssRM;
+ break;
+ case kMov64FP:
+ CHECK(X86_FPREG(r_dest));
+ opcode = kX86MovsdRM;
+ break;
+ case kMovU128FP:
+ CHECK(X86_FPREG(r_dest));
+ opcode = kX86MovupsRM;
+ break;
+ case kMovA128FP:
+ CHECK(X86_FPREG(r_dest));
+ opcode = kX86MovapsRM;
+ break;
+ case kMovLo128FP:
+ CHECK(X86_FPREG(r_dest));
+ opcode = kX86MovlpsRM;
+ break;
+ case kMovHi128FP:
+ CHECK(X86_FPREG(r_dest));
+ opcode = kX86MovhpsRM;
+ break;
+ case kMov64GP:
+ case kMovLo64FP:
+ case kMovHi64FP:
+ default:
+ LOG(FATAL) << "Bad case in OpMovRegMem";
+ break;
+ }
+
+ return NewLIR3(opcode, r_dest, r_base, offset);
+}
+
+LIR* X86Mir2Lir::OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) {
+ DCHECK(!(X86_FPREG(r_base)));
+
+ X86OpCode opcode = kX86Nop;
+ switch (move_type) {
+ case kMov8GP:
+ CHECK(!X86_FPREG(r_src));
+ opcode = kX86Mov8MR;
+ break;
+ case kMov16GP:
+ CHECK(!X86_FPREG(r_src));
+ opcode = kX86Mov16MR;
+ break;
+ case kMov32GP:
+ CHECK(!X86_FPREG(r_src));
+ opcode = kX86Mov32MR;
+ break;
+ case kMov32FP:
+ CHECK(X86_FPREG(r_src));
+ opcode = kX86MovssMR;
+ break;
+ case kMov64FP:
+ CHECK(X86_FPREG(r_src));
+ opcode = kX86MovsdMR;
+ break;
+ case kMovU128FP:
+ CHECK(X86_FPREG(r_src));
+ opcode = kX86MovupsMR;
+ break;
+ case kMovA128FP:
+ CHECK(X86_FPREG(r_src));
+ opcode = kX86MovapsMR;
+ break;
+ case kMovLo128FP:
+ CHECK(X86_FPREG(r_src));
+ opcode = kX86MovlpsMR;
+ break;
+ case kMovHi128FP:
+ CHECK(X86_FPREG(r_src));
+ opcode = kX86MovhpsMR;
+ break;
+ case kMov64GP:
+ case kMovLo64FP:
+ case kMovHi64FP:
+ default:
+ LOG(FATAL) << "Bad case in OpMovMemReg";
+ break;
+ }
+
+ return NewLIR3(opcode, r_base, offset, r_src);
+}
+
LIR* X86Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src) {
// The only conditional reg to reg operation supported is Cmov
DCHECK_EQ(op, kOpCmov);
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 7f35d06..6962ff7 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -357,6 +357,14 @@
kX86PsllqRI, // left shift of floating point registers
kX86SqrtsdRR, // sqrt of floating point register
kX86FstpdM, // Store and pop top x87 fp stack
+ Binary0fOpCode(kX86Movups), // load unaligned packed single FP values from xmm2/m128 to xmm1
+ kX86MovupsMR, kX86MovupsAR, // store unaligned packed single FP values from xmm1 to m128
+ Binary0fOpCode(kX86Movaps), // load aligned packed single FP values from xmm2/m128 to xmm1
+ kX86MovapsMR, kX86MovapsAR, // store aligned packed single FP values from xmm1 to m128
+ kX86MovlpsRM, kX86MovlpsRA, // load packed single FP values from m64 to low quadword of xmm
+ kX86MovlpsMR, kX86MovlpsAR, // store packed single FP values from low quadword of xmm to m64
+ kX86MovhpsRM, kX86MovhpsRA, // load packed single FP values from m64 to high quadword of xmm
+ kX86MovhpsMR, kX86MovhpsAR, // store packed single FP values from high quadword of xmm to m64
Binary0fOpCode(kX86Movdxr), // move into xmm from gpr
kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR, // move into reg from xmm
kX86Set8R, kX86Set8M, kX86Set8A, // set byte depending on condition operand