Blame - src/compiler/codegen/GenInvoke.cc - platform_art

blob: 4698868ad29d3f7f37258d5a8a6b1146a0a316ae [file] [log] [blame]

buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2012 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	namespace art {
				18
				19	/*
				20	* This source files contains "gen" codegen routines that should
				21	* be applicable to most targets. Only mid-level support utilities
				22	* and "op" calls may be used here.
				23	*/
				24
				25
				26	/*
				27	* x86 targets will likely be different enough to need their own
				28	* invoke gen routies.
				29	*/
				30	#if defined(TARGET_ARM) \|\| defined (TARGET_MIPS)
				31	typedef int (NextCallInsn)(CompilationUnit, MIR*, int, uint32_t dexIdx,
				32	uint32_t methodIdx);
				33	/*
				34	* If there are any ins passed in registers that have not been promoted
				35	* to a callee-save register, flush them to the frame. Perform intial
				36	* assignment of promoted arguments.
				37	*/
				38	void flushIns(CompilationUnit* cUnit)
				39	{
				40	if (cUnit->numIns == 0)
				41	return;
				42	int firstArgReg = rARG1;
				43	int lastArgReg = rARG3;
				44	int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
				45	/*
				46	* Arguments passed in registers should be flushed
				47	* to their backing locations in the frame for now.
				48	* Also, we need to do initial assignment for promoted
				49	* arguments. NOTE: an older version of dx had an issue
				50	* in which it would reuse static method argument registers.
				51	* This could result in the same Dalvik virtual register
				52	* being promoted to both core and fp regs. In those
				53	* cases, copy argument to both. This will be uncommon
				54	* enough that it isn't worth attempting to optimize.
				55	*/
				56	for (int i = 0; i < cUnit->numIns; i++) {
				57	PromotionMap vMap = cUnit->promotionMap[startVReg + i];
				58	if (i <= (lastArgReg - firstArgReg)) {
				59	// If arriving in register
				60	if (vMap.coreLocation == kLocPhysReg) {
buzbee	82488f5	2012-03-02 08:20:26 -0800	[diff] [blame]	61	opRegCopy(cUnit, vMap.coreReg, firstArgReg + i);
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	62	}
				63	if (vMap.fpLocation == kLocPhysReg) {
buzbee	82488f5	2012-03-02 08:20:26 -0800	[diff] [blame]	64	opRegCopy(cUnit, vMap.fpReg, firstArgReg + i);
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	65	}
				66	// Also put a copy in memory in case we're partially promoted
				67	storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
				68	firstArgReg + i, kWord);
				69	} else {
				70	// If arriving in frame & promoted
				71	if (vMap.coreLocation == kLocPhysReg) {
				72	loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
				73	vMap.coreReg);
				74	}
				75	if (vMap.fpLocation == kLocPhysReg) {
				76	loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
				77	vMap.fpReg);
				78	}
				79	}
				80	}
				81	}
				82
				83	/*
				84	* Bit of a hack here - in leiu of a real scheduling pass,
				85	* emit the next instruction in static & direct invoke sequences.
				86	*/
				87	int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
				88	int state, uint32_t dexIdx, uint32_t unused)
				89	{
				90	switch(state) {
				91	case 0: // Get the current Method* [sets rARG0]
				92	loadCurrMethodDirect(cUnit, rARG0);
				93	break;
				94	case 1: // Get method->dex_cache_resolved_methods_
				95	loadWordDisp(cUnit, rARG0,
				96	Method::DexCacheResolvedMethodsOffset().Int32Value(),
				97	rARG0);
				98	break;
				99	case 2: // Grab target method*
				100	loadWordDisp(cUnit, rARG0,
				101	Array::DataOffset(sizeof(Object)).Int32Value() + dexIdx 4,
				102	rARG0);
				103	break;
				104	case 3: // Grab the code from the method*
				105	loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	106	rINVOKE_TGT);
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	107	break;
				108	default:
				109	return -1;
				110	}
				111	return state + 1;
				112	}
				113
				114	/*
				115	* Bit of a hack here - in leiu of a real scheduling pass,
				116	* emit the next instruction in a virtual invoke sequence.
				117	* We can use rLR as a temp prior to target address loading
				118	* Note also that we'll load the first argument ("this") into
				119	* rARG1 here rather than the standard loadArgRegs.
				120	*/
				121	int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
				122	int state, uint32_t dexIdx, uint32_t methodIdx)
				123	{
				124	RegLocation rlArg;
				125	/*
				126	* This is the fast path in which the target virtual method is
				127	* fully resolved at compile time.
				128	*/
				129	switch(state) {
				130	case 0: // Get "this" [set rARG1]
				131	rlArg = oatGetSrc(cUnit, mir, 0);
				132	loadValueDirectFixed(cUnit, rlArg, rARG1);
				133	break;
				134	case 1: // Is "this" null? [use rARG1]
				135	genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	136	// get this->klass_ [use rARG1, set rINVOKE_TGT]
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	137	loadWordDisp(cUnit, rARG1, Object::ClassOffset().Int32Value(),
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	138	rINVOKE_TGT);
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	139	break;
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	140	case 2: // Get this->klass_->vtable [usr rINVOKE_TGT, set rINVOKE_TGT]
				141	loadWordDisp(cUnit, rINVOKE_TGT, Class::VTableOffset().Int32Value(),
				142	rINVOKE_TGT);
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	143	break;
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	144	case 3: // Get target method [use rINVOKE_TGT, set rARG0]
				145	loadWordDisp(cUnit, rINVOKE_TGT, (methodIdx * 4) +
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	146	Array::DataOffset(sizeof(Object*)).Int32Value(),
				147	rARG0);
				148	break;
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	149	case 4: // Get the compiled code address [uses rARG0, sets rINVOKE_TGT]
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	150	loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	151	rINVOKE_TGT);
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	152	break;
				153	default:
				154	return -1;
				155	}
				156	return state + 1;
				157	}
				158
				159	/*
				160	* Interleave launch code for INVOKE_SUPER. See comments
				161	* for nextVCallIns.
				162	*/
				163	int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
				164	int state, uint32_t dexIdx, uint32_t methodIdx)
				165	{
				166	/*
				167	* This is the fast path in which the target virtual method is
				168	* fully resolved at compile time. Note also that this path assumes
				169	* that the check to verify that the target method index falls
				170	* within the size of the super's vtable has been done at compile-time.
				171	*/
				172	RegLocation rlArg;
				173	switch(state) {
				174	case 0: // Get current Method* [set rARG0]
				175	loadCurrMethodDirect(cUnit, rARG0);
				176	// Load "this" [set rARG1]
				177	rlArg = oatGetSrc(cUnit, mir, 0);
				178	loadValueDirectFixed(cUnit, rlArg, rARG1);
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	179	// Get method->declaring_class_ [use rARG0, set rINVOKE_TGT]
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	180	loadWordDisp(cUnit, rARG0,
				181	Method::DeclaringClassOffset().Int32Value(),
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	182	rINVOKE_TGT);
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	183	// Is "this" null? [use rARG1]
				184	genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
				185	break;
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	186	case 1: // method->declaring_class_->super_class [use/set rINVOKE_TGT]
				187	loadWordDisp(cUnit, rINVOKE_TGT,
				188	Class::SuperClassOffset().Int32Value(), rINVOKE_TGT);
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	189	break;
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	190	case 2: // Get ...->super_class_->vtable [u/s rINVOKE_TGT]
				191	loadWordDisp(cUnit, rINVOKE_TGT,
				192	Class::VTableOffset().Int32Value(), rINVOKE_TGT);
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	193	break;
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	194	case 3: // Get target method [use rINVOKE_TGT, set rARG0]
				195	loadWordDisp(cUnit, rINVOKE_TGT, (methodIdx * 4) +
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	196	Array::DataOffset(sizeof(Object*)).Int32Value(),
				197	rARG0);
				198	break;
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	199	case 4: // target compiled code address [uses rARG0, sets rINVOKE_TGT]
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	200	loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	201	rINVOKE_TGT);
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	202	break;
				203	default:
				204	return -1;
				205	}
				206	return state + 1;
				207	}
				208
				209	int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline,
				210	int state, uint32_t dexIdx, uint32_t methodIdx)
				211	{
				212	/*
				213	* This handles the case in which the base method is not fully
				214	* resolved at compile time, we bail to a runtime helper.
				215	*/
				216	if (state == 0) {
				217	// Load trampoline target
buzbee	0398c42	2012-03-02 15:22:47 -0800	[diff] [blame^]	218	loadWordDisp(cUnit, rSELF, trampoline, rINVOKE_TGT);
buzbee	31a4a6f	2012-02-28 15:36:15 -0800	[diff] [blame]	219	// Load rARG0 with method index
				220	loadConstant(cUnit, rARG0, dexIdx);
				221	return 1;
				222	}
				223	return -1;
				224	}
				225
				226	int nextStaticCallInsnSP(CompilationUnit* cUnit, MIR* mir,
				227	int state, uint32_t dexIdx, uint32_t methodIdx)
				228	{
				229	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeStaticTrampolineWithAccessCheck);
				230	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				231	}
				232
				233	int nextDirectCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
				234	uint32_t dexIdx, uint32_t methodIdx)
				235	{
				236	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeDirectTrampolineWithAccessCheck);
				237	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				238	}
				239
				240	int nextSuperCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
				241	uint32_t dexIdx, uint32_t methodIdx)
				242	{
				243	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeSuperTrampolineWithAccessCheck);
				244	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				245	}
				246
				247	int nextVCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
				248	uint32_t dexIdx, uint32_t methodIdx)
				249	{
				250	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeVirtualTrampolineWithAccessCheck);
				251	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				252	}
				253
				254	/*
				255	* All invoke-interface calls bounce off of art_invoke_interface_trampoline,
				256	* which will locate the target and continue on via a tail call.
				257	*/
				258	int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir, int state,
				259	uint32_t dexIdx, uint32_t unused)
				260	{
				261	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampoline);
				262	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				263	}
				264
				265	int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit, MIR* mir,
				266	int state, uint32_t dexIdx,
				267	uint32_t unused)
				268	{
				269	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampolineWithAccessCheck);
				270	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				271	}
				272
				273	int loadArgRegs(CompilationUnit* cUnit, MIR* mir, DecodedInstruction* dInsn,
				274	int callState, NextCallInsn nextCallInsn, uint32_t dexIdx,
				275	uint32_t methodIdx, bool skipThis)
				276	{
				277	int nextReg = rARG1;
				278	int nextArg = 0;
				279	if (skipThis) {
				280	nextReg++;
				281	nextArg++;
				282	}
				283	for (; (nextReg <= rARG3) && (nextArg < mir->ssaRep->numUses); nextReg++) {
				284	RegLocation rlArg = oatGetRawSrc(cUnit, mir, nextArg++);
				285	rlArg = oatUpdateRawLoc(cUnit, rlArg);
				286	if (rlArg.wide && (nextReg <= rARG2)) {
				287	loadValueDirectWideFixed(cUnit, rlArg, nextReg, nextReg + 1);
				288	nextReg++;
				289	nextArg++;
				290	} else {
				291	rlArg.wide = false;
				292	loadValueDirectFixed(cUnit, rlArg, nextReg);
				293	}
				294	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				295	}
				296	return callState;
				297	}
				298
				299	/*
				300	* Load up to 5 arguments, the first three of which will be in
				301	* rARG1 .. rARG3. On entry rARG0 contains the current method pointer,
				302	* and as part of the load sequence, it must be replaced with
				303	* the target method pointer. Note, this may also be called
				304	* for "range" variants if the number of arguments is 5 or fewer.
				305	*/
				306	int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
				307	DecodedInstruction* dInsn, int callState,
				308	LIR** pcrLabel, NextCallInsn nextCallInsn,
				309	uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
				310	{
				311	RegLocation rlArg;
				312
				313	/* If no arguments, just return */
				314	if (dInsn->vA == 0)
				315	return callState;
				316
				317	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				318
				319	DCHECK_LE(dInsn->vA, 5U);
				320	if (dInsn->vA > 3) {
				321	uint32_t nextUse = 3;
				322	//Detect special case of wide arg spanning arg3/arg4
				323	RegLocation rlUse0 = oatGetRawSrc(cUnit, mir, 0);
				324	RegLocation rlUse1 = oatGetRawSrc(cUnit, mir, 1);
				325	RegLocation rlUse2 = oatGetRawSrc(cUnit, mir, 2);
				326	if (((!rlUse0.wide && !rlUse1.wide) \|\| rlUse0.wide) &&
				327	rlUse2.wide) {
				328	int reg;
				329	// Wide spans, we need the 2nd half of uses[2].
				330	rlArg = oatUpdateLocWide(cUnit, rlUse2);
				331	if (rlArg.location == kLocPhysReg) {
				332	reg = rlArg.highReg;
				333	} else {
				334	// rARG2 & rARG3 can safely be used here
				335	reg = rARG3;
				336	loadWordDisp(cUnit, rSP,
				337	oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
				338	callState = nextCallInsn(cUnit, mir, callState, dexIdx,
				339	methodIdx);
				340	}
				341	storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
				342	storeBaseDisp(cUnit, rSP, 16 /* (3+1)4 /, reg, kWord);
				343	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				344	nextUse++;
				345	}
				346	// Loop through the rest
				347	while (nextUse < dInsn->vA) {
				348	int lowReg;
				349	int highReg;
				350	rlArg = oatGetRawSrc(cUnit, mir, nextUse);
				351	rlArg = oatUpdateRawLoc(cUnit, rlArg);
				352	if (rlArg.location == kLocPhysReg) {
				353	lowReg = rlArg.lowReg;
				354	highReg = rlArg.highReg;
				355	} else {
				356	lowReg = rARG2;
				357	highReg = rARG3;
				358	if (rlArg.wide) {
				359	loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
				360	} else {
				361	loadValueDirectFixed(cUnit, rlArg, lowReg);
				362	}
				363	callState = nextCallInsn(cUnit, mir, callState, dexIdx,
				364	methodIdx);
				365	}
				366	int outsOffset = (nextUse + 1) * 4;
				367	if (rlArg.wide) {
				368	storeBaseDispWide(cUnit, rSP, outsOffset, lowReg, highReg);
				369	nextUse += 2;
				370	} else {
				371	storeWordDisp(cUnit, rSP, outsOffset, lowReg);
				372	nextUse++;
				373	}
				374	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				375	}
				376	}
				377
				378	callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
				379	dexIdx, methodIdx, skipThis);
				380
				381	if (pcrLabel) {
				382	*pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
				383	}
				384	return callState;
				385	}
				386
				387	/*
				388	* May have 0+ arguments (also used for jumbo). Note that
				389	* source virtual registers may be in physical registers, so may
				390	* need to be flushed to home location before copying. This
				391	* applies to arg3 and above (see below).
				392	*
				393	* Two general strategies:
				394	* If < 20 arguments
				395	* Pass args 3-18 using vldm/vstm block copy
				396	* Pass arg0, arg1 & arg2 in rARG1-rARG3
				397	* If 20+ arguments
				398	* Pass args arg19+ using memcpy block copy
				399	* Pass arg0, arg1 & arg2 in rARG1-rARG3
				400	*
				401	*/
				402	int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
				403	DecodedInstruction* dInsn, int callState,
				404	LIR** pcrLabel, NextCallInsn nextCallInsn,
				405	uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
				406	{
				407	int firstArg = dInsn->vC;
				408	int numArgs = dInsn->vA;
				409
				410	// If we can treat it as non-range (Jumbo ops will use range form)
				411	if (numArgs <= 5)
				412	return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
				413	nextCallInsn, dexIdx, methodIdx,
				414	skipThis);
				415	/*
				416	* Make sure range list doesn't span the break between in normal
				417	* Dalvik vRegs and the ins.
				418	*/
				419	int highestArg = oatGetSrc(cUnit, mir, numArgs-1).sRegLow;
				420	int boundaryReg = cUnit->numDalvikRegisters - cUnit->numIns;
				421	if ((firstArg < boundaryReg) && (highestArg >= boundaryReg)) {
				422	LOG(FATAL) << "Argument list spanned locals & args";
				423	}
				424
				425	/*
				426	* First load the non-register arguments. Both forms expect all
				427	* of the source arguments to be in their home frame location, so
				428	* scan the sReg names and flush any that have been promoted to
				429	* frame backing storage.
				430	*/
				431	// Scan the rest of the args - if in physReg flush to memory
				432	for (int nextArg = 0; nextArg < numArgs;) {
				433	RegLocation loc = oatGetRawSrc(cUnit, mir, nextArg);
				434	if (loc.wide) {
				435	loc = oatUpdateLocWide(cUnit, loc);
				436	if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
				437	storeBaseDispWide(cUnit, rSP,
				438	oatSRegOffset(cUnit, loc.sRegLow),
				439	loc.lowReg, loc.highReg);
				440	}
				441	nextArg += 2;
				442	} else {
				443	loc = oatUpdateLoc(cUnit, loc);
				444	if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
				445	storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
				446	loc.lowReg, kWord);
				447	}
				448	nextArg++;
				449	}
				450	}
				451
				452	int startOffset = oatSRegOffset(cUnit,
				453	cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
				454	int outsOffset = 4 /* Method* / + (3 4);
				455	#if defined(TARGET_MIPS)
				456	// Generate memcpy
				457	opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
				458	opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
				459	int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
				460	loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
				461	callRuntimeHelper(cUnit, rTgt);
				462	// Restore Method*
				463	loadCurrMethodDirect(cUnit, rARG0);
				464	#else
				465	if (numArgs >= 20) {
				466	// Generate memcpy
				467	opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
				468	opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
				469	int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
				470	loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
				471	callRuntimeHelper(cUnit, rTgt);
				472	// Restore Method*
				473	loadCurrMethodDirect(cUnit, rARG0);
				474	} else {
				475	// Use vldm/vstm pair using rARG3 as a temp
				476	int regsLeft = std::min(numArgs - 3, 16);
				477	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				478	opRegRegImm(cUnit, kOpAdd, rARG3, rSP, startOffset);
				479	LIR* ld = newLIR3(cUnit, kThumb2Vldms, rARG3, fr0, regsLeft);
				480	//TUNING: loosen barrier
				481	ld->defMask = ENCODE_ALL;
				482	setMemRefType(ld, true /* isLoad */, kDalvikReg);
				483	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				484	opRegRegImm(cUnit, kOpAdd, rARG3, rSP, 4 /* Method* / + (3 4));
				485	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				486	LIR* st = newLIR3(cUnit, kThumb2Vstms, rARG3, fr0, regsLeft);
				487	setMemRefType(st, false /* isLoad */, kDalvikReg);
				488	st->defMask = ENCODE_ALL;
				489	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				490	}
				491	#endif
				492
				493	callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
				494	dexIdx, methodIdx, skipThis);
				495
				496	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				497	if (pcrLabel) {
				498	*pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
				499	}
				500	return callState;
				501	}
				502
				503	#endif // TARGET_ARM \|\| TARGET_MIPS
				504
				505
				506	} // namespace art