Blame - src/compiler/codegen/x86/GenInvoke.cc - platform_art

blob: e19afdd0f636fc0e16d159b78a215bf1a31acf81 [file] [log] [blame]

buzbee	a7678db	2012-03-05 15:35:46 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2012 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	namespace art {
				18
				19	/*
				20	* This source files contains "gen" codegen routines that should
				21	* be applicable to most targets. Only mid-level support utilities
				22	* and "op" calls may be used here.
				23	*/
				24
				25
				26	/*
				27	* x86 targets will likely be different enough to need their own
				28	* invoke gen routies.
				29	*/
				30	typedef int (NextCallInsn)(CompilationUnit, MIR*, int, uint32_t dexIdx,
				31	uint32_t methodIdx);
				32	/*
				33	* If there are any ins passed in registers that have not been promoted
Ian Rogers	b5d09b2	2012-03-06 22:14:17 -0800	[diff] [blame]	34	* to a callee-save register, flush them to the frame. Perform initial
buzbee	a7678db	2012-03-05 15:35:46 -0800	[diff] [blame]	35	* assignment of promoted arguments.
				36	*/
				37	void flushIns(CompilationUnit* cUnit)
				38	{
buzbee	a7678db	2012-03-05 15:35:46 -0800	[diff] [blame]	39	if (cUnit->numIns == 0)
				40	return;
buzbee	a7678db	2012-03-05 15:35:46 -0800	[diff] [blame]	41	int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
				42	/*
				43	* Arguments passed in registers should be flushed
				44	* to their backing locations in the frame for now.
				45	* Also, we need to do initial assignment for promoted
				46	* arguments. NOTE: an older version of dx had an issue
				47	* in which it would reuse static method argument registers.
				48	* This could result in the same Dalvik virtual register
				49	* being promoted to both core and fp regs. In those
				50	* cases, copy argument to both. This will be uncommon
				51	* enough that it isn't worth attempting to optimize.
				52	*/
				53	for (int i = 0; i < cUnit->numIns; i++) {
				54	PromotionMap vMap = cUnit->promotionMap[startVReg + i];
Ian Rogers	b5d09b2	2012-03-06 22:14:17 -0800	[diff] [blame]	55	if (i == 0 \|\| i == 1) {
buzbee	a7678db	2012-03-05 15:35:46 -0800	[diff] [blame]	56	// If arriving in register
				57	if (vMap.coreLocation == kLocPhysReg) {
Ian Rogers	b5d09b2	2012-03-06 22:14:17 -0800	[diff] [blame]	58	opRegCopy(cUnit, vMap.coreReg, i == 0 ? rARG1 : rARG2);
buzbee	a7678db	2012-03-05 15:35:46 -0800	[diff] [blame]	59	}
				60	if (vMap.fpLocation == kLocPhysReg) {
Ian Rogers	b5d09b2	2012-03-06 22:14:17 -0800	[diff] [blame]	61	opRegCopy(cUnit, vMap.fpReg, i == 0 ? rARG1 : rARG2);
buzbee	a7678db	2012-03-05 15:35:46 -0800	[diff] [blame]	62	}
				63	// Also put a copy in memory in case we're partially promoted
				64	storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
Ian Rogers	b5d09b2	2012-03-06 22:14:17 -0800	[diff] [blame]	65	i == 0 ? rARG1 : rARG2, kWord);
buzbee	a7678db	2012-03-05 15:35:46 -0800	[diff] [blame]	66	} else {
				67	// If arriving in frame & promoted
				68	if (vMap.coreLocation == kLocPhysReg) {
				69	loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
				70	vMap.coreReg);
				71	}
				72	if (vMap.fpLocation == kLocPhysReg) {
				73	loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
				74	vMap.fpReg);
				75	}
				76	}
				77	}
buzbee	a7678db	2012-03-05 15:35:46 -0800	[diff] [blame]	78	}
				79
				80	/*
				81	* Bit of a hack here - in leiu of a real scheduling pass,
				82	* emit the next instruction in static & direct invoke sequences.
				83	*/
				84	int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
				85	int state, uint32_t dexIdx, uint32_t unused)
				86	{
				87	UNIMPLEMENTED(WARNING) << "nextSDCallInsn";
				88	return 0;
				89	#if 0
				90	switch(state) {
				91	case 0: // Get the current Method* [sets rARG0]
				92	loadCurrMethodDirect(cUnit, rARG0);
				93	break;
				94	case 1: // Get method->dex_cache_resolved_methods_
				95	loadWordDisp(cUnit, rARG0,
				96	Method::DexCacheResolvedMethodsOffset().Int32Value(),
				97	rARG0);
				98	break;
				99	case 2: // Grab target method*
				100	loadWordDisp(cUnit, rARG0,
				101	Array::DataOffset(sizeof(Object)).Int32Value() + dexIdx 4,
				102	rARG0);
				103	break;
				104	case 3: // Grab the code from the method*
				105	loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
				106	rINVOKE_TGT);
				107	break;
				108	default:
				109	return -1;
				110	}
				111	return state + 1;
				112	#endif
				113	}
				114
				115	/*
				116	* Bit of a hack here - in leiu of a real scheduling pass,
				117	* emit the next instruction in a virtual invoke sequence.
				118	* We can use rLR as a temp prior to target address loading
				119	* Note also that we'll load the first argument ("this") into
				120	* rARG1 here rather than the standard loadArgRegs.
				121	*/
				122	int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
				123	int state, uint32_t dexIdx, uint32_t methodIdx)
				124	{
				125	UNIMPLEMENTED(WARNING) << "nextVCallInsn";
				126	return 0;
				127	#if 0
				128	RegLocation rlArg;
				129	/*
				130	* This is the fast path in which the target virtual method is
				131	* fully resolved at compile time.
				132	*/
				133	switch(state) {
				134	case 0: // Get "this" [set rARG1]
				135	rlArg = oatGetSrc(cUnit, mir, 0);
				136	loadValueDirectFixed(cUnit, rlArg, rARG1);
				137	break;
				138	case 1: // Is "this" null? [use rARG1]
				139	genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
				140	// get this->klass_ [use rARG1, set rINVOKE_TGT]
				141	loadWordDisp(cUnit, rARG1, Object::ClassOffset().Int32Value(),
				142	rINVOKE_TGT);
				143	break;
				144	case 2: // Get this->klass_->vtable [usr rINVOKE_TGT, set rINVOKE_TGT]
				145	loadWordDisp(cUnit, rINVOKE_TGT, Class::VTableOffset().Int32Value(),
				146	rINVOKE_TGT);
				147	break;
				148	case 3: // Get target method [use rINVOKE_TGT, set rARG0]
				149	loadWordDisp(cUnit, rINVOKE_TGT, (methodIdx * 4) +
				150	Array::DataOffset(sizeof(Object*)).Int32Value(),
				151	rARG0);
				152	break;
				153	case 4: // Get the compiled code address [uses rARG0, sets rINVOKE_TGT]
				154	loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
				155	rINVOKE_TGT);
				156	break;
				157	default:
				158	return -1;
				159	}
				160	return state + 1;
				161	#endif
				162	}
				163
				164	/*
				165	* Interleave launch code for INVOKE_SUPER. See comments
				166	* for nextVCallIns.
				167	*/
				168	int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
				169	int state, uint32_t dexIdx, uint32_t methodIdx)
				170	{
				171	UNIMPLEMENTED(WARNING) << "nextSuperCallInsn";
				172	return 0;
				173	#if 0
				174	/*
				175	* This is the fast path in which the target virtual method is
				176	* fully resolved at compile time. Note also that this path assumes
				177	* that the check to verify that the target method index falls
				178	* within the size of the super's vtable has been done at compile-time.
				179	*/
				180	RegLocation rlArg;
				181	switch(state) {
				182	case 0: // Get current Method* [set rARG0]
				183	loadCurrMethodDirect(cUnit, rARG0);
				184	// Load "this" [set rARG1]
				185	rlArg = oatGetSrc(cUnit, mir, 0);
				186	loadValueDirectFixed(cUnit, rlArg, rARG1);
				187	// Get method->declaring_class_ [use rARG0, set rINVOKE_TGT]
				188	loadWordDisp(cUnit, rARG0,
				189	Method::DeclaringClassOffset().Int32Value(),
				190	rINVOKE_TGT);
				191	// Is "this" null? [use rARG1]
				192	genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
				193	break;
				194	case 1: // method->declaring_class_->super_class [use/set rINVOKE_TGT]
				195	loadWordDisp(cUnit, rINVOKE_TGT,
				196	Class::SuperClassOffset().Int32Value(), rINVOKE_TGT);
				197	break;
				198	case 2: // Get ...->super_class_->vtable [u/s rINVOKE_TGT]
				199	loadWordDisp(cUnit, rINVOKE_TGT,
				200	Class::VTableOffset().Int32Value(), rINVOKE_TGT);
				201	break;
				202	case 3: // Get target method [use rINVOKE_TGT, set rARG0]
				203	loadWordDisp(cUnit, rINVOKE_TGT, (methodIdx * 4) +
				204	Array::DataOffset(sizeof(Object*)).Int32Value(),
				205	rARG0);
				206	break;
				207	case 4: // target compiled code address [uses rARG0, sets rINVOKE_TGT]
				208	loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
				209	rINVOKE_TGT);
				210	break;
				211	default:
				212	return -1;
				213	}
				214	return state + 1;
				215	#endif
				216	}
				217
				218	int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline,
				219	int state, uint32_t dexIdx, uint32_t methodIdx)
				220	{
				221	UNIMPLEMENTED(WARNING) << "nextInvokeInsnSP";
				222	return 0;
				223	#if 0
				224	/*
				225	* This handles the case in which the base method is not fully
				226	* resolved at compile time, we bail to a runtime helper.
				227	*/
				228	if (state == 0) {
				229	// Load trampoline target
				230	loadWordDisp(cUnit, rSELF, trampoline, rINVOKE_TGT);
				231	// Load rARG0 with method index
				232	loadConstant(cUnit, rARG0, dexIdx);
				233	return 1;
				234	}
				235	return -1;
				236	#endif
				237	}
				238
				239	int nextStaticCallInsnSP(CompilationUnit* cUnit, MIR* mir,
				240	int state, uint32_t dexIdx, uint32_t methodIdx)
				241	{
				242	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeStaticTrampolineWithAccessCheck);
				243	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				244	}
				245
				246	int nextDirectCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
				247	uint32_t dexIdx, uint32_t methodIdx)
				248	{
				249	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeDirectTrampolineWithAccessCheck);
				250	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				251	}
				252
				253	int nextSuperCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
				254	uint32_t dexIdx, uint32_t methodIdx)
				255	{
				256	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeSuperTrampolineWithAccessCheck);
				257	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				258	}
				259
				260	int nextVCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
				261	uint32_t dexIdx, uint32_t methodIdx)
				262	{
				263	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeVirtualTrampolineWithAccessCheck);
				264	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				265	}
				266
				267	/*
				268	* All invoke-interface calls bounce off of art_invoke_interface_trampoline,
				269	* which will locate the target and continue on via a tail call.
				270	*/
				271	int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir, int state,
				272	uint32_t dexIdx, uint32_t unused)
				273	{
				274	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampoline);
				275	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				276	}
				277
				278	int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit, MIR* mir,
				279	int state, uint32_t dexIdx,
				280	uint32_t unused)
				281	{
				282	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampolineWithAccessCheck);
				283	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
				284	}
				285
				286	int loadArgRegs(CompilationUnit* cUnit, MIR* mir, DecodedInstruction* dInsn,
				287	int callState, NextCallInsn nextCallInsn, uint32_t dexIdx,
				288	uint32_t methodIdx, bool skipThis)
				289	{
				290	UNIMPLEMENTED(WARNING) << "loadArgRegs";
				291	return 0;
				292	#if 0
				293	int nextReg = rARG1;
				294	int nextArg = 0;
				295	if (skipThis) {
				296	nextReg++;
				297	nextArg++;
				298	}
				299	for (; (nextReg <= rARG3) && (nextArg < mir->ssaRep->numUses); nextReg++) {
				300	RegLocation rlArg = oatGetRawSrc(cUnit, mir, nextArg++);
				301	rlArg = oatUpdateRawLoc(cUnit, rlArg);
				302	if (rlArg.wide && (nextReg <= rARG2)) {
				303	loadValueDirectWideFixed(cUnit, rlArg, nextReg, nextReg + 1);
				304	nextReg++;
				305	nextArg++;
				306	} else {
				307	rlArg.wide = false;
				308	loadValueDirectFixed(cUnit, rlArg, nextReg);
				309	}
				310	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				311	}
				312	return callState;
				313	#endif
				314	}
				315
				316	/*
				317	* Load up to 5 arguments, the first three of which will be in
				318	* rARG1 .. rARG3. On entry rARG0 contains the current method pointer,
				319	* and as part of the load sequence, it must be replaced with
				320	* the target method pointer. Note, this may also be called
				321	* for "range" variants if the number of arguments is 5 or fewer.
				322	*/
				323	int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
				324	DecodedInstruction* dInsn, int callState,
				325	LIR** pcrLabel, NextCallInsn nextCallInsn,
				326	uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
				327	{
				328	UNIMPLEMENTED(WARNING) << "genDalvikArgsNoRange";
				329	return 0;
				330	#if 0
				331	RegLocation rlArg;
				332
				333	/* If no arguments, just return */
				334	if (dInsn->vA == 0)
				335	return callState;
				336
				337	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				338
				339	DCHECK_LE(dInsn->vA, 5U);
				340	if (dInsn->vA > 3) {
				341	uint32_t nextUse = 3;
				342	//Detect special case of wide arg spanning arg3/arg4
				343	RegLocation rlUse0 = oatGetRawSrc(cUnit, mir, 0);
				344	RegLocation rlUse1 = oatGetRawSrc(cUnit, mir, 1);
				345	RegLocation rlUse2 = oatGetRawSrc(cUnit, mir, 2);
				346	if (((!rlUse0.wide && !rlUse1.wide) \|\| rlUse0.wide) &&
				347	rlUse2.wide) {
				348	int reg;
				349	// Wide spans, we need the 2nd half of uses[2].
				350	rlArg = oatUpdateLocWide(cUnit, rlUse2);
				351	if (rlArg.location == kLocPhysReg) {
				352	reg = rlArg.highReg;
				353	} else {
				354	// rARG2 & rARG3 can safely be used here
				355	reg = rARG3;
				356	loadWordDisp(cUnit, rSP,
				357	oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
				358	callState = nextCallInsn(cUnit, mir, callState, dexIdx,
				359	methodIdx);
				360	}
				361	storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
				362	storeBaseDisp(cUnit, rSP, 16 /* (3+1)4 /, reg, kWord);
				363	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				364	nextUse++;
				365	}
				366	// Loop through the rest
				367	while (nextUse < dInsn->vA) {
				368	int lowReg;
				369	int highReg;
				370	rlArg = oatGetRawSrc(cUnit, mir, nextUse);
				371	rlArg = oatUpdateRawLoc(cUnit, rlArg);
				372	if (rlArg.location == kLocPhysReg) {
				373	lowReg = rlArg.lowReg;
				374	highReg = rlArg.highReg;
				375	} else {
				376	lowReg = rARG2;
				377	highReg = rARG3;
				378	if (rlArg.wide) {
				379	loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
				380	} else {
				381	loadValueDirectFixed(cUnit, rlArg, lowReg);
				382	}
				383	callState = nextCallInsn(cUnit, mir, callState, dexIdx,
				384	methodIdx);
				385	}
				386	int outsOffset = (nextUse + 1) * 4;
				387	if (rlArg.wide) {
				388	storeBaseDispWide(cUnit, rSP, outsOffset, lowReg, highReg);
				389	nextUse += 2;
				390	} else {
				391	storeWordDisp(cUnit, rSP, outsOffset, lowReg);
				392	nextUse++;
				393	}
				394	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				395	}
				396	}
				397
				398	callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
				399	dexIdx, methodIdx, skipThis);
				400
				401	if (pcrLabel) {
				402	*pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
				403	}
				404	return callState;
				405	#endif
				406	}
				407
				408	/*
				409	* May have 0+ arguments (also used for jumbo). Note that
				410	* source virtual registers may be in physical registers, so may
				411	* need to be flushed to home location before copying. This
				412	* applies to arg3 and above (see below).
				413	*
				414	* Two general strategies:
				415	* If < 20 arguments
				416	* Pass args 3-18 using vldm/vstm block copy
				417	* Pass arg0, arg1 & arg2 in rARG1-rARG3
				418	* If 20+ arguments
				419	* Pass args arg19+ using memcpy block copy
				420	* Pass arg0, arg1 & arg2 in rARG1-rARG3
				421	*
				422	*/
				423	int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
				424	DecodedInstruction* dInsn, int callState,
				425	LIR** pcrLabel, NextCallInsn nextCallInsn,
				426	uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
				427	{
				428	UNIMPLEMENTED(WARNING) << "genDalvikArgsRange";
				429	return 0;
				430	#if 0
				431	int firstArg = dInsn->vC;
				432	int numArgs = dInsn->vA;
				433
				434	// If we can treat it as non-range (Jumbo ops will use range form)
				435	if (numArgs <= 5)
				436	return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
				437	nextCallInsn, dexIdx, methodIdx,
				438	skipThis);
				439	/*
				440	* Make sure range list doesn't span the break between in normal
				441	* Dalvik vRegs and the ins.
				442	*/
				443	int highestArg = oatGetSrc(cUnit, mir, numArgs-1).sRegLow;
				444	int boundaryReg = cUnit->numDalvikRegisters - cUnit->numIns;
				445	if ((firstArg < boundaryReg) && (highestArg >= boundaryReg)) {
				446	LOG(FATAL) << "Argument list spanned locals & args";
				447	}
				448
				449	/*
				450	* First load the non-register arguments. Both forms expect all
				451	* of the source arguments to be in their home frame location, so
				452	* scan the sReg names and flush any that have been promoted to
				453	* frame backing storage.
				454	*/
				455	// Scan the rest of the args - if in physReg flush to memory
				456	for (int nextArg = 0; nextArg < numArgs;) {
				457	RegLocation loc = oatGetRawSrc(cUnit, mir, nextArg);
				458	if (loc.wide) {
				459	loc = oatUpdateLocWide(cUnit, loc);
				460	if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
				461	storeBaseDispWide(cUnit, rSP,
				462	oatSRegOffset(cUnit, loc.sRegLow),
				463	loc.lowReg, loc.highReg);
				464	}
				465	nextArg += 2;
				466	} else {
				467	loc = oatUpdateLoc(cUnit, loc);
				468	if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
				469	storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
				470	loc.lowReg, kWord);
				471	}
				472	nextArg++;
				473	}
				474	}
				475
				476	int startOffset = oatSRegOffset(cUnit,
				477	cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
				478	int outsOffset = 4 /* Method* / + (3 4);
				479	#if defined(TARGET_MIPS)
				480	// Generate memcpy
				481	opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
				482	opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
				483	int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
				484	loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
				485	callRuntimeHelper(cUnit, rTgt);
				486	// Restore Method*
				487	loadCurrMethodDirect(cUnit, rARG0);
				488	#else
				489	if (numArgs >= 20) {
				490	// Generate memcpy
				491	opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
				492	opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
				493	int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
				494	loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
				495	callRuntimeHelper(cUnit, rTgt);
				496	// Restore Method*
				497	loadCurrMethodDirect(cUnit, rARG0);
				498	} else {
				499	// Use vldm/vstm pair using rARG3 as a temp
				500	int regsLeft = std::min(numArgs - 3, 16);
				501	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				502	opRegRegImm(cUnit, kOpAdd, rARG3, rSP, startOffset);
				503	LIR* ld = newLIR3(cUnit, kThumb2Vldms, rARG3, fr0, regsLeft);
				504	//TUNING: loosen barrier
				505	ld->defMask = ENCODE_ALL;
				506	setMemRefType(ld, true /* isLoad */, kDalvikReg);
				507	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				508	opRegRegImm(cUnit, kOpAdd, rARG3, rSP, 4 /* Method* / + (3 4));
				509	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				510	LIR* st = newLIR3(cUnit, kThumb2Vstms, rARG3, fr0, regsLeft);
				511	setMemRefType(st, false /* isLoad */, kDalvikReg);
				512	st->defMask = ENCODE_ALL;
				513	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				514	}
				515	#endif
				516
				517	callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
				518	dexIdx, methodIdx, skipThis);
				519
				520	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
				521	if (pcrLabel) {
				522	*pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
				523	}
				524	return callState;
				525	#endif
				526	}
				527
				528	} // namespace art