blob: ab4f6f9d280b3dc466081bde4348e67e00e773b0 [file] [log] [blame]
Mark Mendell09ed1a32015-03-25 08:30:06 -04001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86/instruction_set_features_x86.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070022#include "art_method.h"
Mark Mendelld5897672015-08-12 21:16:41 -040023#include "base/bit_utils.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040024#include "code_generator_x86.h"
25#include "entrypoints/quick/quick_entrypoints.h"
26#include "intrinsics.h"
Andreas Gampe85b62f22015-09-09 13:15:38 -070027#include "intrinsics_utils.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040028#include "mirror/array-inl.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040029#include "mirror/string.h"
30#include "thread.h"
31#include "utils/x86/assembler_x86.h"
32#include "utils/x86/constants_x86.h"
33
34namespace art {
35
36namespace x86 {
37
38static constexpr int kDoubleNaNHigh = 0x7FF80000;
39static constexpr int kDoubleNaNLow = 0x00000000;
Mark P Mendell2f10a5f2016-01-25 14:47:50 +000040static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
41static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
Mark Mendell09ed1a32015-03-25 08:30:06 -040042
Mark Mendellfb8d2792015-03-31 22:16:59 -040043IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
Mark P Mendell2f10a5f2016-01-25 14:47:50 +000044 : arena_(codegen->GetGraph()->GetArena()),
45 codegen_(codegen) {
Mark Mendellfb8d2792015-03-31 22:16:59 -040046}
47
48
Mark Mendell09ed1a32015-03-25 08:30:06 -040049X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
Roland Levillainb488b782015-10-22 11:38:49 +010050 return down_cast<X86Assembler*>(codegen_->GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -040051}
52
53ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
54 return codegen_->GetGraph()->GetArena();
55}
56
57bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
58 Dispatch(invoke);
59 LocationSummary* res = invoke->GetLocations();
Roland Levillain0d5a2812015-11-13 10:07:31 +000060 if (res == nullptr) {
61 return false;
62 }
63 if (kEmitCompilerReadBarrier && res->CanCall()) {
64 // Generating an intrinsic for this HInvoke may produce an
65 // IntrinsicSlowPathX86 slow path. Currently this approach
66 // does not work when using read barriers, as the emitted
67 // calling sequence will make use of another slow path
68 // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect,
69 // ReadBarrierSlowPathX86 for HInvokeVirtual). So we bail
70 // out in this case.
71 //
72 // TODO: Find a way to have intrinsics work with read barriers.
73 invoke->SetLocations(nullptr);
74 return false;
75 }
76 return res->Intrinsified();
Mark Mendell09ed1a32015-03-25 08:30:06 -040077}
78
Roland Levillainec525fc2015-04-28 15:50:20 +010079static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +010080 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +010081 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Mark Mendell09ed1a32015-03-25 08:30:06 -040082}
83
Andreas Gampe85b62f22015-09-09 13:15:38 -070084using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
Mark Mendell09ed1a32015-03-25 08:30:06 -040085
Mark Mendell09ed1a32015-03-25 08:30:06 -040086#define __ assembler->
87
88static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
89 LocationSummary* locations = new (arena) LocationSummary(invoke,
90 LocationSummary::kNoCall,
91 kIntrinsified);
92 locations->SetInAt(0, Location::RequiresFpuRegister());
93 locations->SetOut(Location::RequiresRegister());
94 if (is64bit) {
95 locations->AddTemp(Location::RequiresFpuRegister());
96 }
97}
98
99static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
100 LocationSummary* locations = new (arena) LocationSummary(invoke,
101 LocationSummary::kNoCall,
102 kIntrinsified);
103 locations->SetInAt(0, Location::RequiresRegister());
104 locations->SetOut(Location::RequiresFpuRegister());
105 if (is64bit) {
106 locations->AddTemp(Location::RequiresFpuRegister());
107 locations->AddTemp(Location::RequiresFpuRegister());
108 }
109}
110
111static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
112 Location input = locations->InAt(0);
113 Location output = locations->Out();
114 if (is64bit) {
115 // Need to use the temporary.
116 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
117 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
118 __ movd(output.AsRegisterPairLow<Register>(), temp);
119 __ psrlq(temp, Immediate(32));
120 __ movd(output.AsRegisterPairHigh<Register>(), temp);
121 } else {
122 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
123 }
124}
125
126static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
127 Location input = locations->InAt(0);
128 Location output = locations->Out();
129 if (is64bit) {
130 // Need to use the temporary.
131 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
132 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
133 __ movd(temp1, input.AsRegisterPairLow<Register>());
134 __ movd(temp2, input.AsRegisterPairHigh<Register>());
135 __ punpckldq(temp1, temp2);
136 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
137 } else {
138 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
139 }
140}
141
142void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000143 CreateFPToIntLocations(arena_, invoke, /* is64bit */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400144}
145void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000146 CreateIntToFPLocations(arena_, invoke, /* is64bit */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400147}
148
149void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000150 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400151}
152void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000153 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400154}
155
156void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000157 CreateFPToIntLocations(arena_, invoke, /* is64bit */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400158}
159void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000160 CreateIntToFPLocations(arena_, invoke, /* is64bit */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400161}
162
163void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000164 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400165}
166void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000167 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400168}
169
170static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
171 LocationSummary* locations = new (arena) LocationSummary(invoke,
172 LocationSummary::kNoCall,
173 kIntrinsified);
174 locations->SetInAt(0, Location::RequiresRegister());
175 locations->SetOut(Location::SameAsFirstInput());
176}
177
178static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
179 LocationSummary* locations = new (arena) LocationSummary(invoke,
180 LocationSummary::kNoCall,
181 kIntrinsified);
182 locations->SetInAt(0, Location::RequiresRegister());
183 locations->SetOut(Location::RequiresRegister());
184}
185
186static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
187 LocationSummary* locations = new (arena) LocationSummary(invoke,
188 LocationSummary::kNoCall,
189 kIntrinsified);
190 locations->SetInAt(0, Location::RequiresRegister());
191 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
192}
193
194static void GenReverseBytes(LocationSummary* locations,
195 Primitive::Type size,
196 X86Assembler* assembler) {
197 Register out = locations->Out().AsRegister<Register>();
198
199 switch (size) {
200 case Primitive::kPrimShort:
201 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
202 __ bswapl(out);
203 __ sarl(out, Immediate(16));
204 break;
205 case Primitive::kPrimInt:
206 __ bswapl(out);
207 break;
208 default:
209 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
210 UNREACHABLE();
211 }
212}
213
214void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
215 CreateIntToIntLocations(arena_, invoke);
216}
217
218void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
219 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
220}
221
Mark Mendell58d25fd2015-04-03 14:52:31 -0400222void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
223 CreateLongToLongLocations(arena_, invoke);
224}
225
226void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
227 LocationSummary* locations = invoke->GetLocations();
228 Location input = locations->InAt(0);
229 Register input_lo = input.AsRegisterPairLow<Register>();
230 Register input_hi = input.AsRegisterPairHigh<Register>();
231 Location output = locations->Out();
232 Register output_lo = output.AsRegisterPairLow<Register>();
233 Register output_hi = output.AsRegisterPairHigh<Register>();
234
235 X86Assembler* assembler = GetAssembler();
236 // Assign the inputs to the outputs, mixing low/high.
237 __ movl(output_lo, input_hi);
238 __ movl(output_hi, input_lo);
239 __ bswapl(output_lo);
240 __ bswapl(output_hi);
241}
242
Mark Mendell09ed1a32015-03-25 08:30:06 -0400243void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
244 CreateIntToIntLocations(arena_, invoke);
245}
246
247void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
248 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
249}
250
251
252// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
253// need is 64b.
254
255static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
256 // TODO: Enable memory operations when the assembler supports them.
257 LocationSummary* locations = new (arena) LocationSummary(invoke,
258 LocationSummary::kNoCall,
259 kIntrinsified);
260 locations->SetInAt(0, Location::RequiresFpuRegister());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400261 locations->SetOut(Location::SameAsFirstInput());
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000262 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
263 DCHECK(static_or_direct != nullptr);
264 if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
265 // We need addressibility for the constant area.
266 locations->SetInAt(1, Location::RequiresRegister());
267 // We need a temporary to hold the constant.
268 locations->AddTemp(Location::RequiresFpuRegister());
269 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400270}
271
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000272static void MathAbsFP(LocationSummary* locations,
273 bool is64bit,
274 X86Assembler* assembler,
275 CodeGeneratorX86* codegen) {
Mark Mendell09ed1a32015-03-25 08:30:06 -0400276 Location output = locations->Out();
277
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000278 DCHECK(output.IsFpuRegister());
279 if (locations->InAt(1).IsValid()) {
280 DCHECK(locations->InAt(1).IsRegister());
281 // We also have a constant area pointer.
282 Register constant_area = locations->InAt(1).AsRegister<Register>();
283 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
284 if (is64bit) {
285 __ movsd(temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF), constant_area));
286 __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
287 } else {
288 __ movss(temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF), constant_area));
289 __ andps(output.AsFpuRegister<XmmRegister>(), temp);
290 }
291 } else {
Mark Mendell09ed1a32015-03-25 08:30:06 -0400292 // Create the right constant on an aligned stack.
293 if (is64bit) {
294 __ subl(ESP, Immediate(8));
295 __ pushl(Immediate(0x7FFFFFFF));
296 __ pushl(Immediate(0xFFFFFFFF));
297 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
298 } else {
299 __ subl(ESP, Immediate(12));
300 __ pushl(Immediate(0x7FFFFFFF));
301 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
302 }
303 __ addl(ESP, Immediate(16));
Mark Mendell09ed1a32015-03-25 08:30:06 -0400304 }
305}
306
307void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
308 CreateFloatToFloat(arena_, invoke);
309}
310
311void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000312 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400313}
314
315void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
316 CreateFloatToFloat(arena_, invoke);
317}
318
319void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000320 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400321}
322
323static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
324 LocationSummary* locations = new (arena) LocationSummary(invoke,
325 LocationSummary::kNoCall,
326 kIntrinsified);
327 locations->SetInAt(0, Location::RegisterLocation(EAX));
328 locations->SetOut(Location::SameAsFirstInput());
329 locations->AddTemp(Location::RegisterLocation(EDX));
330}
331
332static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
333 Location output = locations->Out();
334 Register out = output.AsRegister<Register>();
335 DCHECK_EQ(out, EAX);
336 Register temp = locations->GetTemp(0).AsRegister<Register>();
337 DCHECK_EQ(temp, EDX);
338
339 // Sign extend EAX into EDX.
340 __ cdq();
341
342 // XOR EAX with sign.
343 __ xorl(EAX, EDX);
344
345 // Subtract out sign to correct.
346 __ subl(EAX, EDX);
347
348 // The result is in EAX.
349}
350
351static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
352 LocationSummary* locations = new (arena) LocationSummary(invoke,
353 LocationSummary::kNoCall,
354 kIntrinsified);
355 locations->SetInAt(0, Location::RequiresRegister());
356 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
357 locations->AddTemp(Location::RequiresRegister());
358}
359
360static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
361 Location input = locations->InAt(0);
362 Register input_lo = input.AsRegisterPairLow<Register>();
363 Register input_hi = input.AsRegisterPairHigh<Register>();
364 Location output = locations->Out();
365 Register output_lo = output.AsRegisterPairLow<Register>();
366 Register output_hi = output.AsRegisterPairHigh<Register>();
367 Register temp = locations->GetTemp(0).AsRegister<Register>();
368
369 // Compute the sign into the temporary.
370 __ movl(temp, input_hi);
371 __ sarl(temp, Immediate(31));
372
373 // Store the sign into the output.
374 __ movl(output_lo, temp);
375 __ movl(output_hi, temp);
376
377 // XOR the input to the output.
378 __ xorl(output_lo, input_lo);
379 __ xorl(output_hi, input_hi);
380
381 // Subtract the sign.
382 __ subl(output_lo, temp);
383 __ sbbl(output_hi, temp);
384}
385
386void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
387 CreateAbsIntLocation(arena_, invoke);
388}
389
390void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
391 GenAbsInteger(invoke->GetLocations(), GetAssembler());
392}
393
394void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
395 CreateAbsLongLocation(arena_, invoke);
396}
397
398void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
399 GenAbsLong(invoke->GetLocations(), GetAssembler());
400}
401
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000402static void GenMinMaxFP(LocationSummary* locations,
403 bool is_min,
404 bool is_double,
405 X86Assembler* assembler,
406 CodeGeneratorX86* codegen) {
Mark Mendell09ed1a32015-03-25 08:30:06 -0400407 Location op1_loc = locations->InAt(0);
408 Location op2_loc = locations->InAt(1);
409 Location out_loc = locations->Out();
410 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
411
412 // Shortcut for same input locations.
413 if (op1_loc.Equals(op2_loc)) {
414 DCHECK(out_loc.Equals(op1_loc));
415 return;
416 }
417
418 // (out := op1)
419 // out <=? op2
420 // if Nan jmp Nan_label
421 // if out is min jmp done
422 // if op2 is min jmp op2_label
423 // handle -0/+0
424 // jmp done
425 // Nan_label:
426 // out := NaN
427 // op2_label:
428 // out := op2
429 // done:
430 //
431 // This removes one jmp, but needs to copy one input (op1) to out.
432 //
433 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
434
435 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
436
Mark Mendell0c9497d2015-08-21 09:30:05 -0400437 NearLabel nan, done, op2_label;
Mark Mendell09ed1a32015-03-25 08:30:06 -0400438 if (is_double) {
439 __ ucomisd(out, op2);
440 } else {
441 __ ucomiss(out, op2);
442 }
443
444 __ j(Condition::kParityEven, &nan);
445
446 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
447 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
448
449 // Handle 0.0/-0.0.
450 if (is_min) {
451 if (is_double) {
452 __ orpd(out, op2);
453 } else {
454 __ orps(out, op2);
455 }
456 } else {
457 if (is_double) {
458 __ andpd(out, op2);
459 } else {
460 __ andps(out, op2);
461 }
462 }
463 __ jmp(&done);
464
465 // NaN handling.
466 __ Bind(&nan);
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000467 // Do we have a constant area pointer?
468 if (locations->InAt(2).IsValid()) {
469 DCHECK(locations->InAt(2).IsRegister());
470 Register constant_area = locations->InAt(2).AsRegister<Register>();
471 if (is_double) {
472 __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, constant_area));
473 } else {
474 __ movss(out, codegen->LiteralInt32Address(kFloatNaN, constant_area));
475 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400476 } else {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000477 if (is_double) {
478 __ pushl(Immediate(kDoubleNaNHigh));
479 __ pushl(Immediate(kDoubleNaNLow));
480 __ movsd(out, Address(ESP, 0));
481 __ addl(ESP, Immediate(8));
482 } else {
483 __ pushl(Immediate(kFloatNaN));
484 __ movss(out, Address(ESP, 0));
485 __ addl(ESP, Immediate(4));
486 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400487 }
488 __ jmp(&done);
489
490 // out := op2;
491 __ Bind(&op2_label);
492 if (is_double) {
493 __ movsd(out, op2);
494 } else {
495 __ movss(out, op2);
496 }
497
498 // Done.
499 __ Bind(&done);
500}
501
502static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
503 LocationSummary* locations = new (arena) LocationSummary(invoke,
504 LocationSummary::kNoCall,
505 kIntrinsified);
506 locations->SetInAt(0, Location::RequiresFpuRegister());
507 locations->SetInAt(1, Location::RequiresFpuRegister());
508 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
509 // the second input to be the output (we can simply swap inputs).
510 locations->SetOut(Location::SameAsFirstInput());
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000511 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
512 DCHECK(static_or_direct != nullptr);
513 if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
514 locations->SetInAt(2, Location::RequiresRegister());
515 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400516}
517
518void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
519 CreateFPFPToFPLocations(arena_, invoke);
520}
521
522void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000523 GenMinMaxFP(invoke->GetLocations(),
524 /* is_min */ true,
525 /* is_double */ true,
526 GetAssembler(),
527 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400528}
529
530void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
531 CreateFPFPToFPLocations(arena_, invoke);
532}
533
534void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000535 GenMinMaxFP(invoke->GetLocations(),
536 /* is_min */ true,
537 /* is_double */ false,
538 GetAssembler(),
539 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400540}
541
542void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
543 CreateFPFPToFPLocations(arena_, invoke);
544}
545
546void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000547 GenMinMaxFP(invoke->GetLocations(),
548 /* is_min */ false,
549 /* is_double */ true,
550 GetAssembler(),
551 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400552}
553
554void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
555 CreateFPFPToFPLocations(arena_, invoke);
556}
557
558void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000559 GenMinMaxFP(invoke->GetLocations(),
560 /* is_min */ false,
561 /* is_double */ false,
562 GetAssembler(),
563 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400564}
565
566static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
567 X86Assembler* assembler) {
568 Location op1_loc = locations->InAt(0);
569 Location op2_loc = locations->InAt(1);
570
571 // Shortcut for same input locations.
572 if (op1_loc.Equals(op2_loc)) {
573 // Can return immediately, as op1_loc == out_loc.
574 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
575 // a copy here.
576 DCHECK(locations->Out().Equals(op1_loc));
577 return;
578 }
579
580 if (is_long) {
581 // Need to perform a subtract to get the sign right.
582 // op1 is already in the same location as the output.
583 Location output = locations->Out();
584 Register output_lo = output.AsRegisterPairLow<Register>();
585 Register output_hi = output.AsRegisterPairHigh<Register>();
586
587 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
588 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
589
590 // Spare register to compute the subtraction to set condition code.
591 Register temp = locations->GetTemp(0).AsRegister<Register>();
592
593 // Subtract off op2_low.
594 __ movl(temp, output_lo);
595 __ subl(temp, op2_lo);
596
597 // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
598 __ movl(temp, output_hi);
599 __ sbbl(temp, op2_hi);
600
601 // Now the condition code is correct.
602 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
603 __ cmovl(cond, output_lo, op2_lo);
604 __ cmovl(cond, output_hi, op2_hi);
605 } else {
606 Register out = locations->Out().AsRegister<Register>();
607 Register op2 = op2_loc.AsRegister<Register>();
608
609 // (out := op1)
610 // out <=? op2
611 // if out is min jmp done
612 // out := op2
613 // done:
614
615 __ cmpl(out, op2);
616 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
617 __ cmovl(cond, out, op2);
618 }
619}
620
621static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
622 LocationSummary* locations = new (arena) LocationSummary(invoke,
623 LocationSummary::kNoCall,
624 kIntrinsified);
625 locations->SetInAt(0, Location::RequiresRegister());
626 locations->SetInAt(1, Location::RequiresRegister());
627 locations->SetOut(Location::SameAsFirstInput());
628}
629
630static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
631 LocationSummary* locations = new (arena) LocationSummary(invoke,
632 LocationSummary::kNoCall,
633 kIntrinsified);
634 locations->SetInAt(0, Location::RequiresRegister());
635 locations->SetInAt(1, Location::RequiresRegister());
636 locations->SetOut(Location::SameAsFirstInput());
637 // Register to use to perform a long subtract to set cc.
638 locations->AddTemp(Location::RequiresRegister());
639}
640
641void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
642 CreateIntIntToIntLocations(arena_, invoke);
643}
644
645void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000646 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400647}
648
649void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
650 CreateLongLongToLongLocations(arena_, invoke);
651}
652
653void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000654 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400655}
656
657void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
658 CreateIntIntToIntLocations(arena_, invoke);
659}
660
661void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000662 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400663}
664
665void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
666 CreateLongLongToLongLocations(arena_, invoke);
667}
668
669void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000670 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400671}
672
673static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
674 LocationSummary* locations = new (arena) LocationSummary(invoke,
675 LocationSummary::kNoCall,
676 kIntrinsified);
677 locations->SetInAt(0, Location::RequiresFpuRegister());
678 locations->SetOut(Location::RequiresFpuRegister());
679}
680
681void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
682 CreateFPToFPLocations(arena_, invoke);
683}
684
685void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
686 LocationSummary* locations = invoke->GetLocations();
687 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
688 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
689
690 GetAssembler()->sqrtsd(out, in);
691}
692
Mark Mendellfb8d2792015-03-31 22:16:59 -0400693static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100694 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400695
696 DCHECK(invoke->IsInvokeStaticOrDirect());
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100697 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
698 Location::RegisterLocation(EAX));
Mingyao Yange90db122015-04-03 17:56:54 -0700699 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400700
701 // Copy the result back to the expected output.
702 Location out = invoke->GetLocations()->Out();
703 if (out.IsValid()) {
704 DCHECK(out.IsRegister());
Andreas Gampe85b62f22015-09-09 13:15:38 -0700705 codegen->MoveFromReturnRegister(out, invoke->GetType());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400706 }
707}
708
709static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
710 HInvoke* invoke,
711 CodeGeneratorX86* codegen) {
712 // Do we have instruction support?
713 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
714 CreateFPToFPLocations(arena, invoke);
715 return;
716 }
717
718 // We have to fall back to a call to the intrinsic.
719 LocationSummary* locations = new (arena) LocationSummary(invoke,
720 LocationSummary::kCall);
721 InvokeRuntimeCallingConvention calling_convention;
722 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
723 locations->SetOut(Location::FpuRegisterLocation(XMM0));
724 // Needs to be EAX for the invoke.
725 locations->AddTemp(Location::RegisterLocation(EAX));
726}
727
728static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
729 HInvoke* invoke,
730 X86Assembler* assembler,
731 int round_mode) {
732 LocationSummary* locations = invoke->GetLocations();
733 if (locations->WillCall()) {
734 InvokeOutOfLineIntrinsic(codegen, invoke);
735 } else {
736 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
737 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
738 __ roundsd(out, in, Immediate(round_mode));
739 }
740}
741
742void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
743 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
744}
745
746void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
747 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
748}
749
750void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
751 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
752}
753
754void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
755 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
756}
757
758void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
759 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
760}
761
762void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
763 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
764}
765
766// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
767// as it needs 64 bit instructions.
768void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
Andreas Gampee6d0d8d2015-12-28 09:54:29 -0800769 // See intrinsics.h.
770 if (!kRoundIsPlusPointFive) {
771 return;
772 }
773
Mark Mendellfb8d2792015-03-31 22:16:59 -0400774 // Do we have instruction support?
775 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
776 LocationSummary* locations = new (arena_) LocationSummary(invoke,
777 LocationSummary::kNoCall,
778 kIntrinsified);
779 locations->SetInAt(0, Location::RequiresFpuRegister());
Nicolas Geoffrayd9b92402015-04-21 10:02:22 +0100780 locations->SetOut(Location::RequiresRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400781 locations->AddTemp(Location::RequiresFpuRegister());
782 locations->AddTemp(Location::RequiresFpuRegister());
783 return;
784 }
785
786 // We have to fall back to a call to the intrinsic.
787 LocationSummary* locations = new (arena_) LocationSummary(invoke,
788 LocationSummary::kCall);
789 InvokeRuntimeCallingConvention calling_convention;
790 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
791 locations->SetOut(Location::RegisterLocation(EAX));
792 // Needs to be EAX for the invoke.
793 locations->AddTemp(Location::RegisterLocation(EAX));
794}
795
796void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
797 LocationSummary* locations = invoke->GetLocations();
798 if (locations->WillCall()) {
799 InvokeOutOfLineIntrinsic(codegen_, invoke);
800 return;
801 }
802
803 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
804 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
805 Register out = locations->Out().AsRegister<Register>();
806 XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
807 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
Mark Mendell0c9497d2015-08-21 09:30:05 -0400808 NearLabel done, nan;
Mark Mendellfb8d2792015-03-31 22:16:59 -0400809 X86Assembler* assembler = GetAssembler();
810
811 // Generate 0.5 into inPlusPointFive.
812 __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
813 __ movd(inPlusPointFive, out);
814
815 // Add in the input.
816 __ addss(inPlusPointFive, in);
817
818 // And truncate to an integer.
819 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
820
821 __ movl(out, Immediate(kPrimIntMax));
822 // maxInt = int-to-float(out)
823 __ cvtsi2ss(maxInt, out);
824
825 // if inPlusPointFive >= maxInt goto done
826 __ comiss(inPlusPointFive, maxInt);
827 __ j(kAboveEqual, &done);
828
829 // if input == NaN goto nan
830 __ j(kUnordered, &nan);
831
832 // output = float-to-int-truncate(input)
833 __ cvttss2si(out, inPlusPointFive);
834 __ jmp(&done);
835 __ Bind(&nan);
836
837 // output = 0
838 __ xorl(out, out);
839 __ Bind(&done);
840}
841
Mark Mendella4f12202015-08-06 15:23:34 -0400842static void CreateFPToFPCallLocations(ArenaAllocator* arena,
843 HInvoke* invoke) {
844 LocationSummary* locations = new (arena) LocationSummary(invoke,
845 LocationSummary::kCall,
846 kIntrinsified);
847 InvokeRuntimeCallingConvention calling_convention;
848 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
849 locations->SetOut(Location::FpuRegisterLocation(XMM0));
850}
851
852static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
853 LocationSummary* locations = invoke->GetLocations();
854 DCHECK(locations->WillCall());
855 DCHECK(invoke->IsInvokeStaticOrDirect());
856 X86Assembler* assembler = codegen->GetAssembler();
857
858 // We need some place to pass the parameters.
859 __ subl(ESP, Immediate(16));
860 __ cfi().AdjustCFAOffset(16);
861
862 // Pass the parameters at the bottom of the stack.
863 __ movsd(Address(ESP, 0), XMM0);
864
865 // If we have a second parameter, pass it next.
866 if (invoke->GetNumberOfArguments() == 2) {
867 __ movsd(Address(ESP, 8), XMM1);
868 }
869
870 // Now do the actual call.
871 __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(entry)));
872
873 // Extract the return value from the FP stack.
874 __ fstpl(Address(ESP, 0));
875 __ movsd(XMM0, Address(ESP, 0));
876
877 // And clean up the stack.
878 __ addl(ESP, Immediate(16));
879 __ cfi().AdjustCFAOffset(-16);
880
881 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
882}
883
884void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
885 CreateFPToFPCallLocations(arena_, invoke);
886}
887
888void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
889 GenFPToFPCall(invoke, codegen_, kQuickCos);
890}
891
892void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
893 CreateFPToFPCallLocations(arena_, invoke);
894}
895
896void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
897 GenFPToFPCall(invoke, codegen_, kQuickSin);
898}
899
900void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
901 CreateFPToFPCallLocations(arena_, invoke);
902}
903
904void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
905 GenFPToFPCall(invoke, codegen_, kQuickAcos);
906}
907
908void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
909 CreateFPToFPCallLocations(arena_, invoke);
910}
911
912void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
913 GenFPToFPCall(invoke, codegen_, kQuickAsin);
914}
915
916void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
917 CreateFPToFPCallLocations(arena_, invoke);
918}
919
920void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
921 GenFPToFPCall(invoke, codegen_, kQuickAtan);
922}
923
924void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
925 CreateFPToFPCallLocations(arena_, invoke);
926}
927
928void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
929 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
930}
931
932void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
933 CreateFPToFPCallLocations(arena_, invoke);
934}
935
936void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
937 GenFPToFPCall(invoke, codegen_, kQuickCosh);
938}
939
940void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
941 CreateFPToFPCallLocations(arena_, invoke);
942}
943
944void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
945 GenFPToFPCall(invoke, codegen_, kQuickExp);
946}
947
948void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
949 CreateFPToFPCallLocations(arena_, invoke);
950}
951
952void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
953 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
954}
955
956void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
957 CreateFPToFPCallLocations(arena_, invoke);
958}
959
960void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
961 GenFPToFPCall(invoke, codegen_, kQuickLog);
962}
963
964void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
965 CreateFPToFPCallLocations(arena_, invoke);
966}
967
968void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
969 GenFPToFPCall(invoke, codegen_, kQuickLog10);
970}
971
972void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
973 CreateFPToFPCallLocations(arena_, invoke);
974}
975
976void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
977 GenFPToFPCall(invoke, codegen_, kQuickSinh);
978}
979
980void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
981 CreateFPToFPCallLocations(arena_, invoke);
982}
983
984void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
985 GenFPToFPCall(invoke, codegen_, kQuickTan);
986}
987
988void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
989 CreateFPToFPCallLocations(arena_, invoke);
990}
991
992void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
993 GenFPToFPCall(invoke, codegen_, kQuickTanh);
994}
995
996static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
997 HInvoke* invoke) {
998 LocationSummary* locations = new (arena) LocationSummary(invoke,
999 LocationSummary::kCall,
1000 kIntrinsified);
1001 InvokeRuntimeCallingConvention calling_convention;
1002 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
1003 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
1004 locations->SetOut(Location::FpuRegisterLocation(XMM0));
1005}
1006
1007void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
1008 CreateFPFPToFPCallLocations(arena_, invoke);
1009}
1010
1011void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
1012 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1013}
1014
1015void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
1016 CreateFPFPToFPCallLocations(arena_, invoke);
1017}
1018
1019void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
1020 GenFPToFPCall(invoke, codegen_, kQuickHypot);
1021}
1022
1023void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
1024 CreateFPFPToFPCallLocations(arena_, invoke);
1025}
1026
1027void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
1028 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1029}
1030
Mark Mendell09ed1a32015-03-25 08:30:06 -04001031void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
1032 // The inputs plus one temp.
1033 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1034 LocationSummary::kCallOnSlowPath,
1035 kIntrinsified);
1036 locations->SetInAt(0, Location::RequiresRegister());
1037 locations->SetInAt(1, Location::RequiresRegister());
1038 locations->SetOut(Location::SameAsFirstInput());
Mark Mendell09ed1a32015-03-25 08:30:06 -04001039}
1040
1041void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
1042 LocationSummary* locations = invoke->GetLocations();
1043
Mark Mendell6bc53a92015-07-01 14:26:52 -04001044 // Location of reference to data array.
Mark Mendell09ed1a32015-03-25 08:30:06 -04001045 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
Mark Mendell6bc53a92015-07-01 14:26:52 -04001046 // Location of count.
Mark Mendell09ed1a32015-03-25 08:30:06 -04001047 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
Mark Mendell09ed1a32015-03-25 08:30:06 -04001048
1049 Register obj = locations->InAt(0).AsRegister<Register>();
1050 Register idx = locations->InAt(1).AsRegister<Register>();
1051 Register out = locations->Out().AsRegister<Register>();
Mark Mendell09ed1a32015-03-25 08:30:06 -04001052
1053 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
1054 // the cost.
1055 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
1056 // we will not optimize the code for constants (which would save a register).
1057
Andreas Gampe85b62f22015-09-09 13:15:38 -07001058 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001059 codegen_->AddSlowPath(slow_path);
1060
1061 X86Assembler* assembler = GetAssembler();
1062
1063 __ cmpl(idx, Address(obj, count_offset));
1064 codegen_->MaybeRecordImplicitNullCheck(invoke);
1065 __ j(kAboveEqual, slow_path->GetEntryLabel());
1066
Jeff Hao848f70a2014-01-15 13:49:50 -08001067 // out = out[2*idx].
1068 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
Mark Mendell09ed1a32015-03-25 08:30:06 -04001069
1070 __ Bind(slow_path->GetExitLabel());
1071}
1072
Mark Mendell6bc53a92015-07-01 14:26:52 -04001073void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1074 // We need at least two of the positions or length to be an integer constant,
1075 // or else we won't have enough free registers.
1076 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1077 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1078 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1079
1080 int num_constants =
1081 ((src_pos != nullptr) ? 1 : 0)
1082 + ((dest_pos != nullptr) ? 1 : 0)
1083 + ((length != nullptr) ? 1 : 0);
1084
1085 if (num_constants < 2) {
1086 // Not enough free registers.
1087 return;
1088 }
1089
1090 // As long as we are checking, we might as well check to see if the src and dest
1091 // positions are >= 0.
1092 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
1093 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
1094 // We will have to fail anyways.
1095 return;
1096 }
1097
1098 // And since we are already checking, check the length too.
1099 if (length != nullptr) {
1100 int32_t len = length->GetValue();
1101 if (len < 0) {
1102 // Just call as normal.
1103 return;
1104 }
1105 }
1106
1107 // Okay, it is safe to generate inline code.
1108 LocationSummary* locations =
1109 new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
1110 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
1111 locations->SetInAt(0, Location::RequiresRegister());
1112 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1113 locations->SetInAt(2, Location::RequiresRegister());
1114 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
1115 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
1116
1117 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1118 locations->AddTemp(Location::RegisterLocation(ESI));
1119 locations->AddTemp(Location::RegisterLocation(EDI));
1120 locations->AddTemp(Location::RegisterLocation(ECX));
1121}
1122
1123static void CheckPosition(X86Assembler* assembler,
1124 Location pos,
1125 Register input,
1126 Register length,
Andreas Gampe85b62f22015-09-09 13:15:38 -07001127 SlowPathCode* slow_path,
Mark Mendell6bc53a92015-07-01 14:26:52 -04001128 Register input_len,
1129 Register temp) {
1130 // Where is the length in the String?
1131 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1132
1133 if (pos.IsConstant()) {
1134 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1135 if (pos_const == 0) {
1136 // Check that length(input) >= length.
1137 __ cmpl(Address(input, length_offset), length);
1138 __ j(kLess, slow_path->GetEntryLabel());
1139 } else {
1140 // Check that length(input) >= pos.
1141 __ movl(input_len, Address(input, length_offset));
1142 __ cmpl(input_len, Immediate(pos_const));
1143 __ j(kLess, slow_path->GetEntryLabel());
1144
1145 // Check that (length(input) - pos) >= length.
1146 __ leal(temp, Address(input_len, -pos_const));
1147 __ cmpl(temp, length);
1148 __ j(kLess, slow_path->GetEntryLabel());
1149 }
1150 } else {
1151 // Check that pos >= 0.
1152 Register pos_reg = pos.AsRegister<Register>();
1153 __ testl(pos_reg, pos_reg);
1154 __ j(kLess, slow_path->GetEntryLabel());
1155
1156 // Check that pos <= length(input).
1157 __ cmpl(Address(input, length_offset), pos_reg);
1158 __ j(kLess, slow_path->GetEntryLabel());
1159
1160 // Check that (length(input) - pos) >= length.
1161 __ movl(temp, Address(input, length_offset));
1162 __ subl(temp, pos_reg);
1163 __ cmpl(temp, length);
1164 __ j(kLess, slow_path->GetEntryLabel());
1165 }
1166}
1167
1168void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1169 X86Assembler* assembler = GetAssembler();
1170 LocationSummary* locations = invoke->GetLocations();
1171
1172 Register src = locations->InAt(0).AsRegister<Register>();
1173 Location srcPos = locations->InAt(1);
1174 Register dest = locations->InAt(2).AsRegister<Register>();
1175 Location destPos = locations->InAt(3);
1176 Location length = locations->InAt(4);
1177
1178 // Temporaries that we need for MOVSW.
1179 Register src_base = locations->GetTemp(0).AsRegister<Register>();
1180 DCHECK_EQ(src_base, ESI);
1181 Register dest_base = locations->GetTemp(1).AsRegister<Register>();
1182 DCHECK_EQ(dest_base, EDI);
1183 Register count = locations->GetTemp(2).AsRegister<Register>();
1184 DCHECK_EQ(count, ECX);
1185
Andreas Gampe85b62f22015-09-09 13:15:38 -07001186 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Mark Mendell6bc53a92015-07-01 14:26:52 -04001187 codegen_->AddSlowPath(slow_path);
1188
1189 // Bail out if the source and destination are the same (to handle overlap).
1190 __ cmpl(src, dest);
1191 __ j(kEqual, slow_path->GetEntryLabel());
1192
1193 // Bail out if the source is null.
1194 __ testl(src, src);
1195 __ j(kEqual, slow_path->GetEntryLabel());
1196
1197 // Bail out if the destination is null.
1198 __ testl(dest, dest);
1199 __ j(kEqual, slow_path->GetEntryLabel());
1200
1201 // If the length is negative, bail out.
1202 // We have already checked in the LocationsBuilder for the constant case.
1203 if (!length.IsConstant()) {
1204 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
1205 __ j(kLess, slow_path->GetEntryLabel());
1206 }
1207
1208 // We need the count in ECX.
1209 if (length.IsConstant()) {
1210 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1211 } else {
1212 __ movl(count, length.AsRegister<Register>());
1213 }
1214
1215 // Validity checks: source.
1216 CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base);
1217
1218 // Validity checks: dest.
1219 CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base);
1220
1221 // Okay, everything checks out. Finally time to do the copy.
1222 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1223 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1224 DCHECK_EQ(char_size, 2u);
1225
1226 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1227
1228 if (srcPos.IsConstant()) {
1229 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
1230 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
1231 } else {
1232 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
1233 ScaleFactor::TIMES_2, data_offset));
1234 }
1235 if (destPos.IsConstant()) {
1236 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
1237
1238 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
1239 } else {
1240 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
1241 ScaleFactor::TIMES_2, data_offset));
1242 }
1243
1244 // Do the move.
1245 __ rep_movsw();
1246
1247 __ Bind(slow_path->GetExitLabel());
1248}
1249
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001250void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
1251 // The inputs plus one temp.
1252 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1253 LocationSummary::kCall,
1254 kIntrinsified);
1255 InvokeRuntimeCallingConvention calling_convention;
1256 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1257 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1258 locations->SetOut(Location::RegisterLocation(EAX));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001259}
1260
1261void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1262 X86Assembler* assembler = GetAssembler();
1263 LocationSummary* locations = invoke->GetLocations();
1264
Nicolas Geoffray512e04d2015-03-27 17:21:24 +00001265 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +01001266 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001267
1268 Register argument = locations->InAt(1).AsRegister<Register>();
1269 __ testl(argument, argument);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001270 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001271 codegen_->AddSlowPath(slow_path);
1272 __ j(kEqual, slow_path->GetEntryLabel());
1273
1274 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pStringCompareTo)));
1275 __ Bind(slow_path->GetExitLabel());
1276}
1277
Agi Csakid7138c82015-08-13 17:46:44 -07001278void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1279 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1280 LocationSummary::kNoCall,
1281 kIntrinsified);
1282 locations->SetInAt(0, Location::RequiresRegister());
1283 locations->SetInAt(1, Location::RequiresRegister());
1284
1285 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1286 locations->AddTemp(Location::RegisterLocation(ECX));
1287 locations->AddTemp(Location::RegisterLocation(EDI));
1288
1289 // Set output, ESI needed for repe_cmpsl instruction anyways.
1290 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1291}
1292
1293void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1294 X86Assembler* assembler = GetAssembler();
1295 LocationSummary* locations = invoke->GetLocations();
1296
1297 Register str = locations->InAt(0).AsRegister<Register>();
1298 Register arg = locations->InAt(1).AsRegister<Register>();
1299 Register ecx = locations->GetTemp(0).AsRegister<Register>();
1300 Register edi = locations->GetTemp(1).AsRegister<Register>();
1301 Register esi = locations->Out().AsRegister<Register>();
1302
Mark Mendell0c9497d2015-08-21 09:30:05 -04001303 NearLabel end, return_true, return_false;
Agi Csakid7138c82015-08-13 17:46:44 -07001304
1305 // Get offsets of count, value, and class fields within a string object.
1306 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1307 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1308 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1309
1310 // Note that the null check must have been done earlier.
1311 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1312
Nicolas Geoffraya83a54d2015-10-02 17:30:26 +01001313 StringEqualsOptimizations optimizations(invoke);
1314 if (!optimizations.GetArgumentNotNull()) {
1315 // Check if input is null, return false if it is.
1316 __ testl(arg, arg);
1317 __ j(kEqual, &return_false);
1318 }
Agi Csakid7138c82015-08-13 17:46:44 -07001319
1320 // Instanceof check for the argument by comparing class fields.
1321 // All string objects must have the same type since String cannot be subclassed.
1322 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1323 // If the argument is a string object, its class field must be equal to receiver's class field.
Nicolas Geoffraya83a54d2015-10-02 17:30:26 +01001324 if (!optimizations.GetArgumentIsString()) {
1325 __ movl(ecx, Address(str, class_offset));
1326 __ cmpl(ecx, Address(arg, class_offset));
1327 __ j(kNotEqual, &return_false);
1328 }
Agi Csakid7138c82015-08-13 17:46:44 -07001329
1330 // Reference equality check, return true if same reference.
1331 __ cmpl(str, arg);
1332 __ j(kEqual, &return_true);
1333
1334 // Load length of receiver string.
1335 __ movl(ecx, Address(str, count_offset));
1336 // Check if lengths are equal, return false if they're not.
1337 __ cmpl(ecx, Address(arg, count_offset));
1338 __ j(kNotEqual, &return_false);
1339 // Return true if both strings are empty.
Mark Mendell0c9497d2015-08-21 09:30:05 -04001340 __ jecxz(&return_true);
Agi Csakid7138c82015-08-13 17:46:44 -07001341
1342 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1343 __ leal(esi, Address(str, value_offset));
1344 __ leal(edi, Address(arg, value_offset));
1345
1346 // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths.
1347 __ addl(ecx, Immediate(1));
1348 __ shrl(ecx, Immediate(1));
1349
1350 // Assertions that must hold in order to compare strings 2 characters at a time.
1351 DCHECK_ALIGNED(value_offset, 4);
1352 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1353
1354 // Loop to compare strings two characters at a time starting at the beginning of the string.
1355 __ repe_cmpsl();
1356 // If strings are not equal, zero flag will be cleared.
1357 __ j(kNotEqual, &return_false);
1358
1359 // Return true and exit the function.
1360 // If loop does not result in returning false, we return true.
1361 __ Bind(&return_true);
1362 __ movl(esi, Immediate(1));
1363 __ jmp(&end);
1364
1365 // Return false and exit the function.
1366 __ Bind(&return_false);
1367 __ xorl(esi, esi);
1368 __ Bind(&end);
1369}
1370
Andreas Gampe21030dd2015-05-07 14:46:15 -07001371static void CreateStringIndexOfLocations(HInvoke* invoke,
1372 ArenaAllocator* allocator,
1373 bool start_at_zero) {
1374 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1375 LocationSummary::kCallOnSlowPath,
1376 kIntrinsified);
1377 // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1378 locations->SetInAt(0, Location::RegisterLocation(EDI));
1379 // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1380 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1381 // of the instruction explicitly.
1382 // Note: This works as we don't clobber EAX anywhere.
1383 locations->SetInAt(1, Location::RegisterLocation(EAX));
1384 if (!start_at_zero) {
1385 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1386 }
1387 // As we clobber EDI during execution anyways, also use it as the output.
1388 locations->SetOut(Location::SameAsFirstInput());
1389
1390 // repne scasw uses ECX as the counter.
1391 locations->AddTemp(Location::RegisterLocation(ECX));
1392 // Need another temporary to be able to compute the result.
1393 locations->AddTemp(Location::RequiresRegister());
1394}
1395
1396static void GenerateStringIndexOf(HInvoke* invoke,
1397 X86Assembler* assembler,
1398 CodeGeneratorX86* codegen,
1399 ArenaAllocator* allocator,
1400 bool start_at_zero) {
1401 LocationSummary* locations = invoke->GetLocations();
1402
1403 // Note that the null check must have been done earlier.
1404 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1405
1406 Register string_obj = locations->InAt(0).AsRegister<Register>();
1407 Register search_value = locations->InAt(1).AsRegister<Register>();
1408 Register counter = locations->GetTemp(0).AsRegister<Register>();
1409 Register string_length = locations->GetTemp(1).AsRegister<Register>();
1410 Register out = locations->Out().AsRegister<Register>();
1411
1412 // Check our assumptions for registers.
1413 DCHECK_EQ(string_obj, EDI);
1414 DCHECK_EQ(search_value, EAX);
1415 DCHECK_EQ(counter, ECX);
1416 DCHECK_EQ(out, EDI);
1417
1418 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1419 // or directly dispatch if we have a constant.
Andreas Gampe85b62f22015-09-09 13:15:38 -07001420 SlowPathCode* slow_path = nullptr;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001421 if (invoke->InputAt(1)->IsIntConstant()) {
1422 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
1423 std::numeric_limits<uint16_t>::max()) {
1424 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1425 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1426 slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1427 codegen->AddSlowPath(slow_path);
1428 __ jmp(slow_path->GetEntryLabel());
1429 __ Bind(slow_path->GetExitLabel());
1430 return;
1431 }
1432 } else {
1433 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1434 slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1435 codegen->AddSlowPath(slow_path);
1436 __ j(kAbove, slow_path->GetEntryLabel());
1437 }
1438
1439 // From here down, we know that we are looking for a char that fits in 16 bits.
1440 // Location of reference to data array within the String object.
1441 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1442 // Location of count within the String object.
1443 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1444
1445 // Load string length, i.e., the count field of the string.
1446 __ movl(string_length, Address(string_obj, count_offset));
1447
1448 // Do a zero-length check.
1449 // TODO: Support jecxz.
Mark Mendell0c9497d2015-08-21 09:30:05 -04001450 NearLabel not_found_label;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001451 __ testl(string_length, string_length);
1452 __ j(kEqual, &not_found_label);
1453
1454 if (start_at_zero) {
1455 // Number of chars to scan is the same as the string length.
1456 __ movl(counter, string_length);
1457
1458 // Move to the start of the string.
1459 __ addl(string_obj, Immediate(value_offset));
1460 } else {
1461 Register start_index = locations->InAt(2).AsRegister<Register>();
1462
1463 // Do a start_index check.
1464 __ cmpl(start_index, string_length);
1465 __ j(kGreaterEqual, &not_found_label);
1466
1467 // Ensure we have a start index >= 0;
1468 __ xorl(counter, counter);
1469 __ cmpl(start_index, Immediate(0));
1470 __ cmovl(kGreater, counter, start_index);
1471
1472 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1473 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1474
1475 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1476 // compare.
1477 __ negl(counter);
1478 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1479 }
1480
1481 // Everything is set up for repne scasw:
1482 // * Comparison address in EDI.
1483 // * Counter in ECX.
1484 __ repne_scasw();
1485
1486 // Did we find a match?
1487 __ j(kNotEqual, &not_found_label);
1488
1489 // Yes, we matched. Compute the index of the result.
1490 __ subl(string_length, counter);
1491 __ leal(out, Address(string_length, -1));
1492
Mark Mendell0c9497d2015-08-21 09:30:05 -04001493 NearLabel done;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001494 __ jmp(&done);
1495
1496 // Failed to match; return -1.
1497 __ Bind(&not_found_label);
1498 __ movl(out, Immediate(-1));
1499
1500 // And join up at the end.
1501 __ Bind(&done);
1502 if (slow_path != nullptr) {
1503 __ Bind(slow_path->GetExitLabel());
1504 }
1505}
1506
1507void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001508 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001509}
1510
1511void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001512 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001513}
1514
1515void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001516 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001517}
1518
1519void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001520 GenerateStringIndexOf(
1521 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001522}
1523
Jeff Hao848f70a2014-01-15 13:49:50 -08001524void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1525 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1526 LocationSummary::kCall,
1527 kIntrinsified);
1528 InvokeRuntimeCallingConvention calling_convention;
1529 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1530 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1531 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1532 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1533 locations->SetOut(Location::RegisterLocation(EAX));
Jeff Hao848f70a2014-01-15 13:49:50 -08001534}
1535
1536void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1537 X86Assembler* assembler = GetAssembler();
1538 LocationSummary* locations = invoke->GetLocations();
1539
1540 Register byte_array = locations->InAt(0).AsRegister<Register>();
1541 __ testl(byte_array, byte_array);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001542 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08001543 codegen_->AddSlowPath(slow_path);
1544 __ j(kEqual, slow_path->GetEntryLabel());
1545
1546 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes)));
1547 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1548 __ Bind(slow_path->GetExitLabel());
1549}
1550
1551void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1552 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1553 LocationSummary::kCall,
1554 kIntrinsified);
1555 InvokeRuntimeCallingConvention calling_convention;
1556 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1557 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1558 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1559 locations->SetOut(Location::RegisterLocation(EAX));
1560}
1561
1562void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1563 X86Assembler* assembler = GetAssembler();
1564
1565 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars)));
1566 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1567}
1568
1569void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1570 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1571 LocationSummary::kCall,
1572 kIntrinsified);
1573 InvokeRuntimeCallingConvention calling_convention;
1574 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1575 locations->SetOut(Location::RegisterLocation(EAX));
Jeff Hao848f70a2014-01-15 13:49:50 -08001576}
1577
1578void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1579 X86Assembler* assembler = GetAssembler();
1580 LocationSummary* locations = invoke->GetLocations();
1581
1582 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1583 __ testl(string_to_copy, string_to_copy);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001584 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08001585 codegen_->AddSlowPath(slow_path);
1586 __ j(kEqual, slow_path->GetEntryLabel());
1587
1588 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString)));
1589 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1590 __ Bind(slow_path->GetExitLabel());
1591}
1592
Mark Mendell8f8926a2015-08-17 11:39:06 -04001593void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1594 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1595 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1596 LocationSummary::kNoCall,
1597 kIntrinsified);
1598 locations->SetInAt(0, Location::RequiresRegister());
1599 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1600 // Place srcEnd in ECX to save a move below.
1601 locations->SetInAt(2, Location::RegisterLocation(ECX));
1602 locations->SetInAt(3, Location::RequiresRegister());
1603 locations->SetInAt(4, Location::RequiresRegister());
1604
1605 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1606 // We don't have enough registers to also grab ECX, so handle below.
1607 locations->AddTemp(Location::RegisterLocation(ESI));
1608 locations->AddTemp(Location::RegisterLocation(EDI));
1609}
1610
1611void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1612 X86Assembler* assembler = GetAssembler();
1613 LocationSummary* locations = invoke->GetLocations();
1614
1615 size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
1616 // Location of data in char array buffer.
1617 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1618 // Location of char array data in string.
1619 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1620
1621 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1622 Register obj = locations->InAt(0).AsRegister<Register>();
1623 Location srcBegin = locations->InAt(1);
1624 int srcBegin_value =
1625 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1626 Register srcEnd = locations->InAt(2).AsRegister<Register>();
1627 Register dst = locations->InAt(3).AsRegister<Register>();
1628 Register dstBegin = locations->InAt(4).AsRegister<Register>();
1629
1630 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1631 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1632 DCHECK_EQ(char_size, 2u);
1633
1634 // Compute the address of the destination buffer.
1635 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1636
1637 // Compute the address of the source string.
1638 if (srcBegin.IsConstant()) {
1639 // Compute the address of the source string by adding the number of chars from
1640 // the source beginning to the value offset of a string.
1641 __ leal(ESI, Address(obj, srcBegin_value * char_size + value_offset));
1642 } else {
1643 __ leal(ESI, Address(obj, srcBegin.AsRegister<Register>(),
1644 ScaleFactor::TIMES_2, value_offset));
1645 }
1646
1647 // Compute the number of chars (words) to move.
1648 // Now is the time to save ECX, since we don't know if it will be used later.
1649 __ pushl(ECX);
1650 int stack_adjust = kX86WordSize;
1651 __ cfi().AdjustCFAOffset(stack_adjust);
1652 DCHECK_EQ(srcEnd, ECX);
1653 if (srcBegin.IsConstant()) {
1654 if (srcBegin_value != 0) {
1655 __ subl(ECX, Immediate(srcBegin_value));
1656 }
1657 } else {
1658 DCHECK(srcBegin.IsRegister());
1659 __ subl(ECX, srcBegin.AsRegister<Register>());
1660 }
1661
1662 // Do the move.
1663 __ rep_movsw();
1664
1665 // And restore ECX.
1666 __ popl(ECX);
1667 __ cfi().AdjustCFAOffset(-stack_adjust);
1668}
1669
Mark Mendell09ed1a32015-03-25 08:30:06 -04001670static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1671 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1672 Location out_loc = locations->Out();
1673 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1674 // to avoid a SIGBUS.
1675 switch (size) {
1676 case Primitive::kPrimByte:
1677 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1678 break;
1679 case Primitive::kPrimShort:
1680 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1681 break;
1682 case Primitive::kPrimInt:
1683 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1684 break;
1685 case Primitive::kPrimLong:
1686 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1687 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1688 break;
1689 default:
1690 LOG(FATAL) << "Type not recognized for peek: " << size;
1691 UNREACHABLE();
1692 }
1693}
1694
1695void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1696 CreateLongToIntLocations(arena_, invoke);
1697}
1698
1699void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1700 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1701}
1702
1703void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1704 CreateLongToIntLocations(arena_, invoke);
1705}
1706
1707void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1708 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1709}
1710
1711void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1712 CreateLongToLongLocations(arena_, invoke);
1713}
1714
1715void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1716 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1717}
1718
1719void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1720 CreateLongToIntLocations(arena_, invoke);
1721}
1722
1723void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1724 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1725}
1726
1727static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
1728 HInvoke* invoke) {
1729 LocationSummary* locations = new (arena) LocationSummary(invoke,
1730 LocationSummary::kNoCall,
1731 kIntrinsified);
1732 locations->SetInAt(0, Location::RequiresRegister());
Roland Levillain4c0eb422015-04-24 16:43:49 +01001733 HInstruction* value = invoke->InputAt(1);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001734 if (size == Primitive::kPrimByte) {
1735 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1736 } else {
1737 locations->SetInAt(1, Location::RegisterOrConstant(value));
1738 }
1739}
1740
1741static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1742 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1743 Location value_loc = locations->InAt(1);
1744 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1745 // to avoid a SIGBUS.
1746 switch (size) {
1747 case Primitive::kPrimByte:
1748 if (value_loc.IsConstant()) {
1749 __ movb(Address(address, 0),
1750 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1751 } else {
1752 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1753 }
1754 break;
1755 case Primitive::kPrimShort:
1756 if (value_loc.IsConstant()) {
1757 __ movw(Address(address, 0),
1758 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1759 } else {
1760 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1761 }
1762 break;
1763 case Primitive::kPrimInt:
1764 if (value_loc.IsConstant()) {
1765 __ movl(Address(address, 0),
1766 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1767 } else {
1768 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1769 }
1770 break;
1771 case Primitive::kPrimLong:
1772 if (value_loc.IsConstant()) {
1773 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1774 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1775 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1776 } else {
1777 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1778 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1779 }
1780 break;
1781 default:
1782 LOG(FATAL) << "Type not recognized for poke: " << size;
1783 UNREACHABLE();
1784 }
1785}
1786
1787void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1788 CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
1789}
1790
1791void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1792 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1793}
1794
1795void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1796 CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
1797}
1798
1799void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1800 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1801}
1802
1803void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1804 CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
1805}
1806
1807void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1808 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1809}
1810
1811void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1812 CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
1813}
1814
1815void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1816 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1817}
1818
1819void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1820 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1821 LocationSummary::kNoCall,
1822 kIntrinsified);
1823 locations->SetOut(Location::RequiresRegister());
1824}
1825
1826void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1827 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1828 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
1829}
1830
Roland Levillain0d5a2812015-11-13 10:07:31 +00001831static void GenUnsafeGet(HInvoke* invoke,
1832 Primitive::Type type,
1833 bool is_volatile,
1834 CodeGeneratorX86* codegen) {
1835 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1836 LocationSummary* locations = invoke->GetLocations();
1837 Location base_loc = locations->InAt(1);
1838 Register base = base_loc.AsRegister<Register>();
1839 Location offset_loc = locations->InAt(2);
1840 Register offset = offset_loc.AsRegisterPairLow<Register>();
1841 Location output_loc = locations->Out();
Mark Mendell09ed1a32015-03-25 08:30:06 -04001842
1843 switch (type) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001844 case Primitive::kPrimInt: {
Roland Levillain0d5a2812015-11-13 10:07:31 +00001845 Register output = output_loc.AsRegister<Register>();
1846 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
Roland Levillain7c1559a2015-12-15 10:55:36 +00001847 break;
1848 }
1849
1850 case Primitive::kPrimNot: {
1851 Register output = output_loc.AsRegister<Register>();
1852 if (kEmitCompilerReadBarrier) {
1853 if (kUseBakerReadBarrier) {
1854 Location temp = locations->GetTemp(0);
1855 codegen->GenerateArrayLoadWithBakerReadBarrier(
1856 invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
1857 } else {
1858 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1859 codegen->GenerateReadBarrierSlow(
1860 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1861 }
1862 } else {
1863 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1864 __ MaybeUnpoisonHeapReference(output);
Roland Levillain4d027112015-07-01 15:41:14 +01001865 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04001866 break;
Roland Levillain4d027112015-07-01 15:41:14 +01001867 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04001868
1869 case Primitive::kPrimLong: {
Roland Levillain0d5a2812015-11-13 10:07:31 +00001870 Register output_lo = output_loc.AsRegisterPairLow<Register>();
1871 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
Mark Mendell09ed1a32015-03-25 08:30:06 -04001872 if (is_volatile) {
1873 // Need to use a XMM to read atomically.
1874 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1875 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1876 __ movd(output_lo, temp);
1877 __ psrlq(temp, Immediate(32));
1878 __ movd(output_hi, temp);
1879 } else {
1880 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1881 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1882 }
1883 }
1884 break;
1885
1886 default:
1887 LOG(FATAL) << "Unsupported op size " << type;
1888 UNREACHABLE();
1889 }
1890}
1891
Roland Levillain7c1559a2015-12-15 10:55:36 +00001892static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
1893 HInvoke* invoke,
1894 Primitive::Type type,
1895 bool is_volatile) {
Roland Levillain0d5a2812015-11-13 10:07:31 +00001896 bool can_call = kEmitCompilerReadBarrier &&
1897 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1898 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001899 LocationSummary* locations = new (arena) LocationSummary(invoke,
Roland Levillain0d5a2812015-11-13 10:07:31 +00001900 can_call ?
1901 LocationSummary::kCallOnSlowPath :
1902 LocationSummary::kNoCall,
Mark Mendell09ed1a32015-03-25 08:30:06 -04001903 kIntrinsified);
1904 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1905 locations->SetInAt(1, Location::RequiresRegister());
1906 locations->SetInAt(2, Location::RequiresRegister());
Roland Levillain7c1559a2015-12-15 10:55:36 +00001907 if (type == Primitive::kPrimLong) {
Mark Mendell09ed1a32015-03-25 08:30:06 -04001908 if (is_volatile) {
1909 // Need to use XMM to read volatile.
1910 locations->AddTemp(Location::RequiresFpuRegister());
1911 locations->SetOut(Location::RequiresRegister());
1912 } else {
1913 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1914 }
1915 } else {
1916 locations->SetOut(Location::RequiresRegister());
1917 }
Roland Levillain7c1559a2015-12-15 10:55:36 +00001918 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1919 // We need a temporary register for the read barrier marking slow
1920 // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
1921 locations->AddTemp(Location::RequiresRegister());
1922 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04001923}
1924
1925void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001926 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001927}
1928void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001929 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001930}
1931void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001932 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001933}
1934void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001935 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001936}
1937void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001938 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001939}
1940void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001941 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001942}
1943
1944
1945void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001946 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001947}
1948void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001949 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001950}
1951void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001952 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001953}
1954void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001955 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001956}
1957void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001958 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001959}
1960void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001961 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001962}
1963
1964
1965static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1966 Primitive::Type type,
1967 HInvoke* invoke,
1968 bool is_volatile) {
1969 LocationSummary* locations = new (arena) LocationSummary(invoke,
1970 LocationSummary::kNoCall,
1971 kIntrinsified);
1972 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1973 locations->SetInAt(1, Location::RequiresRegister());
1974 locations->SetInAt(2, Location::RequiresRegister());
1975 locations->SetInAt(3, Location::RequiresRegister());
1976 if (type == Primitive::kPrimNot) {
1977 // Need temp registers for card-marking.
Roland Levillain4d027112015-07-01 15:41:14 +01001978 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Mark Mendell09ed1a32015-03-25 08:30:06 -04001979 // Ensure the value is in a byte register.
1980 locations->AddTemp(Location::RegisterLocation(ECX));
1981 } else if (type == Primitive::kPrimLong && is_volatile) {
1982 locations->AddTemp(Location::RequiresFpuRegister());
1983 locations->AddTemp(Location::RequiresFpuRegister());
1984 }
1985}
1986
1987void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001988 CreateIntIntIntIntToVoidPlusTempsLocations(
1989 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001990}
1991void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001992 CreateIntIntIntIntToVoidPlusTempsLocations(
1993 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001994}
1995void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001996 CreateIntIntIntIntToVoidPlusTempsLocations(
1997 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001998}
1999void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002000 CreateIntIntIntIntToVoidPlusTempsLocations(
2001 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002002}
2003void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002004 CreateIntIntIntIntToVoidPlusTempsLocations(
2005 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002006}
2007void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002008 CreateIntIntIntIntToVoidPlusTempsLocations(
2009 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002010}
2011void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002012 CreateIntIntIntIntToVoidPlusTempsLocations(
2013 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002014}
2015void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002016 CreateIntIntIntIntToVoidPlusTempsLocations(
2017 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002018}
2019void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002020 CreateIntIntIntIntToVoidPlusTempsLocations(
2021 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002022}
2023
2024// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2025// memory model.
2026static void GenUnsafePut(LocationSummary* locations,
2027 Primitive::Type type,
2028 bool is_volatile,
2029 CodeGeneratorX86* codegen) {
Roland Levillainb488b782015-10-22 11:38:49 +01002030 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -04002031 Register base = locations->InAt(1).AsRegister<Register>();
2032 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2033 Location value_loc = locations->InAt(3);
2034
2035 if (type == Primitive::kPrimLong) {
2036 Register value_lo = value_loc.AsRegisterPairLow<Register>();
2037 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2038 if (is_volatile) {
2039 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2040 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2041 __ movd(temp1, value_lo);
2042 __ movd(temp2, value_hi);
2043 __ punpckldq(temp1, temp2);
2044 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2045 } else {
2046 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2047 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2048 }
Roland Levillain4d027112015-07-01 15:41:14 +01002049 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
2050 Register temp = locations->GetTemp(0).AsRegister<Register>();
2051 __ movl(temp, value_loc.AsRegister<Register>());
2052 __ PoisonHeapReference(temp);
2053 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002054 } else {
2055 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2056 }
2057
2058 if (is_volatile) {
Mark P Mendell17077d82015-12-16 19:15:59 +00002059 codegen->MemoryFence();
Mark Mendell09ed1a32015-03-25 08:30:06 -04002060 }
2061
2062 if (type == Primitive::kPrimNot) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +01002063 bool value_can_be_null = true; // TODO: Worth finding out this information?
Mark Mendell09ed1a32015-03-25 08:30:06 -04002064 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2065 locations->GetTemp(1).AsRegister<Register>(),
2066 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01002067 value_loc.AsRegister<Register>(),
2068 value_can_be_null);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002069 }
2070}
2071
2072void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002073 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002074}
2075void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002076 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002077}
2078void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002079 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002080}
2081void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002082 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002083}
2084void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002085 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002086}
2087void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002088 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002089}
2090void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002091 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002092}
2093void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002094 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002095}
2096void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002097 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002098}
2099
Mark Mendell58d25fd2015-04-03 14:52:31 -04002100static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
2101 HInvoke* invoke) {
2102 LocationSummary* locations = new (arena) LocationSummary(invoke,
2103 LocationSummary::kNoCall,
2104 kIntrinsified);
2105 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2106 locations->SetInAt(1, Location::RequiresRegister());
2107 // Offset is a long, but in 32 bit mode, we only need the low word.
2108 // Can we update the invoke here to remove a TypeConvert to Long?
2109 locations->SetInAt(2, Location::RequiresRegister());
2110 // Expected value must be in EAX or EDX:EAX.
2111 // For long, new value must be in ECX:EBX.
2112 if (type == Primitive::kPrimLong) {
2113 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2114 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2115 } else {
2116 locations->SetInAt(3, Location::RegisterLocation(EAX));
2117 locations->SetInAt(4, Location::RequiresRegister());
2118 }
2119
2120 // Force a byte register for the output.
2121 locations->SetOut(Location::RegisterLocation(EAX));
2122 if (type == Primitive::kPrimNot) {
2123 // Need temp registers for card-marking.
Roland Levillainb488b782015-10-22 11:38:49 +01002124 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Mark Mendell58d25fd2015-04-03 14:52:31 -04002125 // Need a byte register for marking.
2126 locations->AddTemp(Location::RegisterLocation(ECX));
2127 }
2128}
2129
2130void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2131 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
2132}
2133
2134void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2135 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
2136}
2137
2138void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
Roland Levillain391b8662015-12-18 11:43:38 +00002139 // The UnsafeCASObject intrinsic is missing a read barrier, and
2140 // therefore sometimes does not work as expected (b/25883050).
2141 // Turn it off temporarily as a quick fix, until the read barrier is
2142 // implemented.
2143 //
2144 // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
2145 // this intrinsic.
2146 if (kEmitCompilerReadBarrier) {
2147 return;
2148 }
2149
Mark Mendell58d25fd2015-04-03 14:52:31 -04002150 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
2151}
2152
2153static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
Roland Levillainb488b782015-10-22 11:38:49 +01002154 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
Mark Mendell58d25fd2015-04-03 14:52:31 -04002155 LocationSummary* locations = invoke->GetLocations();
2156
2157 Register base = locations->InAt(1).AsRegister<Register>();
2158 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2159 Location out = locations->Out();
2160 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2161
Roland Levillainb488b782015-10-22 11:38:49 +01002162 if (type == Primitive::kPrimNot) {
Roland Levillain4d027112015-07-01 15:41:14 +01002163 Register expected = locations->InAt(3).AsRegister<Register>();
Roland Levillainb488b782015-10-22 11:38:49 +01002164 // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
Roland Levillain4d027112015-07-01 15:41:14 +01002165 DCHECK_EQ(expected, EAX);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002166 Register value = locations->InAt(4).AsRegister<Register>();
Roland Levillain4d027112015-07-01 15:41:14 +01002167
Roland Levillainb488b782015-10-22 11:38:49 +01002168 // Mark card for object assuming new value is stored.
2169 bool value_can_be_null = true; // TODO: Worth finding out this information?
2170 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2171 locations->GetTemp(1).AsRegister<Register>(),
2172 base,
2173 value,
2174 value_can_be_null);
2175
2176 bool base_equals_value = (base == value);
2177 if (kPoisonHeapReferences) {
2178 if (base_equals_value) {
2179 // If `base` and `value` are the same register location, move
2180 // `value` to a temporary register. This way, poisoning
2181 // `value` won't invalidate `base`.
2182 value = locations->GetTemp(0).AsRegister<Register>();
2183 __ movl(value, base);
Roland Levillain4d027112015-07-01 15:41:14 +01002184 }
Roland Levillainb488b782015-10-22 11:38:49 +01002185
2186 // Check that the register allocator did not assign the location
2187 // of `expected` (EAX) to `value` nor to `base`, so that heap
2188 // poisoning (when enabled) works as intended below.
2189 // - If `value` were equal to `expected`, both references would
2190 // be poisoned twice, meaning they would not be poisoned at
2191 // all, as heap poisoning uses address negation.
2192 // - If `base` were equal to `expected`, poisoning `expected`
2193 // would invalidate `base`.
2194 DCHECK_NE(value, expected);
2195 DCHECK_NE(base, expected);
2196
2197 __ PoisonHeapReference(expected);
2198 __ PoisonHeapReference(value);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002199 }
2200
Roland Levillain391b8662015-12-18 11:43:38 +00002201 // TODO: Add a read barrier for the reference stored in the object
2202 // before attempting the CAS, similar to the one in the
2203 // art::Unsafe_compareAndSwapObject JNI implementation.
2204 //
2205 // Note that this code is not (yet) used when read barriers are
2206 // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject).
2207 DCHECK(!kEmitCompilerReadBarrier);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002208 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002209
Roland Levillain0d5a2812015-11-13 10:07:31 +00002210 // LOCK CMPXCHG has full barrier semantics, and we don't need
Roland Levillainb488b782015-10-22 11:38:49 +01002211 // scheduling barriers at this time.
Mark Mendell58d25fd2015-04-03 14:52:31 -04002212
Roland Levillainb488b782015-10-22 11:38:49 +01002213 // Convert ZF into the boolean result.
2214 __ setb(kZero, out.AsRegister<Register>());
2215 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
Roland Levillain4d027112015-07-01 15:41:14 +01002216
Roland Levillain391b8662015-12-18 11:43:38 +00002217 // If heap poisoning is enabled, we need to unpoison the values
2218 // that were poisoned earlier.
Roland Levillainb488b782015-10-22 11:38:49 +01002219 if (kPoisonHeapReferences) {
2220 if (base_equals_value) {
2221 // `value` has been moved to a temporary register, no need to
2222 // unpoison it.
2223 } else {
2224 // Ensure `value` is different from `out`, so that unpoisoning
2225 // the former does not invalidate the latter.
2226 DCHECK_NE(value, out.AsRegister<Register>());
2227 __ UnpoisonHeapReference(value);
2228 }
2229 // Do not unpoison the reference contained in register
2230 // `expected`, as it is the same as register `out` (EAX).
2231 }
2232 } else {
2233 if (type == Primitive::kPrimInt) {
2234 // Ensure the expected value is in EAX (required by the CMPXCHG
2235 // instruction).
2236 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
2237 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0),
2238 locations->InAt(4).AsRegister<Register>());
2239 } else if (type == Primitive::kPrimLong) {
2240 // Ensure the expected value is in EAX:EDX and that the new
2241 // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2242 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
2243 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
2244 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
2245 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
2246 __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
2247 } else {
2248 LOG(FATAL) << "Unexpected CAS type " << type;
2249 }
2250
Roland Levillain0d5a2812015-11-13 10:07:31 +00002251 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2252 // don't need scheduling barriers at this time.
Roland Levillainb488b782015-10-22 11:38:49 +01002253
2254 // Convert ZF into the boolean result.
2255 __ setb(kZero, out.AsRegister<Register>());
2256 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
Roland Levillain4d027112015-07-01 15:41:14 +01002257 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04002258}
2259
2260void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2261 GenCAS(Primitive::kPrimInt, invoke, codegen_);
2262}
2263
2264void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2265 GenCAS(Primitive::kPrimLong, invoke, codegen_);
2266}
2267
2268void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2269 GenCAS(Primitive::kPrimNot, invoke, codegen_);
2270}
2271
2272void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2273 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2274 LocationSummary::kNoCall,
2275 kIntrinsified);
2276 locations->SetInAt(0, Location::RequiresRegister());
2277 locations->SetOut(Location::SameAsFirstInput());
2278 locations->AddTemp(Location::RequiresRegister());
2279}
2280
2281static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2282 X86Assembler* assembler) {
2283 Immediate imm_shift(shift);
2284 Immediate imm_mask(mask);
2285 __ movl(temp, reg);
2286 __ shrl(reg, imm_shift);
2287 __ andl(temp, imm_mask);
2288 __ andl(reg, imm_mask);
2289 __ shll(temp, imm_shift);
2290 __ orl(reg, temp);
2291}
2292
2293void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002294 X86Assembler* assembler = GetAssembler();
Mark Mendell58d25fd2015-04-03 14:52:31 -04002295 LocationSummary* locations = invoke->GetLocations();
2296
2297 Register reg = locations->InAt(0).AsRegister<Register>();
2298 Register temp = locations->GetTemp(0).AsRegister<Register>();
2299
2300 /*
2301 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2302 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2303 * compared to generic luni implementation which has 5 rounds of swapping bits.
2304 * x = bswap x
2305 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2306 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2307 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2308 */
2309 __ bswapl(reg);
2310 SwapBits(reg, temp, 1, 0x55555555, assembler);
2311 SwapBits(reg, temp, 2, 0x33333333, assembler);
2312 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2313}
2314
2315void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2316 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2317 LocationSummary::kNoCall,
2318 kIntrinsified);
2319 locations->SetInAt(0, Location::RequiresRegister());
2320 locations->SetOut(Location::SameAsFirstInput());
2321 locations->AddTemp(Location::RequiresRegister());
2322}
2323
2324void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002325 X86Assembler* assembler = GetAssembler();
Mark Mendell58d25fd2015-04-03 14:52:31 -04002326 LocationSummary* locations = invoke->GetLocations();
2327
2328 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2329 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2330 Register temp = locations->GetTemp(0).AsRegister<Register>();
2331
2332 // We want to swap high/low, then bswap each one, and then do the same
2333 // as a 32 bit reverse.
2334 // Exchange high and low.
2335 __ movl(temp, reg_low);
2336 __ movl(reg_low, reg_high);
2337 __ movl(reg_high, temp);
2338
2339 // bit-reverse low
2340 __ bswapl(reg_low);
2341 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2342 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2343 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2344
2345 // bit-reverse high
2346 __ bswapl(reg_high);
2347 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2348 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2349 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2350}
2351
Aart Bikc39dac12016-01-21 08:59:48 -08002352static void CreateBitCountLocations(
2353 ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2354 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2355 // Do nothing if there is no popcnt support. This results in generating
2356 // a call for the intrinsic rather than direct code.
2357 return;
2358 }
2359 LocationSummary* locations = new (arena) LocationSummary(invoke,
2360 LocationSummary::kNoCall,
2361 kIntrinsified);
2362 if (is_long) {
Aart Bikc39dac12016-01-21 08:59:48 -08002363 locations->AddTemp(Location::RequiresRegister());
Aart Bikc39dac12016-01-21 08:59:48 -08002364 }
Aart Bik2a946072016-01-21 12:49:00 -08002365 locations->SetInAt(0, Location::Any());
Aart Bikc39dac12016-01-21 08:59:48 -08002366 locations->SetOut(Location::RequiresRegister());
2367}
2368
Aart Bika19616e2016-02-01 18:57:58 -08002369static void GenBitCount(X86Assembler* assembler,
2370 CodeGeneratorX86* codegen,
2371 HInvoke* invoke, bool is_long) {
Aart Bikc39dac12016-01-21 08:59:48 -08002372 LocationSummary* locations = invoke->GetLocations();
2373 Location src = locations->InAt(0);
2374 Register out = locations->Out().AsRegister<Register>();
2375
2376 if (invoke->InputAt(0)->IsConstant()) {
2377 // Evaluate this at compile time.
2378 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2379 value = is_long
2380 ? POPCOUNT(static_cast<uint64_t>(value))
2381 : POPCOUNT(static_cast<uint32_t>(value));
Aart Bika19616e2016-02-01 18:57:58 -08002382 codegen->Load32BitValue(out, value);
Aart Bikc39dac12016-01-21 08:59:48 -08002383 return;
2384 }
2385
2386 // Handle the non-constant cases.
2387 if (!is_long) {
2388 if (src.IsRegister()) {
2389 __ popcntl(out, src.AsRegister<Register>());
2390 } else {
2391 DCHECK(src.IsStackSlot());
2392 __ popcntl(out, Address(ESP, src.GetStackIndex()));
2393 }
Aart Bik2a946072016-01-21 12:49:00 -08002394 } else {
2395 // The 64-bit case needs to worry about two parts.
2396 Register temp = locations->GetTemp(0).AsRegister<Register>();
2397 if (src.IsRegisterPair()) {
2398 __ popcntl(temp, src.AsRegisterPairLow<Register>());
2399 __ popcntl(out, src.AsRegisterPairHigh<Register>());
2400 } else {
2401 DCHECK(src.IsDoubleStackSlot());
2402 __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2403 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2404 }
2405 __ addl(out, temp);
Aart Bikc39dac12016-01-21 08:59:48 -08002406 }
Aart Bikc39dac12016-01-21 08:59:48 -08002407}
2408
2409void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2410 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
2411}
2412
2413void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002414 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
Aart Bikc39dac12016-01-21 08:59:48 -08002415}
2416
2417void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2418 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
2419}
2420
2421void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002422 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
Aart Bikc39dac12016-01-21 08:59:48 -08002423}
2424
Mark Mendelld5897672015-08-12 21:16:41 -04002425static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2426 LocationSummary* locations = new (arena) LocationSummary(invoke,
2427 LocationSummary::kNoCall,
2428 kIntrinsified);
2429 if (is_long) {
2430 locations->SetInAt(0, Location::RequiresRegister());
2431 } else {
2432 locations->SetInAt(0, Location::Any());
2433 }
2434 locations->SetOut(Location::RequiresRegister());
2435}
2436
Aart Bika19616e2016-02-01 18:57:58 -08002437static void GenLeadingZeros(X86Assembler* assembler,
2438 CodeGeneratorX86* codegen,
2439 HInvoke* invoke, bool is_long) {
Mark Mendelld5897672015-08-12 21:16:41 -04002440 LocationSummary* locations = invoke->GetLocations();
2441 Location src = locations->InAt(0);
2442 Register out = locations->Out().AsRegister<Register>();
2443
2444 if (invoke->InputAt(0)->IsConstant()) {
2445 // Evaluate this at compile time.
2446 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2447 if (value == 0) {
2448 value = is_long ? 64 : 32;
2449 } else {
2450 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2451 }
Aart Bika19616e2016-02-01 18:57:58 -08002452 codegen->Load32BitValue(out, value);
Mark Mendelld5897672015-08-12 21:16:41 -04002453 return;
2454 }
2455
2456 // Handle the non-constant cases.
2457 if (!is_long) {
2458 if (src.IsRegister()) {
2459 __ bsrl(out, src.AsRegister<Register>());
2460 } else {
2461 DCHECK(src.IsStackSlot());
2462 __ bsrl(out, Address(ESP, src.GetStackIndex()));
2463 }
2464
2465 // BSR sets ZF if the input was zero, and the output is undefined.
Mark Mendell0c9497d2015-08-21 09:30:05 -04002466 NearLabel all_zeroes, done;
Mark Mendelld5897672015-08-12 21:16:41 -04002467 __ j(kEqual, &all_zeroes);
2468
2469 // Correct the result from BSR to get the final CLZ result.
2470 __ xorl(out, Immediate(31));
2471 __ jmp(&done);
2472
2473 // Fix the zero case with the expected result.
2474 __ Bind(&all_zeroes);
2475 __ movl(out, Immediate(32));
2476
2477 __ Bind(&done);
2478 return;
2479 }
2480
2481 // 64 bit case needs to worry about both parts of the register.
2482 DCHECK(src.IsRegisterPair());
2483 Register src_lo = src.AsRegisterPairLow<Register>();
2484 Register src_hi = src.AsRegisterPairHigh<Register>();
Mark Mendell0c9497d2015-08-21 09:30:05 -04002485 NearLabel handle_low, done, all_zeroes;
Mark Mendelld5897672015-08-12 21:16:41 -04002486
2487 // Is the high word zero?
2488 __ testl(src_hi, src_hi);
2489 __ j(kEqual, &handle_low);
2490
2491 // High word is not zero. We know that the BSR result is defined in this case.
2492 __ bsrl(out, src_hi);
2493
2494 // Correct the result from BSR to get the final CLZ result.
2495 __ xorl(out, Immediate(31));
2496 __ jmp(&done);
2497
2498 // High word was zero. We have to compute the low word count and add 32.
2499 __ Bind(&handle_low);
2500 __ bsrl(out, src_lo);
2501 __ j(kEqual, &all_zeroes);
2502
2503 // We had a valid result. Use an XOR to both correct the result and add 32.
2504 __ xorl(out, Immediate(63));
2505 __ jmp(&done);
2506
2507 // All zero case.
2508 __ Bind(&all_zeroes);
2509 __ movl(out, Immediate(64));
2510
2511 __ Bind(&done);
2512}
2513
2514void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2515 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false);
2516}
2517
2518void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002519 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
Mark Mendelld5897672015-08-12 21:16:41 -04002520}
2521
2522void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2523 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true);
2524}
2525
2526void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002527 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
Mark Mendelld5897672015-08-12 21:16:41 -04002528}
2529
Mark Mendell2d554792015-09-15 21:45:18 -04002530static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2531 LocationSummary* locations = new (arena) LocationSummary(invoke,
2532 LocationSummary::kNoCall,
2533 kIntrinsified);
2534 if (is_long) {
2535 locations->SetInAt(0, Location::RequiresRegister());
2536 } else {
2537 locations->SetInAt(0, Location::Any());
2538 }
2539 locations->SetOut(Location::RequiresRegister());
2540}
2541
Aart Bika19616e2016-02-01 18:57:58 -08002542static void GenTrailingZeros(X86Assembler* assembler,
2543 CodeGeneratorX86* codegen,
2544 HInvoke* invoke, bool is_long) {
Mark Mendell2d554792015-09-15 21:45:18 -04002545 LocationSummary* locations = invoke->GetLocations();
2546 Location src = locations->InAt(0);
2547 Register out = locations->Out().AsRegister<Register>();
2548
2549 if (invoke->InputAt(0)->IsConstant()) {
2550 // Evaluate this at compile time.
2551 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2552 if (value == 0) {
2553 value = is_long ? 64 : 32;
2554 } else {
2555 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2556 }
Aart Bika19616e2016-02-01 18:57:58 -08002557 codegen->Load32BitValue(out, value);
Mark Mendell2d554792015-09-15 21:45:18 -04002558 return;
2559 }
2560
2561 // Handle the non-constant cases.
2562 if (!is_long) {
2563 if (src.IsRegister()) {
2564 __ bsfl(out, src.AsRegister<Register>());
2565 } else {
2566 DCHECK(src.IsStackSlot());
2567 __ bsfl(out, Address(ESP, src.GetStackIndex()));
2568 }
2569
2570 // BSF sets ZF if the input was zero, and the output is undefined.
2571 NearLabel done;
2572 __ j(kNotEqual, &done);
2573
2574 // Fix the zero case with the expected result.
2575 __ movl(out, Immediate(32));
2576
2577 __ Bind(&done);
2578 return;
2579 }
2580
2581 // 64 bit case needs to worry about both parts of the register.
2582 DCHECK(src.IsRegisterPair());
2583 Register src_lo = src.AsRegisterPairLow<Register>();
2584 Register src_hi = src.AsRegisterPairHigh<Register>();
2585 NearLabel done, all_zeroes;
2586
2587 // If the low word is zero, then ZF will be set. If not, we have the answer.
2588 __ bsfl(out, src_lo);
2589 __ j(kNotEqual, &done);
2590
2591 // Low word was zero. We have to compute the high word count and add 32.
2592 __ bsfl(out, src_hi);
2593 __ j(kEqual, &all_zeroes);
2594
2595 // We had a valid result. Add 32 to account for the low word being zero.
2596 __ addl(out, Immediate(32));
2597 __ jmp(&done);
2598
2599 // All zero case.
2600 __ Bind(&all_zeroes);
2601 __ movl(out, Immediate(64));
2602
2603 __ Bind(&done);
2604}
2605
2606void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2607 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ false);
2608}
2609
2610void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002611 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
Mark Mendell2d554792015-09-15 21:45:18 -04002612}
2613
2614void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2615 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ true);
2616}
2617
2618void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002619 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
Mark Mendell2d554792015-09-15 21:45:18 -04002620}
2621
Mark Mendell09ed1a32015-03-25 08:30:06 -04002622// Unimplemented intrinsics.
2623
2624#define UNIMPLEMENTED_INTRINSIC(Name) \
2625void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
2626} \
2627void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
2628}
2629
Mark Mendell09ed1a32015-03-25 08:30:06 -04002630UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
Mark Mendell09ed1a32015-03-25 08:30:06 -04002631UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
Aart Bik59c94542016-01-25 14:20:58 -08002632UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
2633
2634UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
2635UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
2636UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
2637UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
2638
Aart Bik59c94542016-01-25 14:20:58 -08002639UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
2640UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
2641UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
2642UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
Aart Bik59c94542016-01-25 14:20:58 -08002643
Aart Bika19616e2016-02-01 18:57:58 -08002644// Handled as HIR instructions.
Scott Wakeling40a04bf2015-12-11 09:50:36 +00002645UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
Aart Bika19616e2016-02-01 18:57:58 -08002646UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
Scott Wakeling40a04bf2015-12-11 09:50:36 +00002647UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
Scott Wakeling9ee23f42015-07-23 10:44:35 +01002648UNIMPLEMENTED_INTRINSIC(LongRotateRight)
Aart Bika19616e2016-02-01 18:57:58 -08002649UNIMPLEMENTED_INTRINSIC(IntegerCompare)
2650UNIMPLEMENTED_INTRINSIC(LongCompare)
2651UNIMPLEMENTED_INTRINSIC(IntegerSignum)
2652UNIMPLEMENTED_INTRINSIC(LongSignum)
Mark Mendell09ed1a32015-03-25 08:30:06 -04002653
Roland Levillain4d027112015-07-01 15:41:14 +01002654#undef UNIMPLEMENTED_INTRINSIC
2655
2656#undef __
2657
Mark Mendell09ed1a32015-03-25 08:30:06 -04002658} // namespace x86
2659} // namespace art