blob: 0740471e8d78984eb9d38d01d4edac783d8ecb71 [file] [log] [blame]
Mark Mendell09ed1a32015-03-25 08:30:06 -04001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86.h"
18
Mark Mendellfb8d2792015-03-31 22:16:59 -040019#include "arch/x86/instruction_set_features_x86.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040020#include "code_generator_x86.h"
21#include "entrypoints/quick/quick_entrypoints.h"
22#include "intrinsics.h"
23#include "mirror/array-inl.h"
24#include "mirror/art_method.h"
25#include "mirror/string.h"
26#include "thread.h"
27#include "utils/x86/assembler_x86.h"
28#include "utils/x86/constants_x86.h"
29
30namespace art {
31
32namespace x86 {
33
34static constexpr int kDoubleNaNHigh = 0x7FF80000;
35static constexpr int kDoubleNaNLow = 0x00000000;
36static constexpr int kFloatNaN = 0x7FC00000;
37
Mark Mendellfb8d2792015-03-31 22:16:59 -040038IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
39 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
40}
41
42
Mark Mendell09ed1a32015-03-25 08:30:06 -040043X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
44 return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
45}
46
47ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
48 return codegen_->GetGraph()->GetArena();
49}
50
51bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
52 Dispatch(invoke);
53 LocationSummary* res = invoke->GetLocations();
54 return res != nullptr && res->Intrinsified();
55}
56
57#define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())->
58
59// TODO: target as memory.
60static void MoveFromReturnRegister(Location target,
61 Primitive::Type type,
62 CodeGeneratorX86* codegen) {
63 if (!target.IsValid()) {
64 DCHECK(type == Primitive::kPrimVoid);
65 return;
66 }
67
68 switch (type) {
69 case Primitive::kPrimBoolean:
70 case Primitive::kPrimByte:
71 case Primitive::kPrimChar:
72 case Primitive::kPrimShort:
73 case Primitive::kPrimInt:
74 case Primitive::kPrimNot: {
75 Register target_reg = target.AsRegister<Register>();
76 if (target_reg != EAX) {
77 __ movl(target_reg, EAX);
78 }
79 break;
80 }
81 case Primitive::kPrimLong: {
82 Register target_reg_lo = target.AsRegisterPairLow<Register>();
83 Register target_reg_hi = target.AsRegisterPairHigh<Register>();
84 if (target_reg_lo != EAX) {
85 __ movl(target_reg_lo, EAX);
86 }
87 if (target_reg_hi != EDX) {
88 __ movl(target_reg_hi, EDX);
89 }
90 break;
91 }
92
93 case Primitive::kPrimVoid:
94 LOG(FATAL) << "Unexpected void type for valid location " << target;
95 UNREACHABLE();
96
97 case Primitive::kPrimDouble: {
98 XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
99 if (target_reg != XMM0) {
100 __ movsd(target_reg, XMM0);
101 }
102 break;
103 }
104 case Primitive::kPrimFloat: {
105 XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
106 if (target_reg != XMM0) {
107 __ movss(target_reg, XMM0);
108 }
109 break;
110 }
111 }
112}
113
114static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) {
115 if (invoke->InputCount() == 0) {
116 return;
117 }
118
119 LocationSummary* locations = invoke->GetLocations();
120 InvokeDexCallingConventionVisitor calling_convention_visitor;
121
122 // We're moving potentially two or more locations to locations that could overlap, so we need
123 // a parallel move resolver.
124 HParallelMove parallel_move(arena);
125
126 for (size_t i = 0; i < invoke->InputCount(); i++) {
127 HInstruction* input = invoke->InputAt(i);
128 Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
129 Location actual_loc = locations->InAt(i);
130
131 parallel_move.AddMove(actual_loc, cc_loc, nullptr);
132 }
133
134 codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
135}
136
137// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
138// call. This will copy the arguments into the positions for a regular call.
139//
140// Note: The actual parameters are required to be in the locations given by the invoke's location
141// summary. If an intrinsic modifies those locations before a slowpath call, they must be
142// restored!
143class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
144 public:
145 explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp)
146 : invoke_(invoke) {
147 // The temporary register has to be EAX for x86 invokes.
148 DCHECK_EQ(temp, EAX);
149 }
150
151 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
152 CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in);
153 __ Bind(GetEntryLabel());
154
155 SaveLiveRegisters(codegen, invoke_->GetLocations());
156
157 MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
158
159 if (invoke_->IsInvokeStaticOrDirect()) {
160 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
161 } else {
162 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
163 UNREACHABLE();
164 }
165
166 // Copy the result back to the expected output.
167 Location out = invoke_->GetLocations()->Out();
168 if (out.IsValid()) {
169 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
170 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
171 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
172 }
173
174 RestoreLiveRegisters(codegen, invoke_->GetLocations());
175 __ jmp(GetExitLabel());
176 }
177
178 private:
179 // The instruction where this slow path is happening.
180 HInvoke* const invoke_;
181
182 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86);
183};
184
185#undef __
186#define __ assembler->
187
188static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
189 LocationSummary* locations = new (arena) LocationSummary(invoke,
190 LocationSummary::kNoCall,
191 kIntrinsified);
192 locations->SetInAt(0, Location::RequiresFpuRegister());
193 locations->SetOut(Location::RequiresRegister());
194 if (is64bit) {
195 locations->AddTemp(Location::RequiresFpuRegister());
196 }
197}
198
199static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
200 LocationSummary* locations = new (arena) LocationSummary(invoke,
201 LocationSummary::kNoCall,
202 kIntrinsified);
203 locations->SetInAt(0, Location::RequiresRegister());
204 locations->SetOut(Location::RequiresFpuRegister());
205 if (is64bit) {
206 locations->AddTemp(Location::RequiresFpuRegister());
207 locations->AddTemp(Location::RequiresFpuRegister());
208 }
209}
210
211static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
212 Location input = locations->InAt(0);
213 Location output = locations->Out();
214 if (is64bit) {
215 // Need to use the temporary.
216 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
217 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
218 __ movd(output.AsRegisterPairLow<Register>(), temp);
219 __ psrlq(temp, Immediate(32));
220 __ movd(output.AsRegisterPairHigh<Register>(), temp);
221 } else {
222 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
223 }
224}
225
226static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
227 Location input = locations->InAt(0);
228 Location output = locations->Out();
229 if (is64bit) {
230 // Need to use the temporary.
231 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
232 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
233 __ movd(temp1, input.AsRegisterPairLow<Register>());
234 __ movd(temp2, input.AsRegisterPairHigh<Register>());
235 __ punpckldq(temp1, temp2);
236 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
237 } else {
238 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
239 }
240}
241
242void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
243 CreateFPToIntLocations(arena_, invoke, true);
244}
245void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
246 CreateIntToFPLocations(arena_, invoke, true);
247}
248
249void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
250 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
251}
252void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
253 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
254}
255
256void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
257 CreateFPToIntLocations(arena_, invoke, false);
258}
259void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
260 CreateIntToFPLocations(arena_, invoke, false);
261}
262
263void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
264 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
265}
266void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
267 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
268}
269
270static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
271 LocationSummary* locations = new (arena) LocationSummary(invoke,
272 LocationSummary::kNoCall,
273 kIntrinsified);
274 locations->SetInAt(0, Location::RequiresRegister());
275 locations->SetOut(Location::SameAsFirstInput());
276}
277
278static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
279 LocationSummary* locations = new (arena) LocationSummary(invoke,
280 LocationSummary::kNoCall,
281 kIntrinsified);
282 locations->SetInAt(0, Location::RequiresRegister());
283 locations->SetOut(Location::RequiresRegister());
284}
285
286static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
287 LocationSummary* locations = new (arena) LocationSummary(invoke,
288 LocationSummary::kNoCall,
289 kIntrinsified);
290 locations->SetInAt(0, Location::RequiresRegister());
291 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
292}
293
294static void GenReverseBytes(LocationSummary* locations,
295 Primitive::Type size,
296 X86Assembler* assembler) {
297 Register out = locations->Out().AsRegister<Register>();
298
299 switch (size) {
300 case Primitive::kPrimShort:
301 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
302 __ bswapl(out);
303 __ sarl(out, Immediate(16));
304 break;
305 case Primitive::kPrimInt:
306 __ bswapl(out);
307 break;
308 default:
309 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
310 UNREACHABLE();
311 }
312}
313
314void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
315 CreateIntToIntLocations(arena_, invoke);
316}
317
318void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
319 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
320}
321
322void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
323 CreateIntToIntLocations(arena_, invoke);
324}
325
326void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
327 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
328}
329
330
331// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
332// need is 64b.
333
334static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
335 // TODO: Enable memory operations when the assembler supports them.
336 LocationSummary* locations = new (arena) LocationSummary(invoke,
337 LocationSummary::kNoCall,
338 kIntrinsified);
339 locations->SetInAt(0, Location::RequiresFpuRegister());
340 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
341 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
342 locations->SetOut(Location::SameAsFirstInput());
343}
344
345static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
346 Location output = locations->Out();
347
348 if (output.IsFpuRegister()) {
349 // Create the right constant on an aligned stack.
350 if (is64bit) {
351 __ subl(ESP, Immediate(8));
352 __ pushl(Immediate(0x7FFFFFFF));
353 __ pushl(Immediate(0xFFFFFFFF));
354 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
355 } else {
356 __ subl(ESP, Immediate(12));
357 __ pushl(Immediate(0x7FFFFFFF));
358 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
359 }
360 __ addl(ESP, Immediate(16));
361 } else {
362 // TODO: update when assember support is available.
363 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
364// Once assembler support is available, in-memory operations look like this:
365// if (is64bit) {
366// DCHECK(output.IsDoubleStackSlot());
367// __ andl(Address(Register(RSP), output.GetHighStackIndex(kX86WordSize)),
368// Immediate(0x7FFFFFFF));
369// } else {
370// DCHECK(output.IsStackSlot());
371// // Can use and with a literal directly.
372// __ andl(Address(Register(RSP), output.GetStackIndex()), Immediate(0x7FFFFFFF));
373// }
374 }
375}
376
377void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
378 CreateFloatToFloat(arena_, invoke);
379}
380
381void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
382 MathAbsFP(invoke->GetLocations(), true, GetAssembler());
383}
384
385void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
386 CreateFloatToFloat(arena_, invoke);
387}
388
389void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
390 MathAbsFP(invoke->GetLocations(), false, GetAssembler());
391}
392
393static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
394 LocationSummary* locations = new (arena) LocationSummary(invoke,
395 LocationSummary::kNoCall,
396 kIntrinsified);
397 locations->SetInAt(0, Location::RegisterLocation(EAX));
398 locations->SetOut(Location::SameAsFirstInput());
399 locations->AddTemp(Location::RegisterLocation(EDX));
400}
401
402static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
403 Location output = locations->Out();
404 Register out = output.AsRegister<Register>();
405 DCHECK_EQ(out, EAX);
406 Register temp = locations->GetTemp(0).AsRegister<Register>();
407 DCHECK_EQ(temp, EDX);
408
409 // Sign extend EAX into EDX.
410 __ cdq();
411
412 // XOR EAX with sign.
413 __ xorl(EAX, EDX);
414
415 // Subtract out sign to correct.
416 __ subl(EAX, EDX);
417
418 // The result is in EAX.
419}
420
421static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
422 LocationSummary* locations = new (arena) LocationSummary(invoke,
423 LocationSummary::kNoCall,
424 kIntrinsified);
425 locations->SetInAt(0, Location::RequiresRegister());
426 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
427 locations->AddTemp(Location::RequiresRegister());
428}
429
430static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
431 Location input = locations->InAt(0);
432 Register input_lo = input.AsRegisterPairLow<Register>();
433 Register input_hi = input.AsRegisterPairHigh<Register>();
434 Location output = locations->Out();
435 Register output_lo = output.AsRegisterPairLow<Register>();
436 Register output_hi = output.AsRegisterPairHigh<Register>();
437 Register temp = locations->GetTemp(0).AsRegister<Register>();
438
439 // Compute the sign into the temporary.
440 __ movl(temp, input_hi);
441 __ sarl(temp, Immediate(31));
442
443 // Store the sign into the output.
444 __ movl(output_lo, temp);
445 __ movl(output_hi, temp);
446
447 // XOR the input to the output.
448 __ xorl(output_lo, input_lo);
449 __ xorl(output_hi, input_hi);
450
451 // Subtract the sign.
452 __ subl(output_lo, temp);
453 __ sbbl(output_hi, temp);
454}
455
456void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
457 CreateAbsIntLocation(arena_, invoke);
458}
459
460void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
461 GenAbsInteger(invoke->GetLocations(), GetAssembler());
462}
463
464void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
465 CreateAbsLongLocation(arena_, invoke);
466}
467
468void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
469 GenAbsLong(invoke->GetLocations(), GetAssembler());
470}
471
472static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
473 X86Assembler* assembler) {
474 Location op1_loc = locations->InAt(0);
475 Location op2_loc = locations->InAt(1);
476 Location out_loc = locations->Out();
477 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
478
479 // Shortcut for same input locations.
480 if (op1_loc.Equals(op2_loc)) {
481 DCHECK(out_loc.Equals(op1_loc));
482 return;
483 }
484
485 // (out := op1)
486 // out <=? op2
487 // if Nan jmp Nan_label
488 // if out is min jmp done
489 // if op2 is min jmp op2_label
490 // handle -0/+0
491 // jmp done
492 // Nan_label:
493 // out := NaN
494 // op2_label:
495 // out := op2
496 // done:
497 //
498 // This removes one jmp, but needs to copy one input (op1) to out.
499 //
500 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
501
502 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
503
504 Label nan, done, op2_label;
505 if (is_double) {
506 __ ucomisd(out, op2);
507 } else {
508 __ ucomiss(out, op2);
509 }
510
511 __ j(Condition::kParityEven, &nan);
512
513 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
514 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
515
516 // Handle 0.0/-0.0.
517 if (is_min) {
518 if (is_double) {
519 __ orpd(out, op2);
520 } else {
521 __ orps(out, op2);
522 }
523 } else {
524 if (is_double) {
525 __ andpd(out, op2);
526 } else {
527 __ andps(out, op2);
528 }
529 }
530 __ jmp(&done);
531
532 // NaN handling.
533 __ Bind(&nan);
534 if (is_double) {
535 __ pushl(Immediate(kDoubleNaNHigh));
536 __ pushl(Immediate(kDoubleNaNLow));
537 __ movsd(out, Address(ESP, 0));
538 __ addl(ESP, Immediate(8));
539 } else {
540 __ pushl(Immediate(kFloatNaN));
541 __ movss(out, Address(ESP, 0));
542 __ addl(ESP, Immediate(4));
543 }
544 __ jmp(&done);
545
546 // out := op2;
547 __ Bind(&op2_label);
548 if (is_double) {
549 __ movsd(out, op2);
550 } else {
551 __ movss(out, op2);
552 }
553
554 // Done.
555 __ Bind(&done);
556}
557
558static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
559 LocationSummary* locations = new (arena) LocationSummary(invoke,
560 LocationSummary::kNoCall,
561 kIntrinsified);
562 locations->SetInAt(0, Location::RequiresFpuRegister());
563 locations->SetInAt(1, Location::RequiresFpuRegister());
564 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
565 // the second input to be the output (we can simply swap inputs).
566 locations->SetOut(Location::SameAsFirstInput());
567}
568
569void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
570 CreateFPFPToFPLocations(arena_, invoke);
571}
572
573void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
574 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
575}
576
577void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
578 CreateFPFPToFPLocations(arena_, invoke);
579}
580
581void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
582 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
583}
584
585void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
586 CreateFPFPToFPLocations(arena_, invoke);
587}
588
589void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
590 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
591}
592
593void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
594 CreateFPFPToFPLocations(arena_, invoke);
595}
596
597void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
598 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
599}
600
601static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
602 X86Assembler* assembler) {
603 Location op1_loc = locations->InAt(0);
604 Location op2_loc = locations->InAt(1);
605
606 // Shortcut for same input locations.
607 if (op1_loc.Equals(op2_loc)) {
608 // Can return immediately, as op1_loc == out_loc.
609 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
610 // a copy here.
611 DCHECK(locations->Out().Equals(op1_loc));
612 return;
613 }
614
615 if (is_long) {
616 // Need to perform a subtract to get the sign right.
617 // op1 is already in the same location as the output.
618 Location output = locations->Out();
619 Register output_lo = output.AsRegisterPairLow<Register>();
620 Register output_hi = output.AsRegisterPairHigh<Register>();
621
622 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
623 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
624
625 // Spare register to compute the subtraction to set condition code.
626 Register temp = locations->GetTemp(0).AsRegister<Register>();
627
628 // Subtract off op2_low.
629 __ movl(temp, output_lo);
630 __ subl(temp, op2_lo);
631
632 // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
633 __ movl(temp, output_hi);
634 __ sbbl(temp, op2_hi);
635
636 // Now the condition code is correct.
637 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
638 __ cmovl(cond, output_lo, op2_lo);
639 __ cmovl(cond, output_hi, op2_hi);
640 } else {
641 Register out = locations->Out().AsRegister<Register>();
642 Register op2 = op2_loc.AsRegister<Register>();
643
644 // (out := op1)
645 // out <=? op2
646 // if out is min jmp done
647 // out := op2
648 // done:
649
650 __ cmpl(out, op2);
651 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
652 __ cmovl(cond, out, op2);
653 }
654}
655
656static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
657 LocationSummary* locations = new (arena) LocationSummary(invoke,
658 LocationSummary::kNoCall,
659 kIntrinsified);
660 locations->SetInAt(0, Location::RequiresRegister());
661 locations->SetInAt(1, Location::RequiresRegister());
662 locations->SetOut(Location::SameAsFirstInput());
663}
664
665static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
666 LocationSummary* locations = new (arena) LocationSummary(invoke,
667 LocationSummary::kNoCall,
668 kIntrinsified);
669 locations->SetInAt(0, Location::RequiresRegister());
670 locations->SetInAt(1, Location::RequiresRegister());
671 locations->SetOut(Location::SameAsFirstInput());
672 // Register to use to perform a long subtract to set cc.
673 locations->AddTemp(Location::RequiresRegister());
674}
675
676void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
677 CreateIntIntToIntLocations(arena_, invoke);
678}
679
680void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
681 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
682}
683
684void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
685 CreateLongLongToLongLocations(arena_, invoke);
686}
687
688void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
689 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
690}
691
692void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
693 CreateIntIntToIntLocations(arena_, invoke);
694}
695
696void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
697 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
698}
699
700void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
701 CreateLongLongToLongLocations(arena_, invoke);
702}
703
704void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
705 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
706}
707
708static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
709 LocationSummary* locations = new (arena) LocationSummary(invoke,
710 LocationSummary::kNoCall,
711 kIntrinsified);
712 locations->SetInAt(0, Location::RequiresFpuRegister());
713 locations->SetOut(Location::RequiresFpuRegister());
714}
715
716void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
717 CreateFPToFPLocations(arena_, invoke);
718}
719
720void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
721 LocationSummary* locations = invoke->GetLocations();
722 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
723 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
724
725 GetAssembler()->sqrtsd(out, in);
726}
727
Mark Mendellfb8d2792015-03-31 22:16:59 -0400728static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
729 MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
730
731 DCHECK(invoke->IsInvokeStaticOrDirect());
732 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
733
734 // Copy the result back to the expected output.
735 Location out = invoke->GetLocations()->Out();
736 if (out.IsValid()) {
737 DCHECK(out.IsRegister());
738 MoveFromReturnRegister(out, invoke->GetType(), codegen);
739 }
740}
741
742static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
743 HInvoke* invoke,
744 CodeGeneratorX86* codegen) {
745 // Do we have instruction support?
746 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
747 CreateFPToFPLocations(arena, invoke);
748 return;
749 }
750
751 // We have to fall back to a call to the intrinsic.
752 LocationSummary* locations = new (arena) LocationSummary(invoke,
753 LocationSummary::kCall);
754 InvokeRuntimeCallingConvention calling_convention;
755 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
756 locations->SetOut(Location::FpuRegisterLocation(XMM0));
757 // Needs to be EAX for the invoke.
758 locations->AddTemp(Location::RegisterLocation(EAX));
759}
760
761static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
762 HInvoke* invoke,
763 X86Assembler* assembler,
764 int round_mode) {
765 LocationSummary* locations = invoke->GetLocations();
766 if (locations->WillCall()) {
767 InvokeOutOfLineIntrinsic(codegen, invoke);
768 } else {
769 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
770 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
771 __ roundsd(out, in, Immediate(round_mode));
772 }
773}
774
775void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
776 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
777}
778
779void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
780 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
781}
782
783void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
784 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
785}
786
787void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
788 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
789}
790
791void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
792 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
793}
794
795void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
796 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
797}
798
799// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
800// as it needs 64 bit instructions.
801void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
802 // Do we have instruction support?
803 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
804 LocationSummary* locations = new (arena_) LocationSummary(invoke,
805 LocationSummary::kNoCall,
806 kIntrinsified);
807 locations->SetInAt(0, Location::RequiresFpuRegister());
808 locations->SetOut(Location::RequiresFpuRegister());
809 locations->AddTemp(Location::RequiresFpuRegister());
810 locations->AddTemp(Location::RequiresFpuRegister());
811 return;
812 }
813
814 // We have to fall back to a call to the intrinsic.
815 LocationSummary* locations = new (arena_) LocationSummary(invoke,
816 LocationSummary::kCall);
817 InvokeRuntimeCallingConvention calling_convention;
818 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
819 locations->SetOut(Location::RegisterLocation(EAX));
820 // Needs to be EAX for the invoke.
821 locations->AddTemp(Location::RegisterLocation(EAX));
822}
823
824void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
825 LocationSummary* locations = invoke->GetLocations();
826 if (locations->WillCall()) {
827 InvokeOutOfLineIntrinsic(codegen_, invoke);
828 return;
829 }
830
831 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
832 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
833 Register out = locations->Out().AsRegister<Register>();
834 XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
835 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
836 Label done, nan;
837 X86Assembler* assembler = GetAssembler();
838
839 // Generate 0.5 into inPlusPointFive.
840 __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
841 __ movd(inPlusPointFive, out);
842
843 // Add in the input.
844 __ addss(inPlusPointFive, in);
845
846 // And truncate to an integer.
847 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
848
849 __ movl(out, Immediate(kPrimIntMax));
850 // maxInt = int-to-float(out)
851 __ cvtsi2ss(maxInt, out);
852
853 // if inPlusPointFive >= maxInt goto done
854 __ comiss(inPlusPointFive, maxInt);
855 __ j(kAboveEqual, &done);
856
857 // if input == NaN goto nan
858 __ j(kUnordered, &nan);
859
860 // output = float-to-int-truncate(input)
861 __ cvttss2si(out, inPlusPointFive);
862 __ jmp(&done);
863 __ Bind(&nan);
864
865 // output = 0
866 __ xorl(out, out);
867 __ Bind(&done);
868}
869
Mark Mendell09ed1a32015-03-25 08:30:06 -0400870void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
871 // The inputs plus one temp.
872 LocationSummary* locations = new (arena_) LocationSummary(invoke,
873 LocationSummary::kCallOnSlowPath,
874 kIntrinsified);
875 locations->SetInAt(0, Location::RequiresRegister());
876 locations->SetInAt(1, Location::RequiresRegister());
877 locations->SetOut(Location::SameAsFirstInput());
878 // Needs to be EAX for the invoke.
879 locations->AddTemp(Location::RegisterLocation(EAX));
880}
881
882void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
883 LocationSummary* locations = invoke->GetLocations();
884
885 // Location of reference to data array
886 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
887 // Location of count
888 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
889 // Starting offset within data array
890 const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
891 // Start of char data with array_
892 const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
893
894 Register obj = locations->InAt(0).AsRegister<Register>();
895 Register idx = locations->InAt(1).AsRegister<Register>();
896 Register out = locations->Out().AsRegister<Register>();
897 Location temp_loc = locations->GetTemp(0);
898 Register temp = temp_loc.AsRegister<Register>();
899
900 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
901 // the cost.
902 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
903 // we will not optimize the code for constants (which would save a register).
904
905 SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke, temp);
906 codegen_->AddSlowPath(slow_path);
907
908 X86Assembler* assembler = GetAssembler();
909
910 __ cmpl(idx, Address(obj, count_offset));
911 codegen_->MaybeRecordImplicitNullCheck(invoke);
912 __ j(kAboveEqual, slow_path->GetEntryLabel());
913
914 // Get the actual element.
915 __ movl(temp, idx); // temp := idx.
916 __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
917 __ movl(out, Address(obj, value_offset)); // obj := obj.array.
918 // out = out[2*temp].
919 __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
920
921 __ Bind(slow_path->GetExitLabel());
922}
923
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000924void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
925 // The inputs plus one temp.
926 LocationSummary* locations = new (arena_) LocationSummary(invoke,
927 LocationSummary::kCall,
928 kIntrinsified);
929 InvokeRuntimeCallingConvention calling_convention;
930 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
931 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
932 locations->SetOut(Location::RegisterLocation(EAX));
933 // Needs to be EAX for the invoke.
934 locations->AddTemp(Location::RegisterLocation(EAX));
935}
936
937void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
938 X86Assembler* assembler = GetAssembler();
939 LocationSummary* locations = invoke->GetLocations();
940
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000941 // Note that the null check must have been done earlier.
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000942 DCHECK(!invoke->CanDoImplicitNullCheck());
943
944 Register argument = locations->InAt(1).AsRegister<Register>();
945 __ testl(argument, argument);
946 SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
947 invoke, locations->GetTemp(0).AsRegister<Register>());
948 codegen_->AddSlowPath(slow_path);
949 __ j(kEqual, slow_path->GetEntryLabel());
950
951 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pStringCompareTo)));
952 __ Bind(slow_path->GetExitLabel());
953}
954
Mark Mendell09ed1a32015-03-25 08:30:06 -0400955static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
956 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
957 Location out_loc = locations->Out();
958 // x86 allows unaligned access. We do not have to check the input or use specific instructions
959 // to avoid a SIGBUS.
960 switch (size) {
961 case Primitive::kPrimByte:
962 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
963 break;
964 case Primitive::kPrimShort:
965 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
966 break;
967 case Primitive::kPrimInt:
968 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
969 break;
970 case Primitive::kPrimLong:
971 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
972 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
973 break;
974 default:
975 LOG(FATAL) << "Type not recognized for peek: " << size;
976 UNREACHABLE();
977 }
978}
979
980void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
981 CreateLongToIntLocations(arena_, invoke);
982}
983
984void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
985 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
986}
987
988void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
989 CreateLongToIntLocations(arena_, invoke);
990}
991
992void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
993 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
994}
995
996void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
997 CreateLongToLongLocations(arena_, invoke);
998}
999
1000void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1001 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1002}
1003
1004void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1005 CreateLongToIntLocations(arena_, invoke);
1006}
1007
1008void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1009 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1010}
1011
1012static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
1013 HInvoke* invoke) {
1014 LocationSummary* locations = new (arena) LocationSummary(invoke,
1015 LocationSummary::kNoCall,
1016 kIntrinsified);
1017 locations->SetInAt(0, Location::RequiresRegister());
1018 HInstruction *value = invoke->InputAt(1);
1019 if (size == Primitive::kPrimByte) {
1020 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1021 } else {
1022 locations->SetInAt(1, Location::RegisterOrConstant(value));
1023 }
1024}
1025
1026static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1027 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1028 Location value_loc = locations->InAt(1);
1029 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1030 // to avoid a SIGBUS.
1031 switch (size) {
1032 case Primitive::kPrimByte:
1033 if (value_loc.IsConstant()) {
1034 __ movb(Address(address, 0),
1035 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1036 } else {
1037 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1038 }
1039 break;
1040 case Primitive::kPrimShort:
1041 if (value_loc.IsConstant()) {
1042 __ movw(Address(address, 0),
1043 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1044 } else {
1045 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1046 }
1047 break;
1048 case Primitive::kPrimInt:
1049 if (value_loc.IsConstant()) {
1050 __ movl(Address(address, 0),
1051 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1052 } else {
1053 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1054 }
1055 break;
1056 case Primitive::kPrimLong:
1057 if (value_loc.IsConstant()) {
1058 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1059 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1060 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1061 } else {
1062 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1063 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1064 }
1065 break;
1066 default:
1067 LOG(FATAL) << "Type not recognized for poke: " << size;
1068 UNREACHABLE();
1069 }
1070}
1071
1072void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1073 CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
1074}
1075
1076void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1077 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1078}
1079
1080void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1081 CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
1082}
1083
1084void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1085 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1086}
1087
1088void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1089 CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
1090}
1091
1092void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1093 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1094}
1095
1096void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1097 CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
1098}
1099
1100void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1101 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1102}
1103
1104void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1105 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1106 LocationSummary::kNoCall,
1107 kIntrinsified);
1108 locations->SetOut(Location::RequiresRegister());
1109}
1110
1111void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1112 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1113 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
1114}
1115
1116static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
1117 bool is_volatile, X86Assembler* assembler) {
1118 Register base = locations->InAt(1).AsRegister<Register>();
1119 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1120 Location output = locations->Out();
1121
1122 switch (type) {
1123 case Primitive::kPrimInt:
1124 case Primitive::kPrimNot:
1125 __ movl(output.AsRegister<Register>(), Address(base, offset, ScaleFactor::TIMES_1, 0));
1126 break;
1127
1128 case Primitive::kPrimLong: {
1129 Register output_lo = output.AsRegisterPairLow<Register>();
1130 Register output_hi = output.AsRegisterPairHigh<Register>();
1131 if (is_volatile) {
1132 // Need to use a XMM to read atomically.
1133 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1134 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1135 __ movd(output_lo, temp);
1136 __ psrlq(temp, Immediate(32));
1137 __ movd(output_hi, temp);
1138 } else {
1139 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1140 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1141 }
1142 }
1143 break;
1144
1145 default:
1146 LOG(FATAL) << "Unsupported op size " << type;
1147 UNREACHABLE();
1148 }
1149}
1150
1151static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
1152 bool is_long, bool is_volatile) {
1153 LocationSummary* locations = new (arena) LocationSummary(invoke,
1154 LocationSummary::kNoCall,
1155 kIntrinsified);
1156 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1157 locations->SetInAt(1, Location::RequiresRegister());
1158 locations->SetInAt(2, Location::RequiresRegister());
1159 if (is_long) {
1160 if (is_volatile) {
1161 // Need to use XMM to read volatile.
1162 locations->AddTemp(Location::RequiresFpuRegister());
1163 locations->SetOut(Location::RequiresRegister());
1164 } else {
1165 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1166 }
1167 } else {
1168 locations->SetOut(Location::RequiresRegister());
1169 }
1170}
1171
1172void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1173 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1174}
1175void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1176 CreateIntIntIntToIntLocations(arena_, invoke, false, true);
1177}
1178void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1179 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1180}
1181void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1182 CreateIntIntIntToIntLocations(arena_, invoke, true, true);
1183}
1184void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1185 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1186}
1187void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1188 CreateIntIntIntToIntLocations(arena_, invoke, false, true);
1189}
1190
1191
1192void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1193 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
1194}
1195void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1196 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
1197}
1198void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1199 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
1200}
1201void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1202 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
1203}
1204void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1205 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1206}
1207void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1208 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1209}
1210
1211
1212static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1213 Primitive::Type type,
1214 HInvoke* invoke,
1215 bool is_volatile) {
1216 LocationSummary* locations = new (arena) LocationSummary(invoke,
1217 LocationSummary::kNoCall,
1218 kIntrinsified);
1219 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1220 locations->SetInAt(1, Location::RequiresRegister());
1221 locations->SetInAt(2, Location::RequiresRegister());
1222 locations->SetInAt(3, Location::RequiresRegister());
1223 if (type == Primitive::kPrimNot) {
1224 // Need temp registers for card-marking.
1225 locations->AddTemp(Location::RequiresRegister());
1226 // Ensure the value is in a byte register.
1227 locations->AddTemp(Location::RegisterLocation(ECX));
1228 } else if (type == Primitive::kPrimLong && is_volatile) {
1229 locations->AddTemp(Location::RequiresFpuRegister());
1230 locations->AddTemp(Location::RequiresFpuRegister());
1231 }
1232}
1233
1234void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1235 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
1236}
1237void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1238 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
1239}
1240void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1241 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, true);
1242}
1243void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1244 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
1245}
1246void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1247 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
1248}
1249void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1250 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, true);
1251}
1252void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1253 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
1254}
1255void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1256 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
1257}
1258void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1259 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, true);
1260}
1261
1262// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1263// memory model.
1264static void GenUnsafePut(LocationSummary* locations,
1265 Primitive::Type type,
1266 bool is_volatile,
1267 CodeGeneratorX86* codegen) {
1268 X86Assembler* assembler = reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
1269 Register base = locations->InAt(1).AsRegister<Register>();
1270 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1271 Location value_loc = locations->InAt(3);
1272
1273 if (type == Primitive::kPrimLong) {
1274 Register value_lo = value_loc.AsRegisterPairLow<Register>();
1275 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
1276 if (is_volatile) {
1277 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1278 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
1279 __ movd(temp1, value_lo);
1280 __ movd(temp2, value_hi);
1281 __ punpckldq(temp1, temp2);
1282 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
1283 } else {
1284 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
1285 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
1286 }
1287 } else {
1288 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
1289 }
1290
1291 if (is_volatile) {
1292 __ mfence();
1293 }
1294
1295 if (type == Primitive::kPrimNot) {
1296 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1297 locations->GetTemp(1).AsRegister<Register>(),
1298 base,
1299 value_loc.AsRegister<Register>());
1300 }
1301}
1302
1303void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
1304 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1305}
1306void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1307 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1308}
1309void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1310 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1311}
1312void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
1313 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1314}
1315void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1316 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1317}
1318void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1319 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1320}
1321void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
1322 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1323}
1324void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1325 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1326}
1327void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1328 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1329}
1330
1331// Unimplemented intrinsics.
1332
1333#define UNIMPLEMENTED_INTRINSIC(Name) \
1334void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1335} \
1336void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1337}
1338
1339UNIMPLEMENTED_INTRINSIC(IntegerReverse)
1340UNIMPLEMENTED_INTRINSIC(LongReverse)
1341UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
Mark Mendell09ed1a32015-03-25 08:30:06 -04001342UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
Mark Mendell09ed1a32015-03-25 08:30:06 -04001343UNIMPLEMENTED_INTRINSIC(StringIndexOf)
1344UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
1345UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
1346UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
1347UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
1348UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
1349UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1350
1351} // namespace x86
1352} // namespace art