blob: c718ece01b5d21c94f648883de5675636a90c018 [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Mark Mendellfb8d2792015-03-31 22:16:59 -040019#include "arch/x86_64/instruction_set_features_x86_64.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080020#include "code_generator_x86_64.h"
21#include "entrypoints/quick/quick_entrypoints.h"
22#include "intrinsics.h"
23#include "mirror/array-inl.h"
24#include "mirror/art_method.h"
25#include "mirror/string.h"
26#include "thread.h"
27#include "utils/x86_64/assembler_x86_64.h"
28#include "utils/x86_64/constants_x86_64.h"
29
30namespace art {
31
32namespace x86_64 {
33
Mark Mendellfb8d2792015-03-31 22:16:59 -040034IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
35 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
36}
37
38
Andreas Gampe71fb52f2014-12-29 17:43:08 -080039X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
40 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
41}
42
Andreas Gampe878d58c2015-01-15 23:24:00 -080043ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080044 return codegen_->GetGraph()->GetArena();
45}
46
47bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
48 Dispatch(invoke);
49 const LocationSummary* res = invoke->GetLocations();
50 return res != nullptr && res->Intrinsified();
51}
52
53#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
54
55// TODO: trg as memory.
56static void MoveFromReturnRegister(Location trg,
57 Primitive::Type type,
58 CodeGeneratorX86_64* codegen) {
59 if (!trg.IsValid()) {
60 DCHECK(type == Primitive::kPrimVoid);
61 return;
62 }
63
64 switch (type) {
65 case Primitive::kPrimBoolean:
66 case Primitive::kPrimByte:
67 case Primitive::kPrimChar:
68 case Primitive::kPrimShort:
69 case Primitive::kPrimInt:
70 case Primitive::kPrimNot: {
71 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
72 if (trg_reg.AsRegister() != RAX) {
73 __ movl(trg_reg, CpuRegister(RAX));
74 }
75 break;
76 }
77 case Primitive::kPrimLong: {
78 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
79 if (trg_reg.AsRegister() != RAX) {
80 __ movq(trg_reg, CpuRegister(RAX));
81 }
82 break;
83 }
84
85 case Primitive::kPrimVoid:
86 LOG(FATAL) << "Unexpected void type for valid location " << trg;
87 UNREACHABLE();
88
89 case Primitive::kPrimDouble: {
90 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
91 if (trg_reg.AsFloatRegister() != XMM0) {
92 __ movsd(trg_reg, XmmRegister(XMM0));
93 }
94 break;
95 }
96 case Primitive::kPrimFloat: {
97 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
98 if (trg_reg.AsFloatRegister() != XMM0) {
99 __ movss(trg_reg, XmmRegister(XMM0));
100 }
101 break;
102 }
103 }
104}
105
106static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
107 if (invoke->InputCount() == 0) {
108 return;
109 }
110
111 LocationSummary* locations = invoke->GetLocations();
112 InvokeDexCallingConventionVisitor calling_convention_visitor;
113
114 // We're moving potentially two or more locations to locations that could overlap, so we need
115 // a parallel move resolver.
116 HParallelMove parallel_move(arena);
117
118 for (size_t i = 0; i < invoke->InputCount(); i++) {
119 HInstruction* input = invoke->InputAt(i);
120 Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
121 Location actual_loc = locations->InAt(i);
122
Nicolas Geoffray90218252015-04-15 11:56:51 +0100123 parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800124 }
125
126 codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
127}
128
129// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
130// call. This will copy the arguments into the positions for a regular call.
131//
132// Note: The actual parameters are required to be in the locations given by the invoke's location
133// summary. If an intrinsic modifies those locations before a slowpath call, they must be
134// restored!
135class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
136 public:
137 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
138
139 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
140 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
141 __ Bind(GetEntryLabel());
142
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000143 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800144
145 MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
146
147 if (invoke_->IsInvokeStaticOrDirect()) {
148 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000149 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800150 } else {
151 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
152 UNREACHABLE();
153 }
154
155 // Copy the result back to the expected output.
156 Location out = invoke_->GetLocations()->Out();
157 if (out.IsValid()) {
158 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
159 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
160 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
161 }
162
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000163 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800164 __ jmp(GetExitLabel());
165 }
166
167 private:
168 // The instruction where this slow path is happening.
169 HInvoke* const invoke_;
170
171 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
172};
173
174#undef __
175#define __ assembler->
176
177static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
178 LocationSummary* locations = new (arena) LocationSummary(invoke,
179 LocationSummary::kNoCall,
180 kIntrinsified);
181 locations->SetInAt(0, Location::RequiresFpuRegister());
182 locations->SetOut(Location::RequiresRegister());
183}
184
185static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
186 LocationSummary* locations = new (arena) LocationSummary(invoke,
187 LocationSummary::kNoCall,
188 kIntrinsified);
189 locations->SetInAt(0, Location::RequiresRegister());
190 locations->SetOut(Location::RequiresFpuRegister());
191}
192
193static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
194 Location input = locations->InAt(0);
195 Location output = locations->Out();
196 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
197}
198
199static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
200 Location input = locations->InAt(0);
201 Location output = locations->Out();
202 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
203}
204
205void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
206 CreateFPToIntLocations(arena_, invoke);
207}
208void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
209 CreateIntToFPLocations(arena_, invoke);
210}
211
212void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
213 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
214}
215void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
216 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
217}
218
219void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
220 CreateFPToIntLocations(arena_, invoke);
221}
222void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
223 CreateIntToFPLocations(arena_, invoke);
224}
225
226void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
227 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
228}
229void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
230 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
231}
232
233static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
234 LocationSummary* locations = new (arena) LocationSummary(invoke,
235 LocationSummary::kNoCall,
236 kIntrinsified);
237 locations->SetInAt(0, Location::RequiresRegister());
238 locations->SetOut(Location::SameAsFirstInput());
239}
240
241static void GenReverseBytes(LocationSummary* locations,
242 Primitive::Type size,
243 X86_64Assembler* assembler) {
244 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
245
246 switch (size) {
247 case Primitive::kPrimShort:
248 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
249 __ bswapl(out);
250 __ sarl(out, Immediate(16));
251 break;
252 case Primitive::kPrimInt:
253 __ bswapl(out);
254 break;
255 case Primitive::kPrimLong:
256 __ bswapq(out);
257 break;
258 default:
259 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
260 UNREACHABLE();
261 }
262}
263
264void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
265 CreateIntToIntLocations(arena_, invoke);
266}
267
268void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
269 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
270}
271
272void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
273 CreateIntToIntLocations(arena_, invoke);
274}
275
276void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
277 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
278}
279
280void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
281 CreateIntToIntLocations(arena_, invoke);
282}
283
284void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
285 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
286}
287
288
289// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
290// need is 64b.
291
292static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
293 // TODO: Enable memory operations when the assembler supports them.
294 LocationSummary* locations = new (arena) LocationSummary(invoke,
295 LocationSummary::kNoCall,
296 kIntrinsified);
297 locations->SetInAt(0, Location::RequiresFpuRegister());
298 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
299 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
300 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400301 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800302}
303
Mark Mendell39dcf552015-04-09 20:42:42 -0400304static void MathAbsFP(LocationSummary* locations,
305 bool is64bit,
306 X86_64Assembler* assembler,
307 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800308 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800309
310 if (output.IsFpuRegister()) {
311 // In-register
Mark Mendellf55c3e02015-03-26 21:07:46 -0400312 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800313
Mark Mendell39dcf552015-04-09 20:42:42 -0400314 // TODO: Can mask directly with constant area using pand if we can guarantee
315 // that the literal is aligned on a 16 byte boundary. This will avoid a
316 // temporary.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800317 if (is64bit) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400318 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800319 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
320 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400321 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800322 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
323 }
324 } else {
325 // TODO: update when assember support is available.
326 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
327// Once assembler support is available, in-memory operations look like this:
328// if (is64bit) {
329// DCHECK(output.IsDoubleStackSlot());
330// // No 64b and with literal.
331// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
332// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
333// } else {
334// DCHECK(output.IsStackSlot());
335// // Can use and with a literal directly.
336// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
337// }
338 }
339}
340
341void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
342 CreateFloatToFloatPlusTemps(arena_, invoke);
343}
344
345void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400346 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800347}
348
349void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
350 CreateFloatToFloatPlusTemps(arena_, invoke);
351}
352
353void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400354 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800355}
356
357static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
358 LocationSummary* locations = new (arena) LocationSummary(invoke,
359 LocationSummary::kNoCall,
360 kIntrinsified);
361 locations->SetInAt(0, Location::RequiresRegister());
362 locations->SetOut(Location::SameAsFirstInput());
363 locations->AddTemp(Location::RequiresRegister());
364}
365
366static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
367 Location output = locations->Out();
368 CpuRegister out = output.AsRegister<CpuRegister>();
369 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
370
371 if (is64bit) {
372 // Create mask.
373 __ movq(mask, out);
374 __ sarq(mask, Immediate(63));
375 // Add mask.
376 __ addq(out, mask);
377 __ xorq(out, mask);
378 } else {
379 // Create mask.
380 __ movl(mask, out);
381 __ sarl(mask, Immediate(31));
382 // Add mask.
383 __ addl(out, mask);
384 __ xorl(out, mask);
385 }
386}
387
388void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
389 CreateIntToIntPlusTemp(arena_, invoke);
390}
391
392void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
393 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
394}
395
396void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
397 CreateIntToIntPlusTemp(arena_, invoke);
398}
399
400void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
401 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
402}
403
Mark Mendell39dcf552015-04-09 20:42:42 -0400404static void GenMinMaxFP(LocationSummary* locations,
405 bool is_min,
406 bool is_double,
407 X86_64Assembler* assembler,
408 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800409 Location op1_loc = locations->InAt(0);
410 Location op2_loc = locations->InAt(1);
411 Location out_loc = locations->Out();
412 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
413
414 // Shortcut for same input locations.
415 if (op1_loc.Equals(op2_loc)) {
416 DCHECK(out_loc.Equals(op1_loc));
417 return;
418 }
419
420 // (out := op1)
421 // out <=? op2
422 // if Nan jmp Nan_label
423 // if out is min jmp done
424 // if op2 is min jmp op2_label
425 // handle -0/+0
426 // jmp done
427 // Nan_label:
428 // out := NaN
429 // op2_label:
430 // out := op2
431 // done:
432 //
433 // This removes one jmp, but needs to copy one input (op1) to out.
434 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400435 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800436
437 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
438
439 Label nan, done, op2_label;
440 if (is_double) {
441 __ ucomisd(out, op2);
442 } else {
443 __ ucomiss(out, op2);
444 }
445
446 __ j(Condition::kParityEven, &nan);
447
448 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
449 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
450
451 // Handle 0.0/-0.0.
452 if (is_min) {
453 if (is_double) {
454 __ orpd(out, op2);
455 } else {
456 __ orps(out, op2);
457 }
458 } else {
459 if (is_double) {
460 __ andpd(out, op2);
461 } else {
462 __ andps(out, op2);
463 }
464 }
465 __ jmp(&done);
466
467 // NaN handling.
468 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800469 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400470 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800471 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400472 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800473 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800474 __ jmp(&done);
475
476 // out := op2;
477 __ Bind(&op2_label);
478 if (is_double) {
479 __ movsd(out, op2);
480 } else {
481 __ movss(out, op2);
482 }
483
484 // Done.
485 __ Bind(&done);
486}
487
Mark Mendellf55c3e02015-03-26 21:07:46 -0400488static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800489 LocationSummary* locations = new (arena) LocationSummary(invoke,
490 LocationSummary::kNoCall,
491 kIntrinsified);
492 locations->SetInAt(0, Location::RequiresFpuRegister());
493 locations->SetInAt(1, Location::RequiresFpuRegister());
494 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
495 // the second input to be the output (we can simply swap inputs).
496 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800497}
498
499void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400500 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800501}
502
503void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400504 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800505}
506
507void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400508 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800509}
510
511void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400512 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800513}
514
515void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400516 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800517}
518
519void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400520 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800521}
522
523void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400524 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800525}
526
527void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400528 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800529}
530
531static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
532 X86_64Assembler* assembler) {
533 Location op1_loc = locations->InAt(0);
534 Location op2_loc = locations->InAt(1);
535
536 // Shortcut for same input locations.
537 if (op1_loc.Equals(op2_loc)) {
538 // Can return immediately, as op1_loc == out_loc.
539 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
540 // a copy here.
541 DCHECK(locations->Out().Equals(op1_loc));
542 return;
543 }
544
545 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
546 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
547
548 // (out := op1)
549 // out <=? op2
550 // if out is min jmp done
551 // out := op2
552 // done:
553
554 if (is_long) {
555 __ cmpq(out, op2);
556 } else {
557 __ cmpl(out, op2);
558 }
559
560 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
561}
562
563static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
564 LocationSummary* locations = new (arena) LocationSummary(invoke,
565 LocationSummary::kNoCall,
566 kIntrinsified);
567 locations->SetInAt(0, Location::RequiresRegister());
568 locations->SetInAt(1, Location::RequiresRegister());
569 locations->SetOut(Location::SameAsFirstInput());
570}
571
572void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
573 CreateIntIntToIntLocations(arena_, invoke);
574}
575
576void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
577 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
578}
579
580void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
581 CreateIntIntToIntLocations(arena_, invoke);
582}
583
584void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
585 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
586}
587
588void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
589 CreateIntIntToIntLocations(arena_, invoke);
590}
591
592void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
593 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
594}
595
596void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
597 CreateIntIntToIntLocations(arena_, invoke);
598}
599
600void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
601 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
602}
603
604static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
605 LocationSummary* locations = new (arena) LocationSummary(invoke,
606 LocationSummary::kNoCall,
607 kIntrinsified);
608 locations->SetInAt(0, Location::RequiresFpuRegister());
609 locations->SetOut(Location::RequiresFpuRegister());
610}
611
612void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
613 CreateFPToFPLocations(arena_, invoke);
614}
615
616void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
617 LocationSummary* locations = invoke->GetLocations();
618 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
619 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
620
621 GetAssembler()->sqrtsd(out, in);
622}
623
Mark Mendellfb8d2792015-03-31 22:16:59 -0400624static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
625 MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
626
627 DCHECK(invoke->IsInvokeStaticOrDirect());
628 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
629 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
630
631 // Copy the result back to the expected output.
632 Location out = invoke->GetLocations()->Out();
633 if (out.IsValid()) {
634 DCHECK(out.IsRegister());
635 MoveFromReturnRegister(out, invoke->GetType(), codegen);
636 }
637}
638
639static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
640 HInvoke* invoke,
641 CodeGeneratorX86_64* codegen) {
642 // Do we have instruction support?
643 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
644 CreateFPToFPLocations(arena, invoke);
645 return;
646 }
647
648 // We have to fall back to a call to the intrinsic.
649 LocationSummary* locations = new (arena) LocationSummary(invoke,
650 LocationSummary::kCall);
651 InvokeRuntimeCallingConvention calling_convention;
652 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
653 locations->SetOut(Location::FpuRegisterLocation(XMM0));
654 // Needs to be RDI for the invoke.
655 locations->AddTemp(Location::RegisterLocation(RDI));
656}
657
658static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
659 HInvoke* invoke,
660 X86_64Assembler* assembler,
661 int round_mode) {
662 LocationSummary* locations = invoke->GetLocations();
663 if (locations->WillCall()) {
664 InvokeOutOfLineIntrinsic(codegen, invoke);
665 } else {
666 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
667 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
668 __ roundsd(out, in, Immediate(round_mode));
669 }
670}
671
672void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
673 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
674}
675
676void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
677 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
678}
679
680void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
681 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
682}
683
684void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
685 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
686}
687
688void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
689 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
690}
691
692void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
693 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
694}
695
696static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
697 HInvoke* invoke,
698 CodeGeneratorX86_64* codegen) {
699 // Do we have instruction support?
700 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
701 LocationSummary* locations = new (arena) LocationSummary(invoke,
702 LocationSummary::kNoCall,
703 kIntrinsified);
704 locations->SetInAt(0, Location::RequiresFpuRegister());
705 locations->SetOut(Location::RequiresFpuRegister());
706 locations->AddTemp(Location::RequiresFpuRegister());
707 locations->AddTemp(Location::RequiresFpuRegister());
708 return;
709 }
710
711 // We have to fall back to a call to the intrinsic.
712 LocationSummary* locations = new (arena) LocationSummary(invoke,
713 LocationSummary::kCall);
714 InvokeRuntimeCallingConvention calling_convention;
715 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
716 locations->SetOut(Location::RegisterLocation(RAX));
717 // Needs to be RDI for the invoke.
718 locations->AddTemp(Location::RegisterLocation(RDI));
719}
720
721void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
722 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
723}
724
725void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
726 LocationSummary* locations = invoke->GetLocations();
727 if (locations->WillCall()) {
728 InvokeOutOfLineIntrinsic(codegen_, invoke);
729 return;
730 }
731
732 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
733 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
734 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
735 XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
736 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
737 Label done, nan;
738 X86_64Assembler* assembler = GetAssembler();
739
740 // Generate 0.5 into inPlusPointFive.
741 __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
742 __ movd(inPlusPointFive, out, false);
743
744 // Add in the input.
745 __ addss(inPlusPointFive, in);
746
747 // And truncate to an integer.
748 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
749
750 __ movl(out, Immediate(kPrimIntMax));
751 // maxInt = int-to-float(out)
752 __ cvtsi2ss(maxInt, out);
753
754 // if inPlusPointFive >= maxInt goto done
755 __ comiss(inPlusPointFive, maxInt);
756 __ j(kAboveEqual, &done);
757
758 // if input == NaN goto nan
759 __ j(kUnordered, &nan);
760
761 // output = float-to-int-truncate(input)
762 __ cvttss2si(out, inPlusPointFive);
763 __ jmp(&done);
764 __ Bind(&nan);
765
766 // output = 0
767 __ xorl(out, out);
768 __ Bind(&done);
769}
770
771void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
772 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
773}
774
775void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
776 LocationSummary* locations = invoke->GetLocations();
777 if (locations->WillCall()) {
778 InvokeOutOfLineIntrinsic(codegen_, invoke);
779 return;
780 }
781
782 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
783 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
784 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
785 XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
786 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
787 Label done, nan;
788 X86_64Assembler* assembler = GetAssembler();
789
790 // Generate 0.5 into inPlusPointFive.
791 __ movq(out, Immediate(bit_cast<int64_t, double>(0.5)));
792 __ movd(inPlusPointFive, out, true);
793
794 // Add in the input.
795 __ addsd(inPlusPointFive, in);
796
797 // And truncate to an integer.
798 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
799
800 __ movq(out, Immediate(kPrimLongMax));
801 // maxLong = long-to-double(out)
802 __ cvtsi2sd(maxLong, out, true);
803
804 // if inPlusPointFive >= maxLong goto done
805 __ comisd(inPlusPointFive, maxLong);
806 __ j(kAboveEqual, &done);
807
808 // if input == NaN goto nan
809 __ j(kUnordered, &nan);
810
811 // output = double-to-long-truncate(input)
812 __ cvttsd2si(out, inPlusPointFive, true);
813 __ jmp(&done);
814 __ Bind(&nan);
815
816 // output = 0
817 __ xorq(out, out);
818 __ Bind(&done);
819}
820
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800821void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
822 // The inputs plus one temp.
823 LocationSummary* locations = new (arena_) LocationSummary(invoke,
824 LocationSummary::kCallOnSlowPath,
825 kIntrinsified);
826 locations->SetInAt(0, Location::RequiresRegister());
827 locations->SetInAt(1, Location::RequiresRegister());
828 locations->SetOut(Location::SameAsFirstInput());
829 locations->AddTemp(Location::RequiresRegister());
830}
831
832void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
833 LocationSummary* locations = invoke->GetLocations();
834
835 // Location of reference to data array
836 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
837 // Location of count
838 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
839 // Starting offset within data array
840 const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
841 // Start of char data with array_
842 const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
843
844 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
845 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
846 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
847 Location temp_loc = locations->GetTemp(0);
848 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
849
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800850 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
851 // the cost.
852 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
853 // we will not optimize the code for constants (which would save a register).
854
Andreas Gampe878d58c2015-01-15 23:24:00 -0800855 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800856 codegen_->AddSlowPath(slow_path);
857
858 X86_64Assembler* assembler = GetAssembler();
859
860 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800861 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800862 __ j(kAboveEqual, slow_path->GetEntryLabel());
863
864 // Get the actual element.
865 __ movl(temp, idx); // temp := idx.
866 __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
867 __ movl(out, Address(obj, value_offset)); // obj := obj.array.
868 // out = out[2*temp].
869 __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
870
871 __ Bind(slow_path->GetExitLabel());
872}
873
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000874void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
875 LocationSummary* locations = new (arena_) LocationSummary(invoke,
876 LocationSummary::kCall,
877 kIntrinsified);
878 InvokeRuntimeCallingConvention calling_convention;
879 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
880 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
881 locations->SetOut(Location::RegisterLocation(RAX));
882}
883
884void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
885 X86_64Assembler* assembler = GetAssembler();
886 LocationSummary* locations = invoke->GetLocations();
887
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000888 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +0100889 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000890
891 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
892 __ testl(argument, argument);
893 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
894 codegen_->AddSlowPath(slow_path);
895 __ j(kEqual, slow_path->GetEntryLabel());
896
897 __ gs()->call(Address::Absolute(
898 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
899 __ Bind(slow_path->GetExitLabel());
900}
901
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800902static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
903 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
904 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
905 // x86 allows unaligned access. We do not have to check the input or use specific instructions
906 // to avoid a SIGBUS.
907 switch (size) {
908 case Primitive::kPrimByte:
909 __ movsxb(out, Address(address, 0));
910 break;
911 case Primitive::kPrimShort:
912 __ movsxw(out, Address(address, 0));
913 break;
914 case Primitive::kPrimInt:
915 __ movl(out, Address(address, 0));
916 break;
917 case Primitive::kPrimLong:
918 __ movq(out, Address(address, 0));
919 break;
920 default:
921 LOG(FATAL) << "Type not recognized for peek: " << size;
922 UNREACHABLE();
923 }
924}
925
926void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
927 CreateIntToIntLocations(arena_, invoke);
928}
929
930void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
931 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
932}
933
934void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
935 CreateIntToIntLocations(arena_, invoke);
936}
937
938void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
939 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
940}
941
942void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
943 CreateIntToIntLocations(arena_, invoke);
944}
945
946void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
947 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
948}
949
950void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
951 CreateIntToIntLocations(arena_, invoke);
952}
953
954void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
955 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
956}
957
958static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
959 LocationSummary* locations = new (arena) LocationSummary(invoke,
960 LocationSummary::kNoCall,
961 kIntrinsified);
962 locations->SetInAt(0, Location::RequiresRegister());
963 locations->SetInAt(1, Location::RequiresRegister());
964}
965
966static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
967 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
968 CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
969 // x86 allows unaligned access. We do not have to check the input or use specific instructions
970 // to avoid a SIGBUS.
971 switch (size) {
972 case Primitive::kPrimByte:
973 __ movb(Address(address, 0), value);
974 break;
975 case Primitive::kPrimShort:
976 __ movw(Address(address, 0), value);
977 break;
978 case Primitive::kPrimInt:
979 __ movl(Address(address, 0), value);
980 break;
981 case Primitive::kPrimLong:
982 __ movq(Address(address, 0), value);
983 break;
984 default:
985 LOG(FATAL) << "Type not recognized for poke: " << size;
986 UNREACHABLE();
987 }
988}
989
990void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
991 CreateIntIntToVoidLocations(arena_, invoke);
992}
993
994void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
995 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
996}
997
998void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
999 CreateIntIntToVoidLocations(arena_, invoke);
1000}
1001
1002void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1003 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1004}
1005
1006void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1007 CreateIntIntToVoidLocations(arena_, invoke);
1008}
1009
1010void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1011 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1012}
1013
1014void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1015 CreateIntIntToVoidLocations(arena_, invoke);
1016}
1017
1018void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1019 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1020}
1021
1022void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1023 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1024 LocationSummary::kNoCall,
1025 kIntrinsified);
1026 locations->SetOut(Location::RequiresRegister());
1027}
1028
1029void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1030 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1031 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1032}
1033
Andreas Gampe878d58c2015-01-15 23:24:00 -08001034static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001035 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1036 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1037 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1038 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1039
Andreas Gampe878d58c2015-01-15 23:24:00 -08001040 switch (type) {
1041 case Primitive::kPrimInt:
1042 case Primitive::kPrimNot:
1043 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1044 break;
1045
1046 case Primitive::kPrimLong:
1047 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1048 break;
1049
1050 default:
1051 LOG(FATAL) << "Unsupported op size " << type;
1052 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001053 }
1054}
1055
1056static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1057 LocationSummary* locations = new (arena) LocationSummary(invoke,
1058 LocationSummary::kNoCall,
1059 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001060 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001061 locations->SetInAt(1, Location::RequiresRegister());
1062 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001063 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001064}
1065
1066void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1067 CreateIntIntIntToIntLocations(arena_, invoke);
1068}
1069void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1070 CreateIntIntIntToIntLocations(arena_, invoke);
1071}
1072void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1073 CreateIntIntIntToIntLocations(arena_, invoke);
1074}
1075void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1076 CreateIntIntIntToIntLocations(arena_, invoke);
1077}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001078void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1079 CreateIntIntIntToIntLocations(arena_, invoke);
1080}
1081void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1082 CreateIntIntIntToIntLocations(arena_, invoke);
1083}
1084
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001085
1086void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001087 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001088}
1089void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001090 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001091}
1092void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001093 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001094}
1095void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001096 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001097}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001098void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1099 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1100}
1101void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1102 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1103}
1104
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001105
1106static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1107 Primitive::Type type,
1108 HInvoke* invoke) {
1109 LocationSummary* locations = new (arena) LocationSummary(invoke,
1110 LocationSummary::kNoCall,
1111 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001112 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001113 locations->SetInAt(1, Location::RequiresRegister());
1114 locations->SetInAt(2, Location::RequiresRegister());
1115 locations->SetInAt(3, Location::RequiresRegister());
1116 if (type == Primitive::kPrimNot) {
1117 // Need temp registers for card-marking.
1118 locations->AddTemp(Location::RequiresRegister());
1119 locations->AddTemp(Location::RequiresRegister());
1120 }
1121}
1122
1123void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1124 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1125}
1126void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1127 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1128}
1129void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1130 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1131}
1132void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1133 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1134}
1135void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1136 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1137}
1138void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1139 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1140}
1141void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1142 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1143}
1144void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1145 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1146}
1147void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1148 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1149}
1150
1151// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1152// memory model.
1153static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1154 CodeGeneratorX86_64* codegen) {
1155 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1156 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1157 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1158 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1159
1160 if (type == Primitive::kPrimLong) {
1161 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1162 } else {
1163 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1164 }
1165
1166 if (is_volatile) {
1167 __ mfence();
1168 }
1169
1170 if (type == Primitive::kPrimNot) {
1171 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1172 locations->GetTemp(1).AsRegister<CpuRegister>(),
1173 base,
1174 value);
1175 }
1176}
1177
1178void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1179 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1180}
1181void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1182 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1183}
1184void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1185 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1186}
1187void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1188 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1189}
1190void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1191 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1192}
1193void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1194 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1195}
1196void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1197 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1198}
1199void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1200 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1201}
1202void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1203 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1204}
1205
Mark Mendell58d25fd2015-04-03 14:52:31 -04001206static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1207 HInvoke* invoke) {
1208 LocationSummary* locations = new (arena) LocationSummary(invoke,
1209 LocationSummary::kNoCall,
1210 kIntrinsified);
1211 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1212 locations->SetInAt(1, Location::RequiresRegister());
1213 locations->SetInAt(2, Location::RequiresRegister());
1214 // expected value must be in EAX/RAX.
1215 locations->SetInAt(3, Location::RegisterLocation(RAX));
1216 locations->SetInAt(4, Location::RequiresRegister());
1217
1218 locations->SetOut(Location::RequiresRegister());
1219 if (type == Primitive::kPrimNot) {
1220 // Need temp registers for card-marking.
1221 locations->AddTemp(Location::RequiresRegister());
1222 locations->AddTemp(Location::RequiresRegister());
1223 }
1224}
1225
1226void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1227 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1228}
1229
1230void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1231 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1232}
1233
1234void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1235 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1236}
1237
1238static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1239 X86_64Assembler* assembler =
1240 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1241 LocationSummary* locations = invoke->GetLocations();
1242
1243 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1244 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1245 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1246 DCHECK_EQ(expected.AsRegister(), RAX);
1247 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1248 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1249
1250 if (type == Primitive::kPrimLong) {
1251 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1252 } else {
1253 // Integer or object.
1254 if (type == Primitive::kPrimNot) {
1255 // Mark card for object assuming new value is stored.
1256 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1257 locations->GetTemp(1).AsRegister<CpuRegister>(),
1258 base,
1259 value);
1260 }
1261
1262 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1263 }
1264
1265 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1266 // barriers at this time.
1267
1268 // Convert ZF into the boolean result.
1269 __ setcc(kZero, out);
1270 __ movzxb(out, out);
1271}
1272
1273void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1274 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1275}
1276
1277void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1278 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1279}
1280
1281void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1282 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1283}
1284
1285void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1286 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1287 LocationSummary::kNoCall,
1288 kIntrinsified);
1289 locations->SetInAt(0, Location::RequiresRegister());
1290 locations->SetOut(Location::SameAsFirstInput());
1291 locations->AddTemp(Location::RequiresRegister());
1292}
1293
1294static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1295 X86_64Assembler* assembler) {
1296 Immediate imm_shift(shift);
1297 Immediate imm_mask(mask);
1298 __ movl(temp, reg);
1299 __ shrl(reg, imm_shift);
1300 __ andl(temp, imm_mask);
1301 __ andl(reg, imm_mask);
1302 __ shll(temp, imm_shift);
1303 __ orl(reg, temp);
1304}
1305
1306void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1307 X86_64Assembler* assembler =
1308 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1309 LocationSummary* locations = invoke->GetLocations();
1310
1311 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1312 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1313
1314 /*
1315 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1316 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1317 * compared to generic luni implementation which has 5 rounds of swapping bits.
1318 * x = bswap x
1319 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1320 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1321 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1322 */
1323 __ bswapl(reg);
1324 SwapBits(reg, temp, 1, 0x55555555, assembler);
1325 SwapBits(reg, temp, 2, 0x33333333, assembler);
1326 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1327}
1328
1329void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1330 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1331 LocationSummary::kNoCall,
1332 kIntrinsified);
1333 locations->SetInAt(0, Location::RequiresRegister());
1334 locations->SetOut(Location::SameAsFirstInput());
1335 locations->AddTemp(Location::RequiresRegister());
1336 locations->AddTemp(Location::RequiresRegister());
1337}
1338
1339static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1340 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1341 Immediate imm_shift(shift);
1342 __ movq(temp_mask, Immediate(mask));
1343 __ movq(temp, reg);
1344 __ shrq(reg, imm_shift);
1345 __ andq(temp, temp_mask);
1346 __ andq(reg, temp_mask);
1347 __ shlq(temp, imm_shift);
1348 __ orq(reg, temp);
1349}
1350
1351void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1352 X86_64Assembler* assembler =
1353 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1354 LocationSummary* locations = invoke->GetLocations();
1355
1356 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1357 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1358 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1359
1360 /*
1361 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1362 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1363 * compared to generic luni implementation which has 5 rounds of swapping bits.
1364 * x = bswap x
1365 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1366 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1367 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1368 */
1369 __ bswapq(reg);
1370 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1371 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1372 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1373}
1374
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001375// Unimplemented intrinsics.
1376
1377#define UNIMPLEMENTED_INTRINSIC(Name) \
1378void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1379} \
1380void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1381}
1382
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001383UNIMPLEMENTED_INTRINSIC(StringIndexOf)
1384UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
1385UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001386UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1387
1388} // namespace x86_64
1389} // namespace art