blob: c73f092a61c36d7a898bb84327b183d7708cb534 [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
19#include "code_generator_x86_64.h"
20#include "entrypoints/quick/quick_entrypoints.h"
21#include "intrinsics.h"
22#include "mirror/array-inl.h"
23#include "mirror/art_method.h"
24#include "mirror/string.h"
25#include "thread.h"
26#include "utils/x86_64/assembler_x86_64.h"
27#include "utils/x86_64/constants_x86_64.h"
28
29namespace art {
30
31namespace x86_64 {
32
Andreas Gampe71fb52f2014-12-29 17:43:08 -080033X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
34 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
35}
36
Andreas Gampe878d58c2015-01-15 23:24:00 -080037ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080038 return codegen_->GetGraph()->GetArena();
39}
40
41bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
42 Dispatch(invoke);
43 const LocationSummary* res = invoke->GetLocations();
44 return res != nullptr && res->Intrinsified();
45}
46
47#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
48
49// TODO: trg as memory.
50static void MoveFromReturnRegister(Location trg,
51 Primitive::Type type,
52 CodeGeneratorX86_64* codegen) {
53 if (!trg.IsValid()) {
54 DCHECK(type == Primitive::kPrimVoid);
55 return;
56 }
57
58 switch (type) {
59 case Primitive::kPrimBoolean:
60 case Primitive::kPrimByte:
61 case Primitive::kPrimChar:
62 case Primitive::kPrimShort:
63 case Primitive::kPrimInt:
64 case Primitive::kPrimNot: {
65 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
66 if (trg_reg.AsRegister() != RAX) {
67 __ movl(trg_reg, CpuRegister(RAX));
68 }
69 break;
70 }
71 case Primitive::kPrimLong: {
72 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
73 if (trg_reg.AsRegister() != RAX) {
74 __ movq(trg_reg, CpuRegister(RAX));
75 }
76 break;
77 }
78
79 case Primitive::kPrimVoid:
80 LOG(FATAL) << "Unexpected void type for valid location " << trg;
81 UNREACHABLE();
82
83 case Primitive::kPrimDouble: {
84 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
85 if (trg_reg.AsFloatRegister() != XMM0) {
86 __ movsd(trg_reg, XmmRegister(XMM0));
87 }
88 break;
89 }
90 case Primitive::kPrimFloat: {
91 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
92 if (trg_reg.AsFloatRegister() != XMM0) {
93 __ movss(trg_reg, XmmRegister(XMM0));
94 }
95 break;
96 }
97 }
98}
99
100static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
101 if (invoke->InputCount() == 0) {
102 return;
103 }
104
105 LocationSummary* locations = invoke->GetLocations();
106 InvokeDexCallingConventionVisitor calling_convention_visitor;
107
108 // We're moving potentially two or more locations to locations that could overlap, so we need
109 // a parallel move resolver.
110 HParallelMove parallel_move(arena);
111
112 for (size_t i = 0; i < invoke->InputCount(); i++) {
113 HInstruction* input = invoke->InputAt(i);
114 Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
115 Location actual_loc = locations->InAt(i);
116
Nicolas Geoffray42d1f5f2015-01-16 09:14:18 +0000117 parallel_move.AddMove(actual_loc, cc_loc, nullptr);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800118 }
119
120 codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
121}
122
123// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
124// call. This will copy the arguments into the positions for a regular call.
125//
126// Note: The actual parameters are required to be in the locations given by the invoke's location
127// summary. If an intrinsic modifies those locations before a slowpath call, they must be
128// restored!
129class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
130 public:
131 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
132
133 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
134 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
135 __ Bind(GetEntryLabel());
136
137 codegen->SaveLiveRegisters(invoke_->GetLocations());
138
139 MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
140
141 if (invoke_->IsInvokeStaticOrDirect()) {
142 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
143 } else {
144 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
145 UNREACHABLE();
146 }
147
148 // Copy the result back to the expected output.
149 Location out = invoke_->GetLocations()->Out();
150 if (out.IsValid()) {
151 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
152 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
153 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
154 }
155
156 codegen->RestoreLiveRegisters(invoke_->GetLocations());
157 __ jmp(GetExitLabel());
158 }
159
160 private:
161 // The instruction where this slow path is happening.
162 HInvoke* const invoke_;
163
164 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
165};
166
167#undef __
168#define __ assembler->
169
170static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
171 LocationSummary* locations = new (arena) LocationSummary(invoke,
172 LocationSummary::kNoCall,
173 kIntrinsified);
174 locations->SetInAt(0, Location::RequiresFpuRegister());
175 locations->SetOut(Location::RequiresRegister());
176}
177
178static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
179 LocationSummary* locations = new (arena) LocationSummary(invoke,
180 LocationSummary::kNoCall,
181 kIntrinsified);
182 locations->SetInAt(0, Location::RequiresRegister());
183 locations->SetOut(Location::RequiresFpuRegister());
184}
185
186static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
187 Location input = locations->InAt(0);
188 Location output = locations->Out();
189 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
190}
191
192static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
193 Location input = locations->InAt(0);
194 Location output = locations->Out();
195 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
196}
197
198void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
199 CreateFPToIntLocations(arena_, invoke);
200}
201void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
202 CreateIntToFPLocations(arena_, invoke);
203}
204
205void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
206 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
207}
208void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
209 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
210}
211
212void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
213 CreateFPToIntLocations(arena_, invoke);
214}
215void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
216 CreateIntToFPLocations(arena_, invoke);
217}
218
219void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
220 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
221}
222void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
223 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
224}
225
226static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
227 LocationSummary* locations = new (arena) LocationSummary(invoke,
228 LocationSummary::kNoCall,
229 kIntrinsified);
230 locations->SetInAt(0, Location::RequiresRegister());
231 locations->SetOut(Location::SameAsFirstInput());
232}
233
234static void GenReverseBytes(LocationSummary* locations,
235 Primitive::Type size,
236 X86_64Assembler* assembler) {
237 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
238
239 switch (size) {
240 case Primitive::kPrimShort:
241 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
242 __ bswapl(out);
243 __ sarl(out, Immediate(16));
244 break;
245 case Primitive::kPrimInt:
246 __ bswapl(out);
247 break;
248 case Primitive::kPrimLong:
249 __ bswapq(out);
250 break;
251 default:
252 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
253 UNREACHABLE();
254 }
255}
256
257void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
258 CreateIntToIntLocations(arena_, invoke);
259}
260
261void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
262 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
263}
264
265void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
266 CreateIntToIntLocations(arena_, invoke);
267}
268
269void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
270 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
271}
272
273void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
274 CreateIntToIntLocations(arena_, invoke);
275}
276
277void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
278 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
279}
280
281
282// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
283// need is 64b.
284
285static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
286 // TODO: Enable memory operations when the assembler supports them.
287 LocationSummary* locations = new (arena) LocationSummary(invoke,
288 LocationSummary::kNoCall,
289 kIntrinsified);
290 locations->SetInAt(0, Location::RequiresFpuRegister());
291 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
292 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
293 locations->SetOut(Location::SameAsFirstInput());
294 locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
295 locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above.
296}
297
298static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
299 Location output = locations->Out();
300 CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
301
302 if (output.IsFpuRegister()) {
303 // In-register
304 XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
305
306 if (is64bit) {
307 __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
308 __ movd(xmm_temp, cpu_temp);
309 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
310 } else {
311 __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
312 __ movd(xmm_temp, cpu_temp);
313 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
314 }
315 } else {
316 // TODO: update when assember support is available.
317 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
318// Once assembler support is available, in-memory operations look like this:
319// if (is64bit) {
320// DCHECK(output.IsDoubleStackSlot());
321// // No 64b and with literal.
322// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
323// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
324// } else {
325// DCHECK(output.IsStackSlot());
326// // Can use and with a literal directly.
327// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
328// }
329 }
330}
331
332void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
333 CreateFloatToFloatPlusTemps(arena_, invoke);
334}
335
336void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
337 MathAbsFP(invoke->GetLocations(), true, GetAssembler());
338}
339
340void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
341 CreateFloatToFloatPlusTemps(arena_, invoke);
342}
343
344void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
345 MathAbsFP(invoke->GetLocations(), false, GetAssembler());
346}
347
348static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
349 LocationSummary* locations = new (arena) LocationSummary(invoke,
350 LocationSummary::kNoCall,
351 kIntrinsified);
352 locations->SetInAt(0, Location::RequiresRegister());
353 locations->SetOut(Location::SameAsFirstInput());
354 locations->AddTemp(Location::RequiresRegister());
355}
356
357static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
358 Location output = locations->Out();
359 CpuRegister out = output.AsRegister<CpuRegister>();
360 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
361
362 if (is64bit) {
363 // Create mask.
364 __ movq(mask, out);
365 __ sarq(mask, Immediate(63));
366 // Add mask.
367 __ addq(out, mask);
368 __ xorq(out, mask);
369 } else {
370 // Create mask.
371 __ movl(mask, out);
372 __ sarl(mask, Immediate(31));
373 // Add mask.
374 __ addl(out, mask);
375 __ xorl(out, mask);
376 }
377}
378
379void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
380 CreateIntToIntPlusTemp(arena_, invoke);
381}
382
383void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
384 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
385}
386
387void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
388 CreateIntToIntPlusTemp(arena_, invoke);
389}
390
391void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
392 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
393}
394
395static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
396 X86_64Assembler* assembler) {
397 Location op1_loc = locations->InAt(0);
398 Location op2_loc = locations->InAt(1);
399 Location out_loc = locations->Out();
400 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
401
402 // Shortcut for same input locations.
403 if (op1_loc.Equals(op2_loc)) {
404 DCHECK(out_loc.Equals(op1_loc));
405 return;
406 }
407
408 // (out := op1)
409 // out <=? op2
410 // if Nan jmp Nan_label
411 // if out is min jmp done
412 // if op2 is min jmp op2_label
413 // handle -0/+0
414 // jmp done
415 // Nan_label:
416 // out := NaN
417 // op2_label:
418 // out := op2
419 // done:
420 //
421 // This removes one jmp, but needs to copy one input (op1) to out.
422 //
423 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
424
425 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
426
427 Label nan, done, op2_label;
428 if (is_double) {
429 __ ucomisd(out, op2);
430 } else {
431 __ ucomiss(out, op2);
432 }
433
434 __ j(Condition::kParityEven, &nan);
435
436 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
437 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
438
439 // Handle 0.0/-0.0.
440 if (is_min) {
441 if (is_double) {
442 __ orpd(out, op2);
443 } else {
444 __ orps(out, op2);
445 }
446 } else {
447 if (is_double) {
448 __ andpd(out, op2);
449 } else {
450 __ andps(out, op2);
451 }
452 }
453 __ jmp(&done);
454
455 // NaN handling.
456 __ Bind(&nan);
457 CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
458 // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
459 if (is_double) {
460 __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
461 } else {
462 __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
463 }
464 __ movd(out, cpu_temp, is_double);
465 __ jmp(&done);
466
467 // out := op2;
468 __ Bind(&op2_label);
469 if (is_double) {
470 __ movsd(out, op2);
471 } else {
472 __ movss(out, op2);
473 }
474
475 // Done.
476 __ Bind(&done);
477}
478
479static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
480 LocationSummary* locations = new (arena) LocationSummary(invoke,
481 LocationSummary::kNoCall,
482 kIntrinsified);
483 locations->SetInAt(0, Location::RequiresFpuRegister());
484 locations->SetInAt(1, Location::RequiresFpuRegister());
485 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
486 // the second input to be the output (we can simply swap inputs).
487 locations->SetOut(Location::SameAsFirstInput());
488 locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
489}
490
491void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
492 CreateFPFPToFPPlusTempLocations(arena_, invoke);
493}
494
495void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
496 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
497}
498
499void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
500 CreateFPFPToFPPlusTempLocations(arena_, invoke);
501}
502
503void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
504 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
505}
506
507void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
508 CreateFPFPToFPPlusTempLocations(arena_, invoke);
509}
510
511void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
512 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
513}
514
515void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
516 CreateFPFPToFPPlusTempLocations(arena_, invoke);
517}
518
519void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
520 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
521}
522
523static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
524 X86_64Assembler* assembler) {
525 Location op1_loc = locations->InAt(0);
526 Location op2_loc = locations->InAt(1);
527
528 // Shortcut for same input locations.
529 if (op1_loc.Equals(op2_loc)) {
530 // Can return immediately, as op1_loc == out_loc.
531 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
532 // a copy here.
533 DCHECK(locations->Out().Equals(op1_loc));
534 return;
535 }
536
537 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
538 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
539
540 // (out := op1)
541 // out <=? op2
542 // if out is min jmp done
543 // out := op2
544 // done:
545
546 if (is_long) {
547 __ cmpq(out, op2);
548 } else {
549 __ cmpl(out, op2);
550 }
551
552 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
553}
554
555static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
556 LocationSummary* locations = new (arena) LocationSummary(invoke,
557 LocationSummary::kNoCall,
558 kIntrinsified);
559 locations->SetInAt(0, Location::RequiresRegister());
560 locations->SetInAt(1, Location::RequiresRegister());
561 locations->SetOut(Location::SameAsFirstInput());
562}
563
564void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
565 CreateIntIntToIntLocations(arena_, invoke);
566}
567
568void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
569 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
570}
571
572void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
573 CreateIntIntToIntLocations(arena_, invoke);
574}
575
576void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
577 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
578}
579
580void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
581 CreateIntIntToIntLocations(arena_, invoke);
582}
583
584void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
585 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
586}
587
588void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
589 CreateIntIntToIntLocations(arena_, invoke);
590}
591
592void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
593 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
594}
595
596static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
597 LocationSummary* locations = new (arena) LocationSummary(invoke,
598 LocationSummary::kNoCall,
599 kIntrinsified);
600 locations->SetInAt(0, Location::RequiresFpuRegister());
601 locations->SetOut(Location::RequiresFpuRegister());
602}
603
604void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
605 CreateFPToFPLocations(arena_, invoke);
606}
607
608void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
609 LocationSummary* locations = invoke->GetLocations();
610 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
611 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
612
613 GetAssembler()->sqrtsd(out, in);
614}
615
616void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
617 // The inputs plus one temp.
618 LocationSummary* locations = new (arena_) LocationSummary(invoke,
619 LocationSummary::kCallOnSlowPath,
620 kIntrinsified);
621 locations->SetInAt(0, Location::RequiresRegister());
622 locations->SetInAt(1, Location::RequiresRegister());
623 locations->SetOut(Location::SameAsFirstInput());
624 locations->AddTemp(Location::RequiresRegister());
625}
626
627void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
628 LocationSummary* locations = invoke->GetLocations();
629
630 // Location of reference to data array
631 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
632 // Location of count
633 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
634 // Starting offset within data array
635 const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
636 // Start of char data with array_
637 const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
638
639 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
640 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
641 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
642 Location temp_loc = locations->GetTemp(0);
643 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
644
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800645 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
646 // the cost.
647 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
648 // we will not optimize the code for constants (which would save a register).
649
Andreas Gampe878d58c2015-01-15 23:24:00 -0800650 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800651 codegen_->AddSlowPath(slow_path);
652
653 X86_64Assembler* assembler = GetAssembler();
654
655 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800656 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800657 __ j(kAboveEqual, slow_path->GetEntryLabel());
658
659 // Get the actual element.
660 __ movl(temp, idx); // temp := idx.
661 __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
662 __ movl(out, Address(obj, value_offset)); // obj := obj.array.
663 // out = out[2*temp].
664 __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
665
666 __ Bind(slow_path->GetExitLabel());
667}
668
669static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
670 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
671 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
672 // x86 allows unaligned access. We do not have to check the input or use specific instructions
673 // to avoid a SIGBUS.
674 switch (size) {
675 case Primitive::kPrimByte:
676 __ movsxb(out, Address(address, 0));
677 break;
678 case Primitive::kPrimShort:
679 __ movsxw(out, Address(address, 0));
680 break;
681 case Primitive::kPrimInt:
682 __ movl(out, Address(address, 0));
683 break;
684 case Primitive::kPrimLong:
685 __ movq(out, Address(address, 0));
686 break;
687 default:
688 LOG(FATAL) << "Type not recognized for peek: " << size;
689 UNREACHABLE();
690 }
691}
692
693void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
694 CreateIntToIntLocations(arena_, invoke);
695}
696
697void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
698 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
699}
700
701void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
702 CreateIntToIntLocations(arena_, invoke);
703}
704
705void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
706 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
707}
708
709void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
710 CreateIntToIntLocations(arena_, invoke);
711}
712
713void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
714 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
715}
716
717void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
718 CreateIntToIntLocations(arena_, invoke);
719}
720
721void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
722 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
723}
724
725static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
726 LocationSummary* locations = new (arena) LocationSummary(invoke,
727 LocationSummary::kNoCall,
728 kIntrinsified);
729 locations->SetInAt(0, Location::RequiresRegister());
730 locations->SetInAt(1, Location::RequiresRegister());
731}
732
733static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
734 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
735 CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
736 // x86 allows unaligned access. We do not have to check the input or use specific instructions
737 // to avoid a SIGBUS.
738 switch (size) {
739 case Primitive::kPrimByte:
740 __ movb(Address(address, 0), value);
741 break;
742 case Primitive::kPrimShort:
743 __ movw(Address(address, 0), value);
744 break;
745 case Primitive::kPrimInt:
746 __ movl(Address(address, 0), value);
747 break;
748 case Primitive::kPrimLong:
749 __ movq(Address(address, 0), value);
750 break;
751 default:
752 LOG(FATAL) << "Type not recognized for poke: " << size;
753 UNREACHABLE();
754 }
755}
756
757void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
758 CreateIntIntToVoidLocations(arena_, invoke);
759}
760
761void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
762 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
763}
764
765void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
766 CreateIntIntToVoidLocations(arena_, invoke);
767}
768
769void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
770 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
771}
772
773void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
774 CreateIntIntToVoidLocations(arena_, invoke);
775}
776
777void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
778 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
779}
780
781void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
782 CreateIntIntToVoidLocations(arena_, invoke);
783}
784
785void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
786 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
787}
788
789void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
790 LocationSummary* locations = new (arena_) LocationSummary(invoke,
791 LocationSummary::kNoCall,
792 kIntrinsified);
793 locations->SetOut(Location::RequiresRegister());
794}
795
796void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
797 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
798 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
799}
800
Andreas Gampe878d58c2015-01-15 23:24:00 -0800801static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800802 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
803 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
804 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
805 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
806
Andreas Gampe878d58c2015-01-15 23:24:00 -0800807 switch (type) {
808 case Primitive::kPrimInt:
809 case Primitive::kPrimNot:
810 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
811 break;
812
813 case Primitive::kPrimLong:
814 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
815 break;
816
817 default:
818 LOG(FATAL) << "Unsupported op size " << type;
819 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800820 }
821}
822
823static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
824 LocationSummary* locations = new (arena) LocationSummary(invoke,
825 LocationSummary::kNoCall,
826 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800827 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800828 locations->SetInAt(1, Location::RequiresRegister());
829 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800830 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800831}
832
833void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
834 CreateIntIntIntToIntLocations(arena_, invoke);
835}
836void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
837 CreateIntIntIntToIntLocations(arena_, invoke);
838}
839void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
840 CreateIntIntIntToIntLocations(arena_, invoke);
841}
842void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
843 CreateIntIntIntToIntLocations(arena_, invoke);
844}
Andreas Gampe878d58c2015-01-15 23:24:00 -0800845void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
846 CreateIntIntIntToIntLocations(arena_, invoke);
847}
848void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
849 CreateIntIntIntToIntLocations(arena_, invoke);
850}
851
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800852
853void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800854 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800855}
856void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800857 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800858}
859void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800860 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800861}
862void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800863 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800864}
Andreas Gampe878d58c2015-01-15 23:24:00 -0800865void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
866 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
867}
868void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
869 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
870}
871
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800872
873static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
874 Primitive::Type type,
875 HInvoke* invoke) {
876 LocationSummary* locations = new (arena) LocationSummary(invoke,
877 LocationSummary::kNoCall,
878 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800879 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800880 locations->SetInAt(1, Location::RequiresRegister());
881 locations->SetInAt(2, Location::RequiresRegister());
882 locations->SetInAt(3, Location::RequiresRegister());
883 if (type == Primitive::kPrimNot) {
884 // Need temp registers for card-marking.
885 locations->AddTemp(Location::RequiresRegister());
886 locations->AddTemp(Location::RequiresRegister());
887 }
888}
889
890void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
891 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
892}
893void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
894 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
895}
896void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
897 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
898}
899void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
900 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
901}
902void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
903 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
904}
905void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
906 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
907}
908void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
909 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
910}
911void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
912 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
913}
914void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
915 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
916}
917
918// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
919// memory model.
920static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
921 CodeGeneratorX86_64* codegen) {
922 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
923 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
924 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
925 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
926
927 if (type == Primitive::kPrimLong) {
928 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
929 } else {
930 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
931 }
932
933 if (is_volatile) {
934 __ mfence();
935 }
936
937 if (type == Primitive::kPrimNot) {
938 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
939 locations->GetTemp(1).AsRegister<CpuRegister>(),
940 base,
941 value);
942 }
943}
944
945void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
946 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
947}
948void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
949 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
950}
951void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
952 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
953}
954void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
955 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
956}
957void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
958 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
959}
960void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
961 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
962}
963void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
964 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
965}
966void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
967 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
968}
969void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
970 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
971}
972
973// Unimplemented intrinsics.
974
975#define UNIMPLEMENTED_INTRINSIC(Name) \
976void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
977} \
978void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
979}
980
981UNIMPLEMENTED_INTRINSIC(IntegerReverse)
982UNIMPLEMENTED_INTRINSIC(LongReverse)
983UNIMPLEMENTED_INTRINSIC(MathFloor)
984UNIMPLEMENTED_INTRINSIC(MathCeil)
985UNIMPLEMENTED_INTRINSIC(MathRint)
986UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
987UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
988UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should
989UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here.
990UNIMPLEMENTED_INTRINSIC(StringCompareTo)
991UNIMPLEMENTED_INTRINSIC(StringIndexOf)
992UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
993UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
994UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
995UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
996UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
997UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
998
999} // namespace x86_64
1000} // namespace art