blob: 7e24dca99ac9bf308ee35466f6480ec4f24a5798 [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Mark Mendellfb8d2792015-03-31 22:16:59 -040019#include "arch/x86_64/instruction_set_features_x86_64.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080020#include "code_generator_x86_64.h"
21#include "entrypoints/quick/quick_entrypoints.h"
22#include "intrinsics.h"
23#include "mirror/array-inl.h"
24#include "mirror/art_method.h"
25#include "mirror/string.h"
26#include "thread.h"
27#include "utils/x86_64/assembler_x86_64.h"
28#include "utils/x86_64/constants_x86_64.h"
29
30namespace art {
31
32namespace x86_64 {
33
Mark Mendellfb8d2792015-03-31 22:16:59 -040034IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
35 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
36}
37
38
Andreas Gampe71fb52f2014-12-29 17:43:08 -080039X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
40 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
41}
42
Andreas Gampe878d58c2015-01-15 23:24:00 -080043ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080044 return codegen_->GetGraph()->GetArena();
45}
46
47bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
48 Dispatch(invoke);
49 const LocationSummary* res = invoke->GetLocations();
50 return res != nullptr && res->Intrinsified();
51}
52
53#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
54
55// TODO: trg as memory.
56static void MoveFromReturnRegister(Location trg,
57 Primitive::Type type,
58 CodeGeneratorX86_64* codegen) {
59 if (!trg.IsValid()) {
60 DCHECK(type == Primitive::kPrimVoid);
61 return;
62 }
63
64 switch (type) {
65 case Primitive::kPrimBoolean:
66 case Primitive::kPrimByte:
67 case Primitive::kPrimChar:
68 case Primitive::kPrimShort:
69 case Primitive::kPrimInt:
70 case Primitive::kPrimNot: {
71 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
72 if (trg_reg.AsRegister() != RAX) {
73 __ movl(trg_reg, CpuRegister(RAX));
74 }
75 break;
76 }
77 case Primitive::kPrimLong: {
78 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
79 if (trg_reg.AsRegister() != RAX) {
80 __ movq(trg_reg, CpuRegister(RAX));
81 }
82 break;
83 }
84
85 case Primitive::kPrimVoid:
86 LOG(FATAL) << "Unexpected void type for valid location " << trg;
87 UNREACHABLE();
88
89 case Primitive::kPrimDouble: {
90 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
91 if (trg_reg.AsFloatRegister() != XMM0) {
92 __ movsd(trg_reg, XmmRegister(XMM0));
93 }
94 break;
95 }
96 case Primitive::kPrimFloat: {
97 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
98 if (trg_reg.AsFloatRegister() != XMM0) {
99 __ movss(trg_reg, XmmRegister(XMM0));
100 }
101 break;
102 }
103 }
104}
105
106static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
Roland Levillain3e3d7332015-04-28 11:00:54 +0100107 if (invoke->GetNumberOfArguments() == 0) {
Roland Levillain4c0eb422015-04-24 16:43:49 +0100108 // No argument to move.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800109 return;
110 }
111
112 LocationSummary* locations = invoke->GetLocations();
Roland Levillain2d27c8e2015-04-28 15:48:45 +0100113 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800114
115 // We're moving potentially two or more locations to locations that could overlap, so we need
116 // a parallel move resolver.
117 HParallelMove parallel_move(arena);
118
Roland Levillain3e3d7332015-04-28 11:00:54 +0100119 for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800120 HInstruction* input = invoke->InputAt(i);
121 Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
122 Location actual_loc = locations->InAt(i);
123
Nicolas Geoffray90218252015-04-15 11:56:51 +0100124 parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800125 }
126
127 codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
128}
129
130// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
131// call. This will copy the arguments into the positions for a regular call.
132//
133// Note: The actual parameters are required to be in the locations given by the invoke's location
134// summary. If an intrinsic modifies those locations before a slowpath call, they must be
135// restored!
136class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
137 public:
138 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
139
140 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
141 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
142 __ Bind(GetEntryLabel());
143
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000144 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800145
146 MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
147
148 if (invoke_->IsInvokeStaticOrDirect()) {
149 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000150 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800151 } else {
152 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
153 UNREACHABLE();
154 }
155
156 // Copy the result back to the expected output.
157 Location out = invoke_->GetLocations()->Out();
158 if (out.IsValid()) {
159 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
160 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
161 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
162 }
163
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000164 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800165 __ jmp(GetExitLabel());
166 }
167
168 private:
169 // The instruction where this slow path is happening.
170 HInvoke* const invoke_;
171
172 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
173};
174
175#undef __
176#define __ assembler->
177
178static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
179 LocationSummary* locations = new (arena) LocationSummary(invoke,
180 LocationSummary::kNoCall,
181 kIntrinsified);
182 locations->SetInAt(0, Location::RequiresFpuRegister());
183 locations->SetOut(Location::RequiresRegister());
184}
185
186static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
187 LocationSummary* locations = new (arena) LocationSummary(invoke,
188 LocationSummary::kNoCall,
189 kIntrinsified);
190 locations->SetInAt(0, Location::RequiresRegister());
191 locations->SetOut(Location::RequiresFpuRegister());
192}
193
194static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
195 Location input = locations->InAt(0);
196 Location output = locations->Out();
197 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
198}
199
200static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
201 Location input = locations->InAt(0);
202 Location output = locations->Out();
203 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
204}
205
206void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
207 CreateFPToIntLocations(arena_, invoke);
208}
209void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
210 CreateIntToFPLocations(arena_, invoke);
211}
212
213void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
214 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
215}
216void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
217 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
218}
219
220void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
221 CreateFPToIntLocations(arena_, invoke);
222}
223void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
224 CreateIntToFPLocations(arena_, invoke);
225}
226
227void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
228 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
229}
230void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
231 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
232}
233
234static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
235 LocationSummary* locations = new (arena) LocationSummary(invoke,
236 LocationSummary::kNoCall,
237 kIntrinsified);
238 locations->SetInAt(0, Location::RequiresRegister());
239 locations->SetOut(Location::SameAsFirstInput());
240}
241
242static void GenReverseBytes(LocationSummary* locations,
243 Primitive::Type size,
244 X86_64Assembler* assembler) {
245 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
246
247 switch (size) {
248 case Primitive::kPrimShort:
249 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
250 __ bswapl(out);
251 __ sarl(out, Immediate(16));
252 break;
253 case Primitive::kPrimInt:
254 __ bswapl(out);
255 break;
256 case Primitive::kPrimLong:
257 __ bswapq(out);
258 break;
259 default:
260 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
261 UNREACHABLE();
262 }
263}
264
265void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
266 CreateIntToIntLocations(arena_, invoke);
267}
268
269void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
270 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
271}
272
273void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
274 CreateIntToIntLocations(arena_, invoke);
275}
276
277void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
278 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
279}
280
281void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
282 CreateIntToIntLocations(arena_, invoke);
283}
284
285void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
286 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
287}
288
289
290// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
291// need is 64b.
292
293static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
294 // TODO: Enable memory operations when the assembler supports them.
295 LocationSummary* locations = new (arena) LocationSummary(invoke,
296 LocationSummary::kNoCall,
297 kIntrinsified);
298 locations->SetInAt(0, Location::RequiresFpuRegister());
299 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
300 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
301 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400302 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800303}
304
Mark Mendell39dcf552015-04-09 20:42:42 -0400305static void MathAbsFP(LocationSummary* locations,
306 bool is64bit,
307 X86_64Assembler* assembler,
308 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800309 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800310
311 if (output.IsFpuRegister()) {
312 // In-register
Mark Mendellf55c3e02015-03-26 21:07:46 -0400313 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800314
Mark Mendell39dcf552015-04-09 20:42:42 -0400315 // TODO: Can mask directly with constant area using pand if we can guarantee
316 // that the literal is aligned on a 16 byte boundary. This will avoid a
317 // temporary.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800318 if (is64bit) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400319 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800320 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
321 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400322 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800323 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
324 }
325 } else {
326 // TODO: update when assember support is available.
327 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
328// Once assembler support is available, in-memory operations look like this:
329// if (is64bit) {
330// DCHECK(output.IsDoubleStackSlot());
331// // No 64b and with literal.
332// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
333// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
334// } else {
335// DCHECK(output.IsStackSlot());
336// // Can use and with a literal directly.
337// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
338// }
339 }
340}
341
342void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
343 CreateFloatToFloatPlusTemps(arena_, invoke);
344}
345
346void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400347 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800348}
349
350void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
351 CreateFloatToFloatPlusTemps(arena_, invoke);
352}
353
354void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400355 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800356}
357
358static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
359 LocationSummary* locations = new (arena) LocationSummary(invoke,
360 LocationSummary::kNoCall,
361 kIntrinsified);
362 locations->SetInAt(0, Location::RequiresRegister());
363 locations->SetOut(Location::SameAsFirstInput());
364 locations->AddTemp(Location::RequiresRegister());
365}
366
367static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
368 Location output = locations->Out();
369 CpuRegister out = output.AsRegister<CpuRegister>();
370 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
371
372 if (is64bit) {
373 // Create mask.
374 __ movq(mask, out);
375 __ sarq(mask, Immediate(63));
376 // Add mask.
377 __ addq(out, mask);
378 __ xorq(out, mask);
379 } else {
380 // Create mask.
381 __ movl(mask, out);
382 __ sarl(mask, Immediate(31));
383 // Add mask.
384 __ addl(out, mask);
385 __ xorl(out, mask);
386 }
387}
388
389void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
390 CreateIntToIntPlusTemp(arena_, invoke);
391}
392
393void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
394 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
395}
396
397void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
398 CreateIntToIntPlusTemp(arena_, invoke);
399}
400
401void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
402 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
403}
404
Mark Mendell39dcf552015-04-09 20:42:42 -0400405static void GenMinMaxFP(LocationSummary* locations,
406 bool is_min,
407 bool is_double,
408 X86_64Assembler* assembler,
409 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800410 Location op1_loc = locations->InAt(0);
411 Location op2_loc = locations->InAt(1);
412 Location out_loc = locations->Out();
413 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
414
415 // Shortcut for same input locations.
416 if (op1_loc.Equals(op2_loc)) {
417 DCHECK(out_loc.Equals(op1_loc));
418 return;
419 }
420
421 // (out := op1)
422 // out <=? op2
423 // if Nan jmp Nan_label
424 // if out is min jmp done
425 // if op2 is min jmp op2_label
426 // handle -0/+0
427 // jmp done
428 // Nan_label:
429 // out := NaN
430 // op2_label:
431 // out := op2
432 // done:
433 //
434 // This removes one jmp, but needs to copy one input (op1) to out.
435 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400436 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800437
438 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
439
440 Label nan, done, op2_label;
441 if (is_double) {
442 __ ucomisd(out, op2);
443 } else {
444 __ ucomiss(out, op2);
445 }
446
447 __ j(Condition::kParityEven, &nan);
448
449 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
450 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
451
452 // Handle 0.0/-0.0.
453 if (is_min) {
454 if (is_double) {
455 __ orpd(out, op2);
456 } else {
457 __ orps(out, op2);
458 }
459 } else {
460 if (is_double) {
461 __ andpd(out, op2);
462 } else {
463 __ andps(out, op2);
464 }
465 }
466 __ jmp(&done);
467
468 // NaN handling.
469 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800470 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400471 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800472 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400473 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800474 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800475 __ jmp(&done);
476
477 // out := op2;
478 __ Bind(&op2_label);
479 if (is_double) {
480 __ movsd(out, op2);
481 } else {
482 __ movss(out, op2);
483 }
484
485 // Done.
486 __ Bind(&done);
487}
488
Mark Mendellf55c3e02015-03-26 21:07:46 -0400489static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800490 LocationSummary* locations = new (arena) LocationSummary(invoke,
491 LocationSummary::kNoCall,
492 kIntrinsified);
493 locations->SetInAt(0, Location::RequiresFpuRegister());
494 locations->SetInAt(1, Location::RequiresFpuRegister());
495 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
496 // the second input to be the output (we can simply swap inputs).
497 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800498}
499
500void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400501 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800502}
503
504void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400505 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800506}
507
508void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400509 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800510}
511
512void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400513 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800514}
515
516void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400517 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800518}
519
520void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400521 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800522}
523
524void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400525 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800526}
527
528void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400529 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800530}
531
532static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
533 X86_64Assembler* assembler) {
534 Location op1_loc = locations->InAt(0);
535 Location op2_loc = locations->InAt(1);
536
537 // Shortcut for same input locations.
538 if (op1_loc.Equals(op2_loc)) {
539 // Can return immediately, as op1_loc == out_loc.
540 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
541 // a copy here.
542 DCHECK(locations->Out().Equals(op1_loc));
543 return;
544 }
545
546 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
547 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
548
549 // (out := op1)
550 // out <=? op2
551 // if out is min jmp done
552 // out := op2
553 // done:
554
555 if (is_long) {
556 __ cmpq(out, op2);
557 } else {
558 __ cmpl(out, op2);
559 }
560
561 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
562}
563
564static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
565 LocationSummary* locations = new (arena) LocationSummary(invoke,
566 LocationSummary::kNoCall,
567 kIntrinsified);
568 locations->SetInAt(0, Location::RequiresRegister());
569 locations->SetInAt(1, Location::RequiresRegister());
570 locations->SetOut(Location::SameAsFirstInput());
571}
572
573void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
574 CreateIntIntToIntLocations(arena_, invoke);
575}
576
577void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
578 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
579}
580
581void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
582 CreateIntIntToIntLocations(arena_, invoke);
583}
584
585void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
586 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
587}
588
589void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
590 CreateIntIntToIntLocations(arena_, invoke);
591}
592
593void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
594 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
595}
596
597void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
598 CreateIntIntToIntLocations(arena_, invoke);
599}
600
601void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
602 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
603}
604
605static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
606 LocationSummary* locations = new (arena) LocationSummary(invoke,
607 LocationSummary::kNoCall,
608 kIntrinsified);
609 locations->SetInAt(0, Location::RequiresFpuRegister());
610 locations->SetOut(Location::RequiresFpuRegister());
611}
612
613void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
614 CreateFPToFPLocations(arena_, invoke);
615}
616
617void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
618 LocationSummary* locations = invoke->GetLocations();
619 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
620 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
621
622 GetAssembler()->sqrtsd(out, in);
623}
624
Mark Mendellfb8d2792015-03-31 22:16:59 -0400625static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
626 MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
627
628 DCHECK(invoke->IsInvokeStaticOrDirect());
629 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
630 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
631
632 // Copy the result back to the expected output.
633 Location out = invoke->GetLocations()->Out();
634 if (out.IsValid()) {
635 DCHECK(out.IsRegister());
636 MoveFromReturnRegister(out, invoke->GetType(), codegen);
637 }
638}
639
640static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
641 HInvoke* invoke,
642 CodeGeneratorX86_64* codegen) {
643 // Do we have instruction support?
644 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
645 CreateFPToFPLocations(arena, invoke);
646 return;
647 }
648
649 // We have to fall back to a call to the intrinsic.
650 LocationSummary* locations = new (arena) LocationSummary(invoke,
651 LocationSummary::kCall);
652 InvokeRuntimeCallingConvention calling_convention;
653 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
654 locations->SetOut(Location::FpuRegisterLocation(XMM0));
655 // Needs to be RDI for the invoke.
656 locations->AddTemp(Location::RegisterLocation(RDI));
657}
658
659static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
660 HInvoke* invoke,
661 X86_64Assembler* assembler,
662 int round_mode) {
663 LocationSummary* locations = invoke->GetLocations();
664 if (locations->WillCall()) {
665 InvokeOutOfLineIntrinsic(codegen, invoke);
666 } else {
667 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
668 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
669 __ roundsd(out, in, Immediate(round_mode));
670 }
671}
672
673void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
674 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
675}
676
677void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
678 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
679}
680
681void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
682 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
683}
684
685void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
686 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
687}
688
689void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
690 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
691}
692
693void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
694 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
695}
696
697static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
698 HInvoke* invoke,
699 CodeGeneratorX86_64* codegen) {
700 // Do we have instruction support?
701 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
702 LocationSummary* locations = new (arena) LocationSummary(invoke,
703 LocationSummary::kNoCall,
704 kIntrinsified);
705 locations->SetInAt(0, Location::RequiresFpuRegister());
706 locations->SetOut(Location::RequiresFpuRegister());
707 locations->AddTemp(Location::RequiresFpuRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400708 return;
709 }
710
711 // We have to fall back to a call to the intrinsic.
712 LocationSummary* locations = new (arena) LocationSummary(invoke,
713 LocationSummary::kCall);
714 InvokeRuntimeCallingConvention calling_convention;
715 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
716 locations->SetOut(Location::RegisterLocation(RAX));
717 // Needs to be RDI for the invoke.
718 locations->AddTemp(Location::RegisterLocation(RDI));
719}
720
721void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
722 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
723}
724
725void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
726 LocationSummary* locations = invoke->GetLocations();
727 if (locations->WillCall()) {
728 InvokeOutOfLineIntrinsic(codegen_, invoke);
729 return;
730 }
731
732 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
733 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
734 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400735 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400736 Label done, nan;
737 X86_64Assembler* assembler = GetAssembler();
738
Mark Mendell40741f32015-04-20 22:10:34 -0400739 // Load 0.5 into inPlusPointFive.
740 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400741
742 // Add in the input.
743 __ addss(inPlusPointFive, in);
744
745 // And truncate to an integer.
746 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
747
Mark Mendellfb8d2792015-03-31 22:16:59 -0400748 // if inPlusPointFive >= maxInt goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400749 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400750 __ j(kAboveEqual, &done);
751
752 // if input == NaN goto nan
753 __ j(kUnordered, &nan);
754
755 // output = float-to-int-truncate(input)
756 __ cvttss2si(out, inPlusPointFive);
757 __ jmp(&done);
758 __ Bind(&nan);
759
760 // output = 0
761 __ xorl(out, out);
762 __ Bind(&done);
763}
764
765void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
766 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
767}
768
769void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
770 LocationSummary* locations = invoke->GetLocations();
771 if (locations->WillCall()) {
772 InvokeOutOfLineIntrinsic(codegen_, invoke);
773 return;
774 }
775
776 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
777 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
778 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400779 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400780 Label done, nan;
781 X86_64Assembler* assembler = GetAssembler();
782
Mark Mendell40741f32015-04-20 22:10:34 -0400783 // Load 0.5 into inPlusPointFive.
784 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400785
786 // Add in the input.
787 __ addsd(inPlusPointFive, in);
788
789 // And truncate to an integer.
790 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
791
Mark Mendellfb8d2792015-03-31 22:16:59 -0400792 // if inPlusPointFive >= maxLong goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400793 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400794 __ j(kAboveEqual, &done);
795
796 // if input == NaN goto nan
797 __ j(kUnordered, &nan);
798
799 // output = double-to-long-truncate(input)
800 __ cvttsd2si(out, inPlusPointFive, true);
801 __ jmp(&done);
802 __ Bind(&nan);
803
804 // output = 0
805 __ xorq(out, out);
806 __ Bind(&done);
807}
808
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800809void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
810 // The inputs plus one temp.
811 LocationSummary* locations = new (arena_) LocationSummary(invoke,
812 LocationSummary::kCallOnSlowPath,
813 kIntrinsified);
814 locations->SetInAt(0, Location::RequiresRegister());
815 locations->SetInAt(1, Location::RequiresRegister());
816 locations->SetOut(Location::SameAsFirstInput());
817 locations->AddTemp(Location::RequiresRegister());
818}
819
820void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
821 LocationSummary* locations = invoke->GetLocations();
822
823 // Location of reference to data array
824 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
825 // Location of count
826 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800827
828 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
829 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
830 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800831
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800832 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
833 // the cost.
834 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
835 // we will not optimize the code for constants (which would save a register).
836
Andreas Gampe878d58c2015-01-15 23:24:00 -0800837 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800838 codegen_->AddSlowPath(slow_path);
839
840 X86_64Assembler* assembler = GetAssembler();
841
842 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800843 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800844 __ j(kAboveEqual, slow_path->GetEntryLabel());
845
Jeff Hao848f70a2014-01-15 13:49:50 -0800846 // out = out[2*idx].
847 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800848
849 __ Bind(slow_path->GetExitLabel());
850}
851
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000852void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
853 LocationSummary* locations = new (arena_) LocationSummary(invoke,
854 LocationSummary::kCall,
855 kIntrinsified);
856 InvokeRuntimeCallingConvention calling_convention;
857 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
858 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
859 locations->SetOut(Location::RegisterLocation(RAX));
860}
861
862void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
863 X86_64Assembler* assembler = GetAssembler();
864 LocationSummary* locations = invoke->GetLocations();
865
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000866 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +0100867 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000868
869 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
870 __ testl(argument, argument);
871 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
872 codegen_->AddSlowPath(slow_path);
873 __ j(kEqual, slow_path->GetEntryLabel());
874
875 __ gs()->call(Address::Absolute(
876 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
877 __ Bind(slow_path->GetExitLabel());
878}
879
Jeff Hao848f70a2014-01-15 13:49:50 -0800880void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
881 LocationSummary* locations = new (arena_) LocationSummary(invoke,
882 LocationSummary::kCall,
883 kIntrinsified);
884 InvokeRuntimeCallingConvention calling_convention;
885 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
886 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
887 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
888 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
889 locations->SetOut(Location::RegisterLocation(RAX));
890}
891
892void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
893 X86_64Assembler* assembler = GetAssembler();
894 LocationSummary* locations = invoke->GetLocations();
895
896 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
897 __ testl(byte_array, byte_array);
898 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
899 codegen_->AddSlowPath(slow_path);
900 __ j(kEqual, slow_path->GetEntryLabel());
901
902 __ gs()->call(Address::Absolute(
903 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
904 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
905 __ Bind(slow_path->GetExitLabel());
906}
907
908void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
909 LocationSummary* locations = new (arena_) LocationSummary(invoke,
910 LocationSummary::kCall,
911 kIntrinsified);
912 InvokeRuntimeCallingConvention calling_convention;
913 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
914 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
915 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
916 locations->SetOut(Location::RegisterLocation(RAX));
917}
918
919void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
920 X86_64Assembler* assembler = GetAssembler();
921
922 __ gs()->call(Address::Absolute(
923 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
924 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
925}
926
927void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
928 LocationSummary* locations = new (arena_) LocationSummary(invoke,
929 LocationSummary::kCall,
930 kIntrinsified);
931 InvokeRuntimeCallingConvention calling_convention;
932 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
933 locations->SetOut(Location::RegisterLocation(RAX));
934}
935
936void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
937 X86_64Assembler* assembler = GetAssembler();
938 LocationSummary* locations = invoke->GetLocations();
939
940 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
941 __ testl(string_to_copy, string_to_copy);
942 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
943 codegen_->AddSlowPath(slow_path);
944 __ j(kEqual, slow_path->GetEntryLabel());
945
946 __ gs()->call(Address::Absolute(
947 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
948 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
949 __ Bind(slow_path->GetExitLabel());
950}
951
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800952static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
953 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
954 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
955 // x86 allows unaligned access. We do not have to check the input or use specific instructions
956 // to avoid a SIGBUS.
957 switch (size) {
958 case Primitive::kPrimByte:
959 __ movsxb(out, Address(address, 0));
960 break;
961 case Primitive::kPrimShort:
962 __ movsxw(out, Address(address, 0));
963 break;
964 case Primitive::kPrimInt:
965 __ movl(out, Address(address, 0));
966 break;
967 case Primitive::kPrimLong:
968 __ movq(out, Address(address, 0));
969 break;
970 default:
971 LOG(FATAL) << "Type not recognized for peek: " << size;
972 UNREACHABLE();
973 }
974}
975
976void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
977 CreateIntToIntLocations(arena_, invoke);
978}
979
980void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
981 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
982}
983
984void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
985 CreateIntToIntLocations(arena_, invoke);
986}
987
988void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
989 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
990}
991
992void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
993 CreateIntToIntLocations(arena_, invoke);
994}
995
996void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
997 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
998}
999
1000void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1001 CreateIntToIntLocations(arena_, invoke);
1002}
1003
1004void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1005 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1006}
1007
1008static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1009 LocationSummary* locations = new (arena) LocationSummary(invoke,
1010 LocationSummary::kNoCall,
1011 kIntrinsified);
1012 locations->SetInAt(0, Location::RequiresRegister());
Mark Mendell40741f32015-04-20 22:10:34 -04001013 locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001014}
1015
1016static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1017 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -04001018 Location value = locations->InAt(1);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001019 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1020 // to avoid a SIGBUS.
1021 switch (size) {
1022 case Primitive::kPrimByte:
Mark Mendell40741f32015-04-20 22:10:34 -04001023 if (value.IsConstant()) {
1024 __ movb(Address(address, 0),
1025 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1026 } else {
1027 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1028 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001029 break;
1030 case Primitive::kPrimShort:
Mark Mendell40741f32015-04-20 22:10:34 -04001031 if (value.IsConstant()) {
1032 __ movw(Address(address, 0),
1033 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1034 } else {
1035 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1036 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001037 break;
1038 case Primitive::kPrimInt:
Mark Mendell40741f32015-04-20 22:10:34 -04001039 if (value.IsConstant()) {
1040 __ movl(Address(address, 0),
1041 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1042 } else {
1043 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1044 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001045 break;
1046 case Primitive::kPrimLong:
Mark Mendell40741f32015-04-20 22:10:34 -04001047 if (value.IsConstant()) {
1048 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1049 DCHECK(IsInt<32>(v));
1050 int32_t v_32 = v;
1051 __ movq(Address(address, 0), Immediate(v_32));
1052 } else {
1053 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1054 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001055 break;
1056 default:
1057 LOG(FATAL) << "Type not recognized for poke: " << size;
1058 UNREACHABLE();
1059 }
1060}
1061
1062void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1063 CreateIntIntToVoidLocations(arena_, invoke);
1064}
1065
1066void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1067 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1068}
1069
1070void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1071 CreateIntIntToVoidLocations(arena_, invoke);
1072}
1073
1074void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1075 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1076}
1077
1078void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1079 CreateIntIntToVoidLocations(arena_, invoke);
1080}
1081
1082void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1083 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1084}
1085
1086void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1087 CreateIntIntToVoidLocations(arena_, invoke);
1088}
1089
1090void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1091 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1092}
1093
1094void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1095 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1096 LocationSummary::kNoCall,
1097 kIntrinsified);
1098 locations->SetOut(Location::RequiresRegister());
1099}
1100
1101void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1102 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1103 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1104}
1105
Andreas Gampe878d58c2015-01-15 23:24:00 -08001106static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001107 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1108 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1109 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1110 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1111
Andreas Gampe878d58c2015-01-15 23:24:00 -08001112 switch (type) {
1113 case Primitive::kPrimInt:
1114 case Primitive::kPrimNot:
1115 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1116 break;
1117
1118 case Primitive::kPrimLong:
1119 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1120 break;
1121
1122 default:
1123 LOG(FATAL) << "Unsupported op size " << type;
1124 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001125 }
1126}
1127
1128static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1129 LocationSummary* locations = new (arena) LocationSummary(invoke,
1130 LocationSummary::kNoCall,
1131 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001132 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001133 locations->SetInAt(1, Location::RequiresRegister());
1134 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001135 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001136}
1137
1138void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1139 CreateIntIntIntToIntLocations(arena_, invoke);
1140}
1141void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1142 CreateIntIntIntToIntLocations(arena_, invoke);
1143}
1144void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1145 CreateIntIntIntToIntLocations(arena_, invoke);
1146}
1147void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1148 CreateIntIntIntToIntLocations(arena_, invoke);
1149}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001150void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1151 CreateIntIntIntToIntLocations(arena_, invoke);
1152}
1153void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1154 CreateIntIntIntToIntLocations(arena_, invoke);
1155}
1156
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001157
1158void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001159 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001160}
1161void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001162 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001163}
1164void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001165 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001166}
1167void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001168 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001169}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001170void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1171 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1172}
1173void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1174 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1175}
1176
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001177
1178static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1179 Primitive::Type type,
1180 HInvoke* invoke) {
1181 LocationSummary* locations = new (arena) LocationSummary(invoke,
1182 LocationSummary::kNoCall,
1183 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001184 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001185 locations->SetInAt(1, Location::RequiresRegister());
1186 locations->SetInAt(2, Location::RequiresRegister());
1187 locations->SetInAt(3, Location::RequiresRegister());
1188 if (type == Primitive::kPrimNot) {
1189 // Need temp registers for card-marking.
1190 locations->AddTemp(Location::RequiresRegister());
1191 locations->AddTemp(Location::RequiresRegister());
1192 }
1193}
1194
1195void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1196 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1197}
1198void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1199 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1200}
1201void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1202 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1203}
1204void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1205 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1206}
1207void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1208 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1209}
1210void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1211 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1212}
1213void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1214 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1215}
1216void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1217 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1218}
1219void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1220 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1221}
1222
1223// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1224// memory model.
1225static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1226 CodeGeneratorX86_64* codegen) {
1227 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1228 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1229 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1230 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1231
1232 if (type == Primitive::kPrimLong) {
1233 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1234 } else {
1235 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1236 }
1237
1238 if (is_volatile) {
1239 __ mfence();
1240 }
1241
1242 if (type == Primitive::kPrimNot) {
1243 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1244 locations->GetTemp(1).AsRegister<CpuRegister>(),
1245 base,
1246 value);
1247 }
1248}
1249
1250void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1251 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1252}
1253void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1254 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1255}
1256void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1257 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1258}
1259void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1260 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1261}
1262void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1263 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1264}
1265void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1266 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1267}
1268void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1269 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1270}
1271void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1272 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1273}
1274void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1275 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1276}
1277
Mark Mendell58d25fd2015-04-03 14:52:31 -04001278static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1279 HInvoke* invoke) {
1280 LocationSummary* locations = new (arena) LocationSummary(invoke,
1281 LocationSummary::kNoCall,
1282 kIntrinsified);
1283 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1284 locations->SetInAt(1, Location::RequiresRegister());
1285 locations->SetInAt(2, Location::RequiresRegister());
1286 // expected value must be in EAX/RAX.
1287 locations->SetInAt(3, Location::RegisterLocation(RAX));
1288 locations->SetInAt(4, Location::RequiresRegister());
1289
1290 locations->SetOut(Location::RequiresRegister());
1291 if (type == Primitive::kPrimNot) {
1292 // Need temp registers for card-marking.
1293 locations->AddTemp(Location::RequiresRegister());
1294 locations->AddTemp(Location::RequiresRegister());
1295 }
1296}
1297
1298void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1299 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1300}
1301
1302void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1303 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1304}
1305
1306void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1307 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1308}
1309
1310static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1311 X86_64Assembler* assembler =
1312 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1313 LocationSummary* locations = invoke->GetLocations();
1314
1315 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1316 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1317 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1318 DCHECK_EQ(expected.AsRegister(), RAX);
1319 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1320 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1321
1322 if (type == Primitive::kPrimLong) {
1323 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1324 } else {
1325 // Integer or object.
1326 if (type == Primitive::kPrimNot) {
1327 // Mark card for object assuming new value is stored.
1328 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1329 locations->GetTemp(1).AsRegister<CpuRegister>(),
1330 base,
1331 value);
1332 }
1333
1334 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1335 }
1336
1337 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1338 // barriers at this time.
1339
1340 // Convert ZF into the boolean result.
1341 __ setcc(kZero, out);
1342 __ movzxb(out, out);
1343}
1344
1345void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1346 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1347}
1348
1349void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1350 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1351}
1352
1353void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1354 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1355}
1356
1357void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1358 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1359 LocationSummary::kNoCall,
1360 kIntrinsified);
1361 locations->SetInAt(0, Location::RequiresRegister());
1362 locations->SetOut(Location::SameAsFirstInput());
1363 locations->AddTemp(Location::RequiresRegister());
1364}
1365
1366static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1367 X86_64Assembler* assembler) {
1368 Immediate imm_shift(shift);
1369 Immediate imm_mask(mask);
1370 __ movl(temp, reg);
1371 __ shrl(reg, imm_shift);
1372 __ andl(temp, imm_mask);
1373 __ andl(reg, imm_mask);
1374 __ shll(temp, imm_shift);
1375 __ orl(reg, temp);
1376}
1377
1378void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1379 X86_64Assembler* assembler =
1380 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1381 LocationSummary* locations = invoke->GetLocations();
1382
1383 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1384 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1385
1386 /*
1387 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1388 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1389 * compared to generic luni implementation which has 5 rounds of swapping bits.
1390 * x = bswap x
1391 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1392 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1393 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1394 */
1395 __ bswapl(reg);
1396 SwapBits(reg, temp, 1, 0x55555555, assembler);
1397 SwapBits(reg, temp, 2, 0x33333333, assembler);
1398 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1399}
1400
1401void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1402 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1403 LocationSummary::kNoCall,
1404 kIntrinsified);
1405 locations->SetInAt(0, Location::RequiresRegister());
1406 locations->SetOut(Location::SameAsFirstInput());
1407 locations->AddTemp(Location::RequiresRegister());
1408 locations->AddTemp(Location::RequiresRegister());
1409}
1410
1411static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1412 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1413 Immediate imm_shift(shift);
1414 __ movq(temp_mask, Immediate(mask));
1415 __ movq(temp, reg);
1416 __ shrq(reg, imm_shift);
1417 __ andq(temp, temp_mask);
1418 __ andq(reg, temp_mask);
1419 __ shlq(temp, imm_shift);
1420 __ orq(reg, temp);
1421}
1422
1423void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1424 X86_64Assembler* assembler =
1425 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1426 LocationSummary* locations = invoke->GetLocations();
1427
1428 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1429 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1430 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1431
1432 /*
1433 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1434 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1435 * compared to generic luni implementation which has 5 rounds of swapping bits.
1436 * x = bswap x
1437 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1438 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1439 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1440 */
1441 __ bswapq(reg);
1442 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1443 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1444 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1445}
1446
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001447// Unimplemented intrinsics.
1448
1449#define UNIMPLEMENTED_INTRINSIC(Name) \
1450void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1451} \
1452void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1453}
1454
Jeff Hao848f70a2014-01-15 13:49:50 -08001455UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001456UNIMPLEMENTED_INTRINSIC(StringIndexOf)
1457UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
1458UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001459UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1460
1461} // namespace x86_64
1462} // namespace art