blob: 1a50f50c843657a6b24f5131503da0eb13bc1b46 [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86_64/instruction_set_features_x86_64.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070022#include "art_method-inl.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080023#include "code_generator_x86_64.h"
24#include "entrypoints/quick/quick_entrypoints.h"
25#include "intrinsics.h"
26#include "mirror/array-inl.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080027#include "mirror/string.h"
28#include "thread.h"
29#include "utils/x86_64/assembler_x86_64.h"
30#include "utils/x86_64/constants_x86_64.h"
31
32namespace art {
33
34namespace x86_64 {
35
Mark Mendellfb8d2792015-03-31 22:16:59 -040036IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
37 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
38}
39
40
Andreas Gampe71fb52f2014-12-29 17:43:08 -080041X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
42 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
43}
44
Andreas Gampe878d58c2015-01-15 23:24:00 -080045ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080046 return codegen_->GetGraph()->GetArena();
47}
48
49bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
50 Dispatch(invoke);
51 const LocationSummary* res = invoke->GetLocations();
52 return res != nullptr && res->Intrinsified();
53}
54
55#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
56
57// TODO: trg as memory.
58static void MoveFromReturnRegister(Location trg,
59 Primitive::Type type,
60 CodeGeneratorX86_64* codegen) {
61 if (!trg.IsValid()) {
62 DCHECK(type == Primitive::kPrimVoid);
63 return;
64 }
65
66 switch (type) {
67 case Primitive::kPrimBoolean:
68 case Primitive::kPrimByte:
69 case Primitive::kPrimChar:
70 case Primitive::kPrimShort:
71 case Primitive::kPrimInt:
72 case Primitive::kPrimNot: {
73 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
74 if (trg_reg.AsRegister() != RAX) {
75 __ movl(trg_reg, CpuRegister(RAX));
76 }
77 break;
78 }
79 case Primitive::kPrimLong: {
80 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
81 if (trg_reg.AsRegister() != RAX) {
82 __ movq(trg_reg, CpuRegister(RAX));
83 }
84 break;
85 }
86
87 case Primitive::kPrimVoid:
88 LOG(FATAL) << "Unexpected void type for valid location " << trg;
89 UNREACHABLE();
90
91 case Primitive::kPrimDouble: {
92 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
93 if (trg_reg.AsFloatRegister() != XMM0) {
94 __ movsd(trg_reg, XmmRegister(XMM0));
95 }
96 break;
97 }
98 case Primitive::kPrimFloat: {
99 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
100 if (trg_reg.AsFloatRegister() != XMM0) {
101 __ movss(trg_reg, XmmRegister(XMM0));
102 }
103 break;
104 }
105 }
106}
107
Roland Levillainec525fc2015-04-28 15:50:20 +0100108static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +0100109 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +0100110 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800111}
112
113// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
114// call. This will copy the arguments into the positions for a regular call.
115//
116// Note: The actual parameters are required to be in the locations given by the invoke's location
117// summary. If an intrinsic modifies those locations before a slowpath call, they must be
118// restored!
119class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
120 public:
121 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
122
123 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
124 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
125 __ Bind(GetEntryLabel());
126
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000127 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800128
Roland Levillainec525fc2015-04-28 15:50:20 +0100129 MoveArguments(invoke_, codegen);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800130
131 if (invoke_->IsInvokeStaticOrDirect()) {
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100132 codegen->GenerateStaticOrDirectCall(
133 invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000134 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800135 } else {
136 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
137 UNREACHABLE();
138 }
139
140 // Copy the result back to the expected output.
141 Location out = invoke_->GetLocations()->Out();
142 if (out.IsValid()) {
143 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
144 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
145 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
146 }
147
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000148 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800149 __ jmp(GetExitLabel());
150 }
151
Alexandre Rames9931f312015-06-19 14:47:01 +0100152 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathX86_64"; }
153
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800154 private:
155 // The instruction where this slow path is happening.
156 HInvoke* const invoke_;
157
158 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
159};
160
161#undef __
162#define __ assembler->
163
164static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
165 LocationSummary* locations = new (arena) LocationSummary(invoke,
166 LocationSummary::kNoCall,
167 kIntrinsified);
168 locations->SetInAt(0, Location::RequiresFpuRegister());
169 locations->SetOut(Location::RequiresRegister());
170}
171
172static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
173 LocationSummary* locations = new (arena) LocationSummary(invoke,
174 LocationSummary::kNoCall,
175 kIntrinsified);
176 locations->SetInAt(0, Location::RequiresRegister());
177 locations->SetOut(Location::RequiresFpuRegister());
178}
179
180static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
181 Location input = locations->InAt(0);
182 Location output = locations->Out();
183 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
184}
185
186static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
187 Location input = locations->InAt(0);
188 Location output = locations->Out();
189 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
190}
191
192void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
193 CreateFPToIntLocations(arena_, invoke);
194}
195void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
196 CreateIntToFPLocations(arena_, invoke);
197}
198
199void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
200 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
201}
202void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
203 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
204}
205
206void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
207 CreateFPToIntLocations(arena_, invoke);
208}
209void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
210 CreateIntToFPLocations(arena_, invoke);
211}
212
213void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
214 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
215}
216void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
217 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
218}
219
220static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
221 LocationSummary* locations = new (arena) LocationSummary(invoke,
222 LocationSummary::kNoCall,
223 kIntrinsified);
224 locations->SetInAt(0, Location::RequiresRegister());
225 locations->SetOut(Location::SameAsFirstInput());
226}
227
228static void GenReverseBytes(LocationSummary* locations,
229 Primitive::Type size,
230 X86_64Assembler* assembler) {
231 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
232
233 switch (size) {
234 case Primitive::kPrimShort:
235 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
236 __ bswapl(out);
237 __ sarl(out, Immediate(16));
238 break;
239 case Primitive::kPrimInt:
240 __ bswapl(out);
241 break;
242 case Primitive::kPrimLong:
243 __ bswapq(out);
244 break;
245 default:
246 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
247 UNREACHABLE();
248 }
249}
250
251void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
252 CreateIntToIntLocations(arena_, invoke);
253}
254
255void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
256 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
257}
258
259void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
260 CreateIntToIntLocations(arena_, invoke);
261}
262
263void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
264 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
265}
266
267void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
268 CreateIntToIntLocations(arena_, invoke);
269}
270
271void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
272 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
273}
274
275
276// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
277// need is 64b.
278
279static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
280 // TODO: Enable memory operations when the assembler supports them.
281 LocationSummary* locations = new (arena) LocationSummary(invoke,
282 LocationSummary::kNoCall,
283 kIntrinsified);
284 locations->SetInAt(0, Location::RequiresFpuRegister());
285 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
286 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
287 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400288 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800289}
290
Mark Mendell39dcf552015-04-09 20:42:42 -0400291static void MathAbsFP(LocationSummary* locations,
292 bool is64bit,
293 X86_64Assembler* assembler,
294 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800295 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800296
297 if (output.IsFpuRegister()) {
298 // In-register
Mark Mendellf55c3e02015-03-26 21:07:46 -0400299 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800300
Mark Mendell39dcf552015-04-09 20:42:42 -0400301 // TODO: Can mask directly with constant area using pand if we can guarantee
302 // that the literal is aligned on a 16 byte boundary. This will avoid a
303 // temporary.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800304 if (is64bit) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400305 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800306 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
307 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400308 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800309 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
310 }
311 } else {
312 // TODO: update when assember support is available.
313 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
314// Once assembler support is available, in-memory operations look like this:
315// if (is64bit) {
316// DCHECK(output.IsDoubleStackSlot());
317// // No 64b and with literal.
318// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
319// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
320// } else {
321// DCHECK(output.IsStackSlot());
322// // Can use and with a literal directly.
323// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
324// }
325 }
326}
327
328void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
329 CreateFloatToFloatPlusTemps(arena_, invoke);
330}
331
332void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400333 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800334}
335
336void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
337 CreateFloatToFloatPlusTemps(arena_, invoke);
338}
339
340void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400341 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800342}
343
344static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
345 LocationSummary* locations = new (arena) LocationSummary(invoke,
346 LocationSummary::kNoCall,
347 kIntrinsified);
348 locations->SetInAt(0, Location::RequiresRegister());
349 locations->SetOut(Location::SameAsFirstInput());
350 locations->AddTemp(Location::RequiresRegister());
351}
352
353static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
354 Location output = locations->Out();
355 CpuRegister out = output.AsRegister<CpuRegister>();
356 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
357
358 if (is64bit) {
359 // Create mask.
360 __ movq(mask, out);
361 __ sarq(mask, Immediate(63));
362 // Add mask.
363 __ addq(out, mask);
364 __ xorq(out, mask);
365 } else {
366 // Create mask.
367 __ movl(mask, out);
368 __ sarl(mask, Immediate(31));
369 // Add mask.
370 __ addl(out, mask);
371 __ xorl(out, mask);
372 }
373}
374
375void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
376 CreateIntToIntPlusTemp(arena_, invoke);
377}
378
379void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
380 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
381}
382
383void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
384 CreateIntToIntPlusTemp(arena_, invoke);
385}
386
387void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
388 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
389}
390
Mark Mendell39dcf552015-04-09 20:42:42 -0400391static void GenMinMaxFP(LocationSummary* locations,
392 bool is_min,
393 bool is_double,
394 X86_64Assembler* assembler,
395 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800396 Location op1_loc = locations->InAt(0);
397 Location op2_loc = locations->InAt(1);
398 Location out_loc = locations->Out();
399 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
400
401 // Shortcut for same input locations.
402 if (op1_loc.Equals(op2_loc)) {
403 DCHECK(out_loc.Equals(op1_loc));
404 return;
405 }
406
407 // (out := op1)
408 // out <=? op2
409 // if Nan jmp Nan_label
410 // if out is min jmp done
411 // if op2 is min jmp op2_label
412 // handle -0/+0
413 // jmp done
414 // Nan_label:
415 // out := NaN
416 // op2_label:
417 // out := op2
418 // done:
419 //
420 // This removes one jmp, but needs to copy one input (op1) to out.
421 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400422 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800423
424 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
425
426 Label nan, done, op2_label;
427 if (is_double) {
428 __ ucomisd(out, op2);
429 } else {
430 __ ucomiss(out, op2);
431 }
432
433 __ j(Condition::kParityEven, &nan);
434
435 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
436 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
437
438 // Handle 0.0/-0.0.
439 if (is_min) {
440 if (is_double) {
441 __ orpd(out, op2);
442 } else {
443 __ orps(out, op2);
444 }
445 } else {
446 if (is_double) {
447 __ andpd(out, op2);
448 } else {
449 __ andps(out, op2);
450 }
451 }
452 __ jmp(&done);
453
454 // NaN handling.
455 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800456 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400457 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800458 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400459 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800460 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800461 __ jmp(&done);
462
463 // out := op2;
464 __ Bind(&op2_label);
465 if (is_double) {
466 __ movsd(out, op2);
467 } else {
468 __ movss(out, op2);
469 }
470
471 // Done.
472 __ Bind(&done);
473}
474
Mark Mendellf55c3e02015-03-26 21:07:46 -0400475static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800476 LocationSummary* locations = new (arena) LocationSummary(invoke,
477 LocationSummary::kNoCall,
478 kIntrinsified);
479 locations->SetInAt(0, Location::RequiresFpuRegister());
480 locations->SetInAt(1, Location::RequiresFpuRegister());
481 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
482 // the second input to be the output (we can simply swap inputs).
483 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800484}
485
486void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400487 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800488}
489
490void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400491 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800492}
493
494void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400495 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800496}
497
498void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400499 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800500}
501
502void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400503 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800504}
505
506void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400507 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800508}
509
510void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400511 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800512}
513
514void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400515 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800516}
517
518static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
519 X86_64Assembler* assembler) {
520 Location op1_loc = locations->InAt(0);
521 Location op2_loc = locations->InAt(1);
522
523 // Shortcut for same input locations.
524 if (op1_loc.Equals(op2_loc)) {
525 // Can return immediately, as op1_loc == out_loc.
526 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
527 // a copy here.
528 DCHECK(locations->Out().Equals(op1_loc));
529 return;
530 }
531
532 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
533 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
534
535 // (out := op1)
536 // out <=? op2
537 // if out is min jmp done
538 // out := op2
539 // done:
540
541 if (is_long) {
542 __ cmpq(out, op2);
543 } else {
544 __ cmpl(out, op2);
545 }
546
547 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
548}
549
550static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
551 LocationSummary* locations = new (arena) LocationSummary(invoke,
552 LocationSummary::kNoCall,
553 kIntrinsified);
554 locations->SetInAt(0, Location::RequiresRegister());
555 locations->SetInAt(1, Location::RequiresRegister());
556 locations->SetOut(Location::SameAsFirstInput());
557}
558
559void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
560 CreateIntIntToIntLocations(arena_, invoke);
561}
562
563void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
564 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
565}
566
567void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
568 CreateIntIntToIntLocations(arena_, invoke);
569}
570
571void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
572 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
573}
574
575void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
576 CreateIntIntToIntLocations(arena_, invoke);
577}
578
579void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
580 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
581}
582
583void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
584 CreateIntIntToIntLocations(arena_, invoke);
585}
586
587void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
588 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
589}
590
591static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
592 LocationSummary* locations = new (arena) LocationSummary(invoke,
593 LocationSummary::kNoCall,
594 kIntrinsified);
595 locations->SetInAt(0, Location::RequiresFpuRegister());
596 locations->SetOut(Location::RequiresFpuRegister());
597}
598
599void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
600 CreateFPToFPLocations(arena_, invoke);
601}
602
603void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
604 LocationSummary* locations = invoke->GetLocations();
605 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
606 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
607
608 GetAssembler()->sqrtsd(out, in);
609}
610
Mark Mendellfb8d2792015-03-31 22:16:59 -0400611static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100612 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400613
614 DCHECK(invoke->IsInvokeStaticOrDirect());
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100615 codegen->GenerateStaticOrDirectCall(
616 invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400617 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
618
619 // Copy the result back to the expected output.
620 Location out = invoke->GetLocations()->Out();
621 if (out.IsValid()) {
622 DCHECK(out.IsRegister());
623 MoveFromReturnRegister(out, invoke->GetType(), codegen);
624 }
625}
626
627static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
628 HInvoke* invoke,
629 CodeGeneratorX86_64* codegen) {
630 // Do we have instruction support?
631 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
632 CreateFPToFPLocations(arena, invoke);
633 return;
634 }
635
636 // We have to fall back to a call to the intrinsic.
637 LocationSummary* locations = new (arena) LocationSummary(invoke,
638 LocationSummary::kCall);
639 InvokeRuntimeCallingConvention calling_convention;
640 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
641 locations->SetOut(Location::FpuRegisterLocation(XMM0));
642 // Needs to be RDI for the invoke.
643 locations->AddTemp(Location::RegisterLocation(RDI));
644}
645
646static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
647 HInvoke* invoke,
648 X86_64Assembler* assembler,
649 int round_mode) {
650 LocationSummary* locations = invoke->GetLocations();
651 if (locations->WillCall()) {
652 InvokeOutOfLineIntrinsic(codegen, invoke);
653 } else {
654 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
655 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
656 __ roundsd(out, in, Immediate(round_mode));
657 }
658}
659
660void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
661 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
662}
663
664void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
665 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
666}
667
668void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
669 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
670}
671
672void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
673 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
674}
675
676void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
677 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
678}
679
680void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
681 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
682}
683
684static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
685 HInvoke* invoke,
686 CodeGeneratorX86_64* codegen) {
687 // Do we have instruction support?
688 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
689 LocationSummary* locations = new (arena) LocationSummary(invoke,
690 LocationSummary::kNoCall,
691 kIntrinsified);
692 locations->SetInAt(0, Location::RequiresFpuRegister());
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600693 locations->SetOut(Location::RequiresRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400694 locations->AddTemp(Location::RequiresFpuRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400695 return;
696 }
697
698 // We have to fall back to a call to the intrinsic.
699 LocationSummary* locations = new (arena) LocationSummary(invoke,
700 LocationSummary::kCall);
701 InvokeRuntimeCallingConvention calling_convention;
702 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
703 locations->SetOut(Location::RegisterLocation(RAX));
704 // Needs to be RDI for the invoke.
705 locations->AddTemp(Location::RegisterLocation(RDI));
706}
707
708void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
709 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
710}
711
712void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
713 LocationSummary* locations = invoke->GetLocations();
714 if (locations->WillCall()) {
715 InvokeOutOfLineIntrinsic(codegen_, invoke);
716 return;
717 }
718
719 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
720 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
721 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400722 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400723 Label done, nan;
724 X86_64Assembler* assembler = GetAssembler();
725
Mark Mendell40741f32015-04-20 22:10:34 -0400726 // Load 0.5 into inPlusPointFive.
727 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400728
729 // Add in the input.
730 __ addss(inPlusPointFive, in);
731
732 // And truncate to an integer.
733 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
734
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600735 // Load maxInt into out.
736 codegen_->Load64BitValue(out, kPrimIntMax);
737
Mark Mendellfb8d2792015-03-31 22:16:59 -0400738 // if inPlusPointFive >= maxInt goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400739 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400740 __ j(kAboveEqual, &done);
741
742 // if input == NaN goto nan
743 __ j(kUnordered, &nan);
744
745 // output = float-to-int-truncate(input)
746 __ cvttss2si(out, inPlusPointFive);
747 __ jmp(&done);
748 __ Bind(&nan);
749
750 // output = 0
751 __ xorl(out, out);
752 __ Bind(&done);
753}
754
755void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
756 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
757}
758
759void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
760 LocationSummary* locations = invoke->GetLocations();
761 if (locations->WillCall()) {
762 InvokeOutOfLineIntrinsic(codegen_, invoke);
763 return;
764 }
765
766 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
767 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
768 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400769 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400770 Label done, nan;
771 X86_64Assembler* assembler = GetAssembler();
772
Mark Mendell40741f32015-04-20 22:10:34 -0400773 // Load 0.5 into inPlusPointFive.
774 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400775
776 // Add in the input.
777 __ addsd(inPlusPointFive, in);
778
779 // And truncate to an integer.
780 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
781
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600782 // Load maxLong into out.
783 codegen_->Load64BitValue(out, kPrimLongMax);
784
Mark Mendellfb8d2792015-03-31 22:16:59 -0400785 // if inPlusPointFive >= maxLong goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400786 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400787 __ j(kAboveEqual, &done);
788
789 // if input == NaN goto nan
790 __ j(kUnordered, &nan);
791
792 // output = double-to-long-truncate(input)
793 __ cvttsd2si(out, inPlusPointFive, true);
794 __ jmp(&done);
795 __ Bind(&nan);
796
797 // output = 0
Mark Mendell92e83bf2015-05-07 11:25:03 -0400798 __ xorl(out, out);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400799 __ Bind(&done);
800}
801
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800802void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
803 // The inputs plus one temp.
804 LocationSummary* locations = new (arena_) LocationSummary(invoke,
805 LocationSummary::kCallOnSlowPath,
806 kIntrinsified);
807 locations->SetInAt(0, Location::RequiresRegister());
808 locations->SetInAt(1, Location::RequiresRegister());
809 locations->SetOut(Location::SameAsFirstInput());
810 locations->AddTemp(Location::RequiresRegister());
811}
812
813void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
814 LocationSummary* locations = invoke->GetLocations();
815
816 // Location of reference to data array
817 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
818 // Location of count
819 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800820
821 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
822 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
823 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800824
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800825 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
826 // the cost.
827 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
828 // we will not optimize the code for constants (which would save a register).
829
Andreas Gampe878d58c2015-01-15 23:24:00 -0800830 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800831 codegen_->AddSlowPath(slow_path);
832
833 X86_64Assembler* assembler = GetAssembler();
834
835 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800836 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800837 __ j(kAboveEqual, slow_path->GetEntryLabel());
838
Jeff Hao848f70a2014-01-15 13:49:50 -0800839 // out = out[2*idx].
840 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800841
842 __ Bind(slow_path->GetExitLabel());
843}
844
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000845void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
846 LocationSummary* locations = new (arena_) LocationSummary(invoke,
847 LocationSummary::kCall,
848 kIntrinsified);
849 InvokeRuntimeCallingConvention calling_convention;
850 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
851 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
852 locations->SetOut(Location::RegisterLocation(RAX));
853}
854
855void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
856 X86_64Assembler* assembler = GetAssembler();
857 LocationSummary* locations = invoke->GetLocations();
858
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000859 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +0100860 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000861
862 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
863 __ testl(argument, argument);
864 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
865 codegen_->AddSlowPath(slow_path);
866 __ j(kEqual, slow_path->GetEntryLabel());
867
868 __ gs()->call(Address::Absolute(
869 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
870 __ Bind(slow_path->GetExitLabel());
871}
872
Andreas Gampe21030dd2015-05-07 14:46:15 -0700873static void CreateStringIndexOfLocations(HInvoke* invoke,
874 ArenaAllocator* allocator,
875 bool start_at_zero) {
876 LocationSummary* locations = new (allocator) LocationSummary(invoke,
877 LocationSummary::kCallOnSlowPath,
878 kIntrinsified);
879 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
880 locations->SetInAt(0, Location::RegisterLocation(RDI));
881 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
882 // allocator to do that, anyways. We can still do the constant check by checking the parameter
883 // of the instruction explicitly.
884 // Note: This works as we don't clobber RAX anywhere.
885 locations->SetInAt(1, Location::RegisterLocation(RAX));
886 if (!start_at_zero) {
887 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
888 }
889 // As we clobber RDI during execution anyways, also use it as the output.
890 locations->SetOut(Location::SameAsFirstInput());
891
892 // repne scasw uses RCX as the counter.
893 locations->AddTemp(Location::RegisterLocation(RCX));
894 // Need another temporary to be able to compute the result.
895 locations->AddTemp(Location::RequiresRegister());
896}
897
898static void GenerateStringIndexOf(HInvoke* invoke,
899 X86_64Assembler* assembler,
900 CodeGeneratorX86_64* codegen,
901 ArenaAllocator* allocator,
902 bool start_at_zero) {
903 LocationSummary* locations = invoke->GetLocations();
904
905 // Note that the null check must have been done earlier.
906 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
907
908 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
909 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
910 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
911 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
912 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
913
914 // Check our assumptions for registers.
915 DCHECK_EQ(string_obj.AsRegister(), RDI);
916 DCHECK_EQ(search_value.AsRegister(), RAX);
917 DCHECK_EQ(counter.AsRegister(), RCX);
918 DCHECK_EQ(out.AsRegister(), RDI);
919
920 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
921 // or directly dispatch if we have a constant.
922 SlowPathCodeX86_64* slow_path = nullptr;
923 if (invoke->InputAt(1)->IsIntConstant()) {
924 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
925 std::numeric_limits<uint16_t>::max()) {
926 // Always needs the slow-path. We could directly dispatch to it, but this case should be
927 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
928 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
929 codegen->AddSlowPath(slow_path);
930 __ jmp(slow_path->GetEntryLabel());
931 __ Bind(slow_path->GetExitLabel());
932 return;
933 }
934 } else {
935 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
936 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
937 codegen->AddSlowPath(slow_path);
938 __ j(kAbove, slow_path->GetEntryLabel());
939 }
940
941 // From here down, we know that we are looking for a char that fits in 16 bits.
942 // Location of reference to data array within the String object.
943 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
944 // Location of count within the String object.
945 int32_t count_offset = mirror::String::CountOffset().Int32Value();
946
947 // Load string length, i.e., the count field of the string.
948 __ movl(string_length, Address(string_obj, count_offset));
949
950 // Do a length check.
951 // TODO: Support jecxz.
952 Label not_found_label;
953 __ testl(string_length, string_length);
954 __ j(kEqual, &not_found_label);
955
956 if (start_at_zero) {
957 // Number of chars to scan is the same as the string length.
958 __ movl(counter, string_length);
959
960 // Move to the start of the string.
961 __ addq(string_obj, Immediate(value_offset));
962 } else {
963 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
964
965 // Do a start_index check.
966 __ cmpl(start_index, string_length);
967 __ j(kGreaterEqual, &not_found_label);
968
969 // Ensure we have a start index >= 0;
970 __ xorl(counter, counter);
971 __ cmpl(start_index, Immediate(0));
972 __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough.
973
974 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
975 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
976
977 // Now update ecx, the work counter: it's gonna be string.length - start_index.
978 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
979 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
980 }
981
982 // Everything is set up for repne scasw:
983 // * Comparison address in RDI.
984 // * Counter in ECX.
985 __ repne_scasw();
986
987 // Did we find a match?
988 __ j(kNotEqual, &not_found_label);
989
990 // Yes, we matched. Compute the index of the result.
991 __ subl(string_length, counter);
992 __ leal(out, Address(string_length, -1));
993
994 Label done;
995 __ jmp(&done);
996
997 // Failed to match; return -1.
998 __ Bind(&not_found_label);
999 __ movl(out, Immediate(-1));
1000
1001 // And join up at the end.
1002 __ Bind(&done);
1003 if (slow_path != nullptr) {
1004 __ Bind(slow_path->GetExitLabel());
1005 }
1006}
1007
1008void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1009 CreateStringIndexOfLocations(invoke, arena_, true);
1010}
1011
1012void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1013 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
1014}
1015
1016void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1017 CreateStringIndexOfLocations(invoke, arena_, false);
1018}
1019
1020void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1021 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
1022}
1023
Jeff Hao848f70a2014-01-15 13:49:50 -08001024void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1025 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1026 LocationSummary::kCall,
1027 kIntrinsified);
1028 InvokeRuntimeCallingConvention calling_convention;
1029 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1030 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1031 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1032 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1033 locations->SetOut(Location::RegisterLocation(RAX));
1034}
1035
1036void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1037 X86_64Assembler* assembler = GetAssembler();
1038 LocationSummary* locations = invoke->GetLocations();
1039
1040 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1041 __ testl(byte_array, byte_array);
1042 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1043 codegen_->AddSlowPath(slow_path);
1044 __ j(kEqual, slow_path->GetEntryLabel());
1045
1046 __ gs()->call(Address::Absolute(
1047 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
1048 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1049 __ Bind(slow_path->GetExitLabel());
1050}
1051
1052void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1053 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1054 LocationSummary::kCall,
1055 kIntrinsified);
1056 InvokeRuntimeCallingConvention calling_convention;
1057 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1058 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1059 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1060 locations->SetOut(Location::RegisterLocation(RAX));
1061}
1062
1063void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1064 X86_64Assembler* assembler = GetAssembler();
1065
1066 __ gs()->call(Address::Absolute(
1067 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
1068 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1069}
1070
1071void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1072 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1073 LocationSummary::kCall,
1074 kIntrinsified);
1075 InvokeRuntimeCallingConvention calling_convention;
1076 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1077 locations->SetOut(Location::RegisterLocation(RAX));
1078}
1079
1080void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1081 X86_64Assembler* assembler = GetAssembler();
1082 LocationSummary* locations = invoke->GetLocations();
1083
1084 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1085 __ testl(string_to_copy, string_to_copy);
1086 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1087 codegen_->AddSlowPath(slow_path);
1088 __ j(kEqual, slow_path->GetEntryLabel());
1089
1090 __ gs()->call(Address::Absolute(
1091 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
1092 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1093 __ Bind(slow_path->GetExitLabel());
1094}
1095
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001096static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1097 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1098 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1099 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1100 // to avoid a SIGBUS.
1101 switch (size) {
1102 case Primitive::kPrimByte:
1103 __ movsxb(out, Address(address, 0));
1104 break;
1105 case Primitive::kPrimShort:
1106 __ movsxw(out, Address(address, 0));
1107 break;
1108 case Primitive::kPrimInt:
1109 __ movl(out, Address(address, 0));
1110 break;
1111 case Primitive::kPrimLong:
1112 __ movq(out, Address(address, 0));
1113 break;
1114 default:
1115 LOG(FATAL) << "Type not recognized for peek: " << size;
1116 UNREACHABLE();
1117 }
1118}
1119
1120void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1121 CreateIntToIntLocations(arena_, invoke);
1122}
1123
1124void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1125 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1126}
1127
1128void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1129 CreateIntToIntLocations(arena_, invoke);
1130}
1131
1132void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1133 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1134}
1135
1136void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1137 CreateIntToIntLocations(arena_, invoke);
1138}
1139
1140void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1141 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1142}
1143
1144void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1145 CreateIntToIntLocations(arena_, invoke);
1146}
1147
1148void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1149 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1150}
1151
1152static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1153 LocationSummary* locations = new (arena) LocationSummary(invoke,
1154 LocationSummary::kNoCall,
1155 kIntrinsified);
1156 locations->SetInAt(0, Location::RequiresRegister());
Mark Mendell40741f32015-04-20 22:10:34 -04001157 locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001158}
1159
1160static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1161 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -04001162 Location value = locations->InAt(1);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001163 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1164 // to avoid a SIGBUS.
1165 switch (size) {
1166 case Primitive::kPrimByte:
Mark Mendell40741f32015-04-20 22:10:34 -04001167 if (value.IsConstant()) {
1168 __ movb(Address(address, 0),
1169 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1170 } else {
1171 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1172 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001173 break;
1174 case Primitive::kPrimShort:
Mark Mendell40741f32015-04-20 22:10:34 -04001175 if (value.IsConstant()) {
1176 __ movw(Address(address, 0),
1177 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1178 } else {
1179 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1180 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001181 break;
1182 case Primitive::kPrimInt:
Mark Mendell40741f32015-04-20 22:10:34 -04001183 if (value.IsConstant()) {
1184 __ movl(Address(address, 0),
1185 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1186 } else {
1187 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1188 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001189 break;
1190 case Primitive::kPrimLong:
Mark Mendell40741f32015-04-20 22:10:34 -04001191 if (value.IsConstant()) {
1192 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1193 DCHECK(IsInt<32>(v));
1194 int32_t v_32 = v;
1195 __ movq(Address(address, 0), Immediate(v_32));
1196 } else {
1197 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1198 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001199 break;
1200 default:
1201 LOG(FATAL) << "Type not recognized for poke: " << size;
1202 UNREACHABLE();
1203 }
1204}
1205
1206void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1207 CreateIntIntToVoidLocations(arena_, invoke);
1208}
1209
1210void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1211 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1212}
1213
1214void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1215 CreateIntIntToVoidLocations(arena_, invoke);
1216}
1217
1218void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1219 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1220}
1221
1222void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1223 CreateIntIntToVoidLocations(arena_, invoke);
1224}
1225
1226void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1227 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1228}
1229
1230void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1231 CreateIntIntToVoidLocations(arena_, invoke);
1232}
1233
1234void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1235 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1236}
1237
1238void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1239 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1240 LocationSummary::kNoCall,
1241 kIntrinsified);
1242 locations->SetOut(Location::RequiresRegister());
1243}
1244
1245void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1246 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1247 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1248}
1249
Andreas Gampe878d58c2015-01-15 23:24:00 -08001250static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001251 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1252 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1253 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1254 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1255
Andreas Gampe878d58c2015-01-15 23:24:00 -08001256 switch (type) {
1257 case Primitive::kPrimInt:
1258 case Primitive::kPrimNot:
1259 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
Roland Levillain4d027112015-07-01 15:41:14 +01001260 if (type == Primitive::kPrimNot) {
1261 __ MaybeUnpoisonHeapReference(trg);
1262 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001263 break;
1264
1265 case Primitive::kPrimLong:
1266 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1267 break;
1268
1269 default:
1270 LOG(FATAL) << "Unsupported op size " << type;
1271 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001272 }
1273}
1274
1275static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1276 LocationSummary* locations = new (arena) LocationSummary(invoke,
1277 LocationSummary::kNoCall,
1278 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001279 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001280 locations->SetInAt(1, Location::RequiresRegister());
1281 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001282 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001283}
1284
1285void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1286 CreateIntIntIntToIntLocations(arena_, invoke);
1287}
1288void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1289 CreateIntIntIntToIntLocations(arena_, invoke);
1290}
1291void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1292 CreateIntIntIntToIntLocations(arena_, invoke);
1293}
1294void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1295 CreateIntIntIntToIntLocations(arena_, invoke);
1296}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001297void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1298 CreateIntIntIntToIntLocations(arena_, invoke);
1299}
1300void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1301 CreateIntIntIntToIntLocations(arena_, invoke);
1302}
1303
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001304
1305void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001306 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001307}
1308void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001309 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001310}
1311void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001312 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001313}
1314void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001315 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001316}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001317void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1318 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1319}
1320void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1321 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1322}
1323
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001324
1325static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1326 Primitive::Type type,
1327 HInvoke* invoke) {
1328 LocationSummary* locations = new (arena) LocationSummary(invoke,
1329 LocationSummary::kNoCall,
1330 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001331 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001332 locations->SetInAt(1, Location::RequiresRegister());
1333 locations->SetInAt(2, Location::RequiresRegister());
1334 locations->SetInAt(3, Location::RequiresRegister());
1335 if (type == Primitive::kPrimNot) {
1336 // Need temp registers for card-marking.
Roland Levillain4d027112015-07-01 15:41:14 +01001337 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001338 locations->AddTemp(Location::RequiresRegister());
1339 }
1340}
1341
1342void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1343 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1344}
1345void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1346 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1347}
1348void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1349 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1350}
1351void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1352 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1353}
1354void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1355 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1356}
1357void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1358 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1359}
1360void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1361 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1362}
1363void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1364 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1365}
1366void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1367 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1368}
1369
1370// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1371// memory model.
1372static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1373 CodeGeneratorX86_64* codegen) {
1374 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1375 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1376 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1377 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1378
1379 if (type == Primitive::kPrimLong) {
1380 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
Roland Levillain4d027112015-07-01 15:41:14 +01001381 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1382 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1383 __ movl(temp, value);
1384 __ PoisonHeapReference(temp);
1385 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001386 } else {
1387 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1388 }
1389
1390 if (is_volatile) {
1391 __ mfence();
1392 }
1393
1394 if (type == Primitive::kPrimNot) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001395 bool value_can_be_null = true; // TODO: Worth finding out this information?
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001396 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1397 locations->GetTemp(1).AsRegister<CpuRegister>(),
1398 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001399 value,
1400 value_can_be_null);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001401 }
1402}
1403
1404void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1405 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1406}
1407void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1408 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1409}
1410void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1411 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1412}
1413void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1414 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1415}
1416void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1417 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1418}
1419void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1420 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1421}
1422void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1423 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1424}
1425void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1426 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1427}
1428void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1429 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1430}
1431
Mark Mendell58d25fd2015-04-03 14:52:31 -04001432static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1433 HInvoke* invoke) {
1434 LocationSummary* locations = new (arena) LocationSummary(invoke,
1435 LocationSummary::kNoCall,
1436 kIntrinsified);
1437 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1438 locations->SetInAt(1, Location::RequiresRegister());
1439 locations->SetInAt(2, Location::RequiresRegister());
1440 // expected value must be in EAX/RAX.
1441 locations->SetInAt(3, Location::RegisterLocation(RAX));
1442 locations->SetInAt(4, Location::RequiresRegister());
1443
1444 locations->SetOut(Location::RequiresRegister());
1445 if (type == Primitive::kPrimNot) {
1446 // Need temp registers for card-marking.
1447 locations->AddTemp(Location::RequiresRegister());
1448 locations->AddTemp(Location::RequiresRegister());
1449 }
1450}
1451
1452void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1453 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1454}
1455
1456void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1457 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1458}
1459
1460void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1461 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1462}
1463
1464static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1465 X86_64Assembler* assembler =
1466 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1467 LocationSummary* locations = invoke->GetLocations();
1468
1469 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1470 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1471 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1472 DCHECK_EQ(expected.AsRegister(), RAX);
1473 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1474 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1475
1476 if (type == Primitive::kPrimLong) {
1477 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1478 } else {
1479 // Integer or object.
1480 if (type == Primitive::kPrimNot) {
1481 // Mark card for object assuming new value is stored.
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001482 bool value_can_be_null = true; // TODO: Worth finding out this information?
Mark Mendell58d25fd2015-04-03 14:52:31 -04001483 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1484 locations->GetTemp(1).AsRegister<CpuRegister>(),
1485 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001486 value,
1487 value_can_be_null);
Roland Levillain4d027112015-07-01 15:41:14 +01001488
1489 if (kPoisonHeapReferences) {
1490 __ PoisonHeapReference(expected);
1491 __ PoisonHeapReference(value);
1492 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04001493 }
1494
1495 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1496 }
1497
1498 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1499 // barriers at this time.
1500
1501 // Convert ZF into the boolean result.
1502 __ setcc(kZero, out);
1503 __ movzxb(out, out);
Roland Levillain4d027112015-07-01 15:41:14 +01001504
1505 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1506 __ UnpoisonHeapReference(value);
1507 __ UnpoisonHeapReference(expected);
1508 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04001509}
1510
1511void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1512 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1513}
1514
1515void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1516 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1517}
1518
1519void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1520 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1521}
1522
1523void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1524 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1525 LocationSummary::kNoCall,
1526 kIntrinsified);
1527 locations->SetInAt(0, Location::RequiresRegister());
1528 locations->SetOut(Location::SameAsFirstInput());
1529 locations->AddTemp(Location::RequiresRegister());
1530}
1531
1532static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1533 X86_64Assembler* assembler) {
1534 Immediate imm_shift(shift);
1535 Immediate imm_mask(mask);
1536 __ movl(temp, reg);
1537 __ shrl(reg, imm_shift);
1538 __ andl(temp, imm_mask);
1539 __ andl(reg, imm_mask);
1540 __ shll(temp, imm_shift);
1541 __ orl(reg, temp);
1542}
1543
1544void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1545 X86_64Assembler* assembler =
1546 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1547 LocationSummary* locations = invoke->GetLocations();
1548
1549 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1550 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1551
1552 /*
1553 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1554 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1555 * compared to generic luni implementation which has 5 rounds of swapping bits.
1556 * x = bswap x
1557 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1558 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1559 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1560 */
1561 __ bswapl(reg);
1562 SwapBits(reg, temp, 1, 0x55555555, assembler);
1563 SwapBits(reg, temp, 2, 0x33333333, assembler);
1564 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1565}
1566
1567void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1568 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1569 LocationSummary::kNoCall,
1570 kIntrinsified);
1571 locations->SetInAt(0, Location::RequiresRegister());
1572 locations->SetOut(Location::SameAsFirstInput());
1573 locations->AddTemp(Location::RequiresRegister());
1574 locations->AddTemp(Location::RequiresRegister());
1575}
1576
1577static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1578 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1579 Immediate imm_shift(shift);
1580 __ movq(temp_mask, Immediate(mask));
1581 __ movq(temp, reg);
1582 __ shrq(reg, imm_shift);
1583 __ andq(temp, temp_mask);
1584 __ andq(reg, temp_mask);
1585 __ shlq(temp, imm_shift);
1586 __ orq(reg, temp);
1587}
1588
1589void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1590 X86_64Assembler* assembler =
1591 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1592 LocationSummary* locations = invoke->GetLocations();
1593
1594 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1595 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1596 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1597
1598 /*
1599 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1600 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1601 * compared to generic luni implementation which has 5 rounds of swapping bits.
1602 * x = bswap x
1603 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1604 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1605 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1606 */
1607 __ bswapq(reg);
1608 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1609 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1610 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1611}
1612
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001613// Unimplemented intrinsics.
1614
1615#define UNIMPLEMENTED_INTRINSIC(Name) \
1616void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1617} \
1618void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1619}
1620
Jeff Hao848f70a2014-01-15 13:49:50 -08001621UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001622UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001623UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1624
Roland Levillain4d027112015-07-01 15:41:14 +01001625#undef UNIMPLEMENTED_INTRINSIC
1626
1627#undef __
1628
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001629} // namespace x86_64
1630} // namespace art