blob: 85d40d7a0a1b6b4f859293e9543c627ed48c80ef [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86_64/instruction_set_features_x86_64.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070022#include "art_method-inl.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080023#include "code_generator_x86_64.h"
24#include "entrypoints/quick/quick_entrypoints.h"
25#include "intrinsics.h"
26#include "mirror/array-inl.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080027#include "mirror/string.h"
28#include "thread.h"
29#include "utils/x86_64/assembler_x86_64.h"
30#include "utils/x86_64/constants_x86_64.h"
31
32namespace art {
33
34namespace x86_64 {
35
Mark Mendellfb8d2792015-03-31 22:16:59 -040036IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
37 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
38}
39
40
Andreas Gampe71fb52f2014-12-29 17:43:08 -080041X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
42 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
43}
44
Andreas Gampe878d58c2015-01-15 23:24:00 -080045ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080046 return codegen_->GetGraph()->GetArena();
47}
48
49bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
50 Dispatch(invoke);
51 const LocationSummary* res = invoke->GetLocations();
52 return res != nullptr && res->Intrinsified();
53}
54
55#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
56
57// TODO: trg as memory.
58static void MoveFromReturnRegister(Location trg,
59 Primitive::Type type,
60 CodeGeneratorX86_64* codegen) {
61 if (!trg.IsValid()) {
62 DCHECK(type == Primitive::kPrimVoid);
63 return;
64 }
65
66 switch (type) {
67 case Primitive::kPrimBoolean:
68 case Primitive::kPrimByte:
69 case Primitive::kPrimChar:
70 case Primitive::kPrimShort:
71 case Primitive::kPrimInt:
72 case Primitive::kPrimNot: {
73 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
74 if (trg_reg.AsRegister() != RAX) {
75 __ movl(trg_reg, CpuRegister(RAX));
76 }
77 break;
78 }
79 case Primitive::kPrimLong: {
80 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
81 if (trg_reg.AsRegister() != RAX) {
82 __ movq(trg_reg, CpuRegister(RAX));
83 }
84 break;
85 }
86
87 case Primitive::kPrimVoid:
88 LOG(FATAL) << "Unexpected void type for valid location " << trg;
89 UNREACHABLE();
90
91 case Primitive::kPrimDouble: {
92 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
93 if (trg_reg.AsFloatRegister() != XMM0) {
94 __ movsd(trg_reg, XmmRegister(XMM0));
95 }
96 break;
97 }
98 case Primitive::kPrimFloat: {
99 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
100 if (trg_reg.AsFloatRegister() != XMM0) {
101 __ movss(trg_reg, XmmRegister(XMM0));
102 }
103 break;
104 }
105 }
106}
107
Roland Levillainec525fc2015-04-28 15:50:20 +0100108static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +0100109 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +0100110 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800111}
112
113// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
114// call. This will copy the arguments into the positions for a regular call.
115//
116// Note: The actual parameters are required to be in the locations given by the invoke's location
117// summary. If an intrinsic modifies those locations before a slowpath call, they must be
118// restored!
119class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
120 public:
121 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
122
123 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
124 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
125 __ Bind(GetEntryLabel());
126
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000127 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800128
Roland Levillainec525fc2015-04-28 15:50:20 +0100129 MoveArguments(invoke_, codegen);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800130
131 if (invoke_->IsInvokeStaticOrDirect()) {
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100132 codegen->GenerateStaticOrDirectCall(
133 invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000134 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800135 } else {
136 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
137 UNREACHABLE();
138 }
139
140 // Copy the result back to the expected output.
141 Location out = invoke_->GetLocations()->Out();
142 if (out.IsValid()) {
143 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
144 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
145 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
146 }
147
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000148 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800149 __ jmp(GetExitLabel());
150 }
151
Alexandre Rames9931f312015-06-19 14:47:01 +0100152 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathX86_64"; }
153
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800154 private:
155 // The instruction where this slow path is happening.
156 HInvoke* const invoke_;
157
158 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
159};
160
161#undef __
162#define __ assembler->
163
164static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
165 LocationSummary* locations = new (arena) LocationSummary(invoke,
166 LocationSummary::kNoCall,
167 kIntrinsified);
168 locations->SetInAt(0, Location::RequiresFpuRegister());
169 locations->SetOut(Location::RequiresRegister());
170}
171
172static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
173 LocationSummary* locations = new (arena) LocationSummary(invoke,
174 LocationSummary::kNoCall,
175 kIntrinsified);
176 locations->SetInAt(0, Location::RequiresRegister());
177 locations->SetOut(Location::RequiresFpuRegister());
178}
179
180static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
181 Location input = locations->InAt(0);
182 Location output = locations->Out();
183 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
184}
185
186static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
187 Location input = locations->InAt(0);
188 Location output = locations->Out();
189 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
190}
191
192void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
193 CreateFPToIntLocations(arena_, invoke);
194}
195void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
196 CreateIntToFPLocations(arena_, invoke);
197}
198
199void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
200 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
201}
202void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
203 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
204}
205
206void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
207 CreateFPToIntLocations(arena_, invoke);
208}
209void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
210 CreateIntToFPLocations(arena_, invoke);
211}
212
213void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
214 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
215}
216void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
217 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
218}
219
220static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
221 LocationSummary* locations = new (arena) LocationSummary(invoke,
222 LocationSummary::kNoCall,
223 kIntrinsified);
224 locations->SetInAt(0, Location::RequiresRegister());
225 locations->SetOut(Location::SameAsFirstInput());
226}
227
228static void GenReverseBytes(LocationSummary* locations,
229 Primitive::Type size,
230 X86_64Assembler* assembler) {
231 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
232
233 switch (size) {
234 case Primitive::kPrimShort:
235 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
236 __ bswapl(out);
237 __ sarl(out, Immediate(16));
238 break;
239 case Primitive::kPrimInt:
240 __ bswapl(out);
241 break;
242 case Primitive::kPrimLong:
243 __ bswapq(out);
244 break;
245 default:
246 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
247 UNREACHABLE();
248 }
249}
250
251void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
252 CreateIntToIntLocations(arena_, invoke);
253}
254
255void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
256 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
257}
258
259void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
260 CreateIntToIntLocations(arena_, invoke);
261}
262
263void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
264 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
265}
266
267void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
268 CreateIntToIntLocations(arena_, invoke);
269}
270
271void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
272 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
273}
274
275
276// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
277// need is 64b.
278
279static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
280 // TODO: Enable memory operations when the assembler supports them.
281 LocationSummary* locations = new (arena) LocationSummary(invoke,
282 LocationSummary::kNoCall,
283 kIntrinsified);
284 locations->SetInAt(0, Location::RequiresFpuRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800285 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400286 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800287}
288
Mark Mendell39dcf552015-04-09 20:42:42 -0400289static void MathAbsFP(LocationSummary* locations,
290 bool is64bit,
291 X86_64Assembler* assembler,
292 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800293 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800294
Mark Mendellcfa410b2015-05-25 16:02:44 -0400295 DCHECK(output.IsFpuRegister());
296 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800297
Mark Mendellcfa410b2015-05-25 16:02:44 -0400298 // TODO: Can mask directly with constant area using pand if we can guarantee
299 // that the literal is aligned on a 16 byte boundary. This will avoid a
300 // temporary.
301 if (is64bit) {
302 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
303 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800304 } else {
Mark Mendellcfa410b2015-05-25 16:02:44 -0400305 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
306 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800307 }
308}
309
310void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
311 CreateFloatToFloatPlusTemps(arena_, invoke);
312}
313
314void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400315 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800316}
317
318void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
319 CreateFloatToFloatPlusTemps(arena_, invoke);
320}
321
322void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400323 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800324}
325
326static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
327 LocationSummary* locations = new (arena) LocationSummary(invoke,
328 LocationSummary::kNoCall,
329 kIntrinsified);
330 locations->SetInAt(0, Location::RequiresRegister());
331 locations->SetOut(Location::SameAsFirstInput());
332 locations->AddTemp(Location::RequiresRegister());
333}
334
335static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
336 Location output = locations->Out();
337 CpuRegister out = output.AsRegister<CpuRegister>();
338 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
339
340 if (is64bit) {
341 // Create mask.
342 __ movq(mask, out);
343 __ sarq(mask, Immediate(63));
344 // Add mask.
345 __ addq(out, mask);
346 __ xorq(out, mask);
347 } else {
348 // Create mask.
349 __ movl(mask, out);
350 __ sarl(mask, Immediate(31));
351 // Add mask.
352 __ addl(out, mask);
353 __ xorl(out, mask);
354 }
355}
356
357void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
358 CreateIntToIntPlusTemp(arena_, invoke);
359}
360
361void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
362 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
363}
364
365void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
366 CreateIntToIntPlusTemp(arena_, invoke);
367}
368
369void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
370 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
371}
372
Mark Mendell39dcf552015-04-09 20:42:42 -0400373static void GenMinMaxFP(LocationSummary* locations,
374 bool is_min,
375 bool is_double,
376 X86_64Assembler* assembler,
377 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800378 Location op1_loc = locations->InAt(0);
379 Location op2_loc = locations->InAt(1);
380 Location out_loc = locations->Out();
381 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
382
383 // Shortcut for same input locations.
384 if (op1_loc.Equals(op2_loc)) {
385 DCHECK(out_loc.Equals(op1_loc));
386 return;
387 }
388
389 // (out := op1)
390 // out <=? op2
391 // if Nan jmp Nan_label
392 // if out is min jmp done
393 // if op2 is min jmp op2_label
394 // handle -0/+0
395 // jmp done
396 // Nan_label:
397 // out := NaN
398 // op2_label:
399 // out := op2
400 // done:
401 //
402 // This removes one jmp, but needs to copy one input (op1) to out.
403 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400404 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800405
406 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
407
408 Label nan, done, op2_label;
409 if (is_double) {
410 __ ucomisd(out, op2);
411 } else {
412 __ ucomiss(out, op2);
413 }
414
415 __ j(Condition::kParityEven, &nan);
416
417 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
418 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
419
420 // Handle 0.0/-0.0.
421 if (is_min) {
422 if (is_double) {
423 __ orpd(out, op2);
424 } else {
425 __ orps(out, op2);
426 }
427 } else {
428 if (is_double) {
429 __ andpd(out, op2);
430 } else {
431 __ andps(out, op2);
432 }
433 }
434 __ jmp(&done);
435
436 // NaN handling.
437 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800438 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400439 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800440 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400441 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800442 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800443 __ jmp(&done);
444
445 // out := op2;
446 __ Bind(&op2_label);
447 if (is_double) {
448 __ movsd(out, op2);
449 } else {
450 __ movss(out, op2);
451 }
452
453 // Done.
454 __ Bind(&done);
455}
456
Mark Mendellf55c3e02015-03-26 21:07:46 -0400457static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800458 LocationSummary* locations = new (arena) LocationSummary(invoke,
459 LocationSummary::kNoCall,
460 kIntrinsified);
461 locations->SetInAt(0, Location::RequiresFpuRegister());
462 locations->SetInAt(1, Location::RequiresFpuRegister());
463 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
464 // the second input to be the output (we can simply swap inputs).
465 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800466}
467
468void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400469 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800470}
471
472void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400473 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800474}
475
476void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400477 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800478}
479
480void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400481 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800482}
483
484void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400485 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800486}
487
488void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400489 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800490}
491
492void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400493 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800494}
495
496void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400497 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800498}
499
500static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
501 X86_64Assembler* assembler) {
502 Location op1_loc = locations->InAt(0);
503 Location op2_loc = locations->InAt(1);
504
505 // Shortcut for same input locations.
506 if (op1_loc.Equals(op2_loc)) {
507 // Can return immediately, as op1_loc == out_loc.
508 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
509 // a copy here.
510 DCHECK(locations->Out().Equals(op1_loc));
511 return;
512 }
513
514 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
515 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
516
517 // (out := op1)
518 // out <=? op2
519 // if out is min jmp done
520 // out := op2
521 // done:
522
523 if (is_long) {
524 __ cmpq(out, op2);
525 } else {
526 __ cmpl(out, op2);
527 }
528
529 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
530}
531
532static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
533 LocationSummary* locations = new (arena) LocationSummary(invoke,
534 LocationSummary::kNoCall,
535 kIntrinsified);
536 locations->SetInAt(0, Location::RequiresRegister());
537 locations->SetInAt(1, Location::RequiresRegister());
538 locations->SetOut(Location::SameAsFirstInput());
539}
540
541void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
542 CreateIntIntToIntLocations(arena_, invoke);
543}
544
545void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
546 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
547}
548
549void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
550 CreateIntIntToIntLocations(arena_, invoke);
551}
552
553void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
554 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
555}
556
557void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
558 CreateIntIntToIntLocations(arena_, invoke);
559}
560
561void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
562 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
563}
564
565void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
566 CreateIntIntToIntLocations(arena_, invoke);
567}
568
569void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
570 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
571}
572
573static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
574 LocationSummary* locations = new (arena) LocationSummary(invoke,
575 LocationSummary::kNoCall,
576 kIntrinsified);
577 locations->SetInAt(0, Location::RequiresFpuRegister());
578 locations->SetOut(Location::RequiresFpuRegister());
579}
580
581void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
582 CreateFPToFPLocations(arena_, invoke);
583}
584
585void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
586 LocationSummary* locations = invoke->GetLocations();
587 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
588 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
589
590 GetAssembler()->sqrtsd(out, in);
591}
592
Mark Mendellfb8d2792015-03-31 22:16:59 -0400593static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100594 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400595
596 DCHECK(invoke->IsInvokeStaticOrDirect());
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100597 codegen->GenerateStaticOrDirectCall(
598 invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400599 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
600
601 // Copy the result back to the expected output.
602 Location out = invoke->GetLocations()->Out();
603 if (out.IsValid()) {
604 DCHECK(out.IsRegister());
605 MoveFromReturnRegister(out, invoke->GetType(), codegen);
606 }
607}
608
609static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
610 HInvoke* invoke,
611 CodeGeneratorX86_64* codegen) {
612 // Do we have instruction support?
613 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
614 CreateFPToFPLocations(arena, invoke);
615 return;
616 }
617
618 // We have to fall back to a call to the intrinsic.
619 LocationSummary* locations = new (arena) LocationSummary(invoke,
620 LocationSummary::kCall);
621 InvokeRuntimeCallingConvention calling_convention;
622 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
623 locations->SetOut(Location::FpuRegisterLocation(XMM0));
624 // Needs to be RDI for the invoke.
625 locations->AddTemp(Location::RegisterLocation(RDI));
626}
627
628static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
629 HInvoke* invoke,
630 X86_64Assembler* assembler,
631 int round_mode) {
632 LocationSummary* locations = invoke->GetLocations();
633 if (locations->WillCall()) {
634 InvokeOutOfLineIntrinsic(codegen, invoke);
635 } else {
636 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
637 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
638 __ roundsd(out, in, Immediate(round_mode));
639 }
640}
641
642void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
643 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
644}
645
646void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
647 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
648}
649
650void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
651 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
652}
653
654void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
655 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
656}
657
658void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
659 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
660}
661
662void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
663 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
664}
665
666static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
667 HInvoke* invoke,
668 CodeGeneratorX86_64* codegen) {
669 // Do we have instruction support?
670 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
671 LocationSummary* locations = new (arena) LocationSummary(invoke,
672 LocationSummary::kNoCall,
673 kIntrinsified);
674 locations->SetInAt(0, Location::RequiresFpuRegister());
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600675 locations->SetOut(Location::RequiresRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400676 locations->AddTemp(Location::RequiresFpuRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400677 return;
678 }
679
680 // We have to fall back to a call to the intrinsic.
681 LocationSummary* locations = new (arena) LocationSummary(invoke,
682 LocationSummary::kCall);
683 InvokeRuntimeCallingConvention calling_convention;
684 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
685 locations->SetOut(Location::RegisterLocation(RAX));
686 // Needs to be RDI for the invoke.
687 locations->AddTemp(Location::RegisterLocation(RDI));
688}
689
690void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
691 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
692}
693
694void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
695 LocationSummary* locations = invoke->GetLocations();
696 if (locations->WillCall()) {
697 InvokeOutOfLineIntrinsic(codegen_, invoke);
698 return;
699 }
700
701 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
702 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
703 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400704 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400705 Label done, nan;
706 X86_64Assembler* assembler = GetAssembler();
707
Mark Mendell40741f32015-04-20 22:10:34 -0400708 // Load 0.5 into inPlusPointFive.
709 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400710
711 // Add in the input.
712 __ addss(inPlusPointFive, in);
713
714 // And truncate to an integer.
715 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
716
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600717 // Load maxInt into out.
718 codegen_->Load64BitValue(out, kPrimIntMax);
719
Mark Mendellfb8d2792015-03-31 22:16:59 -0400720 // if inPlusPointFive >= maxInt goto done
Mark Mendellcfa410b2015-05-25 16:02:44 -0400721 __ movl(out, Immediate(kPrimIntMax));
Mark Mendell40741f32015-04-20 22:10:34 -0400722 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400723 __ j(kAboveEqual, &done);
724
725 // if input == NaN goto nan
726 __ j(kUnordered, &nan);
727
728 // output = float-to-int-truncate(input)
729 __ cvttss2si(out, inPlusPointFive);
730 __ jmp(&done);
731 __ Bind(&nan);
732
733 // output = 0
734 __ xorl(out, out);
735 __ Bind(&done);
736}
737
738void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
739 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
740}
741
742void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
743 LocationSummary* locations = invoke->GetLocations();
744 if (locations->WillCall()) {
745 InvokeOutOfLineIntrinsic(codegen_, invoke);
746 return;
747 }
748
749 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
750 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
751 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400752 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400753 Label done, nan;
754 X86_64Assembler* assembler = GetAssembler();
755
Mark Mendell40741f32015-04-20 22:10:34 -0400756 // Load 0.5 into inPlusPointFive.
757 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400758
759 // Add in the input.
760 __ addsd(inPlusPointFive, in);
761
762 // And truncate to an integer.
763 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
764
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600765 // Load maxLong into out.
766 codegen_->Load64BitValue(out, kPrimLongMax);
767
Mark Mendellfb8d2792015-03-31 22:16:59 -0400768 // if inPlusPointFive >= maxLong goto done
Mark Mendellcfa410b2015-05-25 16:02:44 -0400769 __ movq(out, Immediate(kPrimLongMax));
Mark Mendell40741f32015-04-20 22:10:34 -0400770 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400771 __ j(kAboveEqual, &done);
772
773 // if input == NaN goto nan
774 __ j(kUnordered, &nan);
775
776 // output = double-to-long-truncate(input)
777 __ cvttsd2si(out, inPlusPointFive, true);
778 __ jmp(&done);
779 __ Bind(&nan);
780
781 // output = 0
Mark Mendell92e83bf2015-05-07 11:25:03 -0400782 __ xorl(out, out);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400783 __ Bind(&done);
784}
785
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800786void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
787 // The inputs plus one temp.
788 LocationSummary* locations = new (arena_) LocationSummary(invoke,
789 LocationSummary::kCallOnSlowPath,
790 kIntrinsified);
791 locations->SetInAt(0, Location::RequiresRegister());
792 locations->SetInAt(1, Location::RequiresRegister());
793 locations->SetOut(Location::SameAsFirstInput());
794 locations->AddTemp(Location::RequiresRegister());
795}
796
797void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
798 LocationSummary* locations = invoke->GetLocations();
799
800 // Location of reference to data array
801 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
802 // Location of count
803 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800804
805 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
806 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
807 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800808
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800809 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
810 // the cost.
811 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
812 // we will not optimize the code for constants (which would save a register).
813
Andreas Gampe878d58c2015-01-15 23:24:00 -0800814 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800815 codegen_->AddSlowPath(slow_path);
816
817 X86_64Assembler* assembler = GetAssembler();
818
819 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800820 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800821 __ j(kAboveEqual, slow_path->GetEntryLabel());
822
Jeff Hao848f70a2014-01-15 13:49:50 -0800823 // out = out[2*idx].
824 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800825
826 __ Bind(slow_path->GetExitLabel());
827}
828
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000829void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
830 LocationSummary* locations = new (arena_) LocationSummary(invoke,
831 LocationSummary::kCall,
832 kIntrinsified);
833 InvokeRuntimeCallingConvention calling_convention;
834 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
835 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
836 locations->SetOut(Location::RegisterLocation(RAX));
837}
838
839void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
840 X86_64Assembler* assembler = GetAssembler();
841 LocationSummary* locations = invoke->GetLocations();
842
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000843 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +0100844 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000845
846 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
847 __ testl(argument, argument);
848 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
849 codegen_->AddSlowPath(slow_path);
850 __ j(kEqual, slow_path->GetEntryLabel());
851
852 __ gs()->call(Address::Absolute(
853 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
854 __ Bind(slow_path->GetExitLabel());
855}
856
Andreas Gampe21030dd2015-05-07 14:46:15 -0700857static void CreateStringIndexOfLocations(HInvoke* invoke,
858 ArenaAllocator* allocator,
859 bool start_at_zero) {
860 LocationSummary* locations = new (allocator) LocationSummary(invoke,
861 LocationSummary::kCallOnSlowPath,
862 kIntrinsified);
863 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
864 locations->SetInAt(0, Location::RegisterLocation(RDI));
865 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
866 // allocator to do that, anyways. We can still do the constant check by checking the parameter
867 // of the instruction explicitly.
868 // Note: This works as we don't clobber RAX anywhere.
869 locations->SetInAt(1, Location::RegisterLocation(RAX));
870 if (!start_at_zero) {
871 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
872 }
873 // As we clobber RDI during execution anyways, also use it as the output.
874 locations->SetOut(Location::SameAsFirstInput());
875
876 // repne scasw uses RCX as the counter.
877 locations->AddTemp(Location::RegisterLocation(RCX));
878 // Need another temporary to be able to compute the result.
879 locations->AddTemp(Location::RequiresRegister());
880}
881
882static void GenerateStringIndexOf(HInvoke* invoke,
883 X86_64Assembler* assembler,
884 CodeGeneratorX86_64* codegen,
885 ArenaAllocator* allocator,
886 bool start_at_zero) {
887 LocationSummary* locations = invoke->GetLocations();
888
889 // Note that the null check must have been done earlier.
890 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
891
892 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
893 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
894 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
895 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
896 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
897
898 // Check our assumptions for registers.
899 DCHECK_EQ(string_obj.AsRegister(), RDI);
900 DCHECK_EQ(search_value.AsRegister(), RAX);
901 DCHECK_EQ(counter.AsRegister(), RCX);
902 DCHECK_EQ(out.AsRegister(), RDI);
903
904 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
905 // or directly dispatch if we have a constant.
906 SlowPathCodeX86_64* slow_path = nullptr;
907 if (invoke->InputAt(1)->IsIntConstant()) {
908 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
909 std::numeric_limits<uint16_t>::max()) {
910 // Always needs the slow-path. We could directly dispatch to it, but this case should be
911 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
912 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
913 codegen->AddSlowPath(slow_path);
914 __ jmp(slow_path->GetEntryLabel());
915 __ Bind(slow_path->GetExitLabel());
916 return;
917 }
918 } else {
919 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
920 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
921 codegen->AddSlowPath(slow_path);
922 __ j(kAbove, slow_path->GetEntryLabel());
923 }
924
925 // From here down, we know that we are looking for a char that fits in 16 bits.
926 // Location of reference to data array within the String object.
927 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
928 // Location of count within the String object.
929 int32_t count_offset = mirror::String::CountOffset().Int32Value();
930
931 // Load string length, i.e., the count field of the string.
932 __ movl(string_length, Address(string_obj, count_offset));
933
934 // Do a length check.
935 // TODO: Support jecxz.
936 Label not_found_label;
937 __ testl(string_length, string_length);
938 __ j(kEqual, &not_found_label);
939
940 if (start_at_zero) {
941 // Number of chars to scan is the same as the string length.
942 __ movl(counter, string_length);
943
944 // Move to the start of the string.
945 __ addq(string_obj, Immediate(value_offset));
946 } else {
947 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
948
949 // Do a start_index check.
950 __ cmpl(start_index, string_length);
951 __ j(kGreaterEqual, &not_found_label);
952
953 // Ensure we have a start index >= 0;
954 __ xorl(counter, counter);
955 __ cmpl(start_index, Immediate(0));
956 __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough.
957
958 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
959 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
960
961 // Now update ecx, the work counter: it's gonna be string.length - start_index.
962 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
963 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
964 }
965
966 // Everything is set up for repne scasw:
967 // * Comparison address in RDI.
968 // * Counter in ECX.
969 __ repne_scasw();
970
971 // Did we find a match?
972 __ j(kNotEqual, &not_found_label);
973
974 // Yes, we matched. Compute the index of the result.
975 __ subl(string_length, counter);
976 __ leal(out, Address(string_length, -1));
977
978 Label done;
979 __ jmp(&done);
980
981 // Failed to match; return -1.
982 __ Bind(&not_found_label);
983 __ movl(out, Immediate(-1));
984
985 // And join up at the end.
986 __ Bind(&done);
987 if (slow_path != nullptr) {
988 __ Bind(slow_path->GetExitLabel());
989 }
990}
991
992void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
993 CreateStringIndexOfLocations(invoke, arena_, true);
994}
995
996void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
997 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
998}
999
1000void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1001 CreateStringIndexOfLocations(invoke, arena_, false);
1002}
1003
1004void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1005 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
1006}
1007
Jeff Hao848f70a2014-01-15 13:49:50 -08001008void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1009 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1010 LocationSummary::kCall,
1011 kIntrinsified);
1012 InvokeRuntimeCallingConvention calling_convention;
1013 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1014 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1015 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1016 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1017 locations->SetOut(Location::RegisterLocation(RAX));
1018}
1019
1020void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1021 X86_64Assembler* assembler = GetAssembler();
1022 LocationSummary* locations = invoke->GetLocations();
1023
1024 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1025 __ testl(byte_array, byte_array);
1026 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1027 codegen_->AddSlowPath(slow_path);
1028 __ j(kEqual, slow_path->GetEntryLabel());
1029
1030 __ gs()->call(Address::Absolute(
1031 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
1032 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1033 __ Bind(slow_path->GetExitLabel());
1034}
1035
1036void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1037 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1038 LocationSummary::kCall,
1039 kIntrinsified);
1040 InvokeRuntimeCallingConvention calling_convention;
1041 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1042 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1043 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1044 locations->SetOut(Location::RegisterLocation(RAX));
1045}
1046
1047void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1048 X86_64Assembler* assembler = GetAssembler();
1049
1050 __ gs()->call(Address::Absolute(
1051 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
1052 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1053}
1054
1055void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1056 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1057 LocationSummary::kCall,
1058 kIntrinsified);
1059 InvokeRuntimeCallingConvention calling_convention;
1060 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1061 locations->SetOut(Location::RegisterLocation(RAX));
1062}
1063
1064void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1065 X86_64Assembler* assembler = GetAssembler();
1066 LocationSummary* locations = invoke->GetLocations();
1067
1068 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1069 __ testl(string_to_copy, string_to_copy);
1070 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1071 codegen_->AddSlowPath(slow_path);
1072 __ j(kEqual, slow_path->GetEntryLabel());
1073
1074 __ gs()->call(Address::Absolute(
1075 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
1076 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1077 __ Bind(slow_path->GetExitLabel());
1078}
1079
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001080static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1081 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1082 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1083 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1084 // to avoid a SIGBUS.
1085 switch (size) {
1086 case Primitive::kPrimByte:
1087 __ movsxb(out, Address(address, 0));
1088 break;
1089 case Primitive::kPrimShort:
1090 __ movsxw(out, Address(address, 0));
1091 break;
1092 case Primitive::kPrimInt:
1093 __ movl(out, Address(address, 0));
1094 break;
1095 case Primitive::kPrimLong:
1096 __ movq(out, Address(address, 0));
1097 break;
1098 default:
1099 LOG(FATAL) << "Type not recognized for peek: " << size;
1100 UNREACHABLE();
1101 }
1102}
1103
1104void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1105 CreateIntToIntLocations(arena_, invoke);
1106}
1107
1108void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1109 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1110}
1111
1112void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1113 CreateIntToIntLocations(arena_, invoke);
1114}
1115
1116void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1117 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1118}
1119
1120void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1121 CreateIntToIntLocations(arena_, invoke);
1122}
1123
1124void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1125 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1126}
1127
1128void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1129 CreateIntToIntLocations(arena_, invoke);
1130}
1131
1132void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1133 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1134}
1135
1136static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1137 LocationSummary* locations = new (arena) LocationSummary(invoke,
1138 LocationSummary::kNoCall,
1139 kIntrinsified);
1140 locations->SetInAt(0, Location::RequiresRegister());
Mark Mendell40741f32015-04-20 22:10:34 -04001141 locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001142}
1143
1144static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1145 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -04001146 Location value = locations->InAt(1);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001147 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1148 // to avoid a SIGBUS.
1149 switch (size) {
1150 case Primitive::kPrimByte:
Mark Mendell40741f32015-04-20 22:10:34 -04001151 if (value.IsConstant()) {
1152 __ movb(Address(address, 0),
1153 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1154 } else {
1155 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1156 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001157 break;
1158 case Primitive::kPrimShort:
Mark Mendell40741f32015-04-20 22:10:34 -04001159 if (value.IsConstant()) {
1160 __ movw(Address(address, 0),
1161 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1162 } else {
1163 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1164 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001165 break;
1166 case Primitive::kPrimInt:
Mark Mendell40741f32015-04-20 22:10:34 -04001167 if (value.IsConstant()) {
1168 __ movl(Address(address, 0),
1169 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1170 } else {
1171 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1172 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001173 break;
1174 case Primitive::kPrimLong:
Mark Mendell40741f32015-04-20 22:10:34 -04001175 if (value.IsConstant()) {
1176 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1177 DCHECK(IsInt<32>(v));
1178 int32_t v_32 = v;
1179 __ movq(Address(address, 0), Immediate(v_32));
1180 } else {
1181 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1182 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001183 break;
1184 default:
1185 LOG(FATAL) << "Type not recognized for poke: " << size;
1186 UNREACHABLE();
1187 }
1188}
1189
1190void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1191 CreateIntIntToVoidLocations(arena_, invoke);
1192}
1193
1194void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1195 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1196}
1197
1198void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1199 CreateIntIntToVoidLocations(arena_, invoke);
1200}
1201
1202void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1203 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1204}
1205
1206void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1207 CreateIntIntToVoidLocations(arena_, invoke);
1208}
1209
1210void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1211 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1212}
1213
1214void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1215 CreateIntIntToVoidLocations(arena_, invoke);
1216}
1217
1218void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1219 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1220}
1221
1222void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1223 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1224 LocationSummary::kNoCall,
1225 kIntrinsified);
1226 locations->SetOut(Location::RequiresRegister());
1227}
1228
1229void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1230 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1231 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1232}
1233
Andreas Gampe878d58c2015-01-15 23:24:00 -08001234static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001235 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1236 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1237 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1238 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1239
Andreas Gampe878d58c2015-01-15 23:24:00 -08001240 switch (type) {
1241 case Primitive::kPrimInt:
1242 case Primitive::kPrimNot:
1243 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
Roland Levillain4d027112015-07-01 15:41:14 +01001244 if (type == Primitive::kPrimNot) {
1245 __ MaybeUnpoisonHeapReference(trg);
1246 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001247 break;
1248
1249 case Primitive::kPrimLong:
1250 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1251 break;
1252
1253 default:
1254 LOG(FATAL) << "Unsupported op size " << type;
1255 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001256 }
1257}
1258
1259static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1260 LocationSummary* locations = new (arena) LocationSummary(invoke,
1261 LocationSummary::kNoCall,
1262 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001263 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001264 locations->SetInAt(1, Location::RequiresRegister());
1265 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001266 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001267}
1268
1269void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1270 CreateIntIntIntToIntLocations(arena_, invoke);
1271}
1272void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1273 CreateIntIntIntToIntLocations(arena_, invoke);
1274}
1275void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1276 CreateIntIntIntToIntLocations(arena_, invoke);
1277}
1278void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1279 CreateIntIntIntToIntLocations(arena_, invoke);
1280}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001281void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1282 CreateIntIntIntToIntLocations(arena_, invoke);
1283}
1284void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1285 CreateIntIntIntToIntLocations(arena_, invoke);
1286}
1287
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001288
1289void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001290 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001291}
1292void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001293 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001294}
1295void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001296 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001297}
1298void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001299 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001300}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001301void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1302 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1303}
1304void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1305 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1306}
1307
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001308
1309static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1310 Primitive::Type type,
1311 HInvoke* invoke) {
1312 LocationSummary* locations = new (arena) LocationSummary(invoke,
1313 LocationSummary::kNoCall,
1314 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001315 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001316 locations->SetInAt(1, Location::RequiresRegister());
1317 locations->SetInAt(2, Location::RequiresRegister());
1318 locations->SetInAt(3, Location::RequiresRegister());
1319 if (type == Primitive::kPrimNot) {
1320 // Need temp registers for card-marking.
Roland Levillain4d027112015-07-01 15:41:14 +01001321 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001322 locations->AddTemp(Location::RequiresRegister());
1323 }
1324}
1325
1326void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1327 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1328}
1329void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1330 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1331}
1332void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1333 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1334}
1335void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1336 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1337}
1338void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1339 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1340}
1341void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1342 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1343}
1344void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1345 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1346}
1347void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1348 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1349}
1350void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1351 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1352}
1353
1354// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1355// memory model.
1356static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1357 CodeGeneratorX86_64* codegen) {
1358 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1359 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1360 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1361 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1362
1363 if (type == Primitive::kPrimLong) {
1364 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
Roland Levillain4d027112015-07-01 15:41:14 +01001365 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1366 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1367 __ movl(temp, value);
1368 __ PoisonHeapReference(temp);
1369 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001370 } else {
1371 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1372 }
1373
1374 if (is_volatile) {
1375 __ mfence();
1376 }
1377
1378 if (type == Primitive::kPrimNot) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001379 bool value_can_be_null = true; // TODO: Worth finding out this information?
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001380 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1381 locations->GetTemp(1).AsRegister<CpuRegister>(),
1382 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001383 value,
1384 value_can_be_null);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001385 }
1386}
1387
1388void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1389 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1390}
1391void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1392 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1393}
1394void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1395 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1396}
1397void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1398 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1399}
1400void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1401 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1402}
1403void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1404 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1405}
1406void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1407 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1408}
1409void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1410 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1411}
1412void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1413 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1414}
1415
Mark Mendell58d25fd2015-04-03 14:52:31 -04001416static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1417 HInvoke* invoke) {
1418 LocationSummary* locations = new (arena) LocationSummary(invoke,
1419 LocationSummary::kNoCall,
1420 kIntrinsified);
1421 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1422 locations->SetInAt(1, Location::RequiresRegister());
1423 locations->SetInAt(2, Location::RequiresRegister());
1424 // expected value must be in EAX/RAX.
1425 locations->SetInAt(3, Location::RegisterLocation(RAX));
1426 locations->SetInAt(4, Location::RequiresRegister());
1427
1428 locations->SetOut(Location::RequiresRegister());
1429 if (type == Primitive::kPrimNot) {
1430 // Need temp registers for card-marking.
1431 locations->AddTemp(Location::RequiresRegister());
1432 locations->AddTemp(Location::RequiresRegister());
1433 }
1434}
1435
1436void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1437 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1438}
1439
1440void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1441 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1442}
1443
1444void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1445 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1446}
1447
1448static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1449 X86_64Assembler* assembler =
1450 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1451 LocationSummary* locations = invoke->GetLocations();
1452
1453 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1454 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1455 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1456 DCHECK_EQ(expected.AsRegister(), RAX);
1457 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1458 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1459
1460 if (type == Primitive::kPrimLong) {
1461 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1462 } else {
1463 // Integer or object.
1464 if (type == Primitive::kPrimNot) {
1465 // Mark card for object assuming new value is stored.
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001466 bool value_can_be_null = true; // TODO: Worth finding out this information?
Mark Mendell58d25fd2015-04-03 14:52:31 -04001467 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1468 locations->GetTemp(1).AsRegister<CpuRegister>(),
1469 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001470 value,
1471 value_can_be_null);
Roland Levillain4d027112015-07-01 15:41:14 +01001472
1473 if (kPoisonHeapReferences) {
1474 __ PoisonHeapReference(expected);
1475 __ PoisonHeapReference(value);
1476 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04001477 }
1478
1479 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1480 }
1481
1482 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1483 // barriers at this time.
1484
1485 // Convert ZF into the boolean result.
1486 __ setcc(kZero, out);
1487 __ movzxb(out, out);
Roland Levillain4d027112015-07-01 15:41:14 +01001488
1489 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1490 __ UnpoisonHeapReference(value);
1491 __ UnpoisonHeapReference(expected);
1492 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04001493}
1494
1495void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1496 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1497}
1498
1499void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1500 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1501}
1502
1503void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1504 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1505}
1506
1507void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1508 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1509 LocationSummary::kNoCall,
1510 kIntrinsified);
1511 locations->SetInAt(0, Location::RequiresRegister());
1512 locations->SetOut(Location::SameAsFirstInput());
1513 locations->AddTemp(Location::RequiresRegister());
1514}
1515
1516static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1517 X86_64Assembler* assembler) {
1518 Immediate imm_shift(shift);
1519 Immediate imm_mask(mask);
1520 __ movl(temp, reg);
1521 __ shrl(reg, imm_shift);
1522 __ andl(temp, imm_mask);
1523 __ andl(reg, imm_mask);
1524 __ shll(temp, imm_shift);
1525 __ orl(reg, temp);
1526}
1527
1528void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1529 X86_64Assembler* assembler =
1530 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1531 LocationSummary* locations = invoke->GetLocations();
1532
1533 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1534 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1535
1536 /*
1537 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1538 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1539 * compared to generic luni implementation which has 5 rounds of swapping bits.
1540 * x = bswap x
1541 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1542 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1543 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1544 */
1545 __ bswapl(reg);
1546 SwapBits(reg, temp, 1, 0x55555555, assembler);
1547 SwapBits(reg, temp, 2, 0x33333333, assembler);
1548 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1549}
1550
1551void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1552 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1553 LocationSummary::kNoCall,
1554 kIntrinsified);
1555 locations->SetInAt(0, Location::RequiresRegister());
1556 locations->SetOut(Location::SameAsFirstInput());
1557 locations->AddTemp(Location::RequiresRegister());
1558 locations->AddTemp(Location::RequiresRegister());
1559}
1560
1561static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1562 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1563 Immediate imm_shift(shift);
1564 __ movq(temp_mask, Immediate(mask));
1565 __ movq(temp, reg);
1566 __ shrq(reg, imm_shift);
1567 __ andq(temp, temp_mask);
1568 __ andq(reg, temp_mask);
1569 __ shlq(temp, imm_shift);
1570 __ orq(reg, temp);
1571}
1572
1573void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1574 X86_64Assembler* assembler =
1575 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1576 LocationSummary* locations = invoke->GetLocations();
1577
1578 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1579 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1580 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1581
1582 /*
1583 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1584 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1585 * compared to generic luni implementation which has 5 rounds of swapping bits.
1586 * x = bswap x
1587 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1588 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1589 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1590 */
1591 __ bswapq(reg);
1592 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1593 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1594 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1595}
1596
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001597// Unimplemented intrinsics.
1598
1599#define UNIMPLEMENTED_INTRINSIC(Name) \
1600void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1601} \
1602void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1603}
1604
Jeff Hao848f70a2014-01-15 13:49:50 -08001605UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001606UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001607UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
Scott Wakeling611d3392015-07-10 11:42:06 +01001608UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
1609UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001610
Roland Levillain4d027112015-07-01 15:41:14 +01001611#undef UNIMPLEMENTED_INTRINSIC
1612
1613#undef __
1614
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001615} // namespace x86_64
1616} // namespace art