blob: 15fbac1c630c8d3a098b8c33ec64b7cb64d176ed [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86_64/instruction_set_features_x86_64.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070022#include "art_method-inl.h"
Mark Mendelld5897672015-08-12 21:16:41 -040023#include "base/bit_utils.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080024#include "code_generator_x86_64.h"
25#include "entrypoints/quick/quick_entrypoints.h"
26#include "intrinsics.h"
27#include "mirror/array-inl.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080028#include "mirror/string.h"
29#include "thread.h"
30#include "utils/x86_64/assembler_x86_64.h"
31#include "utils/x86_64/constants_x86_64.h"
32
33namespace art {
34
35namespace x86_64 {
36
Mark Mendellfb8d2792015-03-31 22:16:59 -040037IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
38 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
39}
40
41
Andreas Gampe71fb52f2014-12-29 17:43:08 -080042X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
43 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
44}
45
Andreas Gampe878d58c2015-01-15 23:24:00 -080046ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080047 return codegen_->GetGraph()->GetArena();
48}
49
50bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
51 Dispatch(invoke);
52 const LocationSummary* res = invoke->GetLocations();
53 return res != nullptr && res->Intrinsified();
54}
55
56#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
57
58// TODO: trg as memory.
59static void MoveFromReturnRegister(Location trg,
60 Primitive::Type type,
61 CodeGeneratorX86_64* codegen) {
62 if (!trg.IsValid()) {
63 DCHECK(type == Primitive::kPrimVoid);
64 return;
65 }
66
67 switch (type) {
68 case Primitive::kPrimBoolean:
69 case Primitive::kPrimByte:
70 case Primitive::kPrimChar:
71 case Primitive::kPrimShort:
72 case Primitive::kPrimInt:
73 case Primitive::kPrimNot: {
74 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
75 if (trg_reg.AsRegister() != RAX) {
76 __ movl(trg_reg, CpuRegister(RAX));
77 }
78 break;
79 }
80 case Primitive::kPrimLong: {
81 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
82 if (trg_reg.AsRegister() != RAX) {
83 __ movq(trg_reg, CpuRegister(RAX));
84 }
85 break;
86 }
87
88 case Primitive::kPrimVoid:
89 LOG(FATAL) << "Unexpected void type for valid location " << trg;
90 UNREACHABLE();
91
92 case Primitive::kPrimDouble: {
93 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
94 if (trg_reg.AsFloatRegister() != XMM0) {
95 __ movsd(trg_reg, XmmRegister(XMM0));
96 }
97 break;
98 }
99 case Primitive::kPrimFloat: {
100 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
101 if (trg_reg.AsFloatRegister() != XMM0) {
102 __ movss(trg_reg, XmmRegister(XMM0));
103 }
104 break;
105 }
106 }
107}
108
Roland Levillainec525fc2015-04-28 15:50:20 +0100109static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +0100110 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +0100111 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800112}
113
114// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
115// call. This will copy the arguments into the positions for a regular call.
116//
117// Note: The actual parameters are required to be in the locations given by the invoke's location
118// summary. If an intrinsic modifies those locations before a slowpath call, they must be
119// restored!
120class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
121 public:
122 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
123
124 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
125 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
126 __ Bind(GetEntryLabel());
127
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000128 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800129
Roland Levillainec525fc2015-04-28 15:50:20 +0100130 MoveArguments(invoke_, codegen);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800131
132 if (invoke_->IsInvokeStaticOrDirect()) {
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100133 codegen->GenerateStaticOrDirectCall(
134 invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000135 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800136 } else {
137 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
138 UNREACHABLE();
139 }
140
141 // Copy the result back to the expected output.
142 Location out = invoke_->GetLocations()->Out();
143 if (out.IsValid()) {
144 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
145 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
146 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
147 }
148
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000149 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800150 __ jmp(GetExitLabel());
151 }
152
Alexandre Rames9931f312015-06-19 14:47:01 +0100153 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathX86_64"; }
154
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800155 private:
156 // The instruction where this slow path is happening.
157 HInvoke* const invoke_;
158
159 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
160};
161
162#undef __
163#define __ assembler->
164
165static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
166 LocationSummary* locations = new (arena) LocationSummary(invoke,
167 LocationSummary::kNoCall,
168 kIntrinsified);
169 locations->SetInAt(0, Location::RequiresFpuRegister());
170 locations->SetOut(Location::RequiresRegister());
171}
172
173static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
174 LocationSummary* locations = new (arena) LocationSummary(invoke,
175 LocationSummary::kNoCall,
176 kIntrinsified);
177 locations->SetInAt(0, Location::RequiresRegister());
178 locations->SetOut(Location::RequiresFpuRegister());
179}
180
181static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
182 Location input = locations->InAt(0);
183 Location output = locations->Out();
184 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
185}
186
187static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
188 Location input = locations->InAt(0);
189 Location output = locations->Out();
190 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
191}
192
193void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
194 CreateFPToIntLocations(arena_, invoke);
195}
196void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
197 CreateIntToFPLocations(arena_, invoke);
198}
199
200void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
201 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
202}
203void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
204 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
205}
206
207void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
208 CreateFPToIntLocations(arena_, invoke);
209}
210void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
211 CreateIntToFPLocations(arena_, invoke);
212}
213
214void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
215 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
216}
217void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
218 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
219}
220
221static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
222 LocationSummary* locations = new (arena) LocationSummary(invoke,
223 LocationSummary::kNoCall,
224 kIntrinsified);
225 locations->SetInAt(0, Location::RequiresRegister());
226 locations->SetOut(Location::SameAsFirstInput());
227}
228
229static void GenReverseBytes(LocationSummary* locations,
230 Primitive::Type size,
231 X86_64Assembler* assembler) {
232 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
233
234 switch (size) {
235 case Primitive::kPrimShort:
236 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
237 __ bswapl(out);
238 __ sarl(out, Immediate(16));
239 break;
240 case Primitive::kPrimInt:
241 __ bswapl(out);
242 break;
243 case Primitive::kPrimLong:
244 __ bswapq(out);
245 break;
246 default:
247 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
248 UNREACHABLE();
249 }
250}
251
252void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
253 CreateIntToIntLocations(arena_, invoke);
254}
255
256void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
257 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
258}
259
260void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
261 CreateIntToIntLocations(arena_, invoke);
262}
263
264void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
265 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
266}
267
268void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
269 CreateIntToIntLocations(arena_, invoke);
270}
271
272void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
273 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
274}
275
276
277// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
278// need is 64b.
279
280static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
281 // TODO: Enable memory operations when the assembler supports them.
282 LocationSummary* locations = new (arena) LocationSummary(invoke,
283 LocationSummary::kNoCall,
284 kIntrinsified);
285 locations->SetInAt(0, Location::RequiresFpuRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800286 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400287 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800288}
289
Mark Mendell39dcf552015-04-09 20:42:42 -0400290static void MathAbsFP(LocationSummary* locations,
291 bool is64bit,
292 X86_64Assembler* assembler,
293 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800294 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800295
Mark Mendellcfa410b2015-05-25 16:02:44 -0400296 DCHECK(output.IsFpuRegister());
297 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800298
Mark Mendellcfa410b2015-05-25 16:02:44 -0400299 // TODO: Can mask directly with constant area using pand if we can guarantee
300 // that the literal is aligned on a 16 byte boundary. This will avoid a
301 // temporary.
302 if (is64bit) {
303 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
304 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800305 } else {
Mark Mendellcfa410b2015-05-25 16:02:44 -0400306 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
307 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800308 }
309}
310
311void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
312 CreateFloatToFloatPlusTemps(arena_, invoke);
313}
314
315void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400316 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800317}
318
319void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
320 CreateFloatToFloatPlusTemps(arena_, invoke);
321}
322
323void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400324 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800325}
326
327static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
328 LocationSummary* locations = new (arena) LocationSummary(invoke,
329 LocationSummary::kNoCall,
330 kIntrinsified);
331 locations->SetInAt(0, Location::RequiresRegister());
332 locations->SetOut(Location::SameAsFirstInput());
333 locations->AddTemp(Location::RequiresRegister());
334}
335
336static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
337 Location output = locations->Out();
338 CpuRegister out = output.AsRegister<CpuRegister>();
339 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
340
341 if (is64bit) {
342 // Create mask.
343 __ movq(mask, out);
344 __ sarq(mask, Immediate(63));
345 // Add mask.
346 __ addq(out, mask);
347 __ xorq(out, mask);
348 } else {
349 // Create mask.
350 __ movl(mask, out);
351 __ sarl(mask, Immediate(31));
352 // Add mask.
353 __ addl(out, mask);
354 __ xorl(out, mask);
355 }
356}
357
358void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
359 CreateIntToIntPlusTemp(arena_, invoke);
360}
361
362void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
363 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
364}
365
366void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
367 CreateIntToIntPlusTemp(arena_, invoke);
368}
369
370void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
371 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
372}
373
Mark Mendell39dcf552015-04-09 20:42:42 -0400374static void GenMinMaxFP(LocationSummary* locations,
375 bool is_min,
376 bool is_double,
377 X86_64Assembler* assembler,
378 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800379 Location op1_loc = locations->InAt(0);
380 Location op2_loc = locations->InAt(1);
381 Location out_loc = locations->Out();
382 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
383
384 // Shortcut for same input locations.
385 if (op1_loc.Equals(op2_loc)) {
386 DCHECK(out_loc.Equals(op1_loc));
387 return;
388 }
389
390 // (out := op1)
391 // out <=? op2
392 // if Nan jmp Nan_label
393 // if out is min jmp done
394 // if op2 is min jmp op2_label
395 // handle -0/+0
396 // jmp done
397 // Nan_label:
398 // out := NaN
399 // op2_label:
400 // out := op2
401 // done:
402 //
403 // This removes one jmp, but needs to copy one input (op1) to out.
404 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400405 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800406
407 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
408
409 Label nan, done, op2_label;
410 if (is_double) {
411 __ ucomisd(out, op2);
412 } else {
413 __ ucomiss(out, op2);
414 }
415
416 __ j(Condition::kParityEven, &nan);
417
418 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
419 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
420
421 // Handle 0.0/-0.0.
422 if (is_min) {
423 if (is_double) {
424 __ orpd(out, op2);
425 } else {
426 __ orps(out, op2);
427 }
428 } else {
429 if (is_double) {
430 __ andpd(out, op2);
431 } else {
432 __ andps(out, op2);
433 }
434 }
435 __ jmp(&done);
436
437 // NaN handling.
438 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800439 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400440 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800441 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400442 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800443 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800444 __ jmp(&done);
445
446 // out := op2;
447 __ Bind(&op2_label);
448 if (is_double) {
449 __ movsd(out, op2);
450 } else {
451 __ movss(out, op2);
452 }
453
454 // Done.
455 __ Bind(&done);
456}
457
Mark Mendellf55c3e02015-03-26 21:07:46 -0400458static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800459 LocationSummary* locations = new (arena) LocationSummary(invoke,
460 LocationSummary::kNoCall,
461 kIntrinsified);
462 locations->SetInAt(0, Location::RequiresFpuRegister());
463 locations->SetInAt(1, Location::RequiresFpuRegister());
464 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
465 // the second input to be the output (we can simply swap inputs).
466 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800467}
468
469void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400470 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800471}
472
473void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400474 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800475}
476
477void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400478 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800479}
480
481void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400482 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800483}
484
485void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400486 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800487}
488
489void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400490 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800491}
492
493void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400494 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800495}
496
497void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400498 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800499}
500
501static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
502 X86_64Assembler* assembler) {
503 Location op1_loc = locations->InAt(0);
504 Location op2_loc = locations->InAt(1);
505
506 // Shortcut for same input locations.
507 if (op1_loc.Equals(op2_loc)) {
508 // Can return immediately, as op1_loc == out_loc.
509 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
510 // a copy here.
511 DCHECK(locations->Out().Equals(op1_loc));
512 return;
513 }
514
515 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
516 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
517
518 // (out := op1)
519 // out <=? op2
520 // if out is min jmp done
521 // out := op2
522 // done:
523
524 if (is_long) {
525 __ cmpq(out, op2);
526 } else {
527 __ cmpl(out, op2);
528 }
529
530 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
531}
532
533static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
534 LocationSummary* locations = new (arena) LocationSummary(invoke,
535 LocationSummary::kNoCall,
536 kIntrinsified);
537 locations->SetInAt(0, Location::RequiresRegister());
538 locations->SetInAt(1, Location::RequiresRegister());
539 locations->SetOut(Location::SameAsFirstInput());
540}
541
542void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
543 CreateIntIntToIntLocations(arena_, invoke);
544}
545
546void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
547 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
548}
549
550void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
551 CreateIntIntToIntLocations(arena_, invoke);
552}
553
554void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
555 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
556}
557
558void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
559 CreateIntIntToIntLocations(arena_, invoke);
560}
561
562void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
563 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
564}
565
566void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
567 CreateIntIntToIntLocations(arena_, invoke);
568}
569
570void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
571 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
572}
573
574static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
575 LocationSummary* locations = new (arena) LocationSummary(invoke,
576 LocationSummary::kNoCall,
577 kIntrinsified);
578 locations->SetInAt(0, Location::RequiresFpuRegister());
579 locations->SetOut(Location::RequiresFpuRegister());
580}
581
582void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
583 CreateFPToFPLocations(arena_, invoke);
584}
585
586void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
587 LocationSummary* locations = invoke->GetLocations();
588 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
589 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
590
591 GetAssembler()->sqrtsd(out, in);
592}
593
Mark Mendellfb8d2792015-03-31 22:16:59 -0400594static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100595 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400596
597 DCHECK(invoke->IsInvokeStaticOrDirect());
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100598 codegen->GenerateStaticOrDirectCall(
599 invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400600 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
601
602 // Copy the result back to the expected output.
603 Location out = invoke->GetLocations()->Out();
604 if (out.IsValid()) {
605 DCHECK(out.IsRegister());
606 MoveFromReturnRegister(out, invoke->GetType(), codegen);
607 }
608}
609
610static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
611 HInvoke* invoke,
612 CodeGeneratorX86_64* codegen) {
613 // Do we have instruction support?
614 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
615 CreateFPToFPLocations(arena, invoke);
616 return;
617 }
618
619 // We have to fall back to a call to the intrinsic.
620 LocationSummary* locations = new (arena) LocationSummary(invoke,
621 LocationSummary::kCall);
622 InvokeRuntimeCallingConvention calling_convention;
623 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
624 locations->SetOut(Location::FpuRegisterLocation(XMM0));
625 // Needs to be RDI for the invoke.
626 locations->AddTemp(Location::RegisterLocation(RDI));
627}
628
629static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
630 HInvoke* invoke,
631 X86_64Assembler* assembler,
632 int round_mode) {
633 LocationSummary* locations = invoke->GetLocations();
634 if (locations->WillCall()) {
635 InvokeOutOfLineIntrinsic(codegen, invoke);
636 } else {
637 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
638 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
639 __ roundsd(out, in, Immediate(round_mode));
640 }
641}
642
643void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
644 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
645}
646
647void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
648 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
649}
650
651void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
652 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
653}
654
655void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
656 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
657}
658
659void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
660 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
661}
662
663void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
664 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
665}
666
667static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
668 HInvoke* invoke,
669 CodeGeneratorX86_64* codegen) {
670 // Do we have instruction support?
671 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
672 LocationSummary* locations = new (arena) LocationSummary(invoke,
673 LocationSummary::kNoCall,
674 kIntrinsified);
675 locations->SetInAt(0, Location::RequiresFpuRegister());
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600676 locations->SetOut(Location::RequiresRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400677 locations->AddTemp(Location::RequiresFpuRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400678 return;
679 }
680
681 // We have to fall back to a call to the intrinsic.
682 LocationSummary* locations = new (arena) LocationSummary(invoke,
683 LocationSummary::kCall);
684 InvokeRuntimeCallingConvention calling_convention;
685 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
686 locations->SetOut(Location::RegisterLocation(RAX));
687 // Needs to be RDI for the invoke.
688 locations->AddTemp(Location::RegisterLocation(RDI));
689}
690
691void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
692 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
693}
694
695void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
696 LocationSummary* locations = invoke->GetLocations();
697 if (locations->WillCall()) {
698 InvokeOutOfLineIntrinsic(codegen_, invoke);
699 return;
700 }
701
702 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
703 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
704 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400705 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400706 Label done, nan;
707 X86_64Assembler* assembler = GetAssembler();
708
Mark Mendell40741f32015-04-20 22:10:34 -0400709 // Load 0.5 into inPlusPointFive.
710 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400711
712 // Add in the input.
713 __ addss(inPlusPointFive, in);
714
715 // And truncate to an integer.
716 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
717
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600718 // Load maxInt into out.
719 codegen_->Load64BitValue(out, kPrimIntMax);
720
Mark Mendellfb8d2792015-03-31 22:16:59 -0400721 // if inPlusPointFive >= maxInt goto done
Mark Mendellcfa410b2015-05-25 16:02:44 -0400722 __ movl(out, Immediate(kPrimIntMax));
Mark Mendell40741f32015-04-20 22:10:34 -0400723 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400724 __ j(kAboveEqual, &done);
725
726 // if input == NaN goto nan
727 __ j(kUnordered, &nan);
728
729 // output = float-to-int-truncate(input)
730 __ cvttss2si(out, inPlusPointFive);
731 __ jmp(&done);
732 __ Bind(&nan);
733
734 // output = 0
735 __ xorl(out, out);
736 __ Bind(&done);
737}
738
739void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
740 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
741}
742
743void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
744 LocationSummary* locations = invoke->GetLocations();
745 if (locations->WillCall()) {
746 InvokeOutOfLineIntrinsic(codegen_, invoke);
747 return;
748 }
749
750 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
751 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
752 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400753 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400754 Label done, nan;
755 X86_64Assembler* assembler = GetAssembler();
756
Mark Mendell40741f32015-04-20 22:10:34 -0400757 // Load 0.5 into inPlusPointFive.
758 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400759
760 // Add in the input.
761 __ addsd(inPlusPointFive, in);
762
763 // And truncate to an integer.
764 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
765
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600766 // Load maxLong into out.
767 codegen_->Load64BitValue(out, kPrimLongMax);
768
Mark Mendellfb8d2792015-03-31 22:16:59 -0400769 // if inPlusPointFive >= maxLong goto done
Mark Mendellcfa410b2015-05-25 16:02:44 -0400770 __ movq(out, Immediate(kPrimLongMax));
Mark Mendell40741f32015-04-20 22:10:34 -0400771 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400772 __ j(kAboveEqual, &done);
773
774 // if input == NaN goto nan
775 __ j(kUnordered, &nan);
776
777 // output = double-to-long-truncate(input)
778 __ cvttsd2si(out, inPlusPointFive, true);
779 __ jmp(&done);
780 __ Bind(&nan);
781
782 // output = 0
Mark Mendell92e83bf2015-05-07 11:25:03 -0400783 __ xorl(out, out);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400784 __ Bind(&done);
785}
786
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800787void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
788 // The inputs plus one temp.
789 LocationSummary* locations = new (arena_) LocationSummary(invoke,
790 LocationSummary::kCallOnSlowPath,
791 kIntrinsified);
792 locations->SetInAt(0, Location::RequiresRegister());
793 locations->SetInAt(1, Location::RequiresRegister());
794 locations->SetOut(Location::SameAsFirstInput());
795 locations->AddTemp(Location::RequiresRegister());
796}
797
798void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
799 LocationSummary* locations = invoke->GetLocations();
800
801 // Location of reference to data array
802 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
803 // Location of count
804 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800805
806 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
807 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
808 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800809
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800810 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
811 // the cost.
812 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
813 // we will not optimize the code for constants (which would save a register).
814
Andreas Gampe878d58c2015-01-15 23:24:00 -0800815 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800816 codegen_->AddSlowPath(slow_path);
817
818 X86_64Assembler* assembler = GetAssembler();
819
820 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800821 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800822 __ j(kAboveEqual, slow_path->GetEntryLabel());
823
Jeff Hao848f70a2014-01-15 13:49:50 -0800824 // out = out[2*idx].
825 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800826
827 __ Bind(slow_path->GetExitLabel());
828}
829
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000830void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
831 LocationSummary* locations = new (arena_) LocationSummary(invoke,
832 LocationSummary::kCall,
833 kIntrinsified);
834 InvokeRuntimeCallingConvention calling_convention;
835 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
836 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
837 locations->SetOut(Location::RegisterLocation(RAX));
838}
839
840void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
841 X86_64Assembler* assembler = GetAssembler();
842 LocationSummary* locations = invoke->GetLocations();
843
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000844 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +0100845 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000846
847 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
848 __ testl(argument, argument);
849 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
850 codegen_->AddSlowPath(slow_path);
851 __ j(kEqual, slow_path->GetEntryLabel());
852
853 __ gs()->call(Address::Absolute(
854 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
855 __ Bind(slow_path->GetExitLabel());
856}
857
Agi Csakif8cfb202015-08-13 17:54:54 -0700858void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
859 LocationSummary* locations = new (arena_) LocationSummary(invoke,
860 LocationSummary::kNoCall,
861 kIntrinsified);
862 locations->SetInAt(0, Location::RequiresRegister());
863 locations->SetInAt(1, Location::RequiresRegister());
864
865 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
866 locations->AddTemp(Location::RegisterLocation(RCX));
867 locations->AddTemp(Location::RegisterLocation(RDI));
868
869 // Set output, RSI needed for repe_cmpsq instruction anyways.
870 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
871}
872
873void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
874 X86_64Assembler* assembler = GetAssembler();
875 LocationSummary* locations = invoke->GetLocations();
876
877 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
878 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
879 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
880 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
881 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
882
883 Label end;
884 Label return_true;
885 Label return_false;
886
887 // Get offsets of count, value, and class fields within a string object.
888 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
889 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
890 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
891
892 // Note that the null check must have been done earlier.
893 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
894
895 // Check if input is null, return false if it is.
896 __ testl(arg, arg);
897 __ j(kEqual, &return_false);
898
899 // Instanceof check for the argument by comparing class fields.
900 // All string objects must have the same type since String cannot be subclassed.
901 // Receiver must be a string object, so its class field is equal to all strings' class fields.
902 // If the argument is a string object, its class field must be equal to receiver's class field.
903 __ movl(rcx, Address(str, class_offset));
904 __ cmpl(rcx, Address(arg, class_offset));
905 __ j(kNotEqual, &return_false);
906
907 // Reference equality check, return true if same reference.
908 __ cmpl(str, arg);
909 __ j(kEqual, &return_true);
910
911 // Load length of receiver string.
912 __ movl(rcx, Address(str, count_offset));
913 // Check if lengths are equal, return false if they're not.
914 __ cmpl(rcx, Address(arg, count_offset));
915 __ j(kNotEqual, &return_false);
916 // Return true if both strings are empty.
917 __ testl(rcx, rcx);
918 __ j(kEqual, &return_true);
919
920 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
921 __ leal(rsi, Address(str, value_offset));
922 __ leal(rdi, Address(arg, value_offset));
923
924 // Divide string length by 4 and adjust for lengths not divisible by 4.
925 __ addl(rcx, Immediate(3));
926 __ shrl(rcx, Immediate(2));
927
928 // Assertions that must hold in order to compare strings 4 characters at a time.
929 DCHECK_ALIGNED(value_offset, 8);
930 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
931
932 // Loop to compare strings four characters at a time starting at the beginning of the string.
933 __ repe_cmpsq();
934 // If strings are not equal, zero flag will be cleared.
935 __ j(kNotEqual, &return_false);
936
937 // Return true and exit the function.
938 // If loop does not result in returning false, we return true.
939 __ Bind(&return_true);
940 __ movl(rsi, Immediate(1));
941 __ jmp(&end);
942
943 // Return false and exit the function.
944 __ Bind(&return_false);
945 __ xorl(rsi, rsi);
946 __ Bind(&end);
947}
948
Andreas Gampe21030dd2015-05-07 14:46:15 -0700949static void CreateStringIndexOfLocations(HInvoke* invoke,
950 ArenaAllocator* allocator,
951 bool start_at_zero) {
952 LocationSummary* locations = new (allocator) LocationSummary(invoke,
953 LocationSummary::kCallOnSlowPath,
954 kIntrinsified);
955 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
956 locations->SetInAt(0, Location::RegisterLocation(RDI));
957 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
958 // allocator to do that, anyways. We can still do the constant check by checking the parameter
959 // of the instruction explicitly.
960 // Note: This works as we don't clobber RAX anywhere.
961 locations->SetInAt(1, Location::RegisterLocation(RAX));
962 if (!start_at_zero) {
963 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
964 }
965 // As we clobber RDI during execution anyways, also use it as the output.
966 locations->SetOut(Location::SameAsFirstInput());
967
968 // repne scasw uses RCX as the counter.
969 locations->AddTemp(Location::RegisterLocation(RCX));
970 // Need another temporary to be able to compute the result.
971 locations->AddTemp(Location::RequiresRegister());
972}
973
974static void GenerateStringIndexOf(HInvoke* invoke,
975 X86_64Assembler* assembler,
976 CodeGeneratorX86_64* codegen,
977 ArenaAllocator* allocator,
978 bool start_at_zero) {
979 LocationSummary* locations = invoke->GetLocations();
980
981 // Note that the null check must have been done earlier.
982 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
983
984 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
985 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
986 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
987 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
988 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
989
990 // Check our assumptions for registers.
991 DCHECK_EQ(string_obj.AsRegister(), RDI);
992 DCHECK_EQ(search_value.AsRegister(), RAX);
993 DCHECK_EQ(counter.AsRegister(), RCX);
994 DCHECK_EQ(out.AsRegister(), RDI);
995
996 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
997 // or directly dispatch if we have a constant.
998 SlowPathCodeX86_64* slow_path = nullptr;
999 if (invoke->InputAt(1)->IsIntConstant()) {
1000 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
1001 std::numeric_limits<uint16_t>::max()) {
1002 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1003 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1004 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1005 codegen->AddSlowPath(slow_path);
1006 __ jmp(slow_path->GetEntryLabel());
1007 __ Bind(slow_path->GetExitLabel());
1008 return;
1009 }
1010 } else {
1011 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1012 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1013 codegen->AddSlowPath(slow_path);
1014 __ j(kAbove, slow_path->GetEntryLabel());
1015 }
1016
1017 // From here down, we know that we are looking for a char that fits in 16 bits.
1018 // Location of reference to data array within the String object.
1019 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1020 // Location of count within the String object.
1021 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1022
1023 // Load string length, i.e., the count field of the string.
1024 __ movl(string_length, Address(string_obj, count_offset));
1025
1026 // Do a length check.
1027 // TODO: Support jecxz.
1028 Label not_found_label;
1029 __ testl(string_length, string_length);
1030 __ j(kEqual, &not_found_label);
1031
1032 if (start_at_zero) {
1033 // Number of chars to scan is the same as the string length.
1034 __ movl(counter, string_length);
1035
1036 // Move to the start of the string.
1037 __ addq(string_obj, Immediate(value_offset));
1038 } else {
1039 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1040
1041 // Do a start_index check.
1042 __ cmpl(start_index, string_length);
1043 __ j(kGreaterEqual, &not_found_label);
1044
1045 // Ensure we have a start index >= 0;
1046 __ xorl(counter, counter);
1047 __ cmpl(start_index, Immediate(0));
1048 __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough.
1049
1050 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1051 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1052
1053 // Now update ecx, the work counter: it's gonna be string.length - start_index.
1054 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
1055 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1056 }
1057
1058 // Everything is set up for repne scasw:
1059 // * Comparison address in RDI.
1060 // * Counter in ECX.
1061 __ repne_scasw();
1062
1063 // Did we find a match?
1064 __ j(kNotEqual, &not_found_label);
1065
1066 // Yes, we matched. Compute the index of the result.
1067 __ subl(string_length, counter);
1068 __ leal(out, Address(string_length, -1));
1069
1070 Label done;
1071 __ jmp(&done);
1072
1073 // Failed to match; return -1.
1074 __ Bind(&not_found_label);
1075 __ movl(out, Immediate(-1));
1076
1077 // And join up at the end.
1078 __ Bind(&done);
1079 if (slow_path != nullptr) {
1080 __ Bind(slow_path->GetExitLabel());
1081 }
1082}
1083
1084void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1085 CreateStringIndexOfLocations(invoke, arena_, true);
1086}
1087
1088void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1089 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
1090}
1091
1092void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1093 CreateStringIndexOfLocations(invoke, arena_, false);
1094}
1095
1096void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1097 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
1098}
1099
Jeff Hao848f70a2014-01-15 13:49:50 -08001100void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1101 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1102 LocationSummary::kCall,
1103 kIntrinsified);
1104 InvokeRuntimeCallingConvention calling_convention;
1105 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1106 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1107 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1108 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1109 locations->SetOut(Location::RegisterLocation(RAX));
1110}
1111
1112void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1113 X86_64Assembler* assembler = GetAssembler();
1114 LocationSummary* locations = invoke->GetLocations();
1115
1116 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1117 __ testl(byte_array, byte_array);
1118 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1119 codegen_->AddSlowPath(slow_path);
1120 __ j(kEqual, slow_path->GetEntryLabel());
1121
1122 __ gs()->call(Address::Absolute(
1123 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
1124 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1125 __ Bind(slow_path->GetExitLabel());
1126}
1127
1128void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1129 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1130 LocationSummary::kCall,
1131 kIntrinsified);
1132 InvokeRuntimeCallingConvention calling_convention;
1133 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1134 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1135 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1136 locations->SetOut(Location::RegisterLocation(RAX));
1137}
1138
1139void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1140 X86_64Assembler* assembler = GetAssembler();
1141
1142 __ gs()->call(Address::Absolute(
1143 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
1144 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1145}
1146
1147void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1148 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1149 LocationSummary::kCall,
1150 kIntrinsified);
1151 InvokeRuntimeCallingConvention calling_convention;
1152 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1153 locations->SetOut(Location::RegisterLocation(RAX));
1154}
1155
1156void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1157 X86_64Assembler* assembler = GetAssembler();
1158 LocationSummary* locations = invoke->GetLocations();
1159
1160 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1161 __ testl(string_to_copy, string_to_copy);
1162 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1163 codegen_->AddSlowPath(slow_path);
1164 __ j(kEqual, slow_path->GetEntryLabel());
1165
1166 __ gs()->call(Address::Absolute(
1167 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
1168 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1169 __ Bind(slow_path->GetExitLabel());
1170}
1171
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001172static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1173 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1174 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1175 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1176 // to avoid a SIGBUS.
1177 switch (size) {
1178 case Primitive::kPrimByte:
1179 __ movsxb(out, Address(address, 0));
1180 break;
1181 case Primitive::kPrimShort:
1182 __ movsxw(out, Address(address, 0));
1183 break;
1184 case Primitive::kPrimInt:
1185 __ movl(out, Address(address, 0));
1186 break;
1187 case Primitive::kPrimLong:
1188 __ movq(out, Address(address, 0));
1189 break;
1190 default:
1191 LOG(FATAL) << "Type not recognized for peek: " << size;
1192 UNREACHABLE();
1193 }
1194}
1195
1196void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1197 CreateIntToIntLocations(arena_, invoke);
1198}
1199
1200void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1201 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1202}
1203
1204void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1205 CreateIntToIntLocations(arena_, invoke);
1206}
1207
1208void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1209 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1210}
1211
1212void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1213 CreateIntToIntLocations(arena_, invoke);
1214}
1215
1216void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1217 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1218}
1219
1220void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1221 CreateIntToIntLocations(arena_, invoke);
1222}
1223
1224void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1225 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1226}
1227
1228static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1229 LocationSummary* locations = new (arena) LocationSummary(invoke,
1230 LocationSummary::kNoCall,
1231 kIntrinsified);
1232 locations->SetInAt(0, Location::RequiresRegister());
Mark Mendell40741f32015-04-20 22:10:34 -04001233 locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001234}
1235
1236static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1237 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -04001238 Location value = locations->InAt(1);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001239 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1240 // to avoid a SIGBUS.
1241 switch (size) {
1242 case Primitive::kPrimByte:
Mark Mendell40741f32015-04-20 22:10:34 -04001243 if (value.IsConstant()) {
1244 __ movb(Address(address, 0),
1245 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1246 } else {
1247 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1248 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001249 break;
1250 case Primitive::kPrimShort:
Mark Mendell40741f32015-04-20 22:10:34 -04001251 if (value.IsConstant()) {
1252 __ movw(Address(address, 0),
1253 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1254 } else {
1255 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1256 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001257 break;
1258 case Primitive::kPrimInt:
Mark Mendell40741f32015-04-20 22:10:34 -04001259 if (value.IsConstant()) {
1260 __ movl(Address(address, 0),
1261 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1262 } else {
1263 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1264 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001265 break;
1266 case Primitive::kPrimLong:
Mark Mendell40741f32015-04-20 22:10:34 -04001267 if (value.IsConstant()) {
1268 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1269 DCHECK(IsInt<32>(v));
1270 int32_t v_32 = v;
1271 __ movq(Address(address, 0), Immediate(v_32));
1272 } else {
1273 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1274 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001275 break;
1276 default:
1277 LOG(FATAL) << "Type not recognized for poke: " << size;
1278 UNREACHABLE();
1279 }
1280}
1281
1282void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1283 CreateIntIntToVoidLocations(arena_, invoke);
1284}
1285
1286void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1287 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1288}
1289
1290void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1291 CreateIntIntToVoidLocations(arena_, invoke);
1292}
1293
1294void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1295 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1296}
1297
1298void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1299 CreateIntIntToVoidLocations(arena_, invoke);
1300}
1301
1302void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1303 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1304}
1305
1306void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1307 CreateIntIntToVoidLocations(arena_, invoke);
1308}
1309
1310void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1311 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1312}
1313
1314void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1315 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1316 LocationSummary::kNoCall,
1317 kIntrinsified);
1318 locations->SetOut(Location::RequiresRegister());
1319}
1320
1321void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1322 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1323 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1324}
1325
Andreas Gampe878d58c2015-01-15 23:24:00 -08001326static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001327 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1328 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1329 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1330 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1331
Andreas Gampe878d58c2015-01-15 23:24:00 -08001332 switch (type) {
1333 case Primitive::kPrimInt:
1334 case Primitive::kPrimNot:
1335 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
Roland Levillain4d027112015-07-01 15:41:14 +01001336 if (type == Primitive::kPrimNot) {
1337 __ MaybeUnpoisonHeapReference(trg);
1338 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001339 break;
1340
1341 case Primitive::kPrimLong:
1342 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1343 break;
1344
1345 default:
1346 LOG(FATAL) << "Unsupported op size " << type;
1347 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001348 }
1349}
1350
1351static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1352 LocationSummary* locations = new (arena) LocationSummary(invoke,
1353 LocationSummary::kNoCall,
1354 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001355 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001356 locations->SetInAt(1, Location::RequiresRegister());
1357 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001358 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001359}
1360
1361void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1362 CreateIntIntIntToIntLocations(arena_, invoke);
1363}
1364void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1365 CreateIntIntIntToIntLocations(arena_, invoke);
1366}
1367void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1368 CreateIntIntIntToIntLocations(arena_, invoke);
1369}
1370void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1371 CreateIntIntIntToIntLocations(arena_, invoke);
1372}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001373void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1374 CreateIntIntIntToIntLocations(arena_, invoke);
1375}
1376void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1377 CreateIntIntIntToIntLocations(arena_, invoke);
1378}
1379
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001380
1381void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001382 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001383}
1384void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001385 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001386}
1387void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001388 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001389}
1390void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001391 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001392}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001393void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1394 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1395}
1396void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1397 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1398}
1399
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001400
1401static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1402 Primitive::Type type,
1403 HInvoke* invoke) {
1404 LocationSummary* locations = new (arena) LocationSummary(invoke,
1405 LocationSummary::kNoCall,
1406 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001407 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001408 locations->SetInAt(1, Location::RequiresRegister());
1409 locations->SetInAt(2, Location::RequiresRegister());
1410 locations->SetInAt(3, Location::RequiresRegister());
1411 if (type == Primitive::kPrimNot) {
1412 // Need temp registers for card-marking.
Roland Levillain4d027112015-07-01 15:41:14 +01001413 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001414 locations->AddTemp(Location::RequiresRegister());
1415 }
1416}
1417
1418void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1419 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1420}
1421void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1422 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1423}
1424void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1425 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1426}
1427void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1428 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1429}
1430void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1431 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1432}
1433void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1434 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1435}
1436void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1437 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1438}
1439void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1440 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1441}
1442void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1443 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1444}
1445
1446// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1447// memory model.
1448static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1449 CodeGeneratorX86_64* codegen) {
1450 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1451 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1452 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1453 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1454
1455 if (type == Primitive::kPrimLong) {
1456 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
Roland Levillain4d027112015-07-01 15:41:14 +01001457 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1458 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1459 __ movl(temp, value);
1460 __ PoisonHeapReference(temp);
1461 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001462 } else {
1463 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1464 }
1465
1466 if (is_volatile) {
1467 __ mfence();
1468 }
1469
1470 if (type == Primitive::kPrimNot) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001471 bool value_can_be_null = true; // TODO: Worth finding out this information?
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001472 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1473 locations->GetTemp(1).AsRegister<CpuRegister>(),
1474 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001475 value,
1476 value_can_be_null);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001477 }
1478}
1479
1480void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1481 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1482}
1483void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1484 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1485}
1486void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1487 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1488}
1489void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1490 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1491}
1492void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1493 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1494}
1495void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1496 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1497}
1498void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1499 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1500}
1501void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1502 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1503}
1504void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1505 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1506}
1507
Mark Mendell58d25fd2015-04-03 14:52:31 -04001508static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1509 HInvoke* invoke) {
1510 LocationSummary* locations = new (arena) LocationSummary(invoke,
1511 LocationSummary::kNoCall,
1512 kIntrinsified);
1513 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1514 locations->SetInAt(1, Location::RequiresRegister());
1515 locations->SetInAt(2, Location::RequiresRegister());
1516 // expected value must be in EAX/RAX.
1517 locations->SetInAt(3, Location::RegisterLocation(RAX));
1518 locations->SetInAt(4, Location::RequiresRegister());
1519
1520 locations->SetOut(Location::RequiresRegister());
1521 if (type == Primitive::kPrimNot) {
1522 // Need temp registers for card-marking.
1523 locations->AddTemp(Location::RequiresRegister());
1524 locations->AddTemp(Location::RequiresRegister());
1525 }
1526}
1527
1528void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1529 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1530}
1531
1532void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1533 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1534}
1535
1536void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1537 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1538}
1539
1540static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1541 X86_64Assembler* assembler =
1542 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1543 LocationSummary* locations = invoke->GetLocations();
1544
1545 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1546 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1547 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1548 DCHECK_EQ(expected.AsRegister(), RAX);
1549 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1550 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1551
1552 if (type == Primitive::kPrimLong) {
1553 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1554 } else {
1555 // Integer or object.
1556 if (type == Primitive::kPrimNot) {
1557 // Mark card for object assuming new value is stored.
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001558 bool value_can_be_null = true; // TODO: Worth finding out this information?
Mark Mendell58d25fd2015-04-03 14:52:31 -04001559 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1560 locations->GetTemp(1).AsRegister<CpuRegister>(),
1561 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001562 value,
1563 value_can_be_null);
Roland Levillain4d027112015-07-01 15:41:14 +01001564
1565 if (kPoisonHeapReferences) {
1566 __ PoisonHeapReference(expected);
1567 __ PoisonHeapReference(value);
1568 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04001569 }
1570
1571 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1572 }
1573
1574 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1575 // barriers at this time.
1576
1577 // Convert ZF into the boolean result.
1578 __ setcc(kZero, out);
1579 __ movzxb(out, out);
Roland Levillain4d027112015-07-01 15:41:14 +01001580
1581 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1582 __ UnpoisonHeapReference(value);
1583 __ UnpoisonHeapReference(expected);
1584 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04001585}
1586
1587void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1588 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1589}
1590
1591void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1592 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1593}
1594
1595void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1596 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1597}
1598
1599void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1600 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1601 LocationSummary::kNoCall,
1602 kIntrinsified);
1603 locations->SetInAt(0, Location::RequiresRegister());
1604 locations->SetOut(Location::SameAsFirstInput());
1605 locations->AddTemp(Location::RequiresRegister());
1606}
1607
1608static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1609 X86_64Assembler* assembler) {
1610 Immediate imm_shift(shift);
1611 Immediate imm_mask(mask);
1612 __ movl(temp, reg);
1613 __ shrl(reg, imm_shift);
1614 __ andl(temp, imm_mask);
1615 __ andl(reg, imm_mask);
1616 __ shll(temp, imm_shift);
1617 __ orl(reg, temp);
1618}
1619
1620void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1621 X86_64Assembler* assembler =
1622 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1623 LocationSummary* locations = invoke->GetLocations();
1624
1625 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1626 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1627
1628 /*
1629 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1630 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1631 * compared to generic luni implementation which has 5 rounds of swapping bits.
1632 * x = bswap x
1633 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1634 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1635 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1636 */
1637 __ bswapl(reg);
1638 SwapBits(reg, temp, 1, 0x55555555, assembler);
1639 SwapBits(reg, temp, 2, 0x33333333, assembler);
1640 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1641}
1642
1643void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1644 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1645 LocationSummary::kNoCall,
1646 kIntrinsified);
1647 locations->SetInAt(0, Location::RequiresRegister());
1648 locations->SetOut(Location::SameAsFirstInput());
1649 locations->AddTemp(Location::RequiresRegister());
1650 locations->AddTemp(Location::RequiresRegister());
1651}
1652
1653static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1654 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1655 Immediate imm_shift(shift);
1656 __ movq(temp_mask, Immediate(mask));
1657 __ movq(temp, reg);
1658 __ shrq(reg, imm_shift);
1659 __ andq(temp, temp_mask);
1660 __ andq(reg, temp_mask);
1661 __ shlq(temp, imm_shift);
1662 __ orq(reg, temp);
1663}
1664
1665void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1666 X86_64Assembler* assembler =
1667 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1668 LocationSummary* locations = invoke->GetLocations();
1669
1670 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1671 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1672 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1673
1674 /*
1675 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1676 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1677 * compared to generic luni implementation which has 5 rounds of swapping bits.
1678 * x = bswap x
1679 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1680 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1681 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1682 */
1683 __ bswapq(reg);
1684 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1685 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1686 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1687}
1688
Mark Mendelld5897672015-08-12 21:16:41 -04001689static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
1690 LocationSummary* locations = new (arena) LocationSummary(invoke,
1691 LocationSummary::kNoCall,
1692 kIntrinsified);
1693 locations->SetInAt(0, Location::Any());
1694 locations->SetOut(Location::RequiresRegister());
1695}
1696
1697static void GenLeadingZeros(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
1698 LocationSummary* locations = invoke->GetLocations();
1699 Location src = locations->InAt(0);
1700 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1701
1702 int zero_value_result = is_long ? 64 : 32;
1703 if (invoke->InputAt(0)->IsConstant()) {
1704 // Evaluate this at compile time.
1705 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
1706 if (value == 0) {
1707 value = zero_value_result;
1708 } else {
1709 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
1710 }
1711 if (value == 0) {
1712 __ xorl(out, out);
1713 } else {
1714 __ movl(out, Immediate(value));
1715 }
1716 return;
1717 }
1718
1719 // Handle the non-constant cases.
1720 if (src.IsRegister()) {
1721 if (is_long) {
1722 __ bsrq(out, src.AsRegister<CpuRegister>());
1723 } else {
1724 __ bsrl(out, src.AsRegister<CpuRegister>());
1725 }
1726 } else if (is_long) {
1727 DCHECK(src.IsDoubleStackSlot());
1728 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
1729 } else {
1730 DCHECK(src.IsStackSlot());
1731 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
1732 }
1733
1734 // BSR sets ZF if the input was zero, and the output is undefined.
1735 Label is_zero, done;
1736 __ j(kEqual, &is_zero);
1737
1738 // Correct the result from BSR to get the CLZ result.
1739 __ xorl(out, Immediate(zero_value_result - 1));
1740 __ jmp(&done);
1741
1742 // Fix the zero case with the expected result.
1743 __ Bind(&is_zero);
1744 __ movl(out, Immediate(zero_value_result));
1745
1746 __ Bind(&done);
1747}
1748
1749void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
1750 CreateLeadingZeroLocations(arena_, invoke);
1751}
1752
1753void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
1754 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
1755 GenLeadingZeros(assembler, invoke, /* is_long */ false);
1756}
1757
1758void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
1759 CreateLeadingZeroLocations(arena_, invoke);
1760}
1761
1762void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
1763 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
1764 GenLeadingZeros(assembler, invoke, /* is_long */ true);
1765}
1766
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001767// Unimplemented intrinsics.
1768
1769#define UNIMPLEMENTED_INTRINSIC(Name) \
1770void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1771} \
1772void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1773}
1774
Jeff Hao848f70a2014-01-15 13:49:50 -08001775UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001776UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001777UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1778
Roland Levillain4d027112015-07-01 15:41:14 +01001779#undef UNIMPLEMENTED_INTRINSIC
1780
1781#undef __
1782
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001783} // namespace x86_64
1784} // namespace art