blob: a439a5303bb9b87f6d21f55bc6c3b06df3d4a2bc [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86_64/instruction_set_features_x86_64.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070022#include "art_method-inl.h"
Mark Mendelld5897672015-08-12 21:16:41 -040023#include "base/bit_utils.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080024#include "code_generator_x86_64.h"
25#include "entrypoints/quick/quick_entrypoints.h"
26#include "intrinsics.h"
Andreas Gampe85b62f22015-09-09 13:15:38 -070027#include "intrinsics_utils.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080028#include "mirror/array-inl.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080029#include "mirror/string.h"
30#include "thread.h"
31#include "utils/x86_64/assembler_x86_64.h"
32#include "utils/x86_64/constants_x86_64.h"
33
34namespace art {
35
36namespace x86_64 {
37
Mark Mendellfb8d2792015-03-31 22:16:59 -040038IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
39 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
40}
41
42
Andreas Gampe71fb52f2014-12-29 17:43:08 -080043X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
Roland Levillainb488b782015-10-22 11:38:49 +010044 return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -080045}
46
Andreas Gampe878d58c2015-01-15 23:24:00 -080047ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080048 return codegen_->GetGraph()->GetArena();
49}
50
51bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
52 Dispatch(invoke);
Roland Levillain0d5a2812015-11-13 10:07:31 +000053 LocationSummary* res = invoke->GetLocations();
54 if (res == nullptr) {
55 return false;
56 }
Roland Levillain0d5a2812015-11-13 10:07:31 +000057 return res->Intrinsified();
Andreas Gampe71fb52f2014-12-29 17:43:08 -080058}
59
Roland Levillainec525fc2015-04-28 15:50:20 +010060static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +010061 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +010062 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Andreas Gampe71fb52f2014-12-29 17:43:08 -080063}
64
Andreas Gampe85b62f22015-09-09 13:15:38 -070065using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
Andreas Gampe71fb52f2014-12-29 17:43:08 -080066
Andreas Gampe71fb52f2014-12-29 17:43:08 -080067#define __ assembler->
68
69static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
70 LocationSummary* locations = new (arena) LocationSummary(invoke,
71 LocationSummary::kNoCall,
72 kIntrinsified);
73 locations->SetInAt(0, Location::RequiresFpuRegister());
74 locations->SetOut(Location::RequiresRegister());
75}
76
77static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
78 LocationSummary* locations = new (arena) LocationSummary(invoke,
79 LocationSummary::kNoCall,
80 kIntrinsified);
81 locations->SetInAt(0, Location::RequiresRegister());
82 locations->SetOut(Location::RequiresFpuRegister());
83}
84
85static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
86 Location input = locations->InAt(0);
87 Location output = locations->Out();
88 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
89}
90
91static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
92 Location input = locations->InAt(0);
93 Location output = locations->Out();
94 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
95}
96
97void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
98 CreateFPToIntLocations(arena_, invoke);
99}
100void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
101 CreateIntToFPLocations(arena_, invoke);
102}
103
104void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000105 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800106}
107void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000108 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800109}
110
111void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
112 CreateFPToIntLocations(arena_, invoke);
113}
114void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
115 CreateIntToFPLocations(arena_, invoke);
116}
117
118void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000119 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800120}
121void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000122 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800123}
124
125static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
126 LocationSummary* locations = new (arena) LocationSummary(invoke,
127 LocationSummary::kNoCall,
128 kIntrinsified);
129 locations->SetInAt(0, Location::RequiresRegister());
130 locations->SetOut(Location::SameAsFirstInput());
131}
132
133static void GenReverseBytes(LocationSummary* locations,
134 Primitive::Type size,
135 X86_64Assembler* assembler) {
136 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
137
138 switch (size) {
139 case Primitive::kPrimShort:
140 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
141 __ bswapl(out);
142 __ sarl(out, Immediate(16));
143 break;
144 case Primitive::kPrimInt:
145 __ bswapl(out);
146 break;
147 case Primitive::kPrimLong:
148 __ bswapq(out);
149 break;
150 default:
151 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
152 UNREACHABLE();
153 }
154}
155
156void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
157 CreateIntToIntLocations(arena_, invoke);
158}
159
160void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
161 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
162}
163
164void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
165 CreateIntToIntLocations(arena_, invoke);
166}
167
168void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
169 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
170}
171
172void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
173 CreateIntToIntLocations(arena_, invoke);
174}
175
176void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
177 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
178}
179
180
181// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
182// need is 64b.
183
184static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
185 // TODO: Enable memory operations when the assembler supports them.
186 LocationSummary* locations = new (arena) LocationSummary(invoke,
187 LocationSummary::kNoCall,
188 kIntrinsified);
189 locations->SetInAt(0, Location::RequiresFpuRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800190 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400191 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800192}
193
Mark Mendell39dcf552015-04-09 20:42:42 -0400194static void MathAbsFP(LocationSummary* locations,
195 bool is64bit,
196 X86_64Assembler* assembler,
197 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800198 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800199
Mark Mendellcfa410b2015-05-25 16:02:44 -0400200 DCHECK(output.IsFpuRegister());
201 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800202
Mark Mendellcfa410b2015-05-25 16:02:44 -0400203 // TODO: Can mask directly with constant area using pand if we can guarantee
204 // that the literal is aligned on a 16 byte boundary. This will avoid a
205 // temporary.
206 if (is64bit) {
207 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
208 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800209 } else {
Mark Mendellcfa410b2015-05-25 16:02:44 -0400210 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
211 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800212 }
213}
214
215void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
216 CreateFloatToFloatPlusTemps(arena_, invoke);
217}
218
219void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000220 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800221}
222
223void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
224 CreateFloatToFloatPlusTemps(arena_, invoke);
225}
226
227void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000228 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800229}
230
231static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
232 LocationSummary* locations = new (arena) LocationSummary(invoke,
233 LocationSummary::kNoCall,
234 kIntrinsified);
235 locations->SetInAt(0, Location::RequiresRegister());
236 locations->SetOut(Location::SameAsFirstInput());
237 locations->AddTemp(Location::RequiresRegister());
238}
239
240static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
241 Location output = locations->Out();
242 CpuRegister out = output.AsRegister<CpuRegister>();
243 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
244
245 if (is64bit) {
246 // Create mask.
247 __ movq(mask, out);
248 __ sarq(mask, Immediate(63));
249 // Add mask.
250 __ addq(out, mask);
251 __ xorq(out, mask);
252 } else {
253 // Create mask.
254 __ movl(mask, out);
255 __ sarl(mask, Immediate(31));
256 // Add mask.
257 __ addl(out, mask);
258 __ xorl(out, mask);
259 }
260}
261
262void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
263 CreateIntToIntPlusTemp(arena_, invoke);
264}
265
266void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000267 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800268}
269
270void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
271 CreateIntToIntPlusTemp(arena_, invoke);
272}
273
274void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000275 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800276}
277
Mark Mendell39dcf552015-04-09 20:42:42 -0400278static void GenMinMaxFP(LocationSummary* locations,
279 bool is_min,
280 bool is_double,
281 X86_64Assembler* assembler,
282 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800283 Location op1_loc = locations->InAt(0);
284 Location op2_loc = locations->InAt(1);
285 Location out_loc = locations->Out();
286 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
287
288 // Shortcut for same input locations.
289 if (op1_loc.Equals(op2_loc)) {
290 DCHECK(out_loc.Equals(op1_loc));
291 return;
292 }
293
294 // (out := op1)
295 // out <=? op2
296 // if Nan jmp Nan_label
297 // if out is min jmp done
298 // if op2 is min jmp op2_label
299 // handle -0/+0
300 // jmp done
301 // Nan_label:
302 // out := NaN
303 // op2_label:
304 // out := op2
305 // done:
306 //
307 // This removes one jmp, but needs to copy one input (op1) to out.
308 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400309 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800310
311 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
312
Mark Mendell0c9497d2015-08-21 09:30:05 -0400313 NearLabel nan, done, op2_label;
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800314 if (is_double) {
315 __ ucomisd(out, op2);
316 } else {
317 __ ucomiss(out, op2);
318 }
319
320 __ j(Condition::kParityEven, &nan);
321
322 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
323 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
324
325 // Handle 0.0/-0.0.
326 if (is_min) {
327 if (is_double) {
328 __ orpd(out, op2);
329 } else {
330 __ orps(out, op2);
331 }
332 } else {
333 if (is_double) {
334 __ andpd(out, op2);
335 } else {
336 __ andps(out, op2);
337 }
338 }
339 __ jmp(&done);
340
341 // NaN handling.
342 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800343 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400344 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800345 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400346 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800347 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800348 __ jmp(&done);
349
350 // out := op2;
351 __ Bind(&op2_label);
352 if (is_double) {
353 __ movsd(out, op2);
354 } else {
355 __ movss(out, op2);
356 }
357
358 // Done.
359 __ Bind(&done);
360}
361
Mark Mendellf55c3e02015-03-26 21:07:46 -0400362static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800363 LocationSummary* locations = new (arena) LocationSummary(invoke,
364 LocationSummary::kNoCall,
365 kIntrinsified);
366 locations->SetInAt(0, Location::RequiresFpuRegister());
367 locations->SetInAt(1, Location::RequiresFpuRegister());
368 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
369 // the second input to be the output (we can simply swap inputs).
370 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800371}
372
373void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400374 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800375}
376
377void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000378 GenMinMaxFP(
379 invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800380}
381
382void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400383 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800384}
385
386void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000387 GenMinMaxFP(
388 invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800389}
390
391void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400392 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800393}
394
395void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000396 GenMinMaxFP(
397 invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800398}
399
400void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400401 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800402}
403
404void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000405 GenMinMaxFP(
406 invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800407}
408
409static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
410 X86_64Assembler* assembler) {
411 Location op1_loc = locations->InAt(0);
412 Location op2_loc = locations->InAt(1);
413
414 // Shortcut for same input locations.
415 if (op1_loc.Equals(op2_loc)) {
416 // Can return immediately, as op1_loc == out_loc.
417 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
418 // a copy here.
419 DCHECK(locations->Out().Equals(op1_loc));
420 return;
421 }
422
423 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
424 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
425
426 // (out := op1)
427 // out <=? op2
428 // if out is min jmp done
429 // out := op2
430 // done:
431
432 if (is_long) {
433 __ cmpq(out, op2);
434 } else {
435 __ cmpl(out, op2);
436 }
437
438 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
439}
440
441static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
442 LocationSummary* locations = new (arena) LocationSummary(invoke,
443 LocationSummary::kNoCall,
444 kIntrinsified);
445 locations->SetInAt(0, Location::RequiresRegister());
446 locations->SetInAt(1, Location::RequiresRegister());
447 locations->SetOut(Location::SameAsFirstInput());
448}
449
450void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
451 CreateIntIntToIntLocations(arena_, invoke);
452}
453
454void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000455 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800456}
457
458void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
459 CreateIntIntToIntLocations(arena_, invoke);
460}
461
462void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000463 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800464}
465
466void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
467 CreateIntIntToIntLocations(arena_, invoke);
468}
469
470void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000471 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800472}
473
474void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
475 CreateIntIntToIntLocations(arena_, invoke);
476}
477
478void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000479 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800480}
481
482static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
483 LocationSummary* locations = new (arena) LocationSummary(invoke,
484 LocationSummary::kNoCall,
485 kIntrinsified);
486 locations->SetInAt(0, Location::RequiresFpuRegister());
487 locations->SetOut(Location::RequiresFpuRegister());
488}
489
490void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
491 CreateFPToFPLocations(arena_, invoke);
492}
493
494void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
495 LocationSummary* locations = invoke->GetLocations();
496 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
497 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
498
499 GetAssembler()->sqrtsd(out, in);
500}
501
Mark Mendellfb8d2792015-03-31 22:16:59 -0400502static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100503 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400504
505 DCHECK(invoke->IsInvokeStaticOrDirect());
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100506 codegen->GenerateStaticOrDirectCall(
507 invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400508 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
509
510 // Copy the result back to the expected output.
511 Location out = invoke->GetLocations()->Out();
512 if (out.IsValid()) {
513 DCHECK(out.IsRegister());
Andreas Gampe85b62f22015-09-09 13:15:38 -0700514 codegen->MoveFromReturnRegister(out, invoke->GetType());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400515 }
516}
517
518static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
519 HInvoke* invoke,
520 CodeGeneratorX86_64* codegen) {
521 // Do we have instruction support?
522 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
523 CreateFPToFPLocations(arena, invoke);
524 return;
525 }
526
527 // We have to fall back to a call to the intrinsic.
528 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +0100529 LocationSummary::kCallOnMainOnly);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400530 InvokeRuntimeCallingConvention calling_convention;
531 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
532 locations->SetOut(Location::FpuRegisterLocation(XMM0));
533 // Needs to be RDI for the invoke.
534 locations->AddTemp(Location::RegisterLocation(RDI));
535}
536
537static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
538 HInvoke* invoke,
539 X86_64Assembler* assembler,
540 int round_mode) {
541 LocationSummary* locations = invoke->GetLocations();
542 if (locations->WillCall()) {
543 InvokeOutOfLineIntrinsic(codegen, invoke);
544 } else {
545 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
546 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
547 __ roundsd(out, in, Immediate(round_mode));
548 }
549}
550
551void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
552 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
553}
554
555void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
556 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
557}
558
559void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
560 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
561}
562
563void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
564 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
565}
566
567void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
568 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
569}
570
571void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
572 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
573}
574
575static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
576 HInvoke* invoke,
577 CodeGeneratorX86_64* codegen) {
578 // Do we have instruction support?
579 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
580 LocationSummary* locations = new (arena) LocationSummary(invoke,
581 LocationSummary::kNoCall,
582 kIntrinsified);
583 locations->SetInAt(0, Location::RequiresFpuRegister());
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600584 locations->SetOut(Location::RequiresRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400585 locations->AddTemp(Location::RequiresFpuRegister());
Aart Bik349f3882016-08-02 15:40:56 -0700586 locations->AddTemp(Location::RequiresFpuRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400587 return;
588 }
589
590 // We have to fall back to a call to the intrinsic.
591 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +0100592 LocationSummary::kCallOnMainOnly);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400593 InvokeRuntimeCallingConvention calling_convention;
594 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
595 locations->SetOut(Location::RegisterLocation(RAX));
596 // Needs to be RDI for the invoke.
597 locations->AddTemp(Location::RegisterLocation(RDI));
598}
599
600void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
Aart Bik349f3882016-08-02 15:40:56 -0700601 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400602}
603
604void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
605 LocationSummary* locations = invoke->GetLocations();
606 if (locations->WillCall()) {
607 InvokeOutOfLineIntrinsic(codegen_, invoke);
608 return;
609 }
610
Mark Mendellfb8d2792015-03-31 22:16:59 -0400611 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
612 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Aart Bik349f3882016-08-02 15:40:56 -0700613 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
614 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
615 NearLabel skip_incr, done;
Mark Mendellfb8d2792015-03-31 22:16:59 -0400616 X86_64Assembler* assembler = GetAssembler();
617
Aart Bik349f3882016-08-02 15:40:56 -0700618 // Since no direct x86 rounding instruction matches the required semantics,
619 // this intrinsic is implemented as follows:
620 // result = floor(in);
621 // if (in - result >= 0.5f)
622 // result = result + 1.0f;
623 __ movss(t2, in);
624 __ roundss(t1, in, Immediate(1));
625 __ subss(t2, t1);
626 __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
627 __ j(kBelow, &skip_incr);
628 __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
629 __ Bind(&skip_incr);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400630
Aart Bik349f3882016-08-02 15:40:56 -0700631 // Final conversion to an integer. Unfortunately this also does not have a
632 // direct x86 instruction, since NaN should map to 0 and large positive
633 // values need to be clipped to the extreme value.
634 codegen_->Load32BitValue(out, kPrimIntMax);
635 __ cvtsi2ss(t2, out);
636 __ comiss(t1, t2);
637 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
638 __ movl(out, Immediate(0)); // does not change flags
639 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
640 __ cvttss2si(out, t1);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400641 __ Bind(&done);
642}
643
644void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
Aart Bik349f3882016-08-02 15:40:56 -0700645 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400646}
647
648void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
649 LocationSummary* locations = invoke->GetLocations();
650 if (locations->WillCall()) {
651 InvokeOutOfLineIntrinsic(codegen_, invoke);
652 return;
653 }
654
Mark Mendellfb8d2792015-03-31 22:16:59 -0400655 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
656 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Aart Bik349f3882016-08-02 15:40:56 -0700657 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
658 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
659 NearLabel skip_incr, done;
Mark Mendellfb8d2792015-03-31 22:16:59 -0400660 X86_64Assembler* assembler = GetAssembler();
661
Aart Bik349f3882016-08-02 15:40:56 -0700662 // Since no direct x86 rounding instruction matches the required semantics,
663 // this intrinsic is implemented as follows:
664 // result = floor(in);
665 // if (in - result >= 0.5)
666 // result = result + 1.0f;
667 __ movsd(t2, in);
668 __ roundsd(t1, in, Immediate(1));
669 __ subsd(t2, t1);
670 __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
671 __ j(kBelow, &skip_incr);
672 __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
673 __ Bind(&skip_incr);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400674
Aart Bik349f3882016-08-02 15:40:56 -0700675 // Final conversion to an integer. Unfortunately this also does not have a
676 // direct x86 instruction, since NaN should map to 0 and large positive
677 // values need to be clipped to the extreme value.
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600678 codegen_->Load64BitValue(out, kPrimLongMax);
Aart Bik349f3882016-08-02 15:40:56 -0700679 __ cvtsi2sd(t2, out, /* is64bit */ true);
680 __ comisd(t1, t2);
681 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
682 __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit
683 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
684 __ cvttsd2si(out, t1, /* is64bit */ true);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400685 __ Bind(&done);
686}
687
Mark Mendella4f12202015-08-06 15:23:34 -0400688static void CreateFPToFPCallLocations(ArenaAllocator* arena,
689 HInvoke* invoke) {
690 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +0100691 LocationSummary::kCallOnMainOnly,
Mark Mendella4f12202015-08-06 15:23:34 -0400692 kIntrinsified);
693 InvokeRuntimeCallingConvention calling_convention;
694 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
695 locations->SetOut(Location::FpuRegisterLocation(XMM0));
696
697 // We have to ensure that the native code doesn't clobber the XMM registers which are
698 // non-volatile for ART, but volatile for Native calls. This will ensure that they are
699 // saved in the prologue and properly restored.
700 for (auto fp_reg : non_volatile_xmm_regs) {
701 locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
702 }
703}
704
705static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
706 QuickEntrypointEnum entry) {
707 LocationSummary* locations = invoke->GetLocations();
708 DCHECK(locations->WillCall());
709 DCHECK(invoke->IsInvokeStaticOrDirect());
710 X86_64Assembler* assembler = codegen->GetAssembler();
711
Andreas Gampe542451c2016-07-26 09:02:02 -0700712 __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64PointerSize>(entry), true));
Mark Mendella4f12202015-08-06 15:23:34 -0400713 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
714}
715
716void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
717 CreateFPToFPCallLocations(arena_, invoke);
718}
719
720void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
721 GenFPToFPCall(invoke, codegen_, kQuickCos);
722}
723
724void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
725 CreateFPToFPCallLocations(arena_, invoke);
726}
727
728void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
729 GenFPToFPCall(invoke, codegen_, kQuickSin);
730}
731
732void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
733 CreateFPToFPCallLocations(arena_, invoke);
734}
735
736void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
737 GenFPToFPCall(invoke, codegen_, kQuickAcos);
738}
739
740void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
741 CreateFPToFPCallLocations(arena_, invoke);
742}
743
744void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
745 GenFPToFPCall(invoke, codegen_, kQuickAsin);
746}
747
748void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
749 CreateFPToFPCallLocations(arena_, invoke);
750}
751
752void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
753 GenFPToFPCall(invoke, codegen_, kQuickAtan);
754}
755
756void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
757 CreateFPToFPCallLocations(arena_, invoke);
758}
759
760void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
761 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
762}
763
764void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
765 CreateFPToFPCallLocations(arena_, invoke);
766}
767
768void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
769 GenFPToFPCall(invoke, codegen_, kQuickCosh);
770}
771
772void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
773 CreateFPToFPCallLocations(arena_, invoke);
774}
775
776void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
777 GenFPToFPCall(invoke, codegen_, kQuickExp);
778}
779
780void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
781 CreateFPToFPCallLocations(arena_, invoke);
782}
783
784void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
785 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
786}
787
788void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
789 CreateFPToFPCallLocations(arena_, invoke);
790}
791
792void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
793 GenFPToFPCall(invoke, codegen_, kQuickLog);
794}
795
796void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
797 CreateFPToFPCallLocations(arena_, invoke);
798}
799
800void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
801 GenFPToFPCall(invoke, codegen_, kQuickLog10);
802}
803
804void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
805 CreateFPToFPCallLocations(arena_, invoke);
806}
807
808void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
809 GenFPToFPCall(invoke, codegen_, kQuickSinh);
810}
811
812void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
813 CreateFPToFPCallLocations(arena_, invoke);
814}
815
816void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
817 GenFPToFPCall(invoke, codegen_, kQuickTan);
818}
819
820void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
821 CreateFPToFPCallLocations(arena_, invoke);
822}
823
824void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
825 GenFPToFPCall(invoke, codegen_, kQuickTanh);
826}
827
828static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
829 HInvoke* invoke) {
830 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +0100831 LocationSummary::kCallOnMainOnly,
Mark Mendella4f12202015-08-06 15:23:34 -0400832 kIntrinsified);
833 InvokeRuntimeCallingConvention calling_convention;
834 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
835 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
836 locations->SetOut(Location::FpuRegisterLocation(XMM0));
837
838 // We have to ensure that the native code doesn't clobber the XMM registers which are
839 // non-volatile for ART, but volatile for Native calls. This will ensure that they are
840 // saved in the prologue and properly restored.
841 for (auto fp_reg : non_volatile_xmm_regs) {
842 locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
843 }
844}
845
846void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
847 CreateFPFPToFPCallLocations(arena_, invoke);
848}
849
850void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
851 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
852}
853
854void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
855 CreateFPFPToFPCallLocations(arena_, invoke);
856}
857
858void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
859 GenFPToFPCall(invoke, codegen_, kQuickHypot);
860}
861
862void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
863 CreateFPFPToFPCallLocations(arena_, invoke);
864}
865
866void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
867 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
868}
869
Mark Mendell6bc53a92015-07-01 14:26:52 -0400870void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
871 // Check to see if we have known failures that will cause us to have to bail out
872 // to the runtime, and just generate the runtime call directly.
873 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
874 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
875
876 // The positions must be non-negative.
877 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
878 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
879 // We will have to fail anyways.
880 return;
881 }
882
883 // The length must be > 0.
884 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
885 if (length != nullptr) {
886 int32_t len = length->GetValue();
887 if (len < 0) {
888 // Just call as normal.
889 return;
890 }
891 }
892
893 LocationSummary* locations = new (arena_) LocationSummary(invoke,
894 LocationSummary::kCallOnSlowPath,
895 kIntrinsified);
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +0100896 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
Mark Mendell6bc53a92015-07-01 14:26:52 -0400897 locations->SetInAt(0, Location::RequiresRegister());
898 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
899 locations->SetInAt(2, Location::RequiresRegister());
900 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
901 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
902
903 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
904 locations->AddTemp(Location::RegisterLocation(RSI));
905 locations->AddTemp(Location::RegisterLocation(RDI));
906 locations->AddTemp(Location::RegisterLocation(RCX));
907}
908
909static void CheckPosition(X86_64Assembler* assembler,
910 Location pos,
911 CpuRegister input,
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +0100912 Location length,
Andreas Gampe85b62f22015-09-09 13:15:38 -0700913 SlowPathCode* slow_path,
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +0100914 CpuRegister temp,
915 bool length_is_input_length = false) {
916 // Where is the length in the Array?
Mark Mendell6bc53a92015-07-01 14:26:52 -0400917 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
918
919 if (pos.IsConstant()) {
920 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
921 if (pos_const == 0) {
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +0100922 if (!length_is_input_length) {
923 // Check that length(input) >= length.
924 if (length.IsConstant()) {
925 __ cmpl(Address(input, length_offset),
926 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
927 } else {
928 __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
929 }
930 __ j(kLess, slow_path->GetEntryLabel());
931 }
Mark Mendell6bc53a92015-07-01 14:26:52 -0400932 } else {
933 // Check that length(input) >= pos.
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +0100934 __ movl(temp, Address(input, length_offset));
935 __ subl(temp, Immediate(pos_const));
Mark Mendell6bc53a92015-07-01 14:26:52 -0400936 __ j(kLess, slow_path->GetEntryLabel());
937
938 // Check that (length(input) - pos) >= length.
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +0100939 if (length.IsConstant()) {
940 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
941 } else {
942 __ cmpl(temp, length.AsRegister<CpuRegister>());
943 }
Mark Mendell6bc53a92015-07-01 14:26:52 -0400944 __ j(kLess, slow_path->GetEntryLabel());
945 }
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +0100946 } else if (length_is_input_length) {
947 // The only way the copy can succeed is if pos is zero.
948 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
949 __ testl(pos_reg, pos_reg);
950 __ j(kNotEqual, slow_path->GetEntryLabel());
Mark Mendell6bc53a92015-07-01 14:26:52 -0400951 } else {
952 // Check that pos >= 0.
953 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
954 __ testl(pos_reg, pos_reg);
955 __ j(kLess, slow_path->GetEntryLabel());
956
957 // Check that pos <= length(input).
958 __ cmpl(Address(input, length_offset), pos_reg);
959 __ j(kLess, slow_path->GetEntryLabel());
960
961 // Check that (length(input) - pos) >= length.
962 __ movl(temp, Address(input, length_offset));
963 __ subl(temp, pos_reg);
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +0100964 if (length.IsConstant()) {
965 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
966 } else {
967 __ cmpl(temp, length.AsRegister<CpuRegister>());
968 }
Mark Mendell6bc53a92015-07-01 14:26:52 -0400969 __ j(kLess, slow_path->GetEntryLabel());
970 }
971}
972
973void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
974 X86_64Assembler* assembler = GetAssembler();
975 LocationSummary* locations = invoke->GetLocations();
976
977 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +0100978 Location src_pos = locations->InAt(1);
Mark Mendell6bc53a92015-07-01 14:26:52 -0400979 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +0100980 Location dest_pos = locations->InAt(3);
Mark Mendell6bc53a92015-07-01 14:26:52 -0400981 Location length = locations->InAt(4);
982
983 // Temporaries that we need for MOVSW.
984 CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
985 DCHECK_EQ(src_base.AsRegister(), RSI);
986 CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
987 DCHECK_EQ(dest_base.AsRegister(), RDI);
988 CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
989 DCHECK_EQ(count.AsRegister(), RCX);
990
Andreas Gampe85b62f22015-09-09 13:15:38 -0700991 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Mark Mendell6bc53a92015-07-01 14:26:52 -0400992 codegen_->AddSlowPath(slow_path);
993
994 // Bail out if the source and destination are the same.
995 __ cmpl(src, dest);
996 __ j(kEqual, slow_path->GetEntryLabel());
997
998 // Bail out if the source is null.
999 __ testl(src, src);
1000 __ j(kEqual, slow_path->GetEntryLabel());
1001
1002 // Bail out if the destination is null.
1003 __ testl(dest, dest);
1004 __ j(kEqual, slow_path->GetEntryLabel());
1005
1006 // If the length is negative, bail out.
1007 // We have already checked in the LocationsBuilder for the constant case.
1008 if (!length.IsConstant()) {
1009 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1010 __ j(kLess, slow_path->GetEntryLabel());
1011 }
1012
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001013 // Validity checks: source. Use src_base as a temporary register.
1014 CheckPosition(assembler, src_pos, src, length, slow_path, src_base);
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001015
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001016 // Validity checks: dest. Use src_base as a temporary register.
1017 CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base);
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001018
Mark Mendell6bc53a92015-07-01 14:26:52 -04001019 // We need the count in RCX.
1020 if (length.IsConstant()) {
1021 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1022 } else {
1023 __ movl(count, length.AsRegister<CpuRegister>());
1024 }
1025
Mark Mendell6bc53a92015-07-01 14:26:52 -04001026 // Okay, everything checks out. Finally time to do the copy.
1027 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1028 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1029 DCHECK_EQ(char_size, 2u);
1030
1031 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1032
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001033 if (src_pos.IsConstant()) {
1034 int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
1035 __ leal(src_base, Address(src, char_size * src_pos_const + data_offset));
Mark Mendell6bc53a92015-07-01 14:26:52 -04001036 } else {
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001037 __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(),
Mark Mendell6bc53a92015-07-01 14:26:52 -04001038 ScaleFactor::TIMES_2, data_offset));
1039 }
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001040 if (dest_pos.IsConstant()) {
1041 int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1042 __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset));
Mark Mendell6bc53a92015-07-01 14:26:52 -04001043 } else {
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001044 __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(),
Mark Mendell6bc53a92015-07-01 14:26:52 -04001045 ScaleFactor::TIMES_2, data_offset));
1046 }
1047
1048 // Do the move.
1049 __ rep_movsw();
1050
1051 __ Bind(slow_path->GetExitLabel());
1052}
1053
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001054
1055void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
Roland Levillain3d312422016-06-23 13:53:42 +01001056 // TODO(rpl): Implement read barriers in the SystemArrayCopy
1057 // intrinsic and re-enable it (b/29516905).
1058 if (kEmitCompilerReadBarrier) {
1059 return;
1060 }
1061
Nicolas Geoffray5bd05a52015-10-13 09:48:30 +01001062 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001063}
1064
1065void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
Roland Levillain3d312422016-06-23 13:53:42 +01001066 // TODO(rpl): Implement read barriers in the SystemArrayCopy
1067 // intrinsic and re-enable it (b/29516905).
1068 DCHECK(!kEmitCompilerReadBarrier);
1069
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001070 X86_64Assembler* assembler = GetAssembler();
1071 LocationSummary* locations = invoke->GetLocations();
1072
1073 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1074 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1075 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1076 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1077
1078 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
1079 Location src_pos = locations->InAt(1);
1080 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
1081 Location dest_pos = locations->InAt(3);
1082 Location length = locations->InAt(4);
1083 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1084 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1085 CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
1086
1087 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1088 codegen_->AddSlowPath(slow_path);
1089
Roland Levillainebea3d22016-04-12 15:42:57 +01001090 NearLabel conditions_on_positions_validated;
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001091 SystemArrayCopyOptimizations optimizations(invoke);
1092
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001093 // If source and destination are the same, we go to slow path if we need to do
1094 // forward copying.
1095 if (src_pos.IsConstant()) {
1096 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1097 if (dest_pos.IsConstant()) {
Nicolas Geoffray9f65db82016-07-07 12:07:42 +01001098 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1099 if (optimizations.GetDestinationIsSource()) {
1100 // Checked when building locations.
1101 DCHECK_GE(src_pos_constant, dest_pos_constant);
1102 } else if (src_pos_constant < dest_pos_constant) {
1103 __ cmpl(src, dest);
1104 __ j(kEqual, slow_path->GetEntryLabel());
1105 }
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001106 } else {
1107 if (!optimizations.GetDestinationIsSource()) {
Nicolas Geoffray9f65db82016-07-07 12:07:42 +01001108 __ cmpl(src, dest);
Roland Levillainebea3d22016-04-12 15:42:57 +01001109 __ j(kNotEqual, &conditions_on_positions_validated);
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001110 }
1111 __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
1112 __ j(kGreater, slow_path->GetEntryLabel());
1113 }
1114 } else {
1115 if (!optimizations.GetDestinationIsSource()) {
Nicolas Geoffray9f65db82016-07-07 12:07:42 +01001116 __ cmpl(src, dest);
Roland Levillainebea3d22016-04-12 15:42:57 +01001117 __ j(kNotEqual, &conditions_on_positions_validated);
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001118 }
1119 if (dest_pos.IsConstant()) {
1120 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1121 __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
1122 __ j(kLess, slow_path->GetEntryLabel());
1123 } else {
1124 __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
1125 __ j(kLess, slow_path->GetEntryLabel());
1126 }
1127 }
1128
Roland Levillainebea3d22016-04-12 15:42:57 +01001129 __ Bind(&conditions_on_positions_validated);
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001130
1131 if (!optimizations.GetSourceIsNotNull()) {
1132 // Bail out if the source is null.
1133 __ testl(src, src);
1134 __ j(kEqual, slow_path->GetEntryLabel());
1135 }
1136
1137 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1138 // Bail out if the destination is null.
1139 __ testl(dest, dest);
1140 __ j(kEqual, slow_path->GetEntryLabel());
1141 }
1142
1143 // If the length is negative, bail out.
1144 // We have already checked in the LocationsBuilder for the constant case.
1145 if (!length.IsConstant() &&
1146 !optimizations.GetCountIsSourceLength() &&
1147 !optimizations.GetCountIsDestinationLength()) {
1148 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1149 __ j(kLess, slow_path->GetEntryLabel());
1150 }
1151
1152 // Validity checks: source.
1153 CheckPosition(assembler,
1154 src_pos,
1155 src,
1156 length,
1157 slow_path,
1158 temp1,
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001159 optimizations.GetCountIsSourceLength());
1160
1161 // Validity checks: dest.
1162 CheckPosition(assembler,
1163 dest_pos,
1164 dest,
1165 length,
1166 slow_path,
1167 temp1,
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001168 optimizations.GetCountIsDestinationLength());
1169
1170 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1171 // Check whether all elements of the source array are assignable to the component
1172 // type of the destination array. We do two checks: the classes are the same,
1173 // or the destination is Object[]. If none of these checks succeed, we go to the
1174 // slow path.
1175 __ movl(temp1, Address(dest, class_offset));
1176 __ movl(temp2, Address(src, class_offset));
1177 bool did_unpoison = false;
1178 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1179 !optimizations.GetSourceIsNonPrimitiveArray()) {
Roland Levillainebea3d22016-04-12 15:42:57 +01001180 // One or two of the references need to be unpoisoned. Unpoison them
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001181 // both to make the identity check valid.
1182 __ MaybeUnpoisonHeapReference(temp1);
1183 __ MaybeUnpoisonHeapReference(temp2);
1184 did_unpoison = true;
1185 }
1186
1187 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1188 // Bail out if the destination is not a non primitive array.
Roland Levillainebea3d22016-04-12 15:42:57 +01001189 // /* HeapReference<Class> */ TMP = temp1->component_type_
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001190 __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1191 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1192 __ j(kEqual, slow_path->GetEntryLabel());
1193 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1194 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1195 __ j(kNotEqual, slow_path->GetEntryLabel());
1196 }
1197
1198 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1199 // Bail out if the source is not a non primitive array.
Roland Levillainebea3d22016-04-12 15:42:57 +01001200 // /* HeapReference<Class> */ TMP = temp2->component_type_
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001201 __ movl(CpuRegister(TMP), Address(temp2, component_offset));
1202 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1203 __ j(kEqual, slow_path->GetEntryLabel());
1204 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1205 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1206 __ j(kNotEqual, slow_path->GetEntryLabel());
1207 }
1208
1209 __ cmpl(temp1, temp2);
1210
1211 if (optimizations.GetDestinationIsTypedObjectArray()) {
1212 NearLabel do_copy;
1213 __ j(kEqual, &do_copy);
1214 if (!did_unpoison) {
1215 __ MaybeUnpoisonHeapReference(temp1);
1216 }
Roland Levillainebea3d22016-04-12 15:42:57 +01001217 // /* HeapReference<Class> */ temp1 = temp1->component_type_
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001218 __ movl(temp1, Address(temp1, component_offset));
1219 __ MaybeUnpoisonHeapReference(temp1);
Roland Levillainebea3d22016-04-12 15:42:57 +01001220 // /* HeapReference<Class> */ temp1 = temp1->super_class_
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001221 __ movl(temp1, Address(temp1, super_offset));
1222 // No need to unpoison the result, we're comparing against null.
1223 __ testl(temp1, temp1);
1224 __ j(kNotEqual, slow_path->GetEntryLabel());
1225 __ Bind(&do_copy);
1226 } else {
1227 __ j(kNotEqual, slow_path->GetEntryLabel());
1228 }
1229 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1230 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1231 // Bail out if the source is not a non primitive array.
Roland Levillainebea3d22016-04-12 15:42:57 +01001232 // /* HeapReference<Class> */ temp1 = src->klass_
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001233 __ movl(temp1, Address(src, class_offset));
1234 __ MaybeUnpoisonHeapReference(temp1);
Roland Levillainebea3d22016-04-12 15:42:57 +01001235 // /* HeapReference<Class> */ TMP = temp1->component_type_
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001236 __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1237 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1238 __ j(kEqual, slow_path->GetEntryLabel());
1239 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1240 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1241 __ j(kNotEqual, slow_path->GetEntryLabel());
1242 }
1243
1244 // Compute base source address, base destination address, and end source address.
1245
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001246 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001247 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
1248 if (src_pos.IsConstant()) {
1249 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1250 __ leal(temp1, Address(src, element_size * constant + offset));
1251 } else {
1252 __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
1253 }
1254
1255 if (dest_pos.IsConstant()) {
1256 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1257 __ leal(temp2, Address(dest, element_size * constant + offset));
1258 } else {
1259 __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
1260 }
1261
1262 if (length.IsConstant()) {
1263 int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
1264 __ leal(temp3, Address(temp1, element_size * constant));
1265 } else {
1266 __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
1267 }
1268
1269 // Iterate over the arrays and do a raw copy of the objects. We don't need to
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001270 // poison/unpoison.
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001271 NearLabel loop, done;
1272 __ cmpl(temp1, temp3);
1273 __ j(kEqual, &done);
1274 __ Bind(&loop);
1275 __ movl(CpuRegister(TMP), Address(temp1, 0));
1276 __ movl(Address(temp2, 0), CpuRegister(TMP));
1277 __ addl(temp1, Immediate(element_size));
1278 __ addl(temp2, Immediate(element_size));
1279 __ cmpl(temp1, temp3);
1280 __ j(kNotEqual, &loop);
1281 __ Bind(&done);
1282
1283 // We only need one card marking on the destination array.
1284 codegen_->MarkGCCard(temp1,
1285 temp2,
1286 dest,
1287 CpuRegister(kNoRegister),
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001288 /* value_can_be_null */ false);
Nicolas Geoffrayee3cf072015-10-06 11:45:02 +01001289
1290 __ Bind(slow_path->GetExitLabel());
1291}
1292
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001293void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1294 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +01001295 LocationSummary::kCallOnMainOnly,
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001296 kIntrinsified);
1297 InvokeRuntimeCallingConvention calling_convention;
1298 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1299 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1300 locations->SetOut(Location::RegisterLocation(RAX));
1301}
1302
1303void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1304 X86_64Assembler* assembler = GetAssembler();
1305 LocationSummary* locations = invoke->GetLocations();
1306
Nicolas Geoffray512e04d2015-03-27 17:21:24 +00001307 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +01001308 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001309
1310 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1311 __ testl(argument, argument);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001312 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001313 codegen_->AddSlowPath(slow_path);
1314 __ j(kEqual, slow_path->GetEntryLabel());
1315
Andreas Gampe542451c2016-07-26 09:02:02 -07001316 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, pStringCompareTo),
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001317 /* no_rip */ true));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001318 __ Bind(slow_path->GetExitLabel());
1319}
1320
Agi Csakif8cfb202015-08-13 17:54:54 -07001321void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1322 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1323 LocationSummary::kNoCall,
1324 kIntrinsified);
1325 locations->SetInAt(0, Location::RequiresRegister());
1326 locations->SetInAt(1, Location::RequiresRegister());
1327
1328 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1329 locations->AddTemp(Location::RegisterLocation(RCX));
1330 locations->AddTemp(Location::RegisterLocation(RDI));
1331
1332 // Set output, RSI needed for repe_cmpsq instruction anyways.
1333 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1334}
1335
1336void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1337 X86_64Assembler* assembler = GetAssembler();
1338 LocationSummary* locations = invoke->GetLocations();
1339
1340 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1341 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1342 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1343 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1344 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1345
Mark Mendell0c9497d2015-08-21 09:30:05 -04001346 NearLabel end, return_true, return_false;
Agi Csakif8cfb202015-08-13 17:54:54 -07001347
1348 // Get offsets of count, value, and class fields within a string object.
1349 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1350 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1351 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1352
1353 // Note that the null check must have been done earlier.
1354 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1355
Vladimir Marko53b52002016-05-24 19:30:45 +01001356 StringEqualsOptimizations optimizations(invoke);
1357 if (!optimizations.GetArgumentNotNull()) {
1358 // Check if input is null, return false if it is.
1359 __ testl(arg, arg);
1360 __ j(kEqual, &return_false);
1361 }
Agi Csakif8cfb202015-08-13 17:54:54 -07001362
Vladimir Marko53b52002016-05-24 19:30:45 +01001363 if (!optimizations.GetArgumentIsString()) {
1364 // Instanceof check for the argument by comparing class fields.
1365 // All string objects must have the same type since String cannot be subclassed.
1366 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1367 // If the argument is a string object, its class field must be equal to receiver's class field.
1368 __ movl(rcx, Address(str, class_offset));
1369 __ cmpl(rcx, Address(arg, class_offset));
1370 __ j(kNotEqual, &return_false);
1371 }
Agi Csakif8cfb202015-08-13 17:54:54 -07001372
1373 // Reference equality check, return true if same reference.
1374 __ cmpl(str, arg);
1375 __ j(kEqual, &return_true);
1376
1377 // Load length of receiver string.
1378 __ movl(rcx, Address(str, count_offset));
1379 // Check if lengths are equal, return false if they're not.
1380 __ cmpl(rcx, Address(arg, count_offset));
1381 __ j(kNotEqual, &return_false);
1382 // Return true if both strings are empty.
Mark Mendell0c9497d2015-08-21 09:30:05 -04001383 __ jrcxz(&return_true);
Agi Csakif8cfb202015-08-13 17:54:54 -07001384
1385 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1386 __ leal(rsi, Address(str, value_offset));
1387 __ leal(rdi, Address(arg, value_offset));
1388
1389 // Divide string length by 4 and adjust for lengths not divisible by 4.
1390 __ addl(rcx, Immediate(3));
1391 __ shrl(rcx, Immediate(2));
1392
1393 // Assertions that must hold in order to compare strings 4 characters at a time.
1394 DCHECK_ALIGNED(value_offset, 8);
1395 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1396
1397 // Loop to compare strings four characters at a time starting at the beginning of the string.
1398 __ repe_cmpsq();
1399 // If strings are not equal, zero flag will be cleared.
1400 __ j(kNotEqual, &return_false);
1401
1402 // Return true and exit the function.
1403 // If loop does not result in returning false, we return true.
1404 __ Bind(&return_true);
1405 __ movl(rsi, Immediate(1));
1406 __ jmp(&end);
1407
1408 // Return false and exit the function.
1409 __ Bind(&return_false);
1410 __ xorl(rsi, rsi);
1411 __ Bind(&end);
1412}
1413
Andreas Gampe21030dd2015-05-07 14:46:15 -07001414static void CreateStringIndexOfLocations(HInvoke* invoke,
1415 ArenaAllocator* allocator,
1416 bool start_at_zero) {
1417 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1418 LocationSummary::kCallOnSlowPath,
1419 kIntrinsified);
1420 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1421 locations->SetInAt(0, Location::RegisterLocation(RDI));
1422 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1423 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1424 // of the instruction explicitly.
1425 // Note: This works as we don't clobber RAX anywhere.
1426 locations->SetInAt(1, Location::RegisterLocation(RAX));
1427 if (!start_at_zero) {
1428 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1429 }
1430 // As we clobber RDI during execution anyways, also use it as the output.
1431 locations->SetOut(Location::SameAsFirstInput());
1432
1433 // repne scasw uses RCX as the counter.
1434 locations->AddTemp(Location::RegisterLocation(RCX));
1435 // Need another temporary to be able to compute the result.
1436 locations->AddTemp(Location::RequiresRegister());
1437}
1438
1439static void GenerateStringIndexOf(HInvoke* invoke,
1440 X86_64Assembler* assembler,
1441 CodeGeneratorX86_64* codegen,
1442 ArenaAllocator* allocator,
1443 bool start_at_zero) {
1444 LocationSummary* locations = invoke->GetLocations();
1445
1446 // Note that the null check must have been done earlier.
1447 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1448
1449 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1450 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1451 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1452 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1453 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1454
1455 // Check our assumptions for registers.
1456 DCHECK_EQ(string_obj.AsRegister(), RDI);
1457 DCHECK_EQ(search_value.AsRegister(), RAX);
1458 DCHECK_EQ(counter.AsRegister(), RCX);
1459 DCHECK_EQ(out.AsRegister(), RDI);
1460
1461 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001462 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
Andreas Gampe85b62f22015-09-09 13:15:38 -07001463 SlowPathCode* slow_path = nullptr;
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001464 HInstruction* code_point = invoke->InputAt(1);
1465 if (code_point->IsIntConstant()) {
Vladimir Markoda051082016-05-17 16:10:20 +01001466 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
Andreas Gampe21030dd2015-05-07 14:46:15 -07001467 std::numeric_limits<uint16_t>::max()) {
1468 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1469 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1470 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1471 codegen->AddSlowPath(slow_path);
1472 __ jmp(slow_path->GetEntryLabel());
1473 __ Bind(slow_path->GetExitLabel());
1474 return;
1475 }
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001476 } else if (code_point->GetType() != Primitive::kPrimChar) {
Andreas Gampe21030dd2015-05-07 14:46:15 -07001477 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1478 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1479 codegen->AddSlowPath(slow_path);
1480 __ j(kAbove, slow_path->GetEntryLabel());
1481 }
1482
1483 // From here down, we know that we are looking for a char that fits in 16 bits.
1484 // Location of reference to data array within the String object.
1485 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1486 // Location of count within the String object.
1487 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1488
1489 // Load string length, i.e., the count field of the string.
1490 __ movl(string_length, Address(string_obj, count_offset));
1491
1492 // Do a length check.
1493 // TODO: Support jecxz.
Mark Mendell0c9497d2015-08-21 09:30:05 -04001494 NearLabel not_found_label;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001495 __ testl(string_length, string_length);
1496 __ j(kEqual, &not_found_label);
1497
1498 if (start_at_zero) {
1499 // Number of chars to scan is the same as the string length.
1500 __ movl(counter, string_length);
1501
1502 // Move to the start of the string.
1503 __ addq(string_obj, Immediate(value_offset));
1504 } else {
1505 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1506
1507 // Do a start_index check.
1508 __ cmpl(start_index, string_length);
1509 __ j(kGreaterEqual, &not_found_label);
1510
1511 // Ensure we have a start index >= 0;
1512 __ xorl(counter, counter);
1513 __ cmpl(start_index, Immediate(0));
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001514 __ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough.
Andreas Gampe21030dd2015-05-07 14:46:15 -07001515
1516 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1517 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1518
1519 // Now update ecx, the work counter: it's gonna be string.length - start_index.
1520 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
1521 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1522 }
1523
1524 // Everything is set up for repne scasw:
1525 // * Comparison address in RDI.
1526 // * Counter in ECX.
1527 __ repne_scasw();
1528
1529 // Did we find a match?
1530 __ j(kNotEqual, &not_found_label);
1531
1532 // Yes, we matched. Compute the index of the result.
1533 __ subl(string_length, counter);
1534 __ leal(out, Address(string_length, -1));
1535
Mark Mendell0c9497d2015-08-21 09:30:05 -04001536 NearLabel done;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001537 __ jmp(&done);
1538
1539 // Failed to match; return -1.
1540 __ Bind(&not_found_label);
1541 __ movl(out, Immediate(-1));
1542
1543 // And join up at the end.
1544 __ Bind(&done);
1545 if (slow_path != nullptr) {
1546 __ Bind(slow_path->GetExitLabel());
1547 }
1548}
1549
1550void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001551 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001552}
1553
1554void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001555 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001556}
1557
1558void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001559 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001560}
1561
1562void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001563 GenerateStringIndexOf(
1564 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001565}
1566
Jeff Hao848f70a2014-01-15 13:49:50 -08001567void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1568 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +01001569 LocationSummary::kCallOnMainOnly,
Jeff Hao848f70a2014-01-15 13:49:50 -08001570 kIntrinsified);
1571 InvokeRuntimeCallingConvention calling_convention;
1572 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1573 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1574 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1575 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1576 locations->SetOut(Location::RegisterLocation(RAX));
1577}
1578
1579void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1580 X86_64Assembler* assembler = GetAssembler();
1581 LocationSummary* locations = invoke->GetLocations();
1582
1583 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1584 __ testl(byte_array, byte_array);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001585 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08001586 codegen_->AddSlowPath(slow_path);
1587 __ j(kEqual, slow_path->GetEntryLabel());
1588
Andreas Gampe542451c2016-07-26 09:02:02 -07001589 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
1590 pAllocStringFromBytes),
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001591 /* no_rip */ true));
Roland Levillainf969a202016-03-09 16:14:00 +00001592 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001593 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1594 __ Bind(slow_path->GetExitLabel());
1595}
1596
1597void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1598 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +01001599 LocationSummary::kCallOnMainOnly,
Jeff Hao848f70a2014-01-15 13:49:50 -08001600 kIntrinsified);
1601 InvokeRuntimeCallingConvention calling_convention;
1602 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1603 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1604 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1605 locations->SetOut(Location::RegisterLocation(RAX));
1606}
1607
1608void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1609 X86_64Assembler* assembler = GetAssembler();
1610
Roland Levillaincc3839c2016-02-29 16:23:48 +00001611 // No need to emit code checking whether `locations->InAt(2)` is a null
1612 // pointer, as callers of the native method
1613 //
1614 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1615 //
1616 // all include a null check on `data` before calling that method.
Andreas Gampe542451c2016-07-26 09:02:02 -07001617 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
1618 pAllocStringFromChars),
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001619 /* no_rip */ true));
Roland Levillainf969a202016-03-09 16:14:00 +00001620 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001621 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1622}
1623
1624void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1625 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +01001626 LocationSummary::kCallOnMainOnly,
Jeff Hao848f70a2014-01-15 13:49:50 -08001627 kIntrinsified);
1628 InvokeRuntimeCallingConvention calling_convention;
1629 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1630 locations->SetOut(Location::RegisterLocation(RAX));
1631}
1632
1633void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1634 X86_64Assembler* assembler = GetAssembler();
1635 LocationSummary* locations = invoke->GetLocations();
1636
1637 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1638 __ testl(string_to_copy, string_to_copy);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001639 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08001640 codegen_->AddSlowPath(slow_path);
1641 __ j(kEqual, slow_path->GetEntryLabel());
1642
Andreas Gampe542451c2016-07-26 09:02:02 -07001643 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
1644 pAllocStringFromString),
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001645 /* no_rip */ true));
Roland Levillainf969a202016-03-09 16:14:00 +00001646 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001647 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1648 __ Bind(slow_path->GetExitLabel());
1649}
1650
Mark Mendell8f8926a2015-08-17 11:39:06 -04001651void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1652 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1653 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1654 LocationSummary::kNoCall,
1655 kIntrinsified);
1656 locations->SetInAt(0, Location::RequiresRegister());
1657 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1658 locations->SetInAt(2, Location::RequiresRegister());
1659 locations->SetInAt(3, Location::RequiresRegister());
1660 locations->SetInAt(4, Location::RequiresRegister());
1661
1662 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1663 locations->AddTemp(Location::RegisterLocation(RSI));
1664 locations->AddTemp(Location::RegisterLocation(RDI));
1665 locations->AddTemp(Location::RegisterLocation(RCX));
1666}
1667
1668void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1669 X86_64Assembler* assembler = GetAssembler();
1670 LocationSummary* locations = invoke->GetLocations();
1671
1672 size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
1673 // Location of data in char array buffer.
1674 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1675 // Location of char array data in string.
1676 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1677
1678 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1679 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1680 Location srcBegin = locations->InAt(1);
1681 int srcBegin_value =
1682 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1683 CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1684 CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1685 CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1686
1687 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1688 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1689 DCHECK_EQ(char_size, 2u);
1690
1691 // Compute the address of the destination buffer.
1692 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1693
1694 // Compute the address of the source string.
1695 if (srcBegin.IsConstant()) {
1696 // Compute the address of the source string by adding the number of chars from
1697 // the source beginning to the value offset of a string.
1698 __ leaq(CpuRegister(RSI), Address(obj, srcBegin_value * char_size + value_offset));
1699 } else {
1700 __ leaq(CpuRegister(RSI), Address(obj, srcBegin.AsRegister<CpuRegister>(),
1701 ScaleFactor::TIMES_2, value_offset));
1702 }
1703
1704 // Compute the number of chars (words) to move.
1705 __ movl(CpuRegister(RCX), srcEnd);
1706 if (srcBegin.IsConstant()) {
1707 if (srcBegin_value != 0) {
1708 __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1709 }
1710 } else {
1711 DCHECK(srcBegin.IsRegister());
1712 __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1713 }
1714
1715 // Do the move.
1716 __ rep_movsw();
1717}
1718
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001719static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1720 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1721 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1722 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1723 // to avoid a SIGBUS.
1724 switch (size) {
1725 case Primitive::kPrimByte:
1726 __ movsxb(out, Address(address, 0));
1727 break;
1728 case Primitive::kPrimShort:
1729 __ movsxw(out, Address(address, 0));
1730 break;
1731 case Primitive::kPrimInt:
1732 __ movl(out, Address(address, 0));
1733 break;
1734 case Primitive::kPrimLong:
1735 __ movq(out, Address(address, 0));
1736 break;
1737 default:
1738 LOG(FATAL) << "Type not recognized for peek: " << size;
1739 UNREACHABLE();
1740 }
1741}
1742
1743void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1744 CreateIntToIntLocations(arena_, invoke);
1745}
1746
1747void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1748 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1749}
1750
1751void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1752 CreateIntToIntLocations(arena_, invoke);
1753}
1754
1755void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1756 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1757}
1758
1759void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1760 CreateIntToIntLocations(arena_, invoke);
1761}
1762
1763void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1764 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1765}
1766
1767void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1768 CreateIntToIntLocations(arena_, invoke);
1769}
1770
1771void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1772 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1773}
1774
1775static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1776 LocationSummary* locations = new (arena) LocationSummary(invoke,
1777 LocationSummary::kNoCall,
1778 kIntrinsified);
1779 locations->SetInAt(0, Location::RequiresRegister());
Mark Mendellea5af682015-10-22 17:35:49 -04001780 locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001781}
1782
1783static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1784 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -04001785 Location value = locations->InAt(1);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001786 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1787 // to avoid a SIGBUS.
1788 switch (size) {
1789 case Primitive::kPrimByte:
Mark Mendell40741f32015-04-20 22:10:34 -04001790 if (value.IsConstant()) {
1791 __ movb(Address(address, 0),
1792 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1793 } else {
1794 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1795 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001796 break;
1797 case Primitive::kPrimShort:
Mark Mendell40741f32015-04-20 22:10:34 -04001798 if (value.IsConstant()) {
1799 __ movw(Address(address, 0),
1800 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1801 } else {
1802 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1803 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001804 break;
1805 case Primitive::kPrimInt:
Mark Mendell40741f32015-04-20 22:10:34 -04001806 if (value.IsConstant()) {
1807 __ movl(Address(address, 0),
1808 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1809 } else {
1810 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1811 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001812 break;
1813 case Primitive::kPrimLong:
Mark Mendell40741f32015-04-20 22:10:34 -04001814 if (value.IsConstant()) {
1815 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1816 DCHECK(IsInt<32>(v));
1817 int32_t v_32 = v;
1818 __ movq(Address(address, 0), Immediate(v_32));
1819 } else {
1820 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1821 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001822 break;
1823 default:
1824 LOG(FATAL) << "Type not recognized for poke: " << size;
1825 UNREACHABLE();
1826 }
1827}
1828
1829void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1830 CreateIntIntToVoidLocations(arena_, invoke);
1831}
1832
1833void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1834 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1835}
1836
1837void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1838 CreateIntIntToVoidLocations(arena_, invoke);
1839}
1840
1841void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1842 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1843}
1844
1845void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1846 CreateIntIntToVoidLocations(arena_, invoke);
1847}
1848
1849void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1850 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1851}
1852
1853void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1854 CreateIntIntToVoidLocations(arena_, invoke);
1855}
1856
1857void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1858 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1859}
1860
1861void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1862 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1863 LocationSummary::kNoCall,
1864 kIntrinsified);
1865 locations->SetOut(Location::RequiresRegister());
1866}
1867
1868void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1869 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
Andreas Gampe542451c2016-07-26 09:02:02 -07001870 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001871 /* no_rip */ true));
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001872}
1873
Roland Levillain0d5a2812015-11-13 10:07:31 +00001874static void GenUnsafeGet(HInvoke* invoke,
1875 Primitive::Type type,
1876 bool is_volatile ATTRIBUTE_UNUSED,
1877 CodeGeneratorX86_64* codegen) {
1878 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1879 LocationSummary* locations = invoke->GetLocations();
1880 Location base_loc = locations->InAt(1);
1881 CpuRegister base = base_loc.AsRegister<CpuRegister>();
1882 Location offset_loc = locations->InAt(2);
1883 CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1884 Location output_loc = locations->Out();
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001885 CpuRegister output = output_loc.AsRegister<CpuRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001886
Andreas Gampe878d58c2015-01-15 23:24:00 -08001887 switch (type) {
1888 case Primitive::kPrimInt:
Roland Levillain0d5a2812015-11-13 10:07:31 +00001889 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001890 break;
1891
1892 case Primitive::kPrimNot: {
1893 if (kEmitCompilerReadBarrier) {
1894 if (kUseBakerReadBarrier) {
1895 Location temp = locations->GetTemp(0);
Sang, Chunlei0fcd2b82016-04-05 17:12:59 +08001896 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1897 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1898 invoke, output_loc, base, src, temp, /* needs_null_check */ false);
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001899 } else {
1900 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1901 codegen->GenerateReadBarrierSlow(
1902 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1903 }
1904 } else {
1905 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1906 __ MaybeUnpoisonHeapReference(output);
Roland Levillain4d027112015-07-01 15:41:14 +01001907 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001908 break;
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001909 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001910
1911 case Primitive::kPrimLong:
Roland Levillain0d5a2812015-11-13 10:07:31 +00001912 __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
Andreas Gampe878d58c2015-01-15 23:24:00 -08001913 break;
1914
1915 default:
1916 LOG(FATAL) << "Unsupported op size " << type;
1917 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001918 }
1919}
1920
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001921static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
1922 HInvoke* invoke,
1923 Primitive::Type type) {
Roland Levillain0d5a2812015-11-13 10:07:31 +00001924 bool can_call = kEmitCompilerReadBarrier &&
1925 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1926 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001927 LocationSummary* locations = new (arena) LocationSummary(invoke,
Roland Levillain0d5a2812015-11-13 10:07:31 +00001928 can_call ?
1929 LocationSummary::kCallOnSlowPath :
1930 LocationSummary::kNoCall,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001931 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001932 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001933 locations->SetInAt(1, Location::RequiresRegister());
1934 locations->SetInAt(2, Location::RequiresRegister());
Roland Levillain3d312422016-06-23 13:53:42 +01001935 locations->SetOut(Location::RequiresRegister(),
1936 can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001937 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1938 // We need a temporary register for the read barrier marking slow
Sang, Chunlei0fcd2b82016-04-05 17:12:59 +08001939 // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier.
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001940 locations->AddTemp(Location::RequiresRegister());
1941 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001942}
1943
1944void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001945 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001946}
1947void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001948 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001949}
1950void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001951 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001952}
1953void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001954 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001955}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001956void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001957 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001958}
1959void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Roland Levillain1e7f8db2015-12-15 10:54:19 +00001960 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001961}
1962
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001963
1964void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001965 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001966}
1967void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001968 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001969}
1970void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001971 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001972}
1973void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001974 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001975}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001976void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001977 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001978}
1979void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001980 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001981}
1982
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001983
1984static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1985 Primitive::Type type,
1986 HInvoke* invoke) {
1987 LocationSummary* locations = new (arena) LocationSummary(invoke,
1988 LocationSummary::kNoCall,
1989 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001990 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001991 locations->SetInAt(1, Location::RequiresRegister());
1992 locations->SetInAt(2, Location::RequiresRegister());
1993 locations->SetInAt(3, Location::RequiresRegister());
1994 if (type == Primitive::kPrimNot) {
1995 // Need temp registers for card-marking.
Roland Levillain4d027112015-07-01 15:41:14 +01001996 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001997 locations->AddTemp(Location::RequiresRegister());
1998 }
1999}
2000
2001void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
2002 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2003}
2004void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2005 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2006}
2007void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2008 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2009}
2010void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2011 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2012}
2013void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2014 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2015}
2016void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2017 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2018}
2019void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2020 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2021}
2022void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2023 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2024}
2025void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2026 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2027}
2028
2029// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2030// memory model.
2031static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
2032 CodeGeneratorX86_64* codegen) {
Roland Levillainb488b782015-10-22 11:38:49 +01002033 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002034 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2035 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2036 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2037
2038 if (type == Primitive::kPrimLong) {
2039 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
Roland Levillain4d027112015-07-01 15:41:14 +01002040 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
2041 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2042 __ movl(temp, value);
2043 __ PoisonHeapReference(temp);
2044 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002045 } else {
2046 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2047 }
2048
2049 if (is_volatile) {
Mark P Mendell17077d82015-12-16 19:15:59 +00002050 codegen->MemoryFence();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002051 }
2052
2053 if (type == Primitive::kPrimNot) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +01002054 bool value_can_be_null = true; // TODO: Worth finding out this information?
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002055 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2056 locations->GetTemp(1).AsRegister<CpuRegister>(),
2057 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01002058 value,
2059 value_can_be_null);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002060 }
2061}
2062
2063void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002064 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002065}
2066void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002067 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002068}
2069void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002070 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002071}
2072void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002073 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002074}
2075void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002076 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002077}
2078void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002079 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002080}
2081void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002082 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002083}
2084void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002085 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002086}
2087void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002088 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002089}
2090
Mark Mendell58d25fd2015-04-03 14:52:31 -04002091static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
2092 HInvoke* invoke) {
2093 LocationSummary* locations = new (arena) LocationSummary(invoke,
2094 LocationSummary::kNoCall,
2095 kIntrinsified);
2096 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2097 locations->SetInAt(1, Location::RequiresRegister());
2098 locations->SetInAt(2, Location::RequiresRegister());
2099 // expected value must be in EAX/RAX.
2100 locations->SetInAt(3, Location::RegisterLocation(RAX));
2101 locations->SetInAt(4, Location::RequiresRegister());
2102
2103 locations->SetOut(Location::RequiresRegister());
2104 if (type == Primitive::kPrimNot) {
2105 // Need temp registers for card-marking.
Roland Levillainb488b782015-10-22 11:38:49 +01002106 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Mark Mendell58d25fd2015-04-03 14:52:31 -04002107 locations->AddTemp(Location::RequiresRegister());
2108 }
2109}
2110
2111void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2112 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
2113}
2114
2115void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2116 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
2117}
2118
2119void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
Roland Levillain391b8662015-12-18 11:43:38 +00002120 // The UnsafeCASObject intrinsic is missing a read barrier, and
2121 // therefore sometimes does not work as expected (b/25883050).
2122 // Turn it off temporarily as a quick fix, until the read barrier is
Roland Levillain3d312422016-06-23 13:53:42 +01002123 // implemented (see TODO in GenCAS).
Roland Levillain391b8662015-12-18 11:43:38 +00002124 //
Roland Levillain3d312422016-06-23 13:53:42 +01002125 // TODO(rpl): Implement read barrier support in GenCAS and re-enable
Roland Levillain391b8662015-12-18 11:43:38 +00002126 // this intrinsic.
2127 if (kEmitCompilerReadBarrier) {
2128 return;
2129 }
2130
Mark Mendell58d25fd2015-04-03 14:52:31 -04002131 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
2132}
2133
2134static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
Roland Levillainb488b782015-10-22 11:38:49 +01002135 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
Mark Mendell58d25fd2015-04-03 14:52:31 -04002136 LocationSummary* locations = invoke->GetLocations();
2137
2138 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2139 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2140 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
Roland Levillainb488b782015-10-22 11:38:49 +01002141 // Ensure `expected` is in RAX (required by the CMPXCHG instruction).
Mark Mendell58d25fd2015-04-03 14:52:31 -04002142 DCHECK_EQ(expected.AsRegister(), RAX);
2143 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
2144 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2145
Roland Levillainb488b782015-10-22 11:38:49 +01002146 if (type == Primitive::kPrimNot) {
2147 // Mark card for object assuming new value is stored.
2148 bool value_can_be_null = true; // TODO: Worth finding out this information?
2149 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2150 locations->GetTemp(1).AsRegister<CpuRegister>(),
2151 base,
2152 value,
2153 value_can_be_null);
Roland Levillain4d027112015-07-01 15:41:14 +01002154
Roland Levillainb488b782015-10-22 11:38:49 +01002155 bool base_equals_value = (base.AsRegister() == value.AsRegister());
2156 Register value_reg = value.AsRegister();
2157 if (kPoisonHeapReferences) {
2158 if (base_equals_value) {
2159 // If `base` and `value` are the same register location, move
2160 // `value_reg` to a temporary register. This way, poisoning
2161 // `value_reg` won't invalidate `base`.
2162 value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
2163 __ movl(CpuRegister(value_reg), base);
Roland Levillain4d027112015-07-01 15:41:14 +01002164 }
Roland Levillainb488b782015-10-22 11:38:49 +01002165
2166 // Check that the register allocator did not assign the location
2167 // of `expected` (RAX) to `value` nor to `base`, so that heap
2168 // poisoning (when enabled) works as intended below.
2169 // - If `value` were equal to `expected`, both references would
2170 // be poisoned twice, meaning they would not be poisoned at
2171 // all, as heap poisoning uses address negation.
2172 // - If `base` were equal to `expected`, poisoning `expected`
2173 // would invalidate `base`.
2174 DCHECK_NE(value_reg, expected.AsRegister());
2175 DCHECK_NE(base.AsRegister(), expected.AsRegister());
2176
2177 __ PoisonHeapReference(expected);
2178 __ PoisonHeapReference(CpuRegister(value_reg));
Mark Mendell58d25fd2015-04-03 14:52:31 -04002179 }
2180
Roland Levillain391b8662015-12-18 11:43:38 +00002181 // TODO: Add a read barrier for the reference stored in the object
2182 // before attempting the CAS, similar to the one in the
2183 // art::Unsafe_compareAndSwapObject JNI implementation.
2184 //
2185 // Note that this code is not (yet) used when read barriers are
2186 // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject).
2187 DCHECK(!kEmitCompilerReadBarrier);
Roland Levillainb488b782015-10-22 11:38:49 +01002188 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
Mark Mendell58d25fd2015-04-03 14:52:31 -04002189
Roland Levillain0d5a2812015-11-13 10:07:31 +00002190 // LOCK CMPXCHG has full barrier semantics, and we don't need
Roland Levillainb488b782015-10-22 11:38:49 +01002191 // scheduling barriers at this time.
Mark Mendell58d25fd2015-04-03 14:52:31 -04002192
Roland Levillainb488b782015-10-22 11:38:49 +01002193 // Convert ZF into the boolean result.
2194 __ setcc(kZero, out);
2195 __ movzxb(out, out);
Roland Levillain4d027112015-07-01 15:41:14 +01002196
Roland Levillain391b8662015-12-18 11:43:38 +00002197 // If heap poisoning is enabled, we need to unpoison the values
2198 // that were poisoned earlier.
Roland Levillainb488b782015-10-22 11:38:49 +01002199 if (kPoisonHeapReferences) {
2200 if (base_equals_value) {
2201 // `value_reg` has been moved to a temporary register, no need
2202 // to unpoison it.
2203 } else {
2204 // Ensure `value` is different from `out`, so that unpoisoning
2205 // the former does not invalidate the latter.
2206 DCHECK_NE(value_reg, out.AsRegister());
2207 __ UnpoisonHeapReference(CpuRegister(value_reg));
2208 }
2209 // Ensure `expected` is different from `out`, so that unpoisoning
2210 // the former does not invalidate the latter.
2211 DCHECK_NE(expected.AsRegister(), out.AsRegister());
2212 __ UnpoisonHeapReference(expected);
2213 }
2214 } else {
2215 if (type == Primitive::kPrimInt) {
2216 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
2217 } else if (type == Primitive::kPrimLong) {
2218 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
2219 } else {
2220 LOG(FATAL) << "Unexpected CAS type " << type;
2221 }
2222
Roland Levillain0d5a2812015-11-13 10:07:31 +00002223 // LOCK CMPXCHG has full barrier semantics, and we don't need
Roland Levillainb488b782015-10-22 11:38:49 +01002224 // scheduling barriers at this time.
2225
2226 // Convert ZF into the boolean result.
2227 __ setcc(kZero, out);
2228 __ movzxb(out, out);
Roland Levillain4d027112015-07-01 15:41:14 +01002229 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04002230}
2231
2232void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2233 GenCAS(Primitive::kPrimInt, invoke, codegen_);
2234}
2235
2236void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2237 GenCAS(Primitive::kPrimLong, invoke, codegen_);
2238}
2239
2240void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
Roland Levillain3d312422016-06-23 13:53:42 +01002241 // The UnsafeCASObject intrinsic is missing a read barrier, and
2242 // therefore sometimes does not work as expected (b/25883050).
2243 // Turn it off temporarily as a quick fix, until the read barrier is
2244 // implemented (see TODO in GenCAS).
2245 //
2246 // TODO(rpl): Implement read barrier support in GenCAS and re-enable
2247 // this intrinsic.
2248 DCHECK(!kEmitCompilerReadBarrier);
2249
Mark Mendell58d25fd2015-04-03 14:52:31 -04002250 GenCAS(Primitive::kPrimNot, invoke, codegen_);
2251}
2252
2253void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2254 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2255 LocationSummary::kNoCall,
2256 kIntrinsified);
2257 locations->SetInAt(0, Location::RequiresRegister());
2258 locations->SetOut(Location::SameAsFirstInput());
2259 locations->AddTemp(Location::RequiresRegister());
2260}
2261
2262static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2263 X86_64Assembler* assembler) {
2264 Immediate imm_shift(shift);
2265 Immediate imm_mask(mask);
2266 __ movl(temp, reg);
2267 __ shrl(reg, imm_shift);
2268 __ andl(temp, imm_mask);
2269 __ andl(reg, imm_mask);
2270 __ shll(temp, imm_shift);
2271 __ orl(reg, temp);
2272}
2273
2274void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
Aart Bikc5d47542016-01-27 17:00:35 -08002275 X86_64Assembler* assembler = GetAssembler();
Mark Mendell58d25fd2015-04-03 14:52:31 -04002276 LocationSummary* locations = invoke->GetLocations();
2277
2278 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2279 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2280
2281 /*
2282 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2283 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2284 * compared to generic luni implementation which has 5 rounds of swapping bits.
2285 * x = bswap x
2286 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2287 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2288 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2289 */
2290 __ bswapl(reg);
2291 SwapBits(reg, temp, 1, 0x55555555, assembler);
2292 SwapBits(reg, temp, 2, 0x33333333, assembler);
2293 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2294}
2295
2296void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2297 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2298 LocationSummary::kNoCall,
2299 kIntrinsified);
2300 locations->SetInAt(0, Location::RequiresRegister());
2301 locations->SetOut(Location::SameAsFirstInput());
2302 locations->AddTemp(Location::RequiresRegister());
2303 locations->AddTemp(Location::RequiresRegister());
2304}
2305
2306static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2307 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2308 Immediate imm_shift(shift);
2309 __ movq(temp_mask, Immediate(mask));
2310 __ movq(temp, reg);
2311 __ shrq(reg, imm_shift);
2312 __ andq(temp, temp_mask);
2313 __ andq(reg, temp_mask);
2314 __ shlq(temp, imm_shift);
2315 __ orq(reg, temp);
2316}
2317
2318void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
Aart Bikc5d47542016-01-27 17:00:35 -08002319 X86_64Assembler* assembler = GetAssembler();
Mark Mendell58d25fd2015-04-03 14:52:31 -04002320 LocationSummary* locations = invoke->GetLocations();
2321
2322 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2323 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2324 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2325
2326 /*
2327 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2328 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2329 * compared to generic luni implementation which has 5 rounds of swapping bits.
2330 * x = bswap x
2331 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2332 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2333 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2334 */
2335 __ bswapq(reg);
2336 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2337 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2338 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
2339}
2340
Aart Bik3f67e692016-01-15 14:35:12 -08002341static void CreateBitCountLocations(
2342 ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
2343 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2344 // Do nothing if there is no popcnt support. This results in generating
2345 // a call for the intrinsic rather than direct code.
2346 return;
2347 }
2348 LocationSummary* locations = new (arena) LocationSummary(invoke,
2349 LocationSummary::kNoCall,
2350 kIntrinsified);
2351 locations->SetInAt(0, Location::Any());
2352 locations->SetOut(Location::RequiresRegister());
2353}
2354
Aart Bikc5d47542016-01-27 17:00:35 -08002355static void GenBitCount(X86_64Assembler* assembler,
2356 CodeGeneratorX86_64* codegen,
2357 HInvoke* invoke,
2358 bool is_long) {
Aart Bik3f67e692016-01-15 14:35:12 -08002359 LocationSummary* locations = invoke->GetLocations();
2360 Location src = locations->InAt(0);
2361 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2362
2363 if (invoke->InputAt(0)->IsConstant()) {
2364 // Evaluate this at compile time.
2365 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
Roland Levillainfa3912e2016-04-01 18:21:55 +01002366 int32_t result = is_long
Aart Bik3f67e692016-01-15 14:35:12 -08002367 ? POPCOUNT(static_cast<uint64_t>(value))
2368 : POPCOUNT(static_cast<uint32_t>(value));
Roland Levillainfa3912e2016-04-01 18:21:55 +01002369 codegen->Load32BitValue(out, result);
Aart Bik3f67e692016-01-15 14:35:12 -08002370 return;
2371 }
2372
2373 if (src.IsRegister()) {
2374 if (is_long) {
2375 __ popcntq(out, src.AsRegister<CpuRegister>());
2376 } else {
2377 __ popcntl(out, src.AsRegister<CpuRegister>());
2378 }
2379 } else if (is_long) {
2380 DCHECK(src.IsDoubleStackSlot());
2381 __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2382 } else {
2383 DCHECK(src.IsStackSlot());
2384 __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2385 }
2386}
2387
2388void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2389 CreateBitCountLocations(arena_, codegen_, invoke);
2390}
2391
2392void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
Aart Bikc5d47542016-01-27 17:00:35 -08002393 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
Aart Bik3f67e692016-01-15 14:35:12 -08002394}
2395
2396void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
2397 CreateBitCountLocations(arena_, codegen_, invoke);
2398}
2399
2400void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
Aart Bikc5d47542016-01-27 17:00:35 -08002401 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
2402}
2403
Aart Bikc5d47542016-01-27 17:00:35 -08002404static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) {
2405 LocationSummary* locations = new (arena) LocationSummary(invoke,
2406 LocationSummary::kNoCall,
2407 kIntrinsified);
2408 locations->SetInAt(0, Location::Any());
2409 locations->SetOut(Location::RequiresRegister());
2410 locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL
2411 : Location::RequiresRegister()); // any will do
2412}
2413
2414static void GenOneBit(X86_64Assembler* assembler,
2415 CodeGeneratorX86_64* codegen,
2416 HInvoke* invoke,
2417 bool is_high, bool is_long) {
2418 LocationSummary* locations = invoke->GetLocations();
2419 Location src = locations->InAt(0);
2420 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2421
2422 if (invoke->InputAt(0)->IsConstant()) {
2423 // Evaluate this at compile time.
2424 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2425 if (value == 0) {
2426 __ xorl(out, out); // Clears upper bits too.
2427 return;
2428 }
2429 // Nonzero value.
2430 if (is_high) {
2431 value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
2432 : 31 - CLZ(static_cast<uint32_t>(value));
2433 } else {
2434 value = is_long ? CTZ(static_cast<uint64_t>(value))
2435 : CTZ(static_cast<uint32_t>(value));
2436 }
2437 if (is_long) {
Pavel Vyssotski7f7f6da2016-06-22 12:36:10 +06002438 codegen->Load64BitValue(out, 1ULL << value);
Aart Bikc5d47542016-01-27 17:00:35 -08002439 } else {
2440 codegen->Load32BitValue(out, 1 << value);
2441 }
2442 return;
2443 }
2444
2445 // Handle the non-constant cases.
2446 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
2447 if (is_high) {
2448 // Use architectural support: basically 1 << bsr.
2449 if (src.IsRegister()) {
2450 if (is_long) {
2451 __ bsrq(tmp, src.AsRegister<CpuRegister>());
2452 } else {
2453 __ bsrl(tmp, src.AsRegister<CpuRegister>());
2454 }
2455 } else if (is_long) {
2456 DCHECK(src.IsDoubleStackSlot());
2457 __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2458 } else {
2459 DCHECK(src.IsStackSlot());
2460 __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2461 }
2462 // BSR sets ZF if the input was zero.
2463 NearLabel is_zero, done;
2464 __ j(kEqual, &is_zero);
2465 __ movl(out, Immediate(1)); // Clears upper bits too.
2466 if (is_long) {
2467 __ shlq(out, tmp);
2468 } else {
2469 __ shll(out, tmp);
2470 }
2471 __ jmp(&done);
2472 __ Bind(&is_zero);
2473 __ xorl(out, out); // Clears upper bits too.
2474 __ Bind(&done);
2475 } else {
2476 // Copy input into temporary.
2477 if (src.IsRegister()) {
2478 if (is_long) {
2479 __ movq(tmp, src.AsRegister<CpuRegister>());
2480 } else {
2481 __ movl(tmp, src.AsRegister<CpuRegister>());
2482 }
2483 } else if (is_long) {
2484 DCHECK(src.IsDoubleStackSlot());
2485 __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2486 } else {
2487 DCHECK(src.IsStackSlot());
2488 __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2489 }
2490 // Do the bit twiddling: basically tmp & -tmp;
2491 if (is_long) {
2492 __ movq(out, tmp);
2493 __ negq(tmp);
2494 __ andq(out, tmp);
2495 } else {
2496 __ movl(out, tmp);
2497 __ negl(tmp);
2498 __ andl(out, tmp);
2499 }
2500 }
2501}
2502
2503void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2504 CreateOneBitLocations(arena_, invoke, /* is_high */ true);
2505}
2506
2507void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2508 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false);
2509}
2510
2511void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2512 CreateOneBitLocations(arena_, invoke, /* is_high */ true);
2513}
2514
2515void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2516 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true);
2517}
2518
2519void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2520 CreateOneBitLocations(arena_, invoke, /* is_high */ false);
2521}
2522
2523void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2524 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false);
2525}
2526
2527void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2528 CreateOneBitLocations(arena_, invoke, /* is_high */ false);
2529}
2530
2531void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2532 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true);
Aart Bik3f67e692016-01-15 14:35:12 -08002533}
2534
Mark Mendelld5897672015-08-12 21:16:41 -04002535static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
2536 LocationSummary* locations = new (arena) LocationSummary(invoke,
2537 LocationSummary::kNoCall,
2538 kIntrinsified);
2539 locations->SetInAt(0, Location::Any());
2540 locations->SetOut(Location::RequiresRegister());
2541}
2542
Aart Bikc5d47542016-01-27 17:00:35 -08002543static void GenLeadingZeros(X86_64Assembler* assembler,
2544 CodeGeneratorX86_64* codegen,
2545 HInvoke* invoke, bool is_long) {
Mark Mendelld5897672015-08-12 21:16:41 -04002546 LocationSummary* locations = invoke->GetLocations();
2547 Location src = locations->InAt(0);
2548 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2549
2550 int zero_value_result = is_long ? 64 : 32;
2551 if (invoke->InputAt(0)->IsConstant()) {
2552 // Evaluate this at compile time.
2553 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2554 if (value == 0) {
2555 value = zero_value_result;
2556 } else {
2557 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2558 }
Aart Bikc5d47542016-01-27 17:00:35 -08002559 codegen->Load32BitValue(out, value);
Mark Mendelld5897672015-08-12 21:16:41 -04002560 return;
2561 }
2562
2563 // Handle the non-constant cases.
2564 if (src.IsRegister()) {
2565 if (is_long) {
2566 __ bsrq(out, src.AsRegister<CpuRegister>());
2567 } else {
2568 __ bsrl(out, src.AsRegister<CpuRegister>());
2569 }
2570 } else if (is_long) {
2571 DCHECK(src.IsDoubleStackSlot());
2572 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2573 } else {
2574 DCHECK(src.IsStackSlot());
2575 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2576 }
2577
2578 // BSR sets ZF if the input was zero, and the output is undefined.
Mark Mendell0c9497d2015-08-21 09:30:05 -04002579 NearLabel is_zero, done;
Mark Mendelld5897672015-08-12 21:16:41 -04002580 __ j(kEqual, &is_zero);
2581
2582 // Correct the result from BSR to get the CLZ result.
2583 __ xorl(out, Immediate(zero_value_result - 1));
2584 __ jmp(&done);
2585
2586 // Fix the zero case with the expected result.
2587 __ Bind(&is_zero);
2588 __ movl(out, Immediate(zero_value_result));
2589
2590 __ Bind(&done);
2591}
2592
2593void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2594 CreateLeadingZeroLocations(arena_, invoke);
2595}
2596
2597void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
Aart Bikc5d47542016-01-27 17:00:35 -08002598 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
Mark Mendelld5897672015-08-12 21:16:41 -04002599}
2600
2601void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2602 CreateLeadingZeroLocations(arena_, invoke);
2603}
2604
2605void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
Aart Bikc5d47542016-01-27 17:00:35 -08002606 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
Mark Mendelld5897672015-08-12 21:16:41 -04002607}
2608
Mark Mendell2d554792015-09-15 21:45:18 -04002609static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
2610 LocationSummary* locations = new (arena) LocationSummary(invoke,
2611 LocationSummary::kNoCall,
2612 kIntrinsified);
2613 locations->SetInAt(0, Location::Any());
2614 locations->SetOut(Location::RequiresRegister());
2615}
2616
Aart Bikc5d47542016-01-27 17:00:35 -08002617static void GenTrailingZeros(X86_64Assembler* assembler,
2618 CodeGeneratorX86_64* codegen,
2619 HInvoke* invoke, bool is_long) {
Mark Mendell2d554792015-09-15 21:45:18 -04002620 LocationSummary* locations = invoke->GetLocations();
2621 Location src = locations->InAt(0);
2622 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2623
2624 int zero_value_result = is_long ? 64 : 32;
2625 if (invoke->InputAt(0)->IsConstant()) {
2626 // Evaluate this at compile time.
2627 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2628 if (value == 0) {
2629 value = zero_value_result;
2630 } else {
2631 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2632 }
Aart Bikc5d47542016-01-27 17:00:35 -08002633 codegen->Load32BitValue(out, value);
Mark Mendell2d554792015-09-15 21:45:18 -04002634 return;
2635 }
2636
2637 // Handle the non-constant cases.
2638 if (src.IsRegister()) {
2639 if (is_long) {
2640 __ bsfq(out, src.AsRegister<CpuRegister>());
2641 } else {
2642 __ bsfl(out, src.AsRegister<CpuRegister>());
2643 }
2644 } else if (is_long) {
2645 DCHECK(src.IsDoubleStackSlot());
2646 __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2647 } else {
2648 DCHECK(src.IsStackSlot());
2649 __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2650 }
2651
2652 // BSF sets ZF if the input was zero, and the output is undefined.
2653 NearLabel done;
2654 __ j(kNotEqual, &done);
2655
2656 // Fix the zero case with the expected result.
2657 __ movl(out, Immediate(zero_value_result));
2658
2659 __ Bind(&done);
2660}
2661
2662void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2663 CreateTrailingZeroLocations(arena_, invoke);
2664}
2665
2666void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
Aart Bikc5d47542016-01-27 17:00:35 -08002667 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
Mark Mendell2d554792015-09-15 21:45:18 -04002668}
2669
2670void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2671 CreateTrailingZeroLocations(arena_, invoke);
2672}
2673
2674void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
Aart Bikc5d47542016-01-27 17:00:35 -08002675 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2676}
2677
Serguei Katkov288c7a82016-05-16 11:53:15 +06002678void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
2679 if (kEmitCompilerReadBarrier) {
2680 // Do not intrinsify this call with the read barrier configuration.
2681 return;
2682 }
2683 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2684 LocationSummary::kCallOnSlowPath,
2685 kIntrinsified);
2686 locations->SetInAt(0, Location::RequiresRegister());
2687 locations->SetOut(Location::SameAsFirstInput());
2688 locations->AddTemp(Location::RequiresRegister());
2689}
2690
2691void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
2692 DCHECK(!kEmitCompilerReadBarrier);
2693 LocationSummary* locations = invoke->GetLocations();
2694 X86_64Assembler* assembler = GetAssembler();
2695
2696 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
2697 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2698
2699 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
2700 codegen_->AddSlowPath(slow_path);
2701
2702 // Load ArtMethod first.
2703 HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2704 DCHECK(invoke_direct != nullptr);
2705 Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
2706 invoke_direct, locations->GetTemp(0));
2707 DCHECK(temp_loc.Equals(locations->GetTemp(0)));
2708 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
2709
2710 // Now get declaring class.
2711 __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
2712
2713 uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2714 uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2715 DCHECK_NE(slow_path_flag_offset, 0u);
2716 DCHECK_NE(disable_flag_offset, 0u);
2717 DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2718
2719 // Check static flags preventing us for using intrinsic.
2720 if (slow_path_flag_offset == disable_flag_offset + 1) {
2721 __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
2722 __ j(kNotEqual, slow_path->GetEntryLabel());
2723 } else {
2724 __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
2725 __ j(kNotEqual, slow_path->GetEntryLabel());
2726 __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
2727 __ j(kNotEqual, slow_path->GetEntryLabel());
2728 }
2729
2730 // Fast path.
2731 __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
2732 codegen_->MaybeRecordImplicitNullCheck(invoke);
2733 __ MaybeUnpoisonHeapReference(out);
2734 __ Bind(slow_path->GetExitLabel());
2735}
2736
Aart Bik2f9fcc92016-03-01 15:16:54 -08002737UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
2738UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002739
Aart Bik0e54c012016-03-04 12:08:31 -08002740// 1.8.
2741UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
2742UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
2743UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
2744UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
2745UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
Aart Bik0e54c012016-03-04 12:08:31 -08002746
Aart Bik2f9fcc92016-03-01 15:16:54 -08002747UNREACHABLE_INTRINSICS(X86_64)
Roland Levillain4d027112015-07-01 15:41:14 +01002748
2749#undef __
2750
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002751} // namespace x86_64
2752} // namespace art