blob: 22f4181b9292eff1186d818cd37ec35da55962a9 [file] [log] [blame]
Mark Mendell09ed1a32015-03-25 08:30:06 -04001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86/instruction_set_features_x86.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070022#include "art_method.h"
Mark Mendelld5897672015-08-12 21:16:41 -040023#include "base/bit_utils.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040024#include "code_generator_x86.h"
25#include "entrypoints/quick/quick_entrypoints.h"
26#include "intrinsics.h"
Andreas Gampe85b62f22015-09-09 13:15:38 -070027#include "intrinsics_utils.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040028#include "mirror/array-inl.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040029#include "mirror/string.h"
30#include "thread.h"
31#include "utils/x86/assembler_x86.h"
32#include "utils/x86/constants_x86.h"
33
34namespace art {
35
36namespace x86 {
37
38static constexpr int kDoubleNaNHigh = 0x7FF80000;
39static constexpr int kDoubleNaNLow = 0x00000000;
Mark P Mendell2f10a5f2016-01-25 14:47:50 +000040static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
41static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
Mark Mendell09ed1a32015-03-25 08:30:06 -040042
Mark Mendellfb8d2792015-03-31 22:16:59 -040043IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
Mark P Mendell2f10a5f2016-01-25 14:47:50 +000044 : arena_(codegen->GetGraph()->GetArena()),
45 codegen_(codegen) {
Mark Mendellfb8d2792015-03-31 22:16:59 -040046}
47
48
Mark Mendell09ed1a32015-03-25 08:30:06 -040049X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
Roland Levillainb488b782015-10-22 11:38:49 +010050 return down_cast<X86Assembler*>(codegen_->GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -040051}
52
53ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
54 return codegen_->GetGraph()->GetArena();
55}
56
57bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
58 Dispatch(invoke);
59 LocationSummary* res = invoke->GetLocations();
Roland Levillain0d5a2812015-11-13 10:07:31 +000060 if (res == nullptr) {
61 return false;
62 }
Roland Levillain0d5a2812015-11-13 10:07:31 +000063 return res->Intrinsified();
Mark Mendell09ed1a32015-03-25 08:30:06 -040064}
65
Roland Levillainec525fc2015-04-28 15:50:20 +010066static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +010067 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +010068 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Mark Mendell09ed1a32015-03-25 08:30:06 -040069}
70
Andreas Gampe85b62f22015-09-09 13:15:38 -070071using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
Mark Mendell09ed1a32015-03-25 08:30:06 -040072
Mark Mendell09ed1a32015-03-25 08:30:06 -040073#define __ assembler->
74
75static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
76 LocationSummary* locations = new (arena) LocationSummary(invoke,
77 LocationSummary::kNoCall,
78 kIntrinsified);
79 locations->SetInAt(0, Location::RequiresFpuRegister());
80 locations->SetOut(Location::RequiresRegister());
81 if (is64bit) {
82 locations->AddTemp(Location::RequiresFpuRegister());
83 }
84}
85
86static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
87 LocationSummary* locations = new (arena) LocationSummary(invoke,
88 LocationSummary::kNoCall,
89 kIntrinsified);
90 locations->SetInAt(0, Location::RequiresRegister());
91 locations->SetOut(Location::RequiresFpuRegister());
92 if (is64bit) {
93 locations->AddTemp(Location::RequiresFpuRegister());
94 locations->AddTemp(Location::RequiresFpuRegister());
95 }
96}
97
98static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
99 Location input = locations->InAt(0);
100 Location output = locations->Out();
101 if (is64bit) {
102 // Need to use the temporary.
103 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
104 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
105 __ movd(output.AsRegisterPairLow<Register>(), temp);
106 __ psrlq(temp, Immediate(32));
107 __ movd(output.AsRegisterPairHigh<Register>(), temp);
108 } else {
109 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
110 }
111}
112
113static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
114 Location input = locations->InAt(0);
115 Location output = locations->Out();
116 if (is64bit) {
117 // Need to use the temporary.
118 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
119 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
120 __ movd(temp1, input.AsRegisterPairLow<Register>());
121 __ movd(temp2, input.AsRegisterPairHigh<Register>());
122 __ punpckldq(temp1, temp2);
123 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
124 } else {
125 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
126 }
127}
128
129void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000130 CreateFPToIntLocations(arena_, invoke, /* is64bit */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400131}
132void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000133 CreateIntToFPLocations(arena_, invoke, /* is64bit */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400134}
135
136void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000137 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400138}
139void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000140 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400141}
142
143void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000144 CreateFPToIntLocations(arena_, invoke, /* is64bit */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400145}
146void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000147 CreateIntToFPLocations(arena_, invoke, /* is64bit */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400148}
149
150void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000151 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400152}
153void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000154 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400155}
156
157static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
158 LocationSummary* locations = new (arena) LocationSummary(invoke,
159 LocationSummary::kNoCall,
160 kIntrinsified);
161 locations->SetInAt(0, Location::RequiresRegister());
162 locations->SetOut(Location::SameAsFirstInput());
163}
164
165static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
166 LocationSummary* locations = new (arena) LocationSummary(invoke,
167 LocationSummary::kNoCall,
168 kIntrinsified);
169 locations->SetInAt(0, Location::RequiresRegister());
170 locations->SetOut(Location::RequiresRegister());
171}
172
173static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
174 LocationSummary* locations = new (arena) LocationSummary(invoke,
175 LocationSummary::kNoCall,
176 kIntrinsified);
177 locations->SetInAt(0, Location::RequiresRegister());
178 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
179}
180
181static void GenReverseBytes(LocationSummary* locations,
182 Primitive::Type size,
183 X86Assembler* assembler) {
184 Register out = locations->Out().AsRegister<Register>();
185
186 switch (size) {
187 case Primitive::kPrimShort:
188 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
189 __ bswapl(out);
190 __ sarl(out, Immediate(16));
191 break;
192 case Primitive::kPrimInt:
193 __ bswapl(out);
194 break;
195 default:
196 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
197 UNREACHABLE();
198 }
199}
200
201void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
202 CreateIntToIntLocations(arena_, invoke);
203}
204
205void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
206 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
207}
208
Mark Mendell58d25fd2015-04-03 14:52:31 -0400209void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
210 CreateLongToLongLocations(arena_, invoke);
211}
212
213void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
214 LocationSummary* locations = invoke->GetLocations();
215 Location input = locations->InAt(0);
216 Register input_lo = input.AsRegisterPairLow<Register>();
217 Register input_hi = input.AsRegisterPairHigh<Register>();
218 Location output = locations->Out();
219 Register output_lo = output.AsRegisterPairLow<Register>();
220 Register output_hi = output.AsRegisterPairHigh<Register>();
221
222 X86Assembler* assembler = GetAssembler();
223 // Assign the inputs to the outputs, mixing low/high.
224 __ movl(output_lo, input_hi);
225 __ movl(output_hi, input_lo);
226 __ bswapl(output_lo);
227 __ bswapl(output_hi);
228}
229
Mark Mendell09ed1a32015-03-25 08:30:06 -0400230void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
231 CreateIntToIntLocations(arena_, invoke);
232}
233
234void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
235 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
236}
237
238
239// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
240// need is 64b.
241
242static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
243 // TODO: Enable memory operations when the assembler supports them.
244 LocationSummary* locations = new (arena) LocationSummary(invoke,
245 LocationSummary::kNoCall,
246 kIntrinsified);
247 locations->SetInAt(0, Location::RequiresFpuRegister());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400248 locations->SetOut(Location::SameAsFirstInput());
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000249 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
250 DCHECK(static_or_direct != nullptr);
Nicolas Geoffray97793072016-02-16 15:33:54 +0000251 if (static_or_direct->HasSpecialInput() &&
252 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000253 // We need addressibility for the constant area.
254 locations->SetInAt(1, Location::RequiresRegister());
255 // We need a temporary to hold the constant.
256 locations->AddTemp(Location::RequiresFpuRegister());
257 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400258}
259
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000260static void MathAbsFP(LocationSummary* locations,
261 bool is64bit,
262 X86Assembler* assembler,
263 CodeGeneratorX86* codegen) {
Mark Mendell09ed1a32015-03-25 08:30:06 -0400264 Location output = locations->Out();
265
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000266 DCHECK(output.IsFpuRegister());
Nicolas Geoffray97793072016-02-16 15:33:54 +0000267 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000268 DCHECK(locations->InAt(1).IsRegister());
269 // We also have a constant area pointer.
270 Register constant_area = locations->InAt(1).AsRegister<Register>();
271 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
272 if (is64bit) {
273 __ movsd(temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF), constant_area));
274 __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
275 } else {
276 __ movss(temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF), constant_area));
277 __ andps(output.AsFpuRegister<XmmRegister>(), temp);
278 }
279 } else {
Mark Mendell09ed1a32015-03-25 08:30:06 -0400280 // Create the right constant on an aligned stack.
281 if (is64bit) {
282 __ subl(ESP, Immediate(8));
283 __ pushl(Immediate(0x7FFFFFFF));
284 __ pushl(Immediate(0xFFFFFFFF));
285 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
286 } else {
287 __ subl(ESP, Immediate(12));
288 __ pushl(Immediate(0x7FFFFFFF));
289 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
290 }
291 __ addl(ESP, Immediate(16));
Mark Mendell09ed1a32015-03-25 08:30:06 -0400292 }
293}
294
295void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
296 CreateFloatToFloat(arena_, invoke);
297}
298
299void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000300 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400301}
302
303void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
304 CreateFloatToFloat(arena_, invoke);
305}
306
307void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000308 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400309}
310
311static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
312 LocationSummary* locations = new (arena) LocationSummary(invoke,
313 LocationSummary::kNoCall,
314 kIntrinsified);
315 locations->SetInAt(0, Location::RegisterLocation(EAX));
316 locations->SetOut(Location::SameAsFirstInput());
317 locations->AddTemp(Location::RegisterLocation(EDX));
318}
319
320static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
321 Location output = locations->Out();
322 Register out = output.AsRegister<Register>();
323 DCHECK_EQ(out, EAX);
324 Register temp = locations->GetTemp(0).AsRegister<Register>();
325 DCHECK_EQ(temp, EDX);
326
327 // Sign extend EAX into EDX.
328 __ cdq();
329
330 // XOR EAX with sign.
331 __ xorl(EAX, EDX);
332
333 // Subtract out sign to correct.
334 __ subl(EAX, EDX);
335
336 // The result is in EAX.
337}
338
339static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
340 LocationSummary* locations = new (arena) LocationSummary(invoke,
341 LocationSummary::kNoCall,
342 kIntrinsified);
343 locations->SetInAt(0, Location::RequiresRegister());
344 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
345 locations->AddTemp(Location::RequiresRegister());
346}
347
348static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
349 Location input = locations->InAt(0);
350 Register input_lo = input.AsRegisterPairLow<Register>();
351 Register input_hi = input.AsRegisterPairHigh<Register>();
352 Location output = locations->Out();
353 Register output_lo = output.AsRegisterPairLow<Register>();
354 Register output_hi = output.AsRegisterPairHigh<Register>();
355 Register temp = locations->GetTemp(0).AsRegister<Register>();
356
357 // Compute the sign into the temporary.
358 __ movl(temp, input_hi);
359 __ sarl(temp, Immediate(31));
360
361 // Store the sign into the output.
362 __ movl(output_lo, temp);
363 __ movl(output_hi, temp);
364
365 // XOR the input to the output.
366 __ xorl(output_lo, input_lo);
367 __ xorl(output_hi, input_hi);
368
369 // Subtract the sign.
370 __ subl(output_lo, temp);
371 __ sbbl(output_hi, temp);
372}
373
374void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
375 CreateAbsIntLocation(arena_, invoke);
376}
377
378void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
379 GenAbsInteger(invoke->GetLocations(), GetAssembler());
380}
381
382void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
383 CreateAbsLongLocation(arena_, invoke);
384}
385
386void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
387 GenAbsLong(invoke->GetLocations(), GetAssembler());
388}
389
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000390static void GenMinMaxFP(LocationSummary* locations,
391 bool is_min,
392 bool is_double,
393 X86Assembler* assembler,
394 CodeGeneratorX86* codegen) {
Mark Mendell09ed1a32015-03-25 08:30:06 -0400395 Location op1_loc = locations->InAt(0);
396 Location op2_loc = locations->InAt(1);
397 Location out_loc = locations->Out();
398 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
399
400 // Shortcut for same input locations.
401 if (op1_loc.Equals(op2_loc)) {
402 DCHECK(out_loc.Equals(op1_loc));
403 return;
404 }
405
406 // (out := op1)
407 // out <=? op2
408 // if Nan jmp Nan_label
409 // if out is min jmp done
410 // if op2 is min jmp op2_label
411 // handle -0/+0
412 // jmp done
413 // Nan_label:
414 // out := NaN
415 // op2_label:
416 // out := op2
417 // done:
418 //
419 // This removes one jmp, but needs to copy one input (op1) to out.
420 //
421 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
422
423 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
424
Mark Mendell0c9497d2015-08-21 09:30:05 -0400425 NearLabel nan, done, op2_label;
Mark Mendell09ed1a32015-03-25 08:30:06 -0400426 if (is_double) {
427 __ ucomisd(out, op2);
428 } else {
429 __ ucomiss(out, op2);
430 }
431
432 __ j(Condition::kParityEven, &nan);
433
434 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
435 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
436
437 // Handle 0.0/-0.0.
438 if (is_min) {
439 if (is_double) {
440 __ orpd(out, op2);
441 } else {
442 __ orps(out, op2);
443 }
444 } else {
445 if (is_double) {
446 __ andpd(out, op2);
447 } else {
448 __ andps(out, op2);
449 }
450 }
451 __ jmp(&done);
452
453 // NaN handling.
454 __ Bind(&nan);
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000455 // Do we have a constant area pointer?
Nicolas Geoffray97793072016-02-16 15:33:54 +0000456 if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000457 DCHECK(locations->InAt(2).IsRegister());
458 Register constant_area = locations->InAt(2).AsRegister<Register>();
459 if (is_double) {
460 __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, constant_area));
461 } else {
462 __ movss(out, codegen->LiteralInt32Address(kFloatNaN, constant_area));
463 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400464 } else {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000465 if (is_double) {
466 __ pushl(Immediate(kDoubleNaNHigh));
467 __ pushl(Immediate(kDoubleNaNLow));
468 __ movsd(out, Address(ESP, 0));
469 __ addl(ESP, Immediate(8));
470 } else {
471 __ pushl(Immediate(kFloatNaN));
472 __ movss(out, Address(ESP, 0));
473 __ addl(ESP, Immediate(4));
474 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400475 }
476 __ jmp(&done);
477
478 // out := op2;
479 __ Bind(&op2_label);
480 if (is_double) {
481 __ movsd(out, op2);
482 } else {
483 __ movss(out, op2);
484 }
485
486 // Done.
487 __ Bind(&done);
488}
489
490static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
491 LocationSummary* locations = new (arena) LocationSummary(invoke,
492 LocationSummary::kNoCall,
493 kIntrinsified);
494 locations->SetInAt(0, Location::RequiresFpuRegister());
495 locations->SetInAt(1, Location::RequiresFpuRegister());
496 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
497 // the second input to be the output (we can simply swap inputs).
498 locations->SetOut(Location::SameAsFirstInput());
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000499 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
500 DCHECK(static_or_direct != nullptr);
Nicolas Geoffray97793072016-02-16 15:33:54 +0000501 if (static_or_direct->HasSpecialInput() &&
502 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000503 locations->SetInAt(2, Location::RequiresRegister());
504 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400505}
506
507void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
508 CreateFPFPToFPLocations(arena_, invoke);
509}
510
511void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000512 GenMinMaxFP(invoke->GetLocations(),
513 /* is_min */ true,
514 /* is_double */ true,
515 GetAssembler(),
516 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400517}
518
519void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
520 CreateFPFPToFPLocations(arena_, invoke);
521}
522
523void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000524 GenMinMaxFP(invoke->GetLocations(),
525 /* is_min */ true,
526 /* is_double */ false,
527 GetAssembler(),
528 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400529}
530
531void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
532 CreateFPFPToFPLocations(arena_, invoke);
533}
534
535void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000536 GenMinMaxFP(invoke->GetLocations(),
537 /* is_min */ false,
538 /* is_double */ true,
539 GetAssembler(),
540 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400541}
542
543void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
544 CreateFPFPToFPLocations(arena_, invoke);
545}
546
547void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000548 GenMinMaxFP(invoke->GetLocations(),
549 /* is_min */ false,
550 /* is_double */ false,
551 GetAssembler(),
552 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400553}
554
555static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
556 X86Assembler* assembler) {
557 Location op1_loc = locations->InAt(0);
558 Location op2_loc = locations->InAt(1);
559
560 // Shortcut for same input locations.
561 if (op1_loc.Equals(op2_loc)) {
562 // Can return immediately, as op1_loc == out_loc.
563 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
564 // a copy here.
565 DCHECK(locations->Out().Equals(op1_loc));
566 return;
567 }
568
569 if (is_long) {
570 // Need to perform a subtract to get the sign right.
571 // op1 is already in the same location as the output.
572 Location output = locations->Out();
573 Register output_lo = output.AsRegisterPairLow<Register>();
574 Register output_hi = output.AsRegisterPairHigh<Register>();
575
576 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
577 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
578
579 // Spare register to compute the subtraction to set condition code.
580 Register temp = locations->GetTemp(0).AsRegister<Register>();
581
582 // Subtract off op2_low.
583 __ movl(temp, output_lo);
584 __ subl(temp, op2_lo);
585
586 // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
587 __ movl(temp, output_hi);
588 __ sbbl(temp, op2_hi);
589
590 // Now the condition code is correct.
591 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
592 __ cmovl(cond, output_lo, op2_lo);
593 __ cmovl(cond, output_hi, op2_hi);
594 } else {
595 Register out = locations->Out().AsRegister<Register>();
596 Register op2 = op2_loc.AsRegister<Register>();
597
598 // (out := op1)
599 // out <=? op2
600 // if out is min jmp done
601 // out := op2
602 // done:
603
604 __ cmpl(out, op2);
605 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
606 __ cmovl(cond, out, op2);
607 }
608}
609
610static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
611 LocationSummary* locations = new (arena) LocationSummary(invoke,
612 LocationSummary::kNoCall,
613 kIntrinsified);
614 locations->SetInAt(0, Location::RequiresRegister());
615 locations->SetInAt(1, Location::RequiresRegister());
616 locations->SetOut(Location::SameAsFirstInput());
617}
618
619static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
620 LocationSummary* locations = new (arena) LocationSummary(invoke,
621 LocationSummary::kNoCall,
622 kIntrinsified);
623 locations->SetInAt(0, Location::RequiresRegister());
624 locations->SetInAt(1, Location::RequiresRegister());
625 locations->SetOut(Location::SameAsFirstInput());
626 // Register to use to perform a long subtract to set cc.
627 locations->AddTemp(Location::RequiresRegister());
628}
629
630void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
631 CreateIntIntToIntLocations(arena_, invoke);
632}
633
634void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000635 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400636}
637
638void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
639 CreateLongLongToLongLocations(arena_, invoke);
640}
641
642void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000643 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400644}
645
646void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
647 CreateIntIntToIntLocations(arena_, invoke);
648}
649
650void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000651 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400652}
653
654void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
655 CreateLongLongToLongLocations(arena_, invoke);
656}
657
658void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000659 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400660}
661
662static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
663 LocationSummary* locations = new (arena) LocationSummary(invoke,
664 LocationSummary::kNoCall,
665 kIntrinsified);
666 locations->SetInAt(0, Location::RequiresFpuRegister());
667 locations->SetOut(Location::RequiresFpuRegister());
668}
669
670void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
671 CreateFPToFPLocations(arena_, invoke);
672}
673
674void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
675 LocationSummary* locations = invoke->GetLocations();
676 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
677 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
678
679 GetAssembler()->sqrtsd(out, in);
680}
681
Mark Mendellfb8d2792015-03-31 22:16:59 -0400682static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100683 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400684
685 DCHECK(invoke->IsInvokeStaticOrDirect());
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100686 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
687 Location::RegisterLocation(EAX));
Mingyao Yange90db122015-04-03 17:56:54 -0700688 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400689
690 // Copy the result back to the expected output.
691 Location out = invoke->GetLocations()->Out();
692 if (out.IsValid()) {
693 DCHECK(out.IsRegister());
Andreas Gampe85b62f22015-09-09 13:15:38 -0700694 codegen->MoveFromReturnRegister(out, invoke->GetType());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400695 }
696}
697
698static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
699 HInvoke* invoke,
700 CodeGeneratorX86* codegen) {
701 // Do we have instruction support?
702 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
703 CreateFPToFPLocations(arena, invoke);
704 return;
705 }
706
707 // We have to fall back to a call to the intrinsic.
708 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +0100709 LocationSummary::kCallOnMainOnly);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400710 InvokeRuntimeCallingConvention calling_convention;
711 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
712 locations->SetOut(Location::FpuRegisterLocation(XMM0));
713 // Needs to be EAX for the invoke.
714 locations->AddTemp(Location::RegisterLocation(EAX));
715}
716
717static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
718 HInvoke* invoke,
719 X86Assembler* assembler,
720 int round_mode) {
721 LocationSummary* locations = invoke->GetLocations();
722 if (locations->WillCall()) {
723 InvokeOutOfLineIntrinsic(codegen, invoke);
724 } else {
725 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
726 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
727 __ roundsd(out, in, Immediate(round_mode));
728 }
729}
730
731void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
732 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
733}
734
735void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
736 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
737}
738
739void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
740 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
741}
742
743void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
744 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
745}
746
747void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
748 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
749}
750
751void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
752 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
753}
754
Mark Mendellfb8d2792015-03-31 22:16:59 -0400755void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
756 // Do we have instruction support?
757 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
Aart Bik2c9f4952016-08-01 16:52:27 -0700758 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
759 DCHECK(static_or_direct != nullptr);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400760 LocationSummary* locations = new (arena_) LocationSummary(invoke,
761 LocationSummary::kNoCall,
762 kIntrinsified);
763 locations->SetInAt(0, Location::RequiresFpuRegister());
Aart Bik2c9f4952016-08-01 16:52:27 -0700764 if (static_or_direct->HasSpecialInput() &&
765 invoke->InputAt(
766 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
767 locations->SetInAt(1, Location::RequiresRegister());
768 }
Nicolas Geoffrayd9b92402015-04-21 10:02:22 +0100769 locations->SetOut(Location::RequiresRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400770 locations->AddTemp(Location::RequiresFpuRegister());
771 locations->AddTemp(Location::RequiresFpuRegister());
772 return;
773 }
774
775 // We have to fall back to a call to the intrinsic.
776 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Aart Bik2c9f4952016-08-01 16:52:27 -0700777 LocationSummary::kCallOnMainOnly);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400778 InvokeRuntimeCallingConvention calling_convention;
779 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
780 locations->SetOut(Location::RegisterLocation(EAX));
781 // Needs to be EAX for the invoke.
782 locations->AddTemp(Location::RegisterLocation(EAX));
783}
784
785void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
786 LocationSummary* locations = invoke->GetLocations();
Aart Bik2c9f4952016-08-01 16:52:27 -0700787 if (locations->WillCall()) { // TODO: can we reach this?
Mark Mendellfb8d2792015-03-31 22:16:59 -0400788 InvokeOutOfLineIntrinsic(codegen_, invoke);
789 return;
790 }
791
Mark Mendellfb8d2792015-03-31 22:16:59 -0400792 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
Aart Bik2c9f4952016-08-01 16:52:27 -0700793 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
794 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400795 Register out = locations->Out().AsRegister<Register>();
Aart Bik2c9f4952016-08-01 16:52:27 -0700796 NearLabel skip_incr, done;
Mark Mendellfb8d2792015-03-31 22:16:59 -0400797 X86Assembler* assembler = GetAssembler();
798
Aart Bik2c9f4952016-08-01 16:52:27 -0700799 // Since no direct x86 rounding instruction matches the required semantics,
800 // this intrinsic is implemented as follows:
801 // result = floor(in);
802 // if (in - result >= 0.5f)
803 // result = result + 1.0f;
804 __ movss(t2, in);
805 __ roundss(t1, in, Immediate(1));
806 __ subss(t2, t1);
Aart Bik0cf8d9c2016-08-10 14:05:54 -0700807 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
808 // Direct constant area available.
809 Register constant_area = locations->InAt(1).AsRegister<Register>();
810 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area));
811 __ j(kBelow, &skip_incr);
812 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area));
813 __ Bind(&skip_incr);
814 } else {
815 // No constant area: go through stack.
816 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
817 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
818 __ comiss(t2, Address(ESP, 4));
819 __ j(kBelow, &skip_incr);
820 __ addss(t1, Address(ESP, 0));
821 __ Bind(&skip_incr);
822 __ addl(ESP, Immediate(8));
823 }
Mark Mendellfb8d2792015-03-31 22:16:59 -0400824
Aart Bik2c9f4952016-08-01 16:52:27 -0700825 // Final conversion to an integer. Unfortunately this also does not have a
826 // direct x86 instruction, since NaN should map to 0 and large positive
827 // values need to be clipped to the extreme value.
Mark Mendellfb8d2792015-03-31 22:16:59 -0400828 __ movl(out, Immediate(kPrimIntMax));
Aart Bik2c9f4952016-08-01 16:52:27 -0700829 __ cvtsi2ss(t2, out);
830 __ comiss(t1, t2);
831 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
832 __ movl(out, Immediate(0)); // does not change flags
833 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
834 __ cvttss2si(out, t1);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400835 __ Bind(&done);
836}
837
Mark Mendella4f12202015-08-06 15:23:34 -0400838static void CreateFPToFPCallLocations(ArenaAllocator* arena,
839 HInvoke* invoke) {
840 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +0100841 LocationSummary::kCallOnMainOnly,
Mark Mendella4f12202015-08-06 15:23:34 -0400842 kIntrinsified);
843 InvokeRuntimeCallingConvention calling_convention;
844 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
845 locations->SetOut(Location::FpuRegisterLocation(XMM0));
846}
847
848static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
849 LocationSummary* locations = invoke->GetLocations();
850 DCHECK(locations->WillCall());
851 DCHECK(invoke->IsInvokeStaticOrDirect());
852 X86Assembler* assembler = codegen->GetAssembler();
853
854 // We need some place to pass the parameters.
855 __ subl(ESP, Immediate(16));
856 __ cfi().AdjustCFAOffset(16);
857
858 // Pass the parameters at the bottom of the stack.
859 __ movsd(Address(ESP, 0), XMM0);
860
861 // If we have a second parameter, pass it next.
862 if (invoke->GetNumberOfArguments() == 2) {
863 __ movsd(Address(ESP, 8), XMM1);
864 }
865
866 // Now do the actual call.
Andreas Gampe542451c2016-07-26 09:02:02 -0700867 __ fs()->call(Address::Absolute(GetThreadOffset<kX86PointerSize>(entry)));
Mark Mendella4f12202015-08-06 15:23:34 -0400868
869 // Extract the return value from the FP stack.
870 __ fstpl(Address(ESP, 0));
871 __ movsd(XMM0, Address(ESP, 0));
872
873 // And clean up the stack.
874 __ addl(ESP, Immediate(16));
875 __ cfi().AdjustCFAOffset(-16);
876
877 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
878}
879
880void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
881 CreateFPToFPCallLocations(arena_, invoke);
882}
883
884void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
885 GenFPToFPCall(invoke, codegen_, kQuickCos);
886}
887
888void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
889 CreateFPToFPCallLocations(arena_, invoke);
890}
891
892void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
893 GenFPToFPCall(invoke, codegen_, kQuickSin);
894}
895
896void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
897 CreateFPToFPCallLocations(arena_, invoke);
898}
899
900void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
901 GenFPToFPCall(invoke, codegen_, kQuickAcos);
902}
903
904void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
905 CreateFPToFPCallLocations(arena_, invoke);
906}
907
908void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
909 GenFPToFPCall(invoke, codegen_, kQuickAsin);
910}
911
912void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
913 CreateFPToFPCallLocations(arena_, invoke);
914}
915
916void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
917 GenFPToFPCall(invoke, codegen_, kQuickAtan);
918}
919
920void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
921 CreateFPToFPCallLocations(arena_, invoke);
922}
923
924void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
925 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
926}
927
928void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
929 CreateFPToFPCallLocations(arena_, invoke);
930}
931
932void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
933 GenFPToFPCall(invoke, codegen_, kQuickCosh);
934}
935
936void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
937 CreateFPToFPCallLocations(arena_, invoke);
938}
939
940void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
941 GenFPToFPCall(invoke, codegen_, kQuickExp);
942}
943
944void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
945 CreateFPToFPCallLocations(arena_, invoke);
946}
947
948void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
949 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
950}
951
952void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
953 CreateFPToFPCallLocations(arena_, invoke);
954}
955
956void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
957 GenFPToFPCall(invoke, codegen_, kQuickLog);
958}
959
960void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
961 CreateFPToFPCallLocations(arena_, invoke);
962}
963
964void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
965 GenFPToFPCall(invoke, codegen_, kQuickLog10);
966}
967
968void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
969 CreateFPToFPCallLocations(arena_, invoke);
970}
971
972void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
973 GenFPToFPCall(invoke, codegen_, kQuickSinh);
974}
975
976void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
977 CreateFPToFPCallLocations(arena_, invoke);
978}
979
980void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
981 GenFPToFPCall(invoke, codegen_, kQuickTan);
982}
983
984void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
985 CreateFPToFPCallLocations(arena_, invoke);
986}
987
988void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
989 GenFPToFPCall(invoke, codegen_, kQuickTanh);
990}
991
992static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
993 HInvoke* invoke) {
994 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +0100995 LocationSummary::kCallOnMainOnly,
Mark Mendella4f12202015-08-06 15:23:34 -0400996 kIntrinsified);
997 InvokeRuntimeCallingConvention calling_convention;
998 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
999 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
1000 locations->SetOut(Location::FpuRegisterLocation(XMM0));
1001}
1002
1003void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
1004 CreateFPFPToFPCallLocations(arena_, invoke);
1005}
1006
1007void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
1008 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1009}
1010
1011void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
1012 CreateFPFPToFPCallLocations(arena_, invoke);
1013}
1014
1015void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
1016 GenFPToFPCall(invoke, codegen_, kQuickHypot);
1017}
1018
1019void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
1020 CreateFPFPToFPCallLocations(arena_, invoke);
1021}
1022
1023void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
1024 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1025}
1026
Mark Mendell6bc53a92015-07-01 14:26:52 -04001027void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1028 // We need at least two of the positions or length to be an integer constant,
1029 // or else we won't have enough free registers.
1030 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1031 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1032 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1033
1034 int num_constants =
1035 ((src_pos != nullptr) ? 1 : 0)
1036 + ((dest_pos != nullptr) ? 1 : 0)
1037 + ((length != nullptr) ? 1 : 0);
1038
1039 if (num_constants < 2) {
1040 // Not enough free registers.
1041 return;
1042 }
1043
1044 // As long as we are checking, we might as well check to see if the src and dest
1045 // positions are >= 0.
1046 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
1047 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
1048 // We will have to fail anyways.
1049 return;
1050 }
1051
1052 // And since we are already checking, check the length too.
1053 if (length != nullptr) {
1054 int32_t len = length->GetValue();
1055 if (len < 0) {
1056 // Just call as normal.
1057 return;
1058 }
1059 }
1060
1061 // Okay, it is safe to generate inline code.
1062 LocationSummary* locations =
1063 new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
1064 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
1065 locations->SetInAt(0, Location::RequiresRegister());
1066 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1067 locations->SetInAt(2, Location::RequiresRegister());
1068 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
1069 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
1070
1071 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1072 locations->AddTemp(Location::RegisterLocation(ESI));
1073 locations->AddTemp(Location::RegisterLocation(EDI));
1074 locations->AddTemp(Location::RegisterLocation(ECX));
1075}
1076
1077static void CheckPosition(X86Assembler* assembler,
1078 Location pos,
1079 Register input,
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001080 Location length,
Andreas Gampe85b62f22015-09-09 13:15:38 -07001081 SlowPathCode* slow_path,
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001082 Register temp,
1083 bool length_is_input_length = false) {
1084 // Where is the length in the Array?
Mark Mendell6bc53a92015-07-01 14:26:52 -04001085 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1086
1087 if (pos.IsConstant()) {
1088 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1089 if (pos_const == 0) {
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001090 if (!length_is_input_length) {
1091 // Check that length(input) >= length.
1092 if (length.IsConstant()) {
1093 __ cmpl(Address(input, length_offset),
1094 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1095 } else {
1096 __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
1097 }
1098 __ j(kLess, slow_path->GetEntryLabel());
1099 }
Mark Mendell6bc53a92015-07-01 14:26:52 -04001100 } else {
1101 // Check that length(input) >= pos.
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001102 __ movl(temp, Address(input, length_offset));
1103 __ subl(temp, Immediate(pos_const));
Mark Mendell6bc53a92015-07-01 14:26:52 -04001104 __ j(kLess, slow_path->GetEntryLabel());
1105
1106 // Check that (length(input) - pos) >= length.
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001107 if (length.IsConstant()) {
1108 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1109 } else {
1110 __ cmpl(temp, length.AsRegister<Register>());
1111 }
Mark Mendell6bc53a92015-07-01 14:26:52 -04001112 __ j(kLess, slow_path->GetEntryLabel());
1113 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001114 } else if (length_is_input_length) {
1115 // The only way the copy can succeed is if pos is zero.
1116 Register pos_reg = pos.AsRegister<Register>();
1117 __ testl(pos_reg, pos_reg);
1118 __ j(kNotEqual, slow_path->GetEntryLabel());
Mark Mendell6bc53a92015-07-01 14:26:52 -04001119 } else {
1120 // Check that pos >= 0.
1121 Register pos_reg = pos.AsRegister<Register>();
1122 __ testl(pos_reg, pos_reg);
1123 __ j(kLess, slow_path->GetEntryLabel());
1124
1125 // Check that pos <= length(input).
1126 __ cmpl(Address(input, length_offset), pos_reg);
1127 __ j(kLess, slow_path->GetEntryLabel());
1128
1129 // Check that (length(input) - pos) >= length.
1130 __ movl(temp, Address(input, length_offset));
1131 __ subl(temp, pos_reg);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001132 if (length.IsConstant()) {
1133 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1134 } else {
1135 __ cmpl(temp, length.AsRegister<Register>());
1136 }
Mark Mendell6bc53a92015-07-01 14:26:52 -04001137 __ j(kLess, slow_path->GetEntryLabel());
1138 }
1139}
1140
1141void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1142 X86Assembler* assembler = GetAssembler();
1143 LocationSummary* locations = invoke->GetLocations();
1144
1145 Register src = locations->InAt(0).AsRegister<Register>();
1146 Location srcPos = locations->InAt(1);
1147 Register dest = locations->InAt(2).AsRegister<Register>();
1148 Location destPos = locations->InAt(3);
1149 Location length = locations->InAt(4);
1150
1151 // Temporaries that we need for MOVSW.
1152 Register src_base = locations->GetTemp(0).AsRegister<Register>();
1153 DCHECK_EQ(src_base, ESI);
1154 Register dest_base = locations->GetTemp(1).AsRegister<Register>();
1155 DCHECK_EQ(dest_base, EDI);
1156 Register count = locations->GetTemp(2).AsRegister<Register>();
1157 DCHECK_EQ(count, ECX);
1158
Andreas Gampe85b62f22015-09-09 13:15:38 -07001159 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Mark Mendell6bc53a92015-07-01 14:26:52 -04001160 codegen_->AddSlowPath(slow_path);
1161
1162 // Bail out if the source and destination are the same (to handle overlap).
1163 __ cmpl(src, dest);
1164 __ j(kEqual, slow_path->GetEntryLabel());
1165
1166 // Bail out if the source is null.
1167 __ testl(src, src);
1168 __ j(kEqual, slow_path->GetEntryLabel());
1169
1170 // Bail out if the destination is null.
1171 __ testl(dest, dest);
1172 __ j(kEqual, slow_path->GetEntryLabel());
1173
1174 // If the length is negative, bail out.
1175 // We have already checked in the LocationsBuilder for the constant case.
1176 if (!length.IsConstant()) {
1177 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
1178 __ j(kLess, slow_path->GetEntryLabel());
1179 }
1180
1181 // We need the count in ECX.
1182 if (length.IsConstant()) {
1183 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1184 } else {
1185 __ movl(count, length.AsRegister<Register>());
1186 }
1187
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001188 // Validity checks: source. Use src_base as a temporary register.
1189 CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
Mark Mendell6bc53a92015-07-01 14:26:52 -04001190
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001191 // Validity checks: dest. Use src_base as a temporary register.
1192 CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
Mark Mendell6bc53a92015-07-01 14:26:52 -04001193
1194 // Okay, everything checks out. Finally time to do the copy.
1195 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1196 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1197 DCHECK_EQ(char_size, 2u);
1198
1199 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1200
1201 if (srcPos.IsConstant()) {
1202 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
1203 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
1204 } else {
1205 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
1206 ScaleFactor::TIMES_2, data_offset));
1207 }
1208 if (destPos.IsConstant()) {
1209 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
1210
1211 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
1212 } else {
1213 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
1214 ScaleFactor::TIMES_2, data_offset));
1215 }
1216
1217 // Do the move.
1218 __ rep_movsw();
1219
1220 __ Bind(slow_path->GetExitLabel());
1221}
1222
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001223void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
1224 // The inputs plus one temp.
1225 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu806f0122016-03-09 11:10:16 +00001226 LocationSummary::kCallOnMainAndSlowPath,
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001227 kIntrinsified);
1228 InvokeRuntimeCallingConvention calling_convention;
1229 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1230 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1231 locations->SetOut(Location::RegisterLocation(EAX));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001232}
1233
1234void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1235 X86Assembler* assembler = GetAssembler();
1236 LocationSummary* locations = invoke->GetLocations();
1237
Nicolas Geoffray512e04d2015-03-27 17:21:24 +00001238 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +01001239 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001240
1241 Register argument = locations->InAt(1).AsRegister<Register>();
1242 __ testl(argument, argument);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001243 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001244 codegen_->AddSlowPath(slow_path);
1245 __ j(kEqual, slow_path->GetEntryLabel());
1246
Andreas Gampe542451c2016-07-26 09:02:02 -07001247 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pStringCompareTo)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001248 __ Bind(slow_path->GetExitLabel());
1249}
1250
Agi Csakid7138c82015-08-13 17:46:44 -07001251void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1252 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1253 LocationSummary::kNoCall,
1254 kIntrinsified);
1255 locations->SetInAt(0, Location::RequiresRegister());
1256 locations->SetInAt(1, Location::RequiresRegister());
1257
1258 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1259 locations->AddTemp(Location::RegisterLocation(ECX));
1260 locations->AddTemp(Location::RegisterLocation(EDI));
1261
1262 // Set output, ESI needed for repe_cmpsl instruction anyways.
1263 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1264}
1265
1266void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1267 X86Assembler* assembler = GetAssembler();
1268 LocationSummary* locations = invoke->GetLocations();
1269
1270 Register str = locations->InAt(0).AsRegister<Register>();
1271 Register arg = locations->InAt(1).AsRegister<Register>();
1272 Register ecx = locations->GetTemp(0).AsRegister<Register>();
1273 Register edi = locations->GetTemp(1).AsRegister<Register>();
1274 Register esi = locations->Out().AsRegister<Register>();
1275
Mark Mendell0c9497d2015-08-21 09:30:05 -04001276 NearLabel end, return_true, return_false;
Agi Csakid7138c82015-08-13 17:46:44 -07001277
1278 // Get offsets of count, value, and class fields within a string object.
1279 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1280 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1281 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1282
1283 // Note that the null check must have been done earlier.
1284 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1285
Nicolas Geoffraya83a54d2015-10-02 17:30:26 +01001286 StringEqualsOptimizations optimizations(invoke);
1287 if (!optimizations.GetArgumentNotNull()) {
1288 // Check if input is null, return false if it is.
1289 __ testl(arg, arg);
1290 __ j(kEqual, &return_false);
1291 }
Agi Csakid7138c82015-08-13 17:46:44 -07001292
Nicolas Geoffraya83a54d2015-10-02 17:30:26 +01001293 if (!optimizations.GetArgumentIsString()) {
Vladimir Marko53b52002016-05-24 19:30:45 +01001294 // Instanceof check for the argument by comparing class fields.
1295 // All string objects must have the same type since String cannot be subclassed.
1296 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1297 // If the argument is a string object, its class field must be equal to receiver's class field.
Nicolas Geoffraya83a54d2015-10-02 17:30:26 +01001298 __ movl(ecx, Address(str, class_offset));
1299 __ cmpl(ecx, Address(arg, class_offset));
1300 __ j(kNotEqual, &return_false);
1301 }
Agi Csakid7138c82015-08-13 17:46:44 -07001302
1303 // Reference equality check, return true if same reference.
1304 __ cmpl(str, arg);
1305 __ j(kEqual, &return_true);
1306
1307 // Load length of receiver string.
1308 __ movl(ecx, Address(str, count_offset));
1309 // Check if lengths are equal, return false if they're not.
1310 __ cmpl(ecx, Address(arg, count_offset));
1311 __ j(kNotEqual, &return_false);
1312 // Return true if both strings are empty.
Mark Mendell0c9497d2015-08-21 09:30:05 -04001313 __ jecxz(&return_true);
Agi Csakid7138c82015-08-13 17:46:44 -07001314
1315 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1316 __ leal(esi, Address(str, value_offset));
1317 __ leal(edi, Address(arg, value_offset));
1318
1319 // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths.
1320 __ addl(ecx, Immediate(1));
1321 __ shrl(ecx, Immediate(1));
1322
1323 // Assertions that must hold in order to compare strings 2 characters at a time.
1324 DCHECK_ALIGNED(value_offset, 4);
1325 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1326
1327 // Loop to compare strings two characters at a time starting at the beginning of the string.
1328 __ repe_cmpsl();
1329 // If strings are not equal, zero flag will be cleared.
1330 __ j(kNotEqual, &return_false);
1331
1332 // Return true and exit the function.
1333 // If loop does not result in returning false, we return true.
1334 __ Bind(&return_true);
1335 __ movl(esi, Immediate(1));
1336 __ jmp(&end);
1337
1338 // Return false and exit the function.
1339 __ Bind(&return_false);
1340 __ xorl(esi, esi);
1341 __ Bind(&end);
1342}
1343
Andreas Gampe21030dd2015-05-07 14:46:15 -07001344static void CreateStringIndexOfLocations(HInvoke* invoke,
1345 ArenaAllocator* allocator,
1346 bool start_at_zero) {
1347 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1348 LocationSummary::kCallOnSlowPath,
1349 kIntrinsified);
1350 // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1351 locations->SetInAt(0, Location::RegisterLocation(EDI));
1352 // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1353 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1354 // of the instruction explicitly.
1355 // Note: This works as we don't clobber EAX anywhere.
1356 locations->SetInAt(1, Location::RegisterLocation(EAX));
1357 if (!start_at_zero) {
1358 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1359 }
1360 // As we clobber EDI during execution anyways, also use it as the output.
1361 locations->SetOut(Location::SameAsFirstInput());
1362
1363 // repne scasw uses ECX as the counter.
1364 locations->AddTemp(Location::RegisterLocation(ECX));
1365 // Need another temporary to be able to compute the result.
1366 locations->AddTemp(Location::RequiresRegister());
1367}
1368
1369static void GenerateStringIndexOf(HInvoke* invoke,
1370 X86Assembler* assembler,
1371 CodeGeneratorX86* codegen,
1372 ArenaAllocator* allocator,
1373 bool start_at_zero) {
1374 LocationSummary* locations = invoke->GetLocations();
1375
1376 // Note that the null check must have been done earlier.
1377 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1378
1379 Register string_obj = locations->InAt(0).AsRegister<Register>();
1380 Register search_value = locations->InAt(1).AsRegister<Register>();
1381 Register counter = locations->GetTemp(0).AsRegister<Register>();
1382 Register string_length = locations->GetTemp(1).AsRegister<Register>();
1383 Register out = locations->Out().AsRegister<Register>();
1384
1385 // Check our assumptions for registers.
1386 DCHECK_EQ(string_obj, EDI);
1387 DCHECK_EQ(search_value, EAX);
1388 DCHECK_EQ(counter, ECX);
1389 DCHECK_EQ(out, EDI);
1390
1391 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001392 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
Andreas Gampe85b62f22015-09-09 13:15:38 -07001393 SlowPathCode* slow_path = nullptr;
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001394 HInstruction* code_point = invoke->InputAt(1);
1395 if (code_point->IsIntConstant()) {
Vladimir Markoda051082016-05-17 16:10:20 +01001396 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
Andreas Gampe21030dd2015-05-07 14:46:15 -07001397 std::numeric_limits<uint16_t>::max()) {
1398 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1399 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1400 slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1401 codegen->AddSlowPath(slow_path);
1402 __ jmp(slow_path->GetEntryLabel());
1403 __ Bind(slow_path->GetExitLabel());
1404 return;
1405 }
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001406 } else if (code_point->GetType() != Primitive::kPrimChar) {
Andreas Gampe21030dd2015-05-07 14:46:15 -07001407 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1408 slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1409 codegen->AddSlowPath(slow_path);
1410 __ j(kAbove, slow_path->GetEntryLabel());
1411 }
1412
1413 // From here down, we know that we are looking for a char that fits in 16 bits.
1414 // Location of reference to data array within the String object.
1415 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1416 // Location of count within the String object.
1417 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1418
1419 // Load string length, i.e., the count field of the string.
1420 __ movl(string_length, Address(string_obj, count_offset));
1421
1422 // Do a zero-length check.
1423 // TODO: Support jecxz.
Mark Mendell0c9497d2015-08-21 09:30:05 -04001424 NearLabel not_found_label;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001425 __ testl(string_length, string_length);
1426 __ j(kEqual, &not_found_label);
1427
1428 if (start_at_zero) {
1429 // Number of chars to scan is the same as the string length.
1430 __ movl(counter, string_length);
1431
1432 // Move to the start of the string.
1433 __ addl(string_obj, Immediate(value_offset));
1434 } else {
1435 Register start_index = locations->InAt(2).AsRegister<Register>();
1436
1437 // Do a start_index check.
1438 __ cmpl(start_index, string_length);
1439 __ j(kGreaterEqual, &not_found_label);
1440
1441 // Ensure we have a start index >= 0;
1442 __ xorl(counter, counter);
1443 __ cmpl(start_index, Immediate(0));
1444 __ cmovl(kGreater, counter, start_index);
1445
1446 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1447 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1448
1449 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1450 // compare.
1451 __ negl(counter);
1452 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1453 }
1454
1455 // Everything is set up for repne scasw:
1456 // * Comparison address in EDI.
1457 // * Counter in ECX.
1458 __ repne_scasw();
1459
1460 // Did we find a match?
1461 __ j(kNotEqual, &not_found_label);
1462
1463 // Yes, we matched. Compute the index of the result.
1464 __ subl(string_length, counter);
1465 __ leal(out, Address(string_length, -1));
1466
Mark Mendell0c9497d2015-08-21 09:30:05 -04001467 NearLabel done;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001468 __ jmp(&done);
1469
1470 // Failed to match; return -1.
1471 __ Bind(&not_found_label);
1472 __ movl(out, Immediate(-1));
1473
1474 // And join up at the end.
1475 __ Bind(&done);
1476 if (slow_path != nullptr) {
1477 __ Bind(slow_path->GetExitLabel());
1478 }
1479}
1480
1481void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001482 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001483}
1484
1485void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001486 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001487}
1488
1489void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001490 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001491}
1492
1493void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001494 GenerateStringIndexOf(
1495 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001496}
1497
Jeff Hao848f70a2014-01-15 13:49:50 -08001498void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1499 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu806f0122016-03-09 11:10:16 +00001500 LocationSummary::kCallOnMainAndSlowPath,
Jeff Hao848f70a2014-01-15 13:49:50 -08001501 kIntrinsified);
1502 InvokeRuntimeCallingConvention calling_convention;
1503 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1504 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1505 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1506 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1507 locations->SetOut(Location::RegisterLocation(EAX));
Jeff Hao848f70a2014-01-15 13:49:50 -08001508}
1509
1510void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1511 X86Assembler* assembler = GetAssembler();
1512 LocationSummary* locations = invoke->GetLocations();
1513
1514 Register byte_array = locations->InAt(0).AsRegister<Register>();
1515 __ testl(byte_array, byte_array);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001516 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08001517 codegen_->AddSlowPath(slow_path);
1518 __ j(kEqual, slow_path->GetEntryLabel());
1519
Andreas Gampe542451c2016-07-26 09:02:02 -07001520 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromBytes)));
Roland Levillainf969a202016-03-09 16:14:00 +00001521 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001522 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1523 __ Bind(slow_path->GetExitLabel());
1524}
1525
1526void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1527 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +01001528 LocationSummary::kCallOnMainOnly,
Jeff Hao848f70a2014-01-15 13:49:50 -08001529 kIntrinsified);
1530 InvokeRuntimeCallingConvention calling_convention;
1531 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1532 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1533 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1534 locations->SetOut(Location::RegisterLocation(EAX));
1535}
1536
1537void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1538 X86Assembler* assembler = GetAssembler();
1539
Roland Levillaincc3839c2016-02-29 16:23:48 +00001540 // No need to emit code checking whether `locations->InAt(2)` is a null
1541 // pointer, as callers of the native method
1542 //
1543 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1544 //
1545 // all include a null check on `data` before calling that method.
Andreas Gampe542451c2016-07-26 09:02:02 -07001546 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromChars)));
Roland Levillainf969a202016-03-09 16:14:00 +00001547 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001548 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1549}
1550
1551void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1552 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu806f0122016-03-09 11:10:16 +00001553 LocationSummary::kCallOnMainAndSlowPath,
Jeff Hao848f70a2014-01-15 13:49:50 -08001554 kIntrinsified);
1555 InvokeRuntimeCallingConvention calling_convention;
1556 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1557 locations->SetOut(Location::RegisterLocation(EAX));
Jeff Hao848f70a2014-01-15 13:49:50 -08001558}
1559
1560void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1561 X86Assembler* assembler = GetAssembler();
1562 LocationSummary* locations = invoke->GetLocations();
1563
1564 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1565 __ testl(string_to_copy, string_to_copy);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001566 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08001567 codegen_->AddSlowPath(slow_path);
1568 __ j(kEqual, slow_path->GetEntryLabel());
1569
Andreas Gampe542451c2016-07-26 09:02:02 -07001570 __ fs()->call(
1571 Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromString)));
Roland Levillainf969a202016-03-09 16:14:00 +00001572 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001573 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1574 __ Bind(slow_path->GetExitLabel());
1575}
1576
Mark Mendell8f8926a2015-08-17 11:39:06 -04001577void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1578 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1579 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1580 LocationSummary::kNoCall,
1581 kIntrinsified);
1582 locations->SetInAt(0, Location::RequiresRegister());
1583 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1584 // Place srcEnd in ECX to save a move below.
1585 locations->SetInAt(2, Location::RegisterLocation(ECX));
1586 locations->SetInAt(3, Location::RequiresRegister());
1587 locations->SetInAt(4, Location::RequiresRegister());
1588
1589 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1590 // We don't have enough registers to also grab ECX, so handle below.
1591 locations->AddTemp(Location::RegisterLocation(ESI));
1592 locations->AddTemp(Location::RegisterLocation(EDI));
1593}
1594
1595void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1596 X86Assembler* assembler = GetAssembler();
1597 LocationSummary* locations = invoke->GetLocations();
1598
1599 size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
1600 // Location of data in char array buffer.
1601 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1602 // Location of char array data in string.
1603 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1604
1605 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1606 Register obj = locations->InAt(0).AsRegister<Register>();
1607 Location srcBegin = locations->InAt(1);
1608 int srcBegin_value =
1609 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1610 Register srcEnd = locations->InAt(2).AsRegister<Register>();
1611 Register dst = locations->InAt(3).AsRegister<Register>();
1612 Register dstBegin = locations->InAt(4).AsRegister<Register>();
1613
1614 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1615 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1616 DCHECK_EQ(char_size, 2u);
1617
1618 // Compute the address of the destination buffer.
1619 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1620
1621 // Compute the address of the source string.
1622 if (srcBegin.IsConstant()) {
1623 // Compute the address of the source string by adding the number of chars from
1624 // the source beginning to the value offset of a string.
1625 __ leal(ESI, Address(obj, srcBegin_value * char_size + value_offset));
1626 } else {
1627 __ leal(ESI, Address(obj, srcBegin.AsRegister<Register>(),
1628 ScaleFactor::TIMES_2, value_offset));
1629 }
1630
1631 // Compute the number of chars (words) to move.
1632 // Now is the time to save ECX, since we don't know if it will be used later.
1633 __ pushl(ECX);
1634 int stack_adjust = kX86WordSize;
1635 __ cfi().AdjustCFAOffset(stack_adjust);
1636 DCHECK_EQ(srcEnd, ECX);
1637 if (srcBegin.IsConstant()) {
1638 if (srcBegin_value != 0) {
1639 __ subl(ECX, Immediate(srcBegin_value));
1640 }
1641 } else {
1642 DCHECK(srcBegin.IsRegister());
1643 __ subl(ECX, srcBegin.AsRegister<Register>());
1644 }
1645
1646 // Do the move.
1647 __ rep_movsw();
1648
1649 // And restore ECX.
1650 __ popl(ECX);
1651 __ cfi().AdjustCFAOffset(-stack_adjust);
1652}
1653
Mark Mendell09ed1a32015-03-25 08:30:06 -04001654static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1655 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1656 Location out_loc = locations->Out();
1657 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1658 // to avoid a SIGBUS.
1659 switch (size) {
1660 case Primitive::kPrimByte:
1661 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1662 break;
1663 case Primitive::kPrimShort:
1664 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1665 break;
1666 case Primitive::kPrimInt:
1667 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1668 break;
1669 case Primitive::kPrimLong:
1670 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1671 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1672 break;
1673 default:
1674 LOG(FATAL) << "Type not recognized for peek: " << size;
1675 UNREACHABLE();
1676 }
1677}
1678
1679void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1680 CreateLongToIntLocations(arena_, invoke);
1681}
1682
1683void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1684 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1685}
1686
1687void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1688 CreateLongToIntLocations(arena_, invoke);
1689}
1690
1691void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1692 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1693}
1694
1695void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1696 CreateLongToLongLocations(arena_, invoke);
1697}
1698
1699void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1700 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1701}
1702
1703void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1704 CreateLongToIntLocations(arena_, invoke);
1705}
1706
1707void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1708 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1709}
1710
1711static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
1712 HInvoke* invoke) {
1713 LocationSummary* locations = new (arena) LocationSummary(invoke,
1714 LocationSummary::kNoCall,
1715 kIntrinsified);
1716 locations->SetInAt(0, Location::RequiresRegister());
Roland Levillain4c0eb422015-04-24 16:43:49 +01001717 HInstruction* value = invoke->InputAt(1);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001718 if (size == Primitive::kPrimByte) {
1719 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1720 } else {
1721 locations->SetInAt(1, Location::RegisterOrConstant(value));
1722 }
1723}
1724
1725static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1726 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1727 Location value_loc = locations->InAt(1);
1728 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1729 // to avoid a SIGBUS.
1730 switch (size) {
1731 case Primitive::kPrimByte:
1732 if (value_loc.IsConstant()) {
1733 __ movb(Address(address, 0),
1734 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1735 } else {
1736 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1737 }
1738 break;
1739 case Primitive::kPrimShort:
1740 if (value_loc.IsConstant()) {
1741 __ movw(Address(address, 0),
1742 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1743 } else {
1744 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1745 }
1746 break;
1747 case Primitive::kPrimInt:
1748 if (value_loc.IsConstant()) {
1749 __ movl(Address(address, 0),
1750 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1751 } else {
1752 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1753 }
1754 break;
1755 case Primitive::kPrimLong:
1756 if (value_loc.IsConstant()) {
1757 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1758 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1759 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1760 } else {
1761 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1762 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1763 }
1764 break;
1765 default:
1766 LOG(FATAL) << "Type not recognized for poke: " << size;
1767 UNREACHABLE();
1768 }
1769}
1770
1771void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1772 CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
1773}
1774
1775void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1776 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1777}
1778
1779void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1780 CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
1781}
1782
1783void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1784 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1785}
1786
1787void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1788 CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
1789}
1790
1791void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1792 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1793}
1794
1795void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1796 CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
1797}
1798
1799void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1800 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1801}
1802
1803void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1804 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1805 LocationSummary::kNoCall,
1806 kIntrinsified);
1807 locations->SetOut(Location::RequiresRegister());
1808}
1809
1810void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1811 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
Andreas Gampe542451c2016-07-26 09:02:02 -07001812 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
Mark Mendell09ed1a32015-03-25 08:30:06 -04001813}
1814
Roland Levillain0d5a2812015-11-13 10:07:31 +00001815static void GenUnsafeGet(HInvoke* invoke,
1816 Primitive::Type type,
1817 bool is_volatile,
1818 CodeGeneratorX86* codegen) {
1819 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1820 LocationSummary* locations = invoke->GetLocations();
1821 Location base_loc = locations->InAt(1);
1822 Register base = base_loc.AsRegister<Register>();
1823 Location offset_loc = locations->InAt(2);
1824 Register offset = offset_loc.AsRegisterPairLow<Register>();
1825 Location output_loc = locations->Out();
Mark Mendell09ed1a32015-03-25 08:30:06 -04001826
1827 switch (type) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001828 case Primitive::kPrimInt: {
Roland Levillain0d5a2812015-11-13 10:07:31 +00001829 Register output = output_loc.AsRegister<Register>();
1830 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
Roland Levillain7c1559a2015-12-15 10:55:36 +00001831 break;
1832 }
1833
1834 case Primitive::kPrimNot: {
1835 Register output = output_loc.AsRegister<Register>();
1836 if (kEmitCompilerReadBarrier) {
1837 if (kUseBakerReadBarrier) {
1838 Location temp = locations->GetTemp(0);
Sang, Chunlei0fcd2b82016-04-05 17:12:59 +08001839 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1840 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1841 invoke, output_loc, base, src, temp, /* needs_null_check */ false);
Roland Levillain7c1559a2015-12-15 10:55:36 +00001842 } else {
1843 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1844 codegen->GenerateReadBarrierSlow(
1845 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1846 }
1847 } else {
1848 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1849 __ MaybeUnpoisonHeapReference(output);
Roland Levillain4d027112015-07-01 15:41:14 +01001850 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04001851 break;
Roland Levillain4d027112015-07-01 15:41:14 +01001852 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04001853
1854 case Primitive::kPrimLong: {
Roland Levillain0d5a2812015-11-13 10:07:31 +00001855 Register output_lo = output_loc.AsRegisterPairLow<Register>();
1856 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
Mark Mendell09ed1a32015-03-25 08:30:06 -04001857 if (is_volatile) {
1858 // Need to use a XMM to read atomically.
1859 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1860 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1861 __ movd(output_lo, temp);
1862 __ psrlq(temp, Immediate(32));
1863 __ movd(output_hi, temp);
1864 } else {
1865 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1866 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1867 }
1868 }
1869 break;
1870
1871 default:
1872 LOG(FATAL) << "Unsupported op size " << type;
1873 UNREACHABLE();
1874 }
1875}
1876
Roland Levillain7c1559a2015-12-15 10:55:36 +00001877static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
1878 HInvoke* invoke,
1879 Primitive::Type type,
1880 bool is_volatile) {
Roland Levillain0d5a2812015-11-13 10:07:31 +00001881 bool can_call = kEmitCompilerReadBarrier &&
1882 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1883 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001884 LocationSummary* locations = new (arena) LocationSummary(invoke,
Roland Levillain0d5a2812015-11-13 10:07:31 +00001885 can_call ?
1886 LocationSummary::kCallOnSlowPath :
1887 LocationSummary::kNoCall,
Mark Mendell09ed1a32015-03-25 08:30:06 -04001888 kIntrinsified);
1889 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1890 locations->SetInAt(1, Location::RequiresRegister());
1891 locations->SetInAt(2, Location::RequiresRegister());
Roland Levillain7c1559a2015-12-15 10:55:36 +00001892 if (type == Primitive::kPrimLong) {
Mark Mendell09ed1a32015-03-25 08:30:06 -04001893 if (is_volatile) {
1894 // Need to use XMM to read volatile.
1895 locations->AddTemp(Location::RequiresFpuRegister());
Roland Levillain3d312422016-06-23 13:53:42 +01001896 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001897 } else {
1898 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1899 }
1900 } else {
Roland Levillain3d312422016-06-23 13:53:42 +01001901 locations->SetOut(Location::RequiresRegister(),
1902 can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001903 }
Roland Levillain7c1559a2015-12-15 10:55:36 +00001904 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1905 // We need a temporary register for the read barrier marking slow
Sang, Chunlei0fcd2b82016-04-05 17:12:59 +08001906 // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier.
Roland Levillain7c1559a2015-12-15 10:55:36 +00001907 locations->AddTemp(Location::RequiresRegister());
1908 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04001909}
1910
1911void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001912 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001913}
1914void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001915 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001916}
1917void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001918 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001919}
1920void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001921 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001922}
1923void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001924 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001925}
1926void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00001927 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001928}
1929
1930
1931void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001932 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001933}
1934void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001935 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001936}
1937void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001938 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001939}
1940void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001941 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001942}
1943void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001944 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001945}
1946void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001947 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001948}
1949
1950
1951static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1952 Primitive::Type type,
1953 HInvoke* invoke,
1954 bool is_volatile) {
1955 LocationSummary* locations = new (arena) LocationSummary(invoke,
1956 LocationSummary::kNoCall,
1957 kIntrinsified);
1958 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1959 locations->SetInAt(1, Location::RequiresRegister());
1960 locations->SetInAt(2, Location::RequiresRegister());
1961 locations->SetInAt(3, Location::RequiresRegister());
1962 if (type == Primitive::kPrimNot) {
1963 // Need temp registers for card-marking.
Roland Levillain4d027112015-07-01 15:41:14 +01001964 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Mark Mendell09ed1a32015-03-25 08:30:06 -04001965 // Ensure the value is in a byte register.
1966 locations->AddTemp(Location::RegisterLocation(ECX));
1967 } else if (type == Primitive::kPrimLong && is_volatile) {
1968 locations->AddTemp(Location::RequiresFpuRegister());
1969 locations->AddTemp(Location::RequiresFpuRegister());
1970 }
1971}
1972
1973void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001974 CreateIntIntIntIntToVoidPlusTempsLocations(
1975 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001976}
1977void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001978 CreateIntIntIntIntToVoidPlusTempsLocations(
1979 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001980}
1981void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001982 CreateIntIntIntIntToVoidPlusTempsLocations(
1983 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001984}
1985void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001986 CreateIntIntIntIntToVoidPlusTempsLocations(
1987 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001988}
1989void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001990 CreateIntIntIntIntToVoidPlusTempsLocations(
1991 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001992}
1993void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001994 CreateIntIntIntIntToVoidPlusTempsLocations(
1995 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001996}
1997void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001998 CreateIntIntIntIntToVoidPlusTempsLocations(
1999 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002000}
2001void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002002 CreateIntIntIntIntToVoidPlusTempsLocations(
2003 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002004}
2005void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002006 CreateIntIntIntIntToVoidPlusTempsLocations(
2007 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002008}
2009
2010// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2011// memory model.
2012static void GenUnsafePut(LocationSummary* locations,
2013 Primitive::Type type,
2014 bool is_volatile,
2015 CodeGeneratorX86* codegen) {
Roland Levillainb488b782015-10-22 11:38:49 +01002016 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -04002017 Register base = locations->InAt(1).AsRegister<Register>();
2018 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2019 Location value_loc = locations->InAt(3);
2020
2021 if (type == Primitive::kPrimLong) {
2022 Register value_lo = value_loc.AsRegisterPairLow<Register>();
2023 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2024 if (is_volatile) {
2025 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2026 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2027 __ movd(temp1, value_lo);
2028 __ movd(temp2, value_hi);
2029 __ punpckldq(temp1, temp2);
2030 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2031 } else {
2032 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2033 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2034 }
Roland Levillain4d027112015-07-01 15:41:14 +01002035 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
2036 Register temp = locations->GetTemp(0).AsRegister<Register>();
2037 __ movl(temp, value_loc.AsRegister<Register>());
2038 __ PoisonHeapReference(temp);
2039 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002040 } else {
2041 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2042 }
2043
2044 if (is_volatile) {
Mark P Mendell17077d82015-12-16 19:15:59 +00002045 codegen->MemoryFence();
Mark Mendell09ed1a32015-03-25 08:30:06 -04002046 }
2047
2048 if (type == Primitive::kPrimNot) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +01002049 bool value_can_be_null = true; // TODO: Worth finding out this information?
Mark Mendell09ed1a32015-03-25 08:30:06 -04002050 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2051 locations->GetTemp(1).AsRegister<Register>(),
2052 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01002053 value_loc.AsRegister<Register>(),
2054 value_can_be_null);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002055 }
2056}
2057
2058void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002059 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002060}
2061void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002062 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002063}
2064void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002065 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002066}
2067void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002068 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002069}
2070void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002071 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002072}
2073void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002074 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002075}
2076void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002077 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002078}
2079void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002080 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002081}
2082void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002083 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002084}
2085
Mark Mendell58d25fd2015-04-03 14:52:31 -04002086static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
2087 HInvoke* invoke) {
2088 LocationSummary* locations = new (arena) LocationSummary(invoke,
2089 LocationSummary::kNoCall,
2090 kIntrinsified);
2091 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2092 locations->SetInAt(1, Location::RequiresRegister());
2093 // Offset is a long, but in 32 bit mode, we only need the low word.
2094 // Can we update the invoke here to remove a TypeConvert to Long?
2095 locations->SetInAt(2, Location::RequiresRegister());
2096 // Expected value must be in EAX or EDX:EAX.
2097 // For long, new value must be in ECX:EBX.
2098 if (type == Primitive::kPrimLong) {
2099 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2100 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2101 } else {
2102 locations->SetInAt(3, Location::RegisterLocation(EAX));
2103 locations->SetInAt(4, Location::RequiresRegister());
2104 }
2105
2106 // Force a byte register for the output.
2107 locations->SetOut(Location::RegisterLocation(EAX));
2108 if (type == Primitive::kPrimNot) {
2109 // Need temp registers for card-marking.
Roland Levillainb488b782015-10-22 11:38:49 +01002110 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Mark Mendell58d25fd2015-04-03 14:52:31 -04002111 // Need a byte register for marking.
2112 locations->AddTemp(Location::RegisterLocation(ECX));
2113 }
2114}
2115
2116void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2117 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
2118}
2119
2120void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2121 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
2122}
2123
2124void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
Roland Levillain391b8662015-12-18 11:43:38 +00002125 // The UnsafeCASObject intrinsic is missing a read barrier, and
2126 // therefore sometimes does not work as expected (b/25883050).
2127 // Turn it off temporarily as a quick fix, until the read barrier is
Roland Levillain3d312422016-06-23 13:53:42 +01002128 // implemented (see TODO in GenCAS).
Roland Levillain391b8662015-12-18 11:43:38 +00002129 //
Roland Levillain3d312422016-06-23 13:53:42 +01002130 // TODO(rpl): Implement read barrier support in GenCAS and re-enable
Roland Levillain391b8662015-12-18 11:43:38 +00002131 // this intrinsic.
2132 if (kEmitCompilerReadBarrier) {
2133 return;
2134 }
2135
Mark Mendell58d25fd2015-04-03 14:52:31 -04002136 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
2137}
2138
2139static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
Roland Levillainb488b782015-10-22 11:38:49 +01002140 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
Mark Mendell58d25fd2015-04-03 14:52:31 -04002141 LocationSummary* locations = invoke->GetLocations();
2142
2143 Register base = locations->InAt(1).AsRegister<Register>();
2144 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2145 Location out = locations->Out();
2146 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2147
Roland Levillainb488b782015-10-22 11:38:49 +01002148 if (type == Primitive::kPrimNot) {
Roland Levillain4d027112015-07-01 15:41:14 +01002149 Register expected = locations->InAt(3).AsRegister<Register>();
Roland Levillainb488b782015-10-22 11:38:49 +01002150 // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
Roland Levillain4d027112015-07-01 15:41:14 +01002151 DCHECK_EQ(expected, EAX);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002152 Register value = locations->InAt(4).AsRegister<Register>();
Roland Levillain4d027112015-07-01 15:41:14 +01002153
Roland Levillainb488b782015-10-22 11:38:49 +01002154 // Mark card for object assuming new value is stored.
2155 bool value_can_be_null = true; // TODO: Worth finding out this information?
2156 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2157 locations->GetTemp(1).AsRegister<Register>(),
2158 base,
2159 value,
2160 value_can_be_null);
2161
2162 bool base_equals_value = (base == value);
2163 if (kPoisonHeapReferences) {
2164 if (base_equals_value) {
2165 // If `base` and `value` are the same register location, move
2166 // `value` to a temporary register. This way, poisoning
2167 // `value` won't invalidate `base`.
2168 value = locations->GetTemp(0).AsRegister<Register>();
2169 __ movl(value, base);
Roland Levillain4d027112015-07-01 15:41:14 +01002170 }
Roland Levillainb488b782015-10-22 11:38:49 +01002171
2172 // Check that the register allocator did not assign the location
2173 // of `expected` (EAX) to `value` nor to `base`, so that heap
2174 // poisoning (when enabled) works as intended below.
2175 // - If `value` were equal to `expected`, both references would
2176 // be poisoned twice, meaning they would not be poisoned at
2177 // all, as heap poisoning uses address negation.
2178 // - If `base` were equal to `expected`, poisoning `expected`
2179 // would invalidate `base`.
2180 DCHECK_NE(value, expected);
2181 DCHECK_NE(base, expected);
2182
2183 __ PoisonHeapReference(expected);
2184 __ PoisonHeapReference(value);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002185 }
2186
Roland Levillain391b8662015-12-18 11:43:38 +00002187 // TODO: Add a read barrier for the reference stored in the object
2188 // before attempting the CAS, similar to the one in the
2189 // art::Unsafe_compareAndSwapObject JNI implementation.
2190 //
2191 // Note that this code is not (yet) used when read barriers are
2192 // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject).
2193 DCHECK(!kEmitCompilerReadBarrier);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002194 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002195
Roland Levillain0d5a2812015-11-13 10:07:31 +00002196 // LOCK CMPXCHG has full barrier semantics, and we don't need
Roland Levillainb488b782015-10-22 11:38:49 +01002197 // scheduling barriers at this time.
Mark Mendell58d25fd2015-04-03 14:52:31 -04002198
Roland Levillainb488b782015-10-22 11:38:49 +01002199 // Convert ZF into the boolean result.
2200 __ setb(kZero, out.AsRegister<Register>());
2201 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
Roland Levillain4d027112015-07-01 15:41:14 +01002202
Roland Levillain391b8662015-12-18 11:43:38 +00002203 // If heap poisoning is enabled, we need to unpoison the values
2204 // that were poisoned earlier.
Roland Levillainb488b782015-10-22 11:38:49 +01002205 if (kPoisonHeapReferences) {
2206 if (base_equals_value) {
2207 // `value` has been moved to a temporary register, no need to
2208 // unpoison it.
2209 } else {
2210 // Ensure `value` is different from `out`, so that unpoisoning
2211 // the former does not invalidate the latter.
2212 DCHECK_NE(value, out.AsRegister<Register>());
2213 __ UnpoisonHeapReference(value);
2214 }
2215 // Do not unpoison the reference contained in register
2216 // `expected`, as it is the same as register `out` (EAX).
2217 }
2218 } else {
2219 if (type == Primitive::kPrimInt) {
2220 // Ensure the expected value is in EAX (required by the CMPXCHG
2221 // instruction).
2222 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
2223 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0),
2224 locations->InAt(4).AsRegister<Register>());
2225 } else if (type == Primitive::kPrimLong) {
2226 // Ensure the expected value is in EAX:EDX and that the new
2227 // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2228 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
2229 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
2230 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
2231 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
2232 __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
2233 } else {
2234 LOG(FATAL) << "Unexpected CAS type " << type;
2235 }
2236
Roland Levillain0d5a2812015-11-13 10:07:31 +00002237 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2238 // don't need scheduling barriers at this time.
Roland Levillainb488b782015-10-22 11:38:49 +01002239
2240 // Convert ZF into the boolean result.
2241 __ setb(kZero, out.AsRegister<Register>());
2242 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
Roland Levillain4d027112015-07-01 15:41:14 +01002243 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04002244}
2245
2246void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2247 GenCAS(Primitive::kPrimInt, invoke, codegen_);
2248}
2249
2250void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2251 GenCAS(Primitive::kPrimLong, invoke, codegen_);
2252}
2253
2254void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
Roland Levillain3d312422016-06-23 13:53:42 +01002255 // The UnsafeCASObject intrinsic is missing a read barrier, and
2256 // therefore sometimes does not work as expected (b/25883050).
2257 // Turn it off temporarily as a quick fix, until the read barrier is
2258 // implemented (see TODO in GenCAS).
2259 //
2260 // TODO(rpl): Implement read barrier support in GenCAS and re-enable
2261 // this intrinsic.
2262 DCHECK(!kEmitCompilerReadBarrier);
2263
Mark Mendell58d25fd2015-04-03 14:52:31 -04002264 GenCAS(Primitive::kPrimNot, invoke, codegen_);
2265}
2266
2267void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2268 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2269 LocationSummary::kNoCall,
2270 kIntrinsified);
2271 locations->SetInAt(0, Location::RequiresRegister());
2272 locations->SetOut(Location::SameAsFirstInput());
2273 locations->AddTemp(Location::RequiresRegister());
2274}
2275
2276static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2277 X86Assembler* assembler) {
2278 Immediate imm_shift(shift);
2279 Immediate imm_mask(mask);
2280 __ movl(temp, reg);
2281 __ shrl(reg, imm_shift);
2282 __ andl(temp, imm_mask);
2283 __ andl(reg, imm_mask);
2284 __ shll(temp, imm_shift);
2285 __ orl(reg, temp);
2286}
2287
2288void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002289 X86Assembler* assembler = GetAssembler();
Mark Mendell58d25fd2015-04-03 14:52:31 -04002290 LocationSummary* locations = invoke->GetLocations();
2291
2292 Register reg = locations->InAt(0).AsRegister<Register>();
2293 Register temp = locations->GetTemp(0).AsRegister<Register>();
2294
2295 /*
2296 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2297 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2298 * compared to generic luni implementation which has 5 rounds of swapping bits.
2299 * x = bswap x
2300 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2301 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2302 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2303 */
2304 __ bswapl(reg);
2305 SwapBits(reg, temp, 1, 0x55555555, assembler);
2306 SwapBits(reg, temp, 2, 0x33333333, assembler);
2307 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2308}
2309
2310void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2311 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2312 LocationSummary::kNoCall,
2313 kIntrinsified);
2314 locations->SetInAt(0, Location::RequiresRegister());
2315 locations->SetOut(Location::SameAsFirstInput());
2316 locations->AddTemp(Location::RequiresRegister());
2317}
2318
2319void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002320 X86Assembler* assembler = GetAssembler();
Mark Mendell58d25fd2015-04-03 14:52:31 -04002321 LocationSummary* locations = invoke->GetLocations();
2322
2323 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2324 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2325 Register temp = locations->GetTemp(0).AsRegister<Register>();
2326
2327 // We want to swap high/low, then bswap each one, and then do the same
2328 // as a 32 bit reverse.
2329 // Exchange high and low.
2330 __ movl(temp, reg_low);
2331 __ movl(reg_low, reg_high);
2332 __ movl(reg_high, temp);
2333
2334 // bit-reverse low
2335 __ bswapl(reg_low);
2336 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2337 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2338 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2339
2340 // bit-reverse high
2341 __ bswapl(reg_high);
2342 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2343 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2344 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2345}
2346
Aart Bikc39dac12016-01-21 08:59:48 -08002347static void CreateBitCountLocations(
2348 ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2349 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2350 // Do nothing if there is no popcnt support. This results in generating
2351 // a call for the intrinsic rather than direct code.
2352 return;
2353 }
2354 LocationSummary* locations = new (arena) LocationSummary(invoke,
2355 LocationSummary::kNoCall,
2356 kIntrinsified);
2357 if (is_long) {
Aart Bikc39dac12016-01-21 08:59:48 -08002358 locations->AddTemp(Location::RequiresRegister());
Aart Bikc39dac12016-01-21 08:59:48 -08002359 }
Aart Bik2a946072016-01-21 12:49:00 -08002360 locations->SetInAt(0, Location::Any());
Aart Bikc39dac12016-01-21 08:59:48 -08002361 locations->SetOut(Location::RequiresRegister());
2362}
2363
Aart Bika19616e2016-02-01 18:57:58 -08002364static void GenBitCount(X86Assembler* assembler,
2365 CodeGeneratorX86* codegen,
2366 HInvoke* invoke, bool is_long) {
Aart Bikc39dac12016-01-21 08:59:48 -08002367 LocationSummary* locations = invoke->GetLocations();
2368 Location src = locations->InAt(0);
2369 Register out = locations->Out().AsRegister<Register>();
2370
2371 if (invoke->InputAt(0)->IsConstant()) {
2372 // Evaluate this at compile time.
2373 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
Roland Levillainfa3912e2016-04-01 18:21:55 +01002374 int32_t result = is_long
Aart Bikc39dac12016-01-21 08:59:48 -08002375 ? POPCOUNT(static_cast<uint64_t>(value))
2376 : POPCOUNT(static_cast<uint32_t>(value));
Roland Levillainfa3912e2016-04-01 18:21:55 +01002377 codegen->Load32BitValue(out, result);
Aart Bikc39dac12016-01-21 08:59:48 -08002378 return;
2379 }
2380
2381 // Handle the non-constant cases.
2382 if (!is_long) {
2383 if (src.IsRegister()) {
2384 __ popcntl(out, src.AsRegister<Register>());
2385 } else {
2386 DCHECK(src.IsStackSlot());
2387 __ popcntl(out, Address(ESP, src.GetStackIndex()));
2388 }
Aart Bik2a946072016-01-21 12:49:00 -08002389 } else {
2390 // The 64-bit case needs to worry about two parts.
2391 Register temp = locations->GetTemp(0).AsRegister<Register>();
2392 if (src.IsRegisterPair()) {
2393 __ popcntl(temp, src.AsRegisterPairLow<Register>());
2394 __ popcntl(out, src.AsRegisterPairHigh<Register>());
2395 } else {
2396 DCHECK(src.IsDoubleStackSlot());
2397 __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2398 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2399 }
2400 __ addl(out, temp);
Aart Bikc39dac12016-01-21 08:59:48 -08002401 }
Aart Bikc39dac12016-01-21 08:59:48 -08002402}
2403
2404void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2405 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
2406}
2407
2408void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002409 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
Aart Bikc39dac12016-01-21 08:59:48 -08002410}
2411
2412void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2413 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
2414}
2415
2416void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002417 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
Aart Bikc39dac12016-01-21 08:59:48 -08002418}
2419
Mark Mendelld5897672015-08-12 21:16:41 -04002420static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2421 LocationSummary* locations = new (arena) LocationSummary(invoke,
2422 LocationSummary::kNoCall,
2423 kIntrinsified);
2424 if (is_long) {
2425 locations->SetInAt(0, Location::RequiresRegister());
2426 } else {
2427 locations->SetInAt(0, Location::Any());
2428 }
2429 locations->SetOut(Location::RequiresRegister());
2430}
2431
Aart Bika19616e2016-02-01 18:57:58 -08002432static void GenLeadingZeros(X86Assembler* assembler,
2433 CodeGeneratorX86* codegen,
2434 HInvoke* invoke, bool is_long) {
Mark Mendelld5897672015-08-12 21:16:41 -04002435 LocationSummary* locations = invoke->GetLocations();
2436 Location src = locations->InAt(0);
2437 Register out = locations->Out().AsRegister<Register>();
2438
2439 if (invoke->InputAt(0)->IsConstant()) {
2440 // Evaluate this at compile time.
2441 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2442 if (value == 0) {
2443 value = is_long ? 64 : 32;
2444 } else {
2445 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2446 }
Aart Bika19616e2016-02-01 18:57:58 -08002447 codegen->Load32BitValue(out, value);
Mark Mendelld5897672015-08-12 21:16:41 -04002448 return;
2449 }
2450
2451 // Handle the non-constant cases.
2452 if (!is_long) {
2453 if (src.IsRegister()) {
2454 __ bsrl(out, src.AsRegister<Register>());
2455 } else {
2456 DCHECK(src.IsStackSlot());
2457 __ bsrl(out, Address(ESP, src.GetStackIndex()));
2458 }
2459
2460 // BSR sets ZF if the input was zero, and the output is undefined.
Mark Mendell0c9497d2015-08-21 09:30:05 -04002461 NearLabel all_zeroes, done;
Mark Mendelld5897672015-08-12 21:16:41 -04002462 __ j(kEqual, &all_zeroes);
2463
2464 // Correct the result from BSR to get the final CLZ result.
2465 __ xorl(out, Immediate(31));
2466 __ jmp(&done);
2467
2468 // Fix the zero case with the expected result.
2469 __ Bind(&all_zeroes);
2470 __ movl(out, Immediate(32));
2471
2472 __ Bind(&done);
2473 return;
2474 }
2475
2476 // 64 bit case needs to worry about both parts of the register.
2477 DCHECK(src.IsRegisterPair());
2478 Register src_lo = src.AsRegisterPairLow<Register>();
2479 Register src_hi = src.AsRegisterPairHigh<Register>();
Mark Mendell0c9497d2015-08-21 09:30:05 -04002480 NearLabel handle_low, done, all_zeroes;
Mark Mendelld5897672015-08-12 21:16:41 -04002481
2482 // Is the high word zero?
2483 __ testl(src_hi, src_hi);
2484 __ j(kEqual, &handle_low);
2485
2486 // High word is not zero. We know that the BSR result is defined in this case.
2487 __ bsrl(out, src_hi);
2488
2489 // Correct the result from BSR to get the final CLZ result.
2490 __ xorl(out, Immediate(31));
2491 __ jmp(&done);
2492
2493 // High word was zero. We have to compute the low word count and add 32.
2494 __ Bind(&handle_low);
2495 __ bsrl(out, src_lo);
2496 __ j(kEqual, &all_zeroes);
2497
2498 // We had a valid result. Use an XOR to both correct the result and add 32.
2499 __ xorl(out, Immediate(63));
2500 __ jmp(&done);
2501
2502 // All zero case.
2503 __ Bind(&all_zeroes);
2504 __ movl(out, Immediate(64));
2505
2506 __ Bind(&done);
2507}
2508
2509void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2510 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false);
2511}
2512
2513void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002514 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
Mark Mendelld5897672015-08-12 21:16:41 -04002515}
2516
2517void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2518 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true);
2519}
2520
2521void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002522 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
Mark Mendelld5897672015-08-12 21:16:41 -04002523}
2524
Mark Mendell2d554792015-09-15 21:45:18 -04002525static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2526 LocationSummary* locations = new (arena) LocationSummary(invoke,
2527 LocationSummary::kNoCall,
2528 kIntrinsified);
2529 if (is_long) {
2530 locations->SetInAt(0, Location::RequiresRegister());
2531 } else {
2532 locations->SetInAt(0, Location::Any());
2533 }
2534 locations->SetOut(Location::RequiresRegister());
2535}
2536
Aart Bika19616e2016-02-01 18:57:58 -08002537static void GenTrailingZeros(X86Assembler* assembler,
2538 CodeGeneratorX86* codegen,
2539 HInvoke* invoke, bool is_long) {
Mark Mendell2d554792015-09-15 21:45:18 -04002540 LocationSummary* locations = invoke->GetLocations();
2541 Location src = locations->InAt(0);
2542 Register out = locations->Out().AsRegister<Register>();
2543
2544 if (invoke->InputAt(0)->IsConstant()) {
2545 // Evaluate this at compile time.
2546 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2547 if (value == 0) {
2548 value = is_long ? 64 : 32;
2549 } else {
2550 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2551 }
Aart Bika19616e2016-02-01 18:57:58 -08002552 codegen->Load32BitValue(out, value);
Mark Mendell2d554792015-09-15 21:45:18 -04002553 return;
2554 }
2555
2556 // Handle the non-constant cases.
2557 if (!is_long) {
2558 if (src.IsRegister()) {
2559 __ bsfl(out, src.AsRegister<Register>());
2560 } else {
2561 DCHECK(src.IsStackSlot());
2562 __ bsfl(out, Address(ESP, src.GetStackIndex()));
2563 }
2564
2565 // BSF sets ZF if the input was zero, and the output is undefined.
2566 NearLabel done;
2567 __ j(kNotEqual, &done);
2568
2569 // Fix the zero case with the expected result.
2570 __ movl(out, Immediate(32));
2571
2572 __ Bind(&done);
2573 return;
2574 }
2575
2576 // 64 bit case needs to worry about both parts of the register.
2577 DCHECK(src.IsRegisterPair());
2578 Register src_lo = src.AsRegisterPairLow<Register>();
2579 Register src_hi = src.AsRegisterPairHigh<Register>();
2580 NearLabel done, all_zeroes;
2581
2582 // If the low word is zero, then ZF will be set. If not, we have the answer.
2583 __ bsfl(out, src_lo);
2584 __ j(kNotEqual, &done);
2585
2586 // Low word was zero. We have to compute the high word count and add 32.
2587 __ bsfl(out, src_hi);
2588 __ j(kEqual, &all_zeroes);
2589
2590 // We had a valid result. Add 32 to account for the low word being zero.
2591 __ addl(out, Immediate(32));
2592 __ jmp(&done);
2593
2594 // All zero case.
2595 __ Bind(&all_zeroes);
2596 __ movl(out, Immediate(64));
2597
2598 __ Bind(&done);
2599}
2600
2601void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2602 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ false);
2603}
2604
2605void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002606 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
Mark Mendell2d554792015-09-15 21:45:18 -04002607}
2608
2609void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2610 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ true);
2611}
2612
2613void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002614 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
Mark Mendell2d554792015-09-15 21:45:18 -04002615}
2616
Serguei Katkov288c7a82016-05-16 11:53:15 +06002617void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
2618 if (kEmitCompilerReadBarrier) {
2619 // Do not intrinsify this call with the read barrier configuration.
2620 return;
2621 }
2622 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2623 LocationSummary::kCallOnSlowPath,
2624 kIntrinsified);
2625 locations->SetInAt(0, Location::RequiresRegister());
2626 locations->SetOut(Location::SameAsFirstInput());
2627 locations->AddTemp(Location::RequiresRegister());
2628}
2629
2630void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
2631 DCHECK(!kEmitCompilerReadBarrier);
2632 LocationSummary* locations = invoke->GetLocations();
2633 X86Assembler* assembler = GetAssembler();
2634
2635 Register obj = locations->InAt(0).AsRegister<Register>();
2636 Register out = locations->Out().AsRegister<Register>();
2637
2638 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
2639 codegen_->AddSlowPath(slow_path);
2640
2641 // Load ArtMethod first.
2642 HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2643 DCHECK(invoke_direct != nullptr);
2644 Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
2645 invoke_direct, locations->GetTemp(0));
2646 DCHECK(temp_loc.Equals(locations->GetTemp(0)));
2647 Register temp = temp_loc.AsRegister<Register>();
2648
2649 // Now get declaring class.
2650 __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
2651
2652 uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2653 uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2654 DCHECK_NE(slow_path_flag_offset, 0u);
2655 DCHECK_NE(disable_flag_offset, 0u);
2656 DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2657
2658 // Check static flags preventing us for using intrinsic.
2659 if (slow_path_flag_offset == disable_flag_offset + 1) {
2660 __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
2661 __ j(kNotEqual, slow_path->GetEntryLabel());
2662 } else {
2663 __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
2664 __ j(kNotEqual, slow_path->GetEntryLabel());
2665 __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
2666 __ j(kNotEqual, slow_path->GetEntryLabel());
2667 }
2668
2669 // Fast path.
2670 __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
2671 codegen_->MaybeRecordImplicitNullCheck(invoke);
2672 __ MaybeUnpoisonHeapReference(out);
2673 __ Bind(slow_path->GetExitLabel());
2674}
2675
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002676static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2677 return instruction->InputAt(input0) == instruction->InputAt(input1);
2678}
2679
2680void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
2681 // TODO(rpl): Implement read barriers in the SystemArrayCopy
2682 // intrinsic and re-enable it (b/29516905).
2683 if (kEmitCompilerReadBarrier) {
2684 return;
2685 }
2686
2687 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2688 if (invoke->GetLocations() != nullptr) {
2689 // Need a byte register for marking.
2690 invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2691
2692 static constexpr size_t kSrc = 0;
2693 static constexpr size_t kSrcPos = 1;
2694 static constexpr size_t kDest = 2;
2695 static constexpr size_t kDestPos = 3;
2696 static constexpr size_t kLength = 4;
2697
2698 if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2699 !invoke->InputAt(kDestPos)->IsIntConstant() &&
2700 !invoke->InputAt(kLength)->IsIntConstant()) {
2701 if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2702 !IsSameInput(invoke, kSrcPos, kLength) &&
2703 !IsSameInput(invoke, kDestPos, kLength) &&
2704 !IsSameInput(invoke, kSrc, kDest)) {
2705 // Not enough registers, make the length also take a stack slot.
2706 invoke->GetLocations()->SetInAt(kLength, Location::Any());
2707 }
2708 }
2709 }
2710}
2711
2712void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
2713 // TODO(rpl): Implement read barriers in the SystemArrayCopy
2714 // intrinsic and re-enable it (b/29516905).
2715 DCHECK(!kEmitCompilerReadBarrier);
2716
2717 X86Assembler* assembler = GetAssembler();
2718 LocationSummary* locations = invoke->GetLocations();
2719
2720 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2721 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2722 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2723 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2724
2725 Register src = locations->InAt(0).AsRegister<Register>();
2726 Location src_pos = locations->InAt(1);
2727 Register dest = locations->InAt(2).AsRegister<Register>();
2728 Location dest_pos = locations->InAt(3);
2729 Location length = locations->InAt(4);
2730 Register temp1 = locations->GetTemp(0).AsRegister<Register>();
2731 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2732
2733 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
2734 codegen_->AddSlowPath(slow_path);
2735
2736 NearLabel conditions_on_positions_validated;
2737 SystemArrayCopyOptimizations optimizations(invoke);
2738
2739 // If source and destination are the same, we go to slow path if we need to do
2740 // forward copying.
2741 if (src_pos.IsConstant()) {
2742 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2743 if (dest_pos.IsConstant()) {
2744 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2745 if (optimizations.GetDestinationIsSource()) {
2746 // Checked when building locations.
2747 DCHECK_GE(src_pos_constant, dest_pos_constant);
2748 } else if (src_pos_constant < dest_pos_constant) {
2749 __ cmpl(src, dest);
2750 __ j(kEqual, slow_path->GetEntryLabel());
2751 }
2752 } else {
2753 if (!optimizations.GetDestinationIsSource()) {
2754 __ cmpl(src, dest);
2755 __ j(kNotEqual, &conditions_on_positions_validated);
2756 }
2757 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
2758 __ j(kGreater, slow_path->GetEntryLabel());
2759 }
2760 } else {
2761 if (!optimizations.GetDestinationIsSource()) {
2762 __ cmpl(src, dest);
2763 __ j(kNotEqual, &conditions_on_positions_validated);
2764 }
2765 if (dest_pos.IsConstant()) {
2766 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2767 __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
2768 __ j(kLess, slow_path->GetEntryLabel());
2769 } else {
2770 __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
2771 __ j(kLess, slow_path->GetEntryLabel());
2772 }
2773 }
2774
2775 __ Bind(&conditions_on_positions_validated);
2776
2777 if (!optimizations.GetSourceIsNotNull()) {
2778 // Bail out if the source is null.
2779 __ testl(src, src);
2780 __ j(kEqual, slow_path->GetEntryLabel());
2781 }
2782
2783 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2784 // Bail out if the destination is null.
2785 __ testl(dest, dest);
2786 __ j(kEqual, slow_path->GetEntryLabel());
2787 }
2788
2789 Register temp3 = locations->GetTemp(2).AsRegister<Register>();
2790 if (length.IsStackSlot()) {
2791 __ movl(temp3, Address(ESP, length.GetStackIndex()));
2792 length = Location::RegisterLocation(temp3);
2793 }
2794
2795 // If the length is negative, bail out.
2796 // We have already checked in the LocationsBuilder for the constant case.
2797 if (!length.IsConstant() &&
2798 !optimizations.GetCountIsSourceLength() &&
2799 !optimizations.GetCountIsDestinationLength()) {
2800 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
2801 __ j(kLess, slow_path->GetEntryLabel());
2802 }
2803
2804 // Validity checks: source.
2805 CheckPosition(assembler,
2806 src_pos,
2807 src,
2808 length,
2809 slow_path,
2810 temp1,
2811 optimizations.GetCountIsSourceLength());
2812
2813 // Validity checks: dest.
2814 CheckPosition(assembler,
2815 dest_pos,
2816 dest,
2817 length,
2818 slow_path,
2819 temp1,
2820 optimizations.GetCountIsDestinationLength());
2821
2822 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2823 // Check whether all elements of the source array are assignable to the component
2824 // type of the destination array. We do two checks: the classes are the same,
2825 // or the destination is Object[]. If none of these checks succeed, we go to the
2826 // slow path.
2827 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2828 // /* HeapReference<Class> */ temp1 = temp1->klass_
2829 __ movl(temp1, Address(src, class_offset));
2830 __ MaybeUnpoisonHeapReference(temp1);
2831 // Bail out if the source is not a non primitive array.
2832 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2833 __ movl(temp1, Address(temp1, component_offset));
2834 __ testl(temp1, temp1);
2835 __ j(kEqual, slow_path->GetEntryLabel());
2836 __ MaybeUnpoisonHeapReference(temp1);
2837 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2838 __ j(kNotEqual, slow_path->GetEntryLabel());
2839 }
2840
2841 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2842 // /* HeapReference<Class> */ temp1 = temp1->klass_
2843 __ movl(temp1, Address(dest, class_offset));
2844 __ MaybeUnpoisonHeapReference(temp1);
2845 // Bail out if the destination is not a non primitive array.
2846 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2847 __ movl(temp2, Address(temp1, component_offset));
2848 __ testl(temp2, temp2);
2849 __ j(kEqual, slow_path->GetEntryLabel());
2850 __ MaybeUnpoisonHeapReference(temp2);
2851 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2852 __ j(kNotEqual, slow_path->GetEntryLabel());
2853 // Re-poison the heap reference to make the compare instruction below
2854 // compare two poisoned references.
2855 __ PoisonHeapReference(temp1);
2856 } else {
2857 // /* HeapReference<Class> */ temp1 = temp1->klass_
2858 __ movl(temp1, Address(dest, class_offset));
2859 }
2860
2861 // Note: if poisoning is on, we are here comparing two poisoned references.
2862 __ cmpl(temp1, Address(src, class_offset));
2863
2864 if (optimizations.GetDestinationIsTypedObjectArray()) {
2865 NearLabel do_copy;
2866 __ j(kEqual, &do_copy);
2867 __ MaybeUnpoisonHeapReference(temp1);
2868 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2869 __ movl(temp1, Address(temp1, component_offset));
2870 __ MaybeUnpoisonHeapReference(temp1);
2871 __ cmpl(Address(temp1, super_offset), Immediate(0));
2872 __ j(kNotEqual, slow_path->GetEntryLabel());
2873 __ Bind(&do_copy);
2874 } else {
2875 __ j(kNotEqual, slow_path->GetEntryLabel());
2876 }
2877 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2878 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2879 // Bail out if the source is not a non primitive array.
2880 // /* HeapReference<Class> */ temp1 = src->klass_
2881 __ movl(temp1, Address(src, class_offset));
2882 __ MaybeUnpoisonHeapReference(temp1);
2883 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2884 __ movl(temp1, Address(temp1, component_offset));
2885 __ testl(temp1, temp1);
2886 __ j(kEqual, slow_path->GetEntryLabel());
2887 __ MaybeUnpoisonHeapReference(temp1);
2888 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2889 __ j(kNotEqual, slow_path->GetEntryLabel());
2890 }
2891
2892 // Compute base source address, base destination address, and end source address.
2893 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
2894 DCHECK_EQ(element_size, 4);
2895 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
2896 if (src_pos.IsConstant()) {
2897 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2898 __ leal(temp1, Address(src, element_size * constant + offset));
2899 } else {
2900 __ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
2901 }
2902
2903 if (dest_pos.IsConstant()) {
2904 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2905 __ leal(temp2, Address(dest, element_size * constant + offset));
2906 } else {
2907 __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
2908 }
2909
2910 if (length.IsConstant()) {
2911 int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
2912 __ leal(temp3, Address(temp1, element_size * constant));
2913 } else {
2914 __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
2915 }
2916
2917 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2918 // poison/unpoison.
2919 NearLabel loop, done;
2920 __ cmpl(temp1, temp3);
2921 __ j(kEqual, &done);
2922 __ Bind(&loop);
2923 __ pushl(Address(temp1, 0));
2924 __ cfi().AdjustCFAOffset(4);
2925 __ popl(Address(temp2, 0));
2926 __ cfi().AdjustCFAOffset(-4);
2927 __ addl(temp1, Immediate(element_size));
2928 __ addl(temp2, Immediate(element_size));
2929 __ cmpl(temp1, temp3);
2930 __ j(kNotEqual, &loop);
2931 __ Bind(&done);
2932
2933 // We only need one card marking on the destination array.
2934 codegen_->MarkGCCard(temp1,
2935 temp2,
2936 dest,
2937 Register(kNoRegister),
2938 /* value_can_be_null */ false);
2939
2940 __ Bind(slow_path->GetExitLabel());
2941}
2942
Aart Bik2f9fcc92016-03-01 15:16:54 -08002943UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
Aart Bik2f9fcc92016-03-01 15:16:54 -08002944UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
2945UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
2946UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
2947UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
2948UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
2949UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
Mark Mendell09ed1a32015-03-25 08:30:06 -04002950
Aart Bik0e54c012016-03-04 12:08:31 -08002951// 1.8.
2952UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
2953UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
2954UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
2955UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
2956UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
Aart Bik0e54c012016-03-04 12:08:31 -08002957
Aart Bik2f9fcc92016-03-01 15:16:54 -08002958UNREACHABLE_INTRINSICS(X86)
Roland Levillain4d027112015-07-01 15:41:14 +01002959
2960#undef __
2961
Mark Mendell09ed1a32015-03-25 08:30:06 -04002962} // namespace x86
2963} // namespace art