blob: b3e821cfbe0710de8ccbf844c31cc8ed8aacd8f5 [file] [log] [blame]
Mark Mendell09ed1a32015-03-25 08:30:06 -04001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86.h"
18
Mark Mendellfb8d2792015-03-31 22:16:59 -040019#include "arch/x86/instruction_set_features_x86.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040020#include "code_generator_x86.h"
21#include "entrypoints/quick/quick_entrypoints.h"
22#include "intrinsics.h"
23#include "mirror/array-inl.h"
24#include "mirror/art_method.h"
25#include "mirror/string.h"
26#include "thread.h"
27#include "utils/x86/assembler_x86.h"
28#include "utils/x86/constants_x86.h"
29
30namespace art {
31
32namespace x86 {
33
34static constexpr int kDoubleNaNHigh = 0x7FF80000;
35static constexpr int kDoubleNaNLow = 0x00000000;
36static constexpr int kFloatNaN = 0x7FC00000;
37
Mark Mendellfb8d2792015-03-31 22:16:59 -040038IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
39 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
40}
41
42
Mark Mendell09ed1a32015-03-25 08:30:06 -040043X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
44 return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
45}
46
47ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
48 return codegen_->GetGraph()->GetArena();
49}
50
51bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
52 Dispatch(invoke);
53 LocationSummary* res = invoke->GetLocations();
54 return res != nullptr && res->Intrinsified();
55}
56
57#define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())->
58
59// TODO: target as memory.
60static void MoveFromReturnRegister(Location target,
61 Primitive::Type type,
62 CodeGeneratorX86* codegen) {
63 if (!target.IsValid()) {
64 DCHECK(type == Primitive::kPrimVoid);
65 return;
66 }
67
68 switch (type) {
69 case Primitive::kPrimBoolean:
70 case Primitive::kPrimByte:
71 case Primitive::kPrimChar:
72 case Primitive::kPrimShort:
73 case Primitive::kPrimInt:
74 case Primitive::kPrimNot: {
75 Register target_reg = target.AsRegister<Register>();
76 if (target_reg != EAX) {
77 __ movl(target_reg, EAX);
78 }
79 break;
80 }
81 case Primitive::kPrimLong: {
82 Register target_reg_lo = target.AsRegisterPairLow<Register>();
83 Register target_reg_hi = target.AsRegisterPairHigh<Register>();
84 if (target_reg_lo != EAX) {
85 __ movl(target_reg_lo, EAX);
86 }
87 if (target_reg_hi != EDX) {
88 __ movl(target_reg_hi, EDX);
89 }
90 break;
91 }
92
93 case Primitive::kPrimVoid:
94 LOG(FATAL) << "Unexpected void type for valid location " << target;
95 UNREACHABLE();
96
97 case Primitive::kPrimDouble: {
98 XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
99 if (target_reg != XMM0) {
100 __ movsd(target_reg, XMM0);
101 }
102 break;
103 }
104 case Primitive::kPrimFloat: {
105 XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
106 if (target_reg != XMM0) {
107 __ movss(target_reg, XMM0);
108 }
109 break;
110 }
111 }
112}
113
114static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) {
115 if (invoke->InputCount() == 0) {
Roland Levillain4c0eb422015-04-24 16:43:49 +0100116 // No argument to move.
Mark Mendell09ed1a32015-03-25 08:30:06 -0400117 return;
118 }
119
120 LocationSummary* locations = invoke->GetLocations();
121 InvokeDexCallingConventionVisitor calling_convention_visitor;
122
123 // We're moving potentially two or more locations to locations that could overlap, so we need
124 // a parallel move resolver.
125 HParallelMove parallel_move(arena);
126
127 for (size_t i = 0; i < invoke->InputCount(); i++) {
128 HInstruction* input = invoke->InputAt(i);
129 Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
130 Location actual_loc = locations->InAt(i);
131
Nicolas Geoffray90218252015-04-15 11:56:51 +0100132 parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400133 }
134
135 codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
136}
137
138// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
139// call. This will copy the arguments into the positions for a regular call.
140//
141// Note: The actual parameters are required to be in the locations given by the invoke's location
142// summary. If an intrinsic modifies those locations before a slowpath call, they must be
143// restored!
144class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
145 public:
146 explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp)
147 : invoke_(invoke) {
148 // The temporary register has to be EAX for x86 invokes.
149 DCHECK_EQ(temp, EAX);
150 }
151
152 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
153 CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in);
154 __ Bind(GetEntryLabel());
155
156 SaveLiveRegisters(codegen, invoke_->GetLocations());
157
158 MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
159
160 if (invoke_->IsInvokeStaticOrDirect()) {
161 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
Mingyao Yange90db122015-04-03 17:56:54 -0700162 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400163 } else {
164 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
165 UNREACHABLE();
166 }
167
168 // Copy the result back to the expected output.
169 Location out = invoke_->GetLocations()->Out();
170 if (out.IsValid()) {
171 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
172 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
173 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
174 }
175
176 RestoreLiveRegisters(codegen, invoke_->GetLocations());
177 __ jmp(GetExitLabel());
178 }
179
180 private:
181 // The instruction where this slow path is happening.
182 HInvoke* const invoke_;
183
184 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86);
185};
186
187#undef __
188#define __ assembler->
189
190static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
191 LocationSummary* locations = new (arena) LocationSummary(invoke,
192 LocationSummary::kNoCall,
193 kIntrinsified);
194 locations->SetInAt(0, Location::RequiresFpuRegister());
195 locations->SetOut(Location::RequiresRegister());
196 if (is64bit) {
197 locations->AddTemp(Location::RequiresFpuRegister());
198 }
199}
200
201static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
202 LocationSummary* locations = new (arena) LocationSummary(invoke,
203 LocationSummary::kNoCall,
204 kIntrinsified);
205 locations->SetInAt(0, Location::RequiresRegister());
206 locations->SetOut(Location::RequiresFpuRegister());
207 if (is64bit) {
208 locations->AddTemp(Location::RequiresFpuRegister());
209 locations->AddTemp(Location::RequiresFpuRegister());
210 }
211}
212
213static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
214 Location input = locations->InAt(0);
215 Location output = locations->Out();
216 if (is64bit) {
217 // Need to use the temporary.
218 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
219 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
220 __ movd(output.AsRegisterPairLow<Register>(), temp);
221 __ psrlq(temp, Immediate(32));
222 __ movd(output.AsRegisterPairHigh<Register>(), temp);
223 } else {
224 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
225 }
226}
227
228static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
229 Location input = locations->InAt(0);
230 Location output = locations->Out();
231 if (is64bit) {
232 // Need to use the temporary.
233 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
234 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
235 __ movd(temp1, input.AsRegisterPairLow<Register>());
236 __ movd(temp2, input.AsRegisterPairHigh<Register>());
237 __ punpckldq(temp1, temp2);
238 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
239 } else {
240 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
241 }
242}
243
244void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
245 CreateFPToIntLocations(arena_, invoke, true);
246}
247void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
248 CreateIntToFPLocations(arena_, invoke, true);
249}
250
251void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
252 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
253}
254void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
255 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
256}
257
258void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
259 CreateFPToIntLocations(arena_, invoke, false);
260}
261void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
262 CreateIntToFPLocations(arena_, invoke, false);
263}
264
265void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
266 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
267}
268void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
269 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
270}
271
272static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
273 LocationSummary* locations = new (arena) LocationSummary(invoke,
274 LocationSummary::kNoCall,
275 kIntrinsified);
276 locations->SetInAt(0, Location::RequiresRegister());
277 locations->SetOut(Location::SameAsFirstInput());
278}
279
280static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
281 LocationSummary* locations = new (arena) LocationSummary(invoke,
282 LocationSummary::kNoCall,
283 kIntrinsified);
284 locations->SetInAt(0, Location::RequiresRegister());
285 locations->SetOut(Location::RequiresRegister());
286}
287
288static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
289 LocationSummary* locations = new (arena) LocationSummary(invoke,
290 LocationSummary::kNoCall,
291 kIntrinsified);
292 locations->SetInAt(0, Location::RequiresRegister());
293 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
294}
295
296static void GenReverseBytes(LocationSummary* locations,
297 Primitive::Type size,
298 X86Assembler* assembler) {
299 Register out = locations->Out().AsRegister<Register>();
300
301 switch (size) {
302 case Primitive::kPrimShort:
303 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
304 __ bswapl(out);
305 __ sarl(out, Immediate(16));
306 break;
307 case Primitive::kPrimInt:
308 __ bswapl(out);
309 break;
310 default:
311 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
312 UNREACHABLE();
313 }
314}
315
316void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
317 CreateIntToIntLocations(arena_, invoke);
318}
319
320void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
321 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
322}
323
Mark Mendell58d25fd2015-04-03 14:52:31 -0400324void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
325 CreateLongToLongLocations(arena_, invoke);
326}
327
328void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
329 LocationSummary* locations = invoke->GetLocations();
330 Location input = locations->InAt(0);
331 Register input_lo = input.AsRegisterPairLow<Register>();
332 Register input_hi = input.AsRegisterPairHigh<Register>();
333 Location output = locations->Out();
334 Register output_lo = output.AsRegisterPairLow<Register>();
335 Register output_hi = output.AsRegisterPairHigh<Register>();
336
337 X86Assembler* assembler = GetAssembler();
338 // Assign the inputs to the outputs, mixing low/high.
339 __ movl(output_lo, input_hi);
340 __ movl(output_hi, input_lo);
341 __ bswapl(output_lo);
342 __ bswapl(output_hi);
343}
344
Mark Mendell09ed1a32015-03-25 08:30:06 -0400345void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
346 CreateIntToIntLocations(arena_, invoke);
347}
348
349void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
350 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
351}
352
353
354// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
355// need is 64b.
356
357static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
358 // TODO: Enable memory operations when the assembler supports them.
359 LocationSummary* locations = new (arena) LocationSummary(invoke,
360 LocationSummary::kNoCall,
361 kIntrinsified);
362 locations->SetInAt(0, Location::RequiresFpuRegister());
363 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
364 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
365 locations->SetOut(Location::SameAsFirstInput());
366}
367
368static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
369 Location output = locations->Out();
370
371 if (output.IsFpuRegister()) {
372 // Create the right constant on an aligned stack.
373 if (is64bit) {
374 __ subl(ESP, Immediate(8));
375 __ pushl(Immediate(0x7FFFFFFF));
376 __ pushl(Immediate(0xFFFFFFFF));
377 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
378 } else {
379 __ subl(ESP, Immediate(12));
380 __ pushl(Immediate(0x7FFFFFFF));
381 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
382 }
383 __ addl(ESP, Immediate(16));
384 } else {
385 // TODO: update when assember support is available.
386 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
387// Once assembler support is available, in-memory operations look like this:
388// if (is64bit) {
389// DCHECK(output.IsDoubleStackSlot());
390// __ andl(Address(Register(RSP), output.GetHighStackIndex(kX86WordSize)),
391// Immediate(0x7FFFFFFF));
392// } else {
393// DCHECK(output.IsStackSlot());
394// // Can use and with a literal directly.
395// __ andl(Address(Register(RSP), output.GetStackIndex()), Immediate(0x7FFFFFFF));
396// }
397 }
398}
399
400void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
401 CreateFloatToFloat(arena_, invoke);
402}
403
404void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
405 MathAbsFP(invoke->GetLocations(), true, GetAssembler());
406}
407
408void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
409 CreateFloatToFloat(arena_, invoke);
410}
411
412void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
413 MathAbsFP(invoke->GetLocations(), false, GetAssembler());
414}
415
416static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
417 LocationSummary* locations = new (arena) LocationSummary(invoke,
418 LocationSummary::kNoCall,
419 kIntrinsified);
420 locations->SetInAt(0, Location::RegisterLocation(EAX));
421 locations->SetOut(Location::SameAsFirstInput());
422 locations->AddTemp(Location::RegisterLocation(EDX));
423}
424
425static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
426 Location output = locations->Out();
427 Register out = output.AsRegister<Register>();
428 DCHECK_EQ(out, EAX);
429 Register temp = locations->GetTemp(0).AsRegister<Register>();
430 DCHECK_EQ(temp, EDX);
431
432 // Sign extend EAX into EDX.
433 __ cdq();
434
435 // XOR EAX with sign.
436 __ xorl(EAX, EDX);
437
438 // Subtract out sign to correct.
439 __ subl(EAX, EDX);
440
441 // The result is in EAX.
442}
443
444static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
445 LocationSummary* locations = new (arena) LocationSummary(invoke,
446 LocationSummary::kNoCall,
447 kIntrinsified);
448 locations->SetInAt(0, Location::RequiresRegister());
449 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
450 locations->AddTemp(Location::RequiresRegister());
451}
452
453static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
454 Location input = locations->InAt(0);
455 Register input_lo = input.AsRegisterPairLow<Register>();
456 Register input_hi = input.AsRegisterPairHigh<Register>();
457 Location output = locations->Out();
458 Register output_lo = output.AsRegisterPairLow<Register>();
459 Register output_hi = output.AsRegisterPairHigh<Register>();
460 Register temp = locations->GetTemp(0).AsRegister<Register>();
461
462 // Compute the sign into the temporary.
463 __ movl(temp, input_hi);
464 __ sarl(temp, Immediate(31));
465
466 // Store the sign into the output.
467 __ movl(output_lo, temp);
468 __ movl(output_hi, temp);
469
470 // XOR the input to the output.
471 __ xorl(output_lo, input_lo);
472 __ xorl(output_hi, input_hi);
473
474 // Subtract the sign.
475 __ subl(output_lo, temp);
476 __ sbbl(output_hi, temp);
477}
478
479void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
480 CreateAbsIntLocation(arena_, invoke);
481}
482
483void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
484 GenAbsInteger(invoke->GetLocations(), GetAssembler());
485}
486
487void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
488 CreateAbsLongLocation(arena_, invoke);
489}
490
491void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
492 GenAbsLong(invoke->GetLocations(), GetAssembler());
493}
494
495static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
496 X86Assembler* assembler) {
497 Location op1_loc = locations->InAt(0);
498 Location op2_loc = locations->InAt(1);
499 Location out_loc = locations->Out();
500 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
501
502 // Shortcut for same input locations.
503 if (op1_loc.Equals(op2_loc)) {
504 DCHECK(out_loc.Equals(op1_loc));
505 return;
506 }
507
508 // (out := op1)
509 // out <=? op2
510 // if Nan jmp Nan_label
511 // if out is min jmp done
512 // if op2 is min jmp op2_label
513 // handle -0/+0
514 // jmp done
515 // Nan_label:
516 // out := NaN
517 // op2_label:
518 // out := op2
519 // done:
520 //
521 // This removes one jmp, but needs to copy one input (op1) to out.
522 //
523 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
524
525 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
526
527 Label nan, done, op2_label;
528 if (is_double) {
529 __ ucomisd(out, op2);
530 } else {
531 __ ucomiss(out, op2);
532 }
533
534 __ j(Condition::kParityEven, &nan);
535
536 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
537 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
538
539 // Handle 0.0/-0.0.
540 if (is_min) {
541 if (is_double) {
542 __ orpd(out, op2);
543 } else {
544 __ orps(out, op2);
545 }
546 } else {
547 if (is_double) {
548 __ andpd(out, op2);
549 } else {
550 __ andps(out, op2);
551 }
552 }
553 __ jmp(&done);
554
555 // NaN handling.
556 __ Bind(&nan);
557 if (is_double) {
558 __ pushl(Immediate(kDoubleNaNHigh));
559 __ pushl(Immediate(kDoubleNaNLow));
560 __ movsd(out, Address(ESP, 0));
561 __ addl(ESP, Immediate(8));
562 } else {
563 __ pushl(Immediate(kFloatNaN));
564 __ movss(out, Address(ESP, 0));
565 __ addl(ESP, Immediate(4));
566 }
567 __ jmp(&done);
568
569 // out := op2;
570 __ Bind(&op2_label);
571 if (is_double) {
572 __ movsd(out, op2);
573 } else {
574 __ movss(out, op2);
575 }
576
577 // Done.
578 __ Bind(&done);
579}
580
581static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
582 LocationSummary* locations = new (arena) LocationSummary(invoke,
583 LocationSummary::kNoCall,
584 kIntrinsified);
585 locations->SetInAt(0, Location::RequiresFpuRegister());
586 locations->SetInAt(1, Location::RequiresFpuRegister());
587 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
588 // the second input to be the output (we can simply swap inputs).
589 locations->SetOut(Location::SameAsFirstInput());
590}
591
592void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
593 CreateFPFPToFPLocations(arena_, invoke);
594}
595
596void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
597 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
598}
599
600void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
601 CreateFPFPToFPLocations(arena_, invoke);
602}
603
604void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
605 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
606}
607
608void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
609 CreateFPFPToFPLocations(arena_, invoke);
610}
611
612void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
613 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
614}
615
616void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
617 CreateFPFPToFPLocations(arena_, invoke);
618}
619
620void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
621 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
622}
623
624static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
625 X86Assembler* assembler) {
626 Location op1_loc = locations->InAt(0);
627 Location op2_loc = locations->InAt(1);
628
629 // Shortcut for same input locations.
630 if (op1_loc.Equals(op2_loc)) {
631 // Can return immediately, as op1_loc == out_loc.
632 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
633 // a copy here.
634 DCHECK(locations->Out().Equals(op1_loc));
635 return;
636 }
637
638 if (is_long) {
639 // Need to perform a subtract to get the sign right.
640 // op1 is already in the same location as the output.
641 Location output = locations->Out();
642 Register output_lo = output.AsRegisterPairLow<Register>();
643 Register output_hi = output.AsRegisterPairHigh<Register>();
644
645 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
646 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
647
648 // Spare register to compute the subtraction to set condition code.
649 Register temp = locations->GetTemp(0).AsRegister<Register>();
650
651 // Subtract off op2_low.
652 __ movl(temp, output_lo);
653 __ subl(temp, op2_lo);
654
655 // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
656 __ movl(temp, output_hi);
657 __ sbbl(temp, op2_hi);
658
659 // Now the condition code is correct.
660 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
661 __ cmovl(cond, output_lo, op2_lo);
662 __ cmovl(cond, output_hi, op2_hi);
663 } else {
664 Register out = locations->Out().AsRegister<Register>();
665 Register op2 = op2_loc.AsRegister<Register>();
666
667 // (out := op1)
668 // out <=? op2
669 // if out is min jmp done
670 // out := op2
671 // done:
672
673 __ cmpl(out, op2);
674 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
675 __ cmovl(cond, out, op2);
676 }
677}
678
679static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
680 LocationSummary* locations = new (arena) LocationSummary(invoke,
681 LocationSummary::kNoCall,
682 kIntrinsified);
683 locations->SetInAt(0, Location::RequiresRegister());
684 locations->SetInAt(1, Location::RequiresRegister());
685 locations->SetOut(Location::SameAsFirstInput());
686}
687
688static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
689 LocationSummary* locations = new (arena) LocationSummary(invoke,
690 LocationSummary::kNoCall,
691 kIntrinsified);
692 locations->SetInAt(0, Location::RequiresRegister());
693 locations->SetInAt(1, Location::RequiresRegister());
694 locations->SetOut(Location::SameAsFirstInput());
695 // Register to use to perform a long subtract to set cc.
696 locations->AddTemp(Location::RequiresRegister());
697}
698
699void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
700 CreateIntIntToIntLocations(arena_, invoke);
701}
702
703void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
704 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
705}
706
707void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
708 CreateLongLongToLongLocations(arena_, invoke);
709}
710
711void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
712 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
713}
714
715void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
716 CreateIntIntToIntLocations(arena_, invoke);
717}
718
719void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
720 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
721}
722
723void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
724 CreateLongLongToLongLocations(arena_, invoke);
725}
726
727void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
728 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
729}
730
731static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
732 LocationSummary* locations = new (arena) LocationSummary(invoke,
733 LocationSummary::kNoCall,
734 kIntrinsified);
735 locations->SetInAt(0, Location::RequiresFpuRegister());
736 locations->SetOut(Location::RequiresFpuRegister());
737}
738
739void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
740 CreateFPToFPLocations(arena_, invoke);
741}
742
743void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
744 LocationSummary* locations = invoke->GetLocations();
745 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
746 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
747
748 GetAssembler()->sqrtsd(out, in);
749}
750
Mark Mendellfb8d2792015-03-31 22:16:59 -0400751static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
752 MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
753
754 DCHECK(invoke->IsInvokeStaticOrDirect());
755 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
Mingyao Yange90db122015-04-03 17:56:54 -0700756 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400757
758 // Copy the result back to the expected output.
759 Location out = invoke->GetLocations()->Out();
760 if (out.IsValid()) {
761 DCHECK(out.IsRegister());
762 MoveFromReturnRegister(out, invoke->GetType(), codegen);
763 }
764}
765
766static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
767 HInvoke* invoke,
768 CodeGeneratorX86* codegen) {
769 // Do we have instruction support?
770 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
771 CreateFPToFPLocations(arena, invoke);
772 return;
773 }
774
775 // We have to fall back to a call to the intrinsic.
776 LocationSummary* locations = new (arena) LocationSummary(invoke,
777 LocationSummary::kCall);
778 InvokeRuntimeCallingConvention calling_convention;
779 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
780 locations->SetOut(Location::FpuRegisterLocation(XMM0));
781 // Needs to be EAX for the invoke.
782 locations->AddTemp(Location::RegisterLocation(EAX));
783}
784
785static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
786 HInvoke* invoke,
787 X86Assembler* assembler,
788 int round_mode) {
789 LocationSummary* locations = invoke->GetLocations();
790 if (locations->WillCall()) {
791 InvokeOutOfLineIntrinsic(codegen, invoke);
792 } else {
793 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
794 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
795 __ roundsd(out, in, Immediate(round_mode));
796 }
797}
798
799void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
800 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
801}
802
803void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
804 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
805}
806
807void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
808 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
809}
810
811void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
812 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
813}
814
815void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
816 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
817}
818
819void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
820 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
821}
822
823// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
824// as it needs 64 bit instructions.
825void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
826 // Do we have instruction support?
827 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
828 LocationSummary* locations = new (arena_) LocationSummary(invoke,
829 LocationSummary::kNoCall,
830 kIntrinsified);
831 locations->SetInAt(0, Location::RequiresFpuRegister());
Nicolas Geoffrayd9b92402015-04-21 10:02:22 +0100832 locations->SetOut(Location::RequiresRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400833 locations->AddTemp(Location::RequiresFpuRegister());
834 locations->AddTemp(Location::RequiresFpuRegister());
835 return;
836 }
837
838 // We have to fall back to a call to the intrinsic.
839 LocationSummary* locations = new (arena_) LocationSummary(invoke,
840 LocationSummary::kCall);
841 InvokeRuntimeCallingConvention calling_convention;
842 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
843 locations->SetOut(Location::RegisterLocation(EAX));
844 // Needs to be EAX for the invoke.
845 locations->AddTemp(Location::RegisterLocation(EAX));
846}
847
848void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
849 LocationSummary* locations = invoke->GetLocations();
850 if (locations->WillCall()) {
851 InvokeOutOfLineIntrinsic(codegen_, invoke);
852 return;
853 }
854
855 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
856 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
857 Register out = locations->Out().AsRegister<Register>();
858 XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
859 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
860 Label done, nan;
861 X86Assembler* assembler = GetAssembler();
862
863 // Generate 0.5 into inPlusPointFive.
864 __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
865 __ movd(inPlusPointFive, out);
866
867 // Add in the input.
868 __ addss(inPlusPointFive, in);
869
870 // And truncate to an integer.
871 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
872
873 __ movl(out, Immediate(kPrimIntMax));
874 // maxInt = int-to-float(out)
875 __ cvtsi2ss(maxInt, out);
876
877 // if inPlusPointFive >= maxInt goto done
878 __ comiss(inPlusPointFive, maxInt);
879 __ j(kAboveEqual, &done);
880
881 // if input == NaN goto nan
882 __ j(kUnordered, &nan);
883
884 // output = float-to-int-truncate(input)
885 __ cvttss2si(out, inPlusPointFive);
886 __ jmp(&done);
887 __ Bind(&nan);
888
889 // output = 0
890 __ xorl(out, out);
891 __ Bind(&done);
892}
893
Mark Mendell09ed1a32015-03-25 08:30:06 -0400894void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
895 // The inputs plus one temp.
896 LocationSummary* locations = new (arena_) LocationSummary(invoke,
897 LocationSummary::kCallOnSlowPath,
898 kIntrinsified);
899 locations->SetInAt(0, Location::RequiresRegister());
900 locations->SetInAt(1, Location::RequiresRegister());
901 locations->SetOut(Location::SameAsFirstInput());
902 // Needs to be EAX for the invoke.
903 locations->AddTemp(Location::RegisterLocation(EAX));
904}
905
906void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
907 LocationSummary* locations = invoke->GetLocations();
908
909 // Location of reference to data array
910 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
911 // Location of count
912 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
Mark Mendell09ed1a32015-03-25 08:30:06 -0400913
914 Register obj = locations->InAt(0).AsRegister<Register>();
915 Register idx = locations->InAt(1).AsRegister<Register>();
916 Register out = locations->Out().AsRegister<Register>();
Mark Mendell09ed1a32015-03-25 08:30:06 -0400917
918 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
919 // the cost.
920 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
921 // we will not optimize the code for constants (which would save a register).
922
Jeff Hao848f70a2014-01-15 13:49:50 -0800923 SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
924 invoke, locations->GetTemp(0).AsRegister<Register>());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400925 codegen_->AddSlowPath(slow_path);
926
927 X86Assembler* assembler = GetAssembler();
928
929 __ cmpl(idx, Address(obj, count_offset));
930 codegen_->MaybeRecordImplicitNullCheck(invoke);
931 __ j(kAboveEqual, slow_path->GetEntryLabel());
932
Jeff Hao848f70a2014-01-15 13:49:50 -0800933 // out = out[2*idx].
934 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
Mark Mendell09ed1a32015-03-25 08:30:06 -0400935
936 __ Bind(slow_path->GetExitLabel());
937}
938
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000939void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
940 // The inputs plus one temp.
941 LocationSummary* locations = new (arena_) LocationSummary(invoke,
942 LocationSummary::kCall,
943 kIntrinsified);
944 InvokeRuntimeCallingConvention calling_convention;
945 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
946 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
947 locations->SetOut(Location::RegisterLocation(EAX));
948 // Needs to be EAX for the invoke.
949 locations->AddTemp(Location::RegisterLocation(EAX));
950}
951
952void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
953 X86Assembler* assembler = GetAssembler();
954 LocationSummary* locations = invoke->GetLocations();
955
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000956 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +0100957 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000958
959 Register argument = locations->InAt(1).AsRegister<Register>();
960 __ testl(argument, argument);
961 SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
962 invoke, locations->GetTemp(0).AsRegister<Register>());
963 codegen_->AddSlowPath(slow_path);
964 __ j(kEqual, slow_path->GetEntryLabel());
965
966 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pStringCompareTo)));
967 __ Bind(slow_path->GetExitLabel());
968}
969
Jeff Hao848f70a2014-01-15 13:49:50 -0800970void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
971 LocationSummary* locations = new (arena_) LocationSummary(invoke,
972 LocationSummary::kCall,
973 kIntrinsified);
974 InvokeRuntimeCallingConvention calling_convention;
975 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
976 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
977 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
978 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
979 locations->SetOut(Location::RegisterLocation(EAX));
980 // Needs to be EAX for the invoke.
981 locations->AddTemp(Location::RegisterLocation(EAX));
982}
983
984void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
985 X86Assembler* assembler = GetAssembler();
986 LocationSummary* locations = invoke->GetLocations();
987
988 Register byte_array = locations->InAt(0).AsRegister<Register>();
989 __ testl(byte_array, byte_array);
990 SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
991 invoke, locations->GetTemp(0).AsRegister<Register>());
992 codegen_->AddSlowPath(slow_path);
993 __ j(kEqual, slow_path->GetEntryLabel());
994
995 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes)));
996 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
997 __ Bind(slow_path->GetExitLabel());
998}
999
1000void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1001 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1002 LocationSummary::kCall,
1003 kIntrinsified);
1004 InvokeRuntimeCallingConvention calling_convention;
1005 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1006 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1007 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1008 locations->SetOut(Location::RegisterLocation(EAX));
1009}
1010
1011void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1012 X86Assembler* assembler = GetAssembler();
1013
1014 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars)));
1015 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1016}
1017
1018void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1019 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1020 LocationSummary::kCall,
1021 kIntrinsified);
1022 InvokeRuntimeCallingConvention calling_convention;
1023 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1024 locations->SetOut(Location::RegisterLocation(EAX));
1025 // Needs to be EAX for the invoke.
1026 locations->AddTemp(Location::RegisterLocation(EAX));
1027}
1028
1029void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1030 X86Assembler* assembler = GetAssembler();
1031 LocationSummary* locations = invoke->GetLocations();
1032
1033 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1034 __ testl(string_to_copy, string_to_copy);
1035 SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
1036 invoke, locations->GetTemp(0).AsRegister<Register>());
1037 codegen_->AddSlowPath(slow_path);
1038 __ j(kEqual, slow_path->GetEntryLabel());
1039
1040 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString)));
1041 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1042 __ Bind(slow_path->GetExitLabel());
1043}
1044
Mark Mendell09ed1a32015-03-25 08:30:06 -04001045static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1046 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1047 Location out_loc = locations->Out();
1048 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1049 // to avoid a SIGBUS.
1050 switch (size) {
1051 case Primitive::kPrimByte:
1052 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1053 break;
1054 case Primitive::kPrimShort:
1055 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1056 break;
1057 case Primitive::kPrimInt:
1058 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1059 break;
1060 case Primitive::kPrimLong:
1061 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1062 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1063 break;
1064 default:
1065 LOG(FATAL) << "Type not recognized for peek: " << size;
1066 UNREACHABLE();
1067 }
1068}
1069
1070void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1071 CreateLongToIntLocations(arena_, invoke);
1072}
1073
1074void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1075 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1076}
1077
1078void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1079 CreateLongToIntLocations(arena_, invoke);
1080}
1081
1082void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1083 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1084}
1085
1086void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1087 CreateLongToLongLocations(arena_, invoke);
1088}
1089
1090void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1091 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1092}
1093
1094void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1095 CreateLongToIntLocations(arena_, invoke);
1096}
1097
1098void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1099 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1100}
1101
1102static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
1103 HInvoke* invoke) {
1104 LocationSummary* locations = new (arena) LocationSummary(invoke,
1105 LocationSummary::kNoCall,
1106 kIntrinsified);
1107 locations->SetInAt(0, Location::RequiresRegister());
Roland Levillain4c0eb422015-04-24 16:43:49 +01001108 HInstruction* value = invoke->InputAt(1);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001109 if (size == Primitive::kPrimByte) {
1110 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1111 } else {
1112 locations->SetInAt(1, Location::RegisterOrConstant(value));
1113 }
1114}
1115
1116static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1117 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1118 Location value_loc = locations->InAt(1);
1119 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1120 // to avoid a SIGBUS.
1121 switch (size) {
1122 case Primitive::kPrimByte:
1123 if (value_loc.IsConstant()) {
1124 __ movb(Address(address, 0),
1125 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1126 } else {
1127 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1128 }
1129 break;
1130 case Primitive::kPrimShort:
1131 if (value_loc.IsConstant()) {
1132 __ movw(Address(address, 0),
1133 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1134 } else {
1135 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1136 }
1137 break;
1138 case Primitive::kPrimInt:
1139 if (value_loc.IsConstant()) {
1140 __ movl(Address(address, 0),
1141 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1142 } else {
1143 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1144 }
1145 break;
1146 case Primitive::kPrimLong:
1147 if (value_loc.IsConstant()) {
1148 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1149 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1150 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1151 } else {
1152 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1153 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1154 }
1155 break;
1156 default:
1157 LOG(FATAL) << "Type not recognized for poke: " << size;
1158 UNREACHABLE();
1159 }
1160}
1161
1162void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1163 CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
1164}
1165
1166void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1167 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1168}
1169
1170void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1171 CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
1172}
1173
1174void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1175 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1176}
1177
1178void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1179 CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
1180}
1181
1182void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1183 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1184}
1185
1186void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1187 CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
1188}
1189
1190void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1191 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1192}
1193
1194void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1195 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1196 LocationSummary::kNoCall,
1197 kIntrinsified);
1198 locations->SetOut(Location::RequiresRegister());
1199}
1200
1201void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1202 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1203 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
1204}
1205
1206static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
1207 bool is_volatile, X86Assembler* assembler) {
1208 Register base = locations->InAt(1).AsRegister<Register>();
1209 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1210 Location output = locations->Out();
1211
1212 switch (type) {
1213 case Primitive::kPrimInt:
1214 case Primitive::kPrimNot:
1215 __ movl(output.AsRegister<Register>(), Address(base, offset, ScaleFactor::TIMES_1, 0));
1216 break;
1217
1218 case Primitive::kPrimLong: {
1219 Register output_lo = output.AsRegisterPairLow<Register>();
1220 Register output_hi = output.AsRegisterPairHigh<Register>();
1221 if (is_volatile) {
1222 // Need to use a XMM to read atomically.
1223 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1224 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1225 __ movd(output_lo, temp);
1226 __ psrlq(temp, Immediate(32));
1227 __ movd(output_hi, temp);
1228 } else {
1229 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1230 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1231 }
1232 }
1233 break;
1234
1235 default:
1236 LOG(FATAL) << "Unsupported op size " << type;
1237 UNREACHABLE();
1238 }
1239}
1240
1241static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
1242 bool is_long, bool is_volatile) {
1243 LocationSummary* locations = new (arena) LocationSummary(invoke,
1244 LocationSummary::kNoCall,
1245 kIntrinsified);
1246 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1247 locations->SetInAt(1, Location::RequiresRegister());
1248 locations->SetInAt(2, Location::RequiresRegister());
1249 if (is_long) {
1250 if (is_volatile) {
1251 // Need to use XMM to read volatile.
1252 locations->AddTemp(Location::RequiresFpuRegister());
1253 locations->SetOut(Location::RequiresRegister());
1254 } else {
1255 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1256 }
1257 } else {
1258 locations->SetOut(Location::RequiresRegister());
1259 }
1260}
1261
1262void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1263 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1264}
1265void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1266 CreateIntIntIntToIntLocations(arena_, invoke, false, true);
1267}
1268void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1269 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1270}
1271void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1272 CreateIntIntIntToIntLocations(arena_, invoke, true, true);
1273}
1274void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1275 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1276}
1277void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1278 CreateIntIntIntToIntLocations(arena_, invoke, false, true);
1279}
1280
1281
1282void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1283 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
1284}
1285void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1286 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
1287}
1288void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1289 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
1290}
1291void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1292 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
1293}
1294void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1295 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1296}
1297void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1298 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1299}
1300
1301
1302static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1303 Primitive::Type type,
1304 HInvoke* invoke,
1305 bool is_volatile) {
1306 LocationSummary* locations = new (arena) LocationSummary(invoke,
1307 LocationSummary::kNoCall,
1308 kIntrinsified);
1309 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1310 locations->SetInAt(1, Location::RequiresRegister());
1311 locations->SetInAt(2, Location::RequiresRegister());
1312 locations->SetInAt(3, Location::RequiresRegister());
1313 if (type == Primitive::kPrimNot) {
1314 // Need temp registers for card-marking.
1315 locations->AddTemp(Location::RequiresRegister());
1316 // Ensure the value is in a byte register.
1317 locations->AddTemp(Location::RegisterLocation(ECX));
1318 } else if (type == Primitive::kPrimLong && is_volatile) {
1319 locations->AddTemp(Location::RequiresFpuRegister());
1320 locations->AddTemp(Location::RequiresFpuRegister());
1321 }
1322}
1323
1324void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1325 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
1326}
1327void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1328 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
1329}
1330void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1331 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, true);
1332}
1333void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1334 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
1335}
1336void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1337 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
1338}
1339void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1340 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, true);
1341}
1342void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1343 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
1344}
1345void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1346 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
1347}
1348void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1349 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, true);
1350}
1351
1352// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1353// memory model.
1354static void GenUnsafePut(LocationSummary* locations,
1355 Primitive::Type type,
1356 bool is_volatile,
1357 CodeGeneratorX86* codegen) {
1358 X86Assembler* assembler = reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
1359 Register base = locations->InAt(1).AsRegister<Register>();
1360 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1361 Location value_loc = locations->InAt(3);
1362
1363 if (type == Primitive::kPrimLong) {
1364 Register value_lo = value_loc.AsRegisterPairLow<Register>();
1365 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
1366 if (is_volatile) {
1367 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1368 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
1369 __ movd(temp1, value_lo);
1370 __ movd(temp2, value_hi);
1371 __ punpckldq(temp1, temp2);
1372 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
1373 } else {
1374 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
1375 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
1376 }
1377 } else {
1378 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
1379 }
1380
1381 if (is_volatile) {
1382 __ mfence();
1383 }
1384
1385 if (type == Primitive::kPrimNot) {
1386 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1387 locations->GetTemp(1).AsRegister<Register>(),
1388 base,
1389 value_loc.AsRegister<Register>());
1390 }
1391}
1392
1393void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
1394 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1395}
1396void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1397 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1398}
1399void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1400 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1401}
1402void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
1403 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1404}
1405void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1406 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1407}
1408void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1409 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1410}
1411void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
1412 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1413}
1414void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1415 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1416}
1417void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1418 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1419}
1420
Mark Mendell58d25fd2015-04-03 14:52:31 -04001421static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1422 HInvoke* invoke) {
1423 LocationSummary* locations = new (arena) LocationSummary(invoke,
1424 LocationSummary::kNoCall,
1425 kIntrinsified);
1426 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1427 locations->SetInAt(1, Location::RequiresRegister());
1428 // Offset is a long, but in 32 bit mode, we only need the low word.
1429 // Can we update the invoke here to remove a TypeConvert to Long?
1430 locations->SetInAt(2, Location::RequiresRegister());
1431 // Expected value must be in EAX or EDX:EAX.
1432 // For long, new value must be in ECX:EBX.
1433 if (type == Primitive::kPrimLong) {
1434 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
1435 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
1436 } else {
1437 locations->SetInAt(3, Location::RegisterLocation(EAX));
1438 locations->SetInAt(4, Location::RequiresRegister());
1439 }
1440
1441 // Force a byte register for the output.
1442 locations->SetOut(Location::RegisterLocation(EAX));
1443 if (type == Primitive::kPrimNot) {
1444 // Need temp registers for card-marking.
1445 locations->AddTemp(Location::RequiresRegister());
1446 // Need a byte register for marking.
1447 locations->AddTemp(Location::RegisterLocation(ECX));
1448 }
1449}
1450
1451void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
1452 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1453}
1454
1455void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
1456 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1457}
1458
1459void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
1460 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1461}
1462
1463static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
1464 X86Assembler* assembler =
1465 reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
1466 LocationSummary* locations = invoke->GetLocations();
1467
1468 Register base = locations->InAt(1).AsRegister<Register>();
1469 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1470 Location out = locations->Out();
1471 DCHECK_EQ(out.AsRegister<Register>(), EAX);
1472
1473 if (type == Primitive::kPrimLong) {
1474 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
1475 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
1476 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
1477 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
1478 __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
1479 } else {
1480 // Integer or object.
1481 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
1482 Register value = locations->InAt(4).AsRegister<Register>();
1483 if (type == Primitive::kPrimNot) {
1484 // Mark card for object assuming new value is stored.
1485 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1486 locations->GetTemp(1).AsRegister<Register>(),
1487 base,
1488 value);
1489 }
1490
1491 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1492 }
1493
1494 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1495 // barriers at this time.
1496
1497 // Convert ZF into the boolean result.
1498 __ setb(kZero, out.AsRegister<Register>());
1499 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
1500}
1501
1502void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
1503 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1504}
1505
1506void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
1507 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1508}
1509
1510void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
1511 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1512}
1513
1514void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
1515 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1516 LocationSummary::kNoCall,
1517 kIntrinsified);
1518 locations->SetInAt(0, Location::RequiresRegister());
1519 locations->SetOut(Location::SameAsFirstInput());
1520 locations->AddTemp(Location::RequiresRegister());
1521}
1522
1523static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
1524 X86Assembler* assembler) {
1525 Immediate imm_shift(shift);
1526 Immediate imm_mask(mask);
1527 __ movl(temp, reg);
1528 __ shrl(reg, imm_shift);
1529 __ andl(temp, imm_mask);
1530 __ andl(reg, imm_mask);
1531 __ shll(temp, imm_shift);
1532 __ orl(reg, temp);
1533}
1534
1535void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
1536 X86Assembler* assembler =
1537 reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
1538 LocationSummary* locations = invoke->GetLocations();
1539
1540 Register reg = locations->InAt(0).AsRegister<Register>();
1541 Register temp = locations->GetTemp(0).AsRegister<Register>();
1542
1543 /*
1544 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1545 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1546 * compared to generic luni implementation which has 5 rounds of swapping bits.
1547 * x = bswap x
1548 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1549 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1550 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1551 */
1552 __ bswapl(reg);
1553 SwapBits(reg, temp, 1, 0x55555555, assembler);
1554 SwapBits(reg, temp, 2, 0x33333333, assembler);
1555 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1556}
1557
1558void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
1559 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1560 LocationSummary::kNoCall,
1561 kIntrinsified);
1562 locations->SetInAt(0, Location::RequiresRegister());
1563 locations->SetOut(Location::SameAsFirstInput());
1564 locations->AddTemp(Location::RequiresRegister());
1565}
1566
1567void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
1568 X86Assembler* assembler =
1569 reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
1570 LocationSummary* locations = invoke->GetLocations();
1571
1572 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
1573 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
1574 Register temp = locations->GetTemp(0).AsRegister<Register>();
1575
1576 // We want to swap high/low, then bswap each one, and then do the same
1577 // as a 32 bit reverse.
1578 // Exchange high and low.
1579 __ movl(temp, reg_low);
1580 __ movl(reg_low, reg_high);
1581 __ movl(reg_high, temp);
1582
1583 // bit-reverse low
1584 __ bswapl(reg_low);
1585 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
1586 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
1587 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
1588
1589 // bit-reverse high
1590 __ bswapl(reg_high);
1591 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
1592 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
1593 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
1594}
1595
Mark Mendell09ed1a32015-03-25 08:30:06 -04001596// Unimplemented intrinsics.
1597
1598#define UNIMPLEMENTED_INTRINSIC(Name) \
1599void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1600} \
1601void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1602}
1603
Mark Mendell09ed1a32015-03-25 08:30:06 -04001604UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
Jeff Hao848f70a2014-01-15 13:49:50 -08001605UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
Mark Mendell09ed1a32015-03-25 08:30:06 -04001606UNIMPLEMENTED_INTRINSIC(StringIndexOf)
1607UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
1608UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Mark Mendell09ed1a32015-03-25 08:30:06 -04001609UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1610
1611} // namespace x86
1612} // namespace art