blob: e4bef3446c2d7dc3f920fd8f2e7b90961e7d2ad8 [file] [log] [blame]
Anton Kirilov5ec62182016-10-13 20:16:02 +01001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_arm_vixl.h"
18
19#include "arch/arm/instruction_set_features_arm.h"
20#include "code_generator_arm_vixl.h"
21#include "common_arm.h"
22#include "lock_word.h"
23#include "mirror/array-inl.h"
24
25#include "aarch32/constants-aarch32.h"
26
27namespace art {
28namespace arm {
29
30#define __ assembler->GetVIXLAssembler()->
31
32using helpers::DRegisterFrom;
33using helpers::HighRegisterFrom;
34using helpers::InputDRegisterAt;
35using helpers::InputRegisterAt;
36using helpers::InputSRegisterAt;
37using helpers::InputVRegisterAt;
38using helpers::Int32ConstantFrom;
39using helpers::LocationFrom;
40using helpers::LowRegisterFrom;
41using helpers::LowSRegisterFrom;
42using helpers::OutputDRegister;
43using helpers::OutputRegister;
44using helpers::OutputVRegister;
45using helpers::RegisterFrom;
46using helpers::SRegisterFrom;
47
48using namespace vixl::aarch32; // NOLINT(build/namespaces)
49
50ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
51 return codegen_->GetAssembler();
52}
53
54ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
55 return codegen_->GetGraph()->GetArena();
56}
57
58// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
59// intrinsified call. This will copy the arguments into the positions for a regular call.
60//
61// Note: The actual parameters are required to be in the locations given by the invoke's location
62// summary. If an intrinsic modifies those locations before a slowpath call, they must be
63// restored!
64//
65// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
66// sub-optimal (compared to a direct pointer call), but this is a slow-path.
67
68class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
69 public:
70 explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
71 : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
72
73 Location MoveArguments(CodeGenerator* codegen) {
74 InvokeDexCallingConventionVisitorARM calling_convention_visitor;
75 IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
76 return calling_convention_visitor.GetMethodLocation();
77 }
78
79 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
80 ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
81 __ Bind(GetEntryLabel());
82
83 SaveLiveRegisters(codegen, invoke_->GetLocations());
84
85 Location method_loc = MoveArguments(codegen);
86
87 if (invoke_->IsInvokeStaticOrDirect()) {
88 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
89 } else {
90 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
91 }
92 codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
93
94 // Copy the result back to the expected output.
95 Location out = invoke_->GetLocations()->Out();
96 if (out.IsValid()) {
97 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
98 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
99 codegen->MoveFromReturnRegister(out, invoke_->GetType());
100 }
101
102 RestoreLiveRegisters(codegen, invoke_->GetLocations());
103 __ B(GetExitLabel());
104 }
105
106 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
107
108 private:
109 // The instruction where this slow path is happening.
110 HInvoke* const invoke_;
111
112 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
113};
114
115// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
116class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
117 public:
118 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
119 : SlowPathCodeARMVIXL(instruction) {
120 DCHECK(kEmitCompilerReadBarrier);
121 DCHECK(kUseBakerReadBarrier);
122 }
123
124 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
125 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
126 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
127 LocationSummary* locations = instruction_->GetLocations();
128 DCHECK(locations->CanCall());
129 DCHECK(instruction_->IsInvokeStaticOrDirect())
130 << "Unexpected instruction in read barrier arraycopy slow path: "
131 << instruction_->DebugName();
132 DCHECK(instruction_->GetLocations()->Intrinsified());
133 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
134
135 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
136 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
137 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
138
139 vixl32::Register dest = InputRegisterAt(instruction_, 2);
140 Location dest_pos = locations->InAt(3);
141 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
142 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
143 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
144 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
145
146 __ Bind(GetEntryLabel());
147 // Compute the base destination address in `dst_curr_addr`.
148 if (dest_pos.IsConstant()) {
149 int32_t constant = Int32ConstantFrom(dest_pos);
150 __ Add(dst_curr_addr, dest, element_size * constant + offset);
151 } else {
152 __ Add(dst_curr_addr,
153 dest,
154 Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
155 __ Add(dst_curr_addr, dst_curr_addr, offset);
156 }
157
158 vixl32::Label loop;
159 __ Bind(&loop);
160 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
161 assembler->MaybeUnpoisonHeapReference(tmp);
162 // TODO: Inline the mark bit check before calling the runtime?
163 // tmp = ReadBarrier::Mark(tmp);
164 // No need to save live registers; it's taken care of by the
165 // entrypoint. Also, there is no need to update the stack mask,
166 // as this runtime call will not trigger a garbage collection.
167 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
168 // explanations.)
169 DCHECK(!tmp.IsSP());
170 DCHECK(!tmp.IsLR());
171 DCHECK(!tmp.IsPC());
172 // IP is used internally by the ReadBarrierMarkRegX entry point
173 // as a temporary (and not preserved). It thus cannot be used by
174 // any live register in this slow path.
175 DCHECK(!src_curr_addr.Is(ip));
176 DCHECK(!dst_curr_addr.Is(ip));
177 DCHECK(!src_stop_addr.Is(ip));
178 DCHECK(!tmp.Is(ip));
179 DCHECK(tmp.IsRegister()) << tmp;
180 int32_t entry_point_offset =
181 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
182 // This runtime call does not require a stack map.
183 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
184 assembler->MaybePoisonHeapReference(tmp);
185 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
186 __ Cmp(src_curr_addr, src_stop_addr);
187 __ B(ne, &loop);
188 __ B(GetExitLabel());
189 }
190
191 const char* GetDescription() const OVERRIDE {
192 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
193 }
194
195 private:
196 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
197};
198
199IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
200 : arena_(codegen->GetGraph()->GetArena()),
201 assembler_(codegen->GetAssembler()),
202 features_(codegen->GetInstructionSetFeatures()) {}
203
204bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
205 Dispatch(invoke);
206 LocationSummary* res = invoke->GetLocations();
207 if (res == nullptr) {
208 return false;
209 }
210 return res->Intrinsified();
211}
212
213static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
214 LocationSummary* locations = new (arena) LocationSummary(invoke,
215 LocationSummary::kNoCall,
216 kIntrinsified);
217 locations->SetInAt(0, Location::RequiresFpuRegister());
218 locations->SetOut(Location::RequiresRegister());
219}
220
221static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
222 LocationSummary* locations = new (arena) LocationSummary(invoke,
223 LocationSummary::kNoCall,
224 kIntrinsified);
225 locations->SetInAt(0, Location::RequiresRegister());
226 locations->SetOut(Location::RequiresFpuRegister());
227}
228
229static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
230 Location input = locations->InAt(0);
231 Location output = locations->Out();
232 if (is64bit) {
233 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
234 } else {
235 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
236 }
237}
238
239static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
240 Location input = locations->InAt(0);
241 Location output = locations->Out();
242 if (is64bit) {
243 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
244 } else {
245 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
246 }
247}
248
249void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
250 CreateFPToIntLocations(arena_, invoke);
251}
252void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
253 CreateIntToFPLocations(arena_, invoke);
254}
255
256void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
257 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
258}
259void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
260 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
261}
262
263void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
264 CreateFPToIntLocations(arena_, invoke);
265}
266void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
267 CreateIntToFPLocations(arena_, invoke);
268}
269
270void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
271 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
272}
273void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
274 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
275}
276
277static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
278 LocationSummary* locations = new (arena) LocationSummary(invoke,
279 LocationSummary::kNoCall,
280 kIntrinsified);
281 locations->SetInAt(0, Location::RequiresRegister());
282 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
283}
284
285static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
286 LocationSummary* locations = new (arena) LocationSummary(invoke,
287 LocationSummary::kNoCall,
288 kIntrinsified);
289 locations->SetInAt(0, Location::RequiresFpuRegister());
290 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
291}
292
293static void GenNumberOfLeadingZeros(LocationSummary* locations,
294 Primitive::Type type,
295 ArmVIXLAssembler* assembler) {
296 Location in = locations->InAt(0);
297 vixl32::Register out = RegisterFrom(locations->Out());
298
299 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
300
301 if (type == Primitive::kPrimLong) {
302 vixl32::Register in_reg_lo = LowRegisterFrom(in);
303 vixl32::Register in_reg_hi = HighRegisterFrom(in);
304 vixl32::Label end;
305 __ Clz(out, in_reg_hi);
306 __ Cbnz(in_reg_hi, &end);
307 __ Clz(out, in_reg_lo);
308 __ Add(out, out, 32);
309 __ Bind(&end);
310 } else {
311 __ Clz(out, RegisterFrom(in));
312 }
313}
314
315void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
316 CreateIntToIntLocations(arena_, invoke);
317}
318
319void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
320 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
321}
322
323void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
324 LocationSummary* locations = new (arena_) LocationSummary(invoke,
325 LocationSummary::kNoCall,
326 kIntrinsified);
327 locations->SetInAt(0, Location::RequiresRegister());
328 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
329}
330
331void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
332 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
333}
334
335static void GenNumberOfTrailingZeros(LocationSummary* locations,
336 Primitive::Type type,
337 ArmVIXLAssembler* assembler) {
338 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
339
340 vixl32::Register out = RegisterFrom(locations->Out());
341
342 if (type == Primitive::kPrimLong) {
343 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
344 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
345 vixl32::Label end;
346 __ Rbit(out, in_reg_lo);
347 __ Clz(out, out);
348 __ Cbnz(in_reg_lo, &end);
349 __ Rbit(out, in_reg_hi);
350 __ Clz(out, out);
351 __ Add(out, out, 32);
352 __ Bind(&end);
353 } else {
354 vixl32::Register in = RegisterFrom(locations->InAt(0));
355 __ Rbit(out, in);
356 __ Clz(out, out);
357 }
358}
359
360void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
361 LocationSummary* locations = new (arena_) LocationSummary(invoke,
362 LocationSummary::kNoCall,
363 kIntrinsified);
364 locations->SetInAt(0, Location::RequiresRegister());
365 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
366}
367
368void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
369 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
370}
371
372void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
373 LocationSummary* locations = new (arena_) LocationSummary(invoke,
374 LocationSummary::kNoCall,
375 kIntrinsified);
376 locations->SetInAt(0, Location::RequiresRegister());
377 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
378}
379
380void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
381 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
382}
383
384static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
385 __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
386}
387
388void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
389 CreateFPToFPLocations(arena_, invoke);
390}
391
392void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
393 MathAbsFP(invoke, GetAssembler());
394}
395
396void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
397 CreateFPToFPLocations(arena_, invoke);
398}
399
400void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
401 MathAbsFP(invoke, GetAssembler());
402}
403
404static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
405 LocationSummary* locations = new (arena) LocationSummary(invoke,
406 LocationSummary::kNoCall,
407 kIntrinsified);
408 locations->SetInAt(0, Location::RequiresRegister());
409 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
410
411 locations->AddTemp(Location::RequiresRegister());
412}
413
414static void GenAbsInteger(LocationSummary* locations,
415 bool is64bit,
416 ArmVIXLAssembler* assembler) {
417 Location in = locations->InAt(0);
418 Location output = locations->Out();
419
420 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
421
422 if (is64bit) {
423 vixl32::Register in_reg_lo = LowRegisterFrom(in);
424 vixl32::Register in_reg_hi = HighRegisterFrom(in);
425 vixl32::Register out_reg_lo = LowRegisterFrom(output);
426 vixl32::Register out_reg_hi = HighRegisterFrom(output);
427
428 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
429
430 __ Asr(mask, in_reg_hi, 31);
431 __ Adds(out_reg_lo, in_reg_lo, mask);
432 __ Adc(out_reg_hi, in_reg_hi, mask);
433 __ Eor(out_reg_lo, mask, out_reg_lo);
434 __ Eor(out_reg_hi, mask, out_reg_hi);
435 } else {
436 vixl32::Register in_reg = RegisterFrom(in);
437 vixl32::Register out_reg = RegisterFrom(output);
438
439 __ Asr(mask, in_reg, 31);
440 __ Add(out_reg, in_reg, mask);
441 __ Eor(out_reg, mask, out_reg);
442 }
443}
444
445void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
446 CreateIntToIntPlusTemp(arena_, invoke);
447}
448
449void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
450 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
451}
452
453
454void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
455 CreateIntToIntPlusTemp(arena_, invoke);
456}
457
458void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
459 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
460}
461
462static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
463 vixl32::Register op1 = InputRegisterAt(invoke, 0);
464 vixl32::Register op2 = InputRegisterAt(invoke, 1);
465 vixl32::Register out = OutputRegister(invoke);
466
467 __ Cmp(op1, op2);
468
469 {
470 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
471 3 * kMaxInstructionSizeInBytes,
472 CodeBufferCheckScope::kMaximumSize);
473
474 __ ite(is_min ? lt : gt);
475 __ mov(is_min ? lt : gt, out, op1);
476 __ mov(is_min ? ge : le, out, op2);
477 }
478}
479
480static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
481 LocationSummary* locations = new (arena) LocationSummary(invoke,
482 LocationSummary::kNoCall,
483 kIntrinsified);
484 locations->SetInAt(0, Location::RequiresRegister());
485 locations->SetInAt(1, Location::RequiresRegister());
486 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
487}
488
489void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
490 CreateIntIntToIntLocations(arena_, invoke);
491}
492
493void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
494 GenMinMax(invoke, /* is_min */ true, GetAssembler());
495}
496
497void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
498 CreateIntIntToIntLocations(arena_, invoke);
499}
500
501void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
502 GenMinMax(invoke, /* is_min */ false, GetAssembler());
503}
504
505void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
506 CreateFPToFPLocations(arena_, invoke);
507}
508
509void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
510 ArmVIXLAssembler* assembler = GetAssembler();
511 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
512}
513
514void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
515 CreateIntToIntLocations(arena_, invoke);
516}
517
518void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
519 ArmVIXLAssembler* assembler = GetAssembler();
520 // Ignore upper 4B of long address.
521 __ Ldrsb(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
522}
523
524void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
525 CreateIntToIntLocations(arena_, invoke);
526}
527
528void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
529 ArmVIXLAssembler* assembler = GetAssembler();
530 // Ignore upper 4B of long address.
531 __ Ldr(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
532}
533
534void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
535 CreateIntToIntLocations(arena_, invoke);
536}
537
538void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
539 ArmVIXLAssembler* assembler = GetAssembler();
540 // Ignore upper 4B of long address.
541 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
542 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
543 // exception. So we can't use ldrd as addr may be unaligned.
544 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
545 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
546 if (addr.Is(lo)) {
547 __ Ldr(hi, MemOperand(addr, 4));
548 __ Ldr(lo, addr);
549 } else {
550 __ Ldr(lo, addr);
551 __ Ldr(hi, MemOperand(addr, 4));
552 }
553}
554
555void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
556 CreateIntToIntLocations(arena_, invoke);
557}
558
559void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
560 ArmVIXLAssembler* assembler = GetAssembler();
561 // Ignore upper 4B of long address.
562 __ Ldrsh(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
563}
564
565static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
566 LocationSummary* locations = new (arena) LocationSummary(invoke,
567 LocationSummary::kNoCall,
568 kIntrinsified);
569 locations->SetInAt(0, Location::RequiresRegister());
570 locations->SetInAt(1, Location::RequiresRegister());
571}
572
573void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
574 CreateIntIntToVoidLocations(arena_, invoke);
575}
576
577void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
578 ArmVIXLAssembler* assembler = GetAssembler();
579 __ Strb(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
580}
581
582void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
583 CreateIntIntToVoidLocations(arena_, invoke);
584}
585
586void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
587 ArmVIXLAssembler* assembler = GetAssembler();
588 __ Str(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
589}
590
591void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
592 CreateIntIntToVoidLocations(arena_, invoke);
593}
594
595void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
596 ArmVIXLAssembler* assembler = GetAssembler();
597 // Ignore upper 4B of long address.
598 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
599 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
600 // exception. So we can't use ldrd as addr may be unaligned.
601 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), addr);
602 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
603}
604
605void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
606 CreateIntIntToVoidLocations(arena_, invoke);
607}
608
609void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
610 ArmVIXLAssembler* assembler = GetAssembler();
611 __ Strh(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
612}
613
614void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
615 LocationSummary* locations = new (arena_) LocationSummary(invoke,
616 LocationSummary::kNoCall,
617 kIntrinsified);
618 locations->SetOut(Location::RequiresRegister());
619}
620
621void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
622 ArmVIXLAssembler* assembler = GetAssembler();
623 __ Ldr(OutputRegister(invoke),
624 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
625}
626
627static void GenUnsafeGet(HInvoke* invoke,
628 Primitive::Type type,
629 bool is_volatile,
630 CodeGeneratorARMVIXL* codegen) {
631 LocationSummary* locations = invoke->GetLocations();
632 ArmVIXLAssembler* assembler = codegen->GetAssembler();
633 Location base_loc = locations->InAt(1);
634 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
635 Location offset_loc = locations->InAt(2);
636 vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only.
637 Location trg_loc = locations->Out();
638
639 switch (type) {
640 case Primitive::kPrimInt: {
641 vixl32::Register trg = RegisterFrom(trg_loc);
642 __ Ldr(trg, MemOperand(base, offset));
643 if (is_volatile) {
644 __ Dmb(vixl32::ISH);
645 }
646 break;
647 }
648
649 case Primitive::kPrimNot: {
650 vixl32::Register trg = RegisterFrom(trg_loc);
651 if (kEmitCompilerReadBarrier) {
652 if (kUseBakerReadBarrier) {
653 Location temp = locations->GetTemp(0);
654 codegen->GenerateReferenceLoadWithBakerReadBarrier(
655 invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
656 if (is_volatile) {
657 __ Dmb(vixl32::ISH);
658 }
659 } else {
660 __ Ldr(trg, MemOperand(base, offset));
661 if (is_volatile) {
662 __ Dmb(vixl32::ISH);
663 }
664 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
665 }
666 } else {
667 __ Ldr(trg, MemOperand(base, offset));
668 if (is_volatile) {
669 __ Dmb(vixl32::ISH);
670 }
671 assembler->MaybeUnpoisonHeapReference(trg);
672 }
673 break;
674 }
675
676 case Primitive::kPrimLong: {
677 vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
678 vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
679 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
680 __ Ldrexd(trg_lo, trg_hi, MemOperand(base, offset));
681 } else {
682 __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
683 }
684 if (is_volatile) {
685 __ Dmb(vixl32::ISH);
686 }
687 break;
688 }
689
690 default:
691 LOG(FATAL) << "Unexpected type " << type;
692 UNREACHABLE();
693 }
694}
695
696static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
697 HInvoke* invoke,
698 Primitive::Type type) {
699 bool can_call = kEmitCompilerReadBarrier &&
700 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
701 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
702 LocationSummary* locations = new (arena) LocationSummary(invoke,
703 (can_call
704 ? LocationSummary::kCallOnSlowPath
705 : LocationSummary::kNoCall),
706 kIntrinsified);
707 if (can_call && kUseBakerReadBarrier) {
708 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
709 }
710 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
711 locations->SetInAt(1, Location::RequiresRegister());
712 locations->SetInAt(2, Location::RequiresRegister());
713 locations->SetOut(Location::RequiresRegister(),
714 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
715 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
716 // We need a temporary register for the read barrier marking slow
717 // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
718 locations->AddTemp(Location::RequiresRegister());
719 }
720}
721
722void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
723 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
724}
725void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
726 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
727}
728void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
729 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
730}
731void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
732 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
733}
734void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
735 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
736}
737void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
738 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
739}
740
741void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
742 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
743}
744void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
745 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
746}
747void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
748 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
749}
750void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
751 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
752}
753void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
754 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
755}
756void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
757 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
758}
759
760static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
761 const ArmInstructionSetFeatures& features,
762 Primitive::Type type,
763 bool is_volatile,
764 HInvoke* invoke) {
765 LocationSummary* locations = new (arena) LocationSummary(invoke,
766 LocationSummary::kNoCall,
767 kIntrinsified);
768 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
769 locations->SetInAt(1, Location::RequiresRegister());
770 locations->SetInAt(2, Location::RequiresRegister());
771 locations->SetInAt(3, Location::RequiresRegister());
772
773 if (type == Primitive::kPrimLong) {
774 // Potentially need temps for ldrexd-strexd loop.
775 if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
776 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
777 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
778 }
779 } else if (type == Primitive::kPrimNot) {
780 // Temps for card-marking.
781 locations->AddTemp(Location::RequiresRegister()); // Temp.
782 locations->AddTemp(Location::RequiresRegister()); // Card.
783 }
784}
785
786void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
787 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
788}
789void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
790 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
791}
792void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
793 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
794}
795void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
796 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
797}
798void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
799 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
800}
801void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
802 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
803}
804void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
805 CreateIntIntIntIntToVoid(
806 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
807}
808void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
809 CreateIntIntIntIntToVoid(
810 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
811}
812void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
813 CreateIntIntIntIntToVoid(
814 arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
815}
816
817static void GenUnsafePut(LocationSummary* locations,
818 Primitive::Type type,
819 bool is_volatile,
820 bool is_ordered,
821 CodeGeneratorARMVIXL* codegen) {
822 ArmVIXLAssembler* assembler = codegen->GetAssembler();
823
824 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
825 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
826 vixl32::Register value;
827
828 if (is_volatile || is_ordered) {
829 __ Dmb(vixl32::ISH);
830 }
831
832 if (type == Primitive::kPrimLong) {
833 vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
834 vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
835 value = value_lo;
836 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
837 vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
838 vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
839 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
840 const vixl32::Register temp_reg = temps.Acquire();
841
842 __ Add(temp_reg, base, offset);
843 vixl32::Label loop_head;
844 __ Bind(&loop_head);
845 __ Ldrexd(temp_lo, temp_hi, temp_reg);
846 __ Strexd(temp_lo, value_lo, value_hi, temp_reg);
847 __ Cmp(temp_lo, 0);
848 __ B(ne, &loop_head);
849 } else {
850 __ Strd(value_lo, value_hi, MemOperand(base, offset));
851 }
852 } else {
853 value = RegisterFrom(locations->InAt(3));
854 vixl32::Register source = value;
855 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
856 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
857 __ Mov(temp, value);
858 assembler->PoisonHeapReference(temp);
859 source = temp;
860 }
861 __ Str(source, MemOperand(base, offset));
862 }
863
864 if (is_volatile) {
865 __ Dmb(vixl32::ISH);
866 }
867
868 if (type == Primitive::kPrimNot) {
869 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
870 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
871 bool value_can_be_null = true; // TODO: Worth finding out this information?
872 codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
873 }
874}
875
876void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
877 GenUnsafePut(invoke->GetLocations(),
878 Primitive::kPrimInt,
879 /* is_volatile */ false,
880 /* is_ordered */ false,
881 codegen_);
882}
883void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
884 GenUnsafePut(invoke->GetLocations(),
885 Primitive::kPrimInt,
886 /* is_volatile */ false,
887 /* is_ordered */ true,
888 codegen_);
889}
890void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
891 GenUnsafePut(invoke->GetLocations(),
892 Primitive::kPrimInt,
893 /* is_volatile */ true,
894 /* is_ordered */ false,
895 codegen_);
896}
897void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
898 GenUnsafePut(invoke->GetLocations(),
899 Primitive::kPrimNot,
900 /* is_volatile */ false,
901 /* is_ordered */ false,
902 codegen_);
903}
904void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
905 GenUnsafePut(invoke->GetLocations(),
906 Primitive::kPrimNot,
907 /* is_volatile */ false,
908 /* is_ordered */ true,
909 codegen_);
910}
911void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
912 GenUnsafePut(invoke->GetLocations(),
913 Primitive::kPrimNot,
914 /* is_volatile */ true,
915 /* is_ordered */ false,
916 codegen_);
917}
918void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
919 GenUnsafePut(invoke->GetLocations(),
920 Primitive::kPrimLong,
921 /* is_volatile */ false,
922 /* is_ordered */ false,
923 codegen_);
924}
925void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
926 GenUnsafePut(invoke->GetLocations(),
927 Primitive::kPrimLong,
928 /* is_volatile */ false,
929 /* is_ordered */ true,
930 codegen_);
931}
932void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
933 GenUnsafePut(invoke->GetLocations(),
934 Primitive::kPrimLong,
935 /* is_volatile */ true,
936 /* is_ordered */ false,
937 codegen_);
938}
939
940static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
941 HInvoke* invoke,
942 Primitive::Type type) {
943 bool can_call = kEmitCompilerReadBarrier &&
944 kUseBakerReadBarrier &&
945 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
946 LocationSummary* locations = new (arena) LocationSummary(invoke,
947 (can_call
948 ? LocationSummary::kCallOnSlowPath
949 : LocationSummary::kNoCall),
950 kIntrinsified);
951 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
952 locations->SetInAt(1, Location::RequiresRegister());
953 locations->SetInAt(2, Location::RequiresRegister());
954 locations->SetInAt(3, Location::RequiresRegister());
955 locations->SetInAt(4, Location::RequiresRegister());
956
957 // If heap poisoning is enabled, we don't want the unpoisoning
958 // operations to potentially clobber the output. Likewise when
959 // emitting a (Baker) read barrier, which may call.
960 Location::OutputOverlap overlaps =
961 ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
962 ? Location::kOutputOverlap
963 : Location::kNoOutputOverlap;
964 locations->SetOut(Location::RequiresRegister(), overlaps);
965
966 // Temporary registers used in CAS. In the object case
967 // (UnsafeCASObject intrinsic), these are also used for
968 // card-marking, and possibly for (Baker) read barrier.
969 locations->AddTemp(Location::RequiresRegister()); // Pointer.
970 locations->AddTemp(Location::RequiresRegister()); // Temp 1.
971}
972
973static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) {
974 DCHECK_NE(type, Primitive::kPrimLong);
975
976 ArmVIXLAssembler* assembler = codegen->GetAssembler();
977 LocationSummary* locations = invoke->GetLocations();
978
979 Location out_loc = locations->Out();
980 vixl32::Register out = OutputRegister(invoke); // Boolean result.
981
982 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
983 Location offset_loc = locations->InAt(2);
984 vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B).
985 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
986 vixl32::Register value = InputRegisterAt(invoke, 4); // Value.
987
988 Location tmp_ptr_loc = locations->GetTemp(0);
989 vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory.
990 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory.
991
992 if (type == Primitive::kPrimNot) {
993 // The only read barrier implementation supporting the
994 // UnsafeCASObject intrinsic is the Baker-style read barriers.
995 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
996
997 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
998 // object and scan the receiver at the next GC for nothing.
999 bool value_can_be_null = true; // TODO: Worth finding out this information?
1000 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1001
1002 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1003 // Need to make sure the reference stored in the field is a to-space
1004 // one before attempting the CAS or the CAS could fail incorrectly.
1005 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1006 invoke,
1007 out_loc, // Unused, used only as a "temporary" within the read barrier.
1008 base,
1009 /* offset */ 0u,
1010 /* index */ offset_loc,
1011 ScaleFactor::TIMES_1,
1012 tmp_ptr_loc,
1013 /* needs_null_check */ false,
1014 /* always_update_field */ true,
1015 &tmp);
1016 }
1017 }
1018
1019 // Prevent reordering with prior memory operations.
1020 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1021 // latter allows a preceding load to be delayed past the STXR
1022 // instruction below.
1023 __ Dmb(vixl32::ISH);
1024
1025 __ Add(tmp_ptr, base, offset);
1026
1027 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1028 codegen->GetAssembler()->PoisonHeapReference(expected);
1029 if (value.Is(expected)) {
1030 // Do not poison `value`, as it is the same register as
1031 // `expected`, which has just been poisoned.
1032 } else {
1033 codegen->GetAssembler()->PoisonHeapReference(value);
1034 }
1035 }
1036
1037 // do {
1038 // tmp = [r_ptr] - expected;
1039 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1040 // result = tmp != 0;
1041
1042 vixl32::Label loop_head;
1043 __ Bind(&loop_head);
1044
1045 __ Ldrex(tmp, tmp_ptr);
1046
1047 __ Subs(tmp, tmp, expected);
1048
1049 {
1050 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1051 3 * kMaxInstructionSizeInBytes,
1052 CodeBufferCheckScope::kMaximumSize);
1053
1054 __ itt(eq);
1055 __ strex(eq, tmp, value, tmp_ptr);
1056 __ cmp(eq, tmp, 1);
1057 }
1058
1059 __ B(eq, &loop_head);
1060
1061 __ Dmb(vixl32::ISH);
1062
1063 __ Rsbs(out, tmp, 1);
1064
1065 {
1066 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1067 2 * kMaxInstructionSizeInBytes,
1068 CodeBufferCheckScope::kMaximumSize);
1069
1070 __ it(cc);
1071 __ mov(cc, out, 0);
1072 }
1073
1074 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1075 codegen->GetAssembler()->UnpoisonHeapReference(expected);
1076 if (value.Is(expected)) {
1077 // Do not unpoison `value`, as it is the same register as
1078 // `expected`, which has just been unpoisoned.
1079 } else {
1080 codegen->GetAssembler()->UnpoisonHeapReference(value);
1081 }
1082 }
1083}
1084
1085void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1086 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
1087}
1088void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1089 // The only read barrier implementation supporting the
1090 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1091 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1092 return;
1093 }
1094
1095 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
1096}
1097void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1098 GenCas(invoke, Primitive::kPrimInt, codegen_);
1099}
1100void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1101 // The only read barrier implementation supporting the
1102 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1103 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1104
1105 GenCas(invoke, Primitive::kPrimNot, codegen_);
1106}
1107
1108void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1109 // The inputs plus one temp.
1110 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1111 invoke->InputAt(1)->CanBeNull()
1112 ? LocationSummary::kCallOnSlowPath
1113 : LocationSummary::kNoCall,
1114 kIntrinsified);
1115 locations->SetInAt(0, Location::RequiresRegister());
1116 locations->SetInAt(1, Location::RequiresRegister());
1117 locations->AddTemp(Location::RequiresRegister());
1118 locations->AddTemp(Location::RequiresRegister());
1119 locations->AddTemp(Location::RequiresRegister());
1120 // Need temporary registers for String compression's feature.
1121 if (mirror::kUseStringCompression) {
1122 locations->AddTemp(Location::RequiresRegister());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001123 }
1124 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1125}
1126
1127void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1128 ArmVIXLAssembler* assembler = GetAssembler();
1129 LocationSummary* locations = invoke->GetLocations();
1130
1131 vixl32::Register str = InputRegisterAt(invoke, 0);
1132 vixl32::Register arg = InputRegisterAt(invoke, 1);
1133 vixl32::Register out = OutputRegister(invoke);
1134
1135 vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1136 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1137 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001138 vixl32::Register temp3;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001139 if (mirror::kUseStringCompression) {
1140 temp3 = RegisterFrom(locations->GetTemp(3));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001141 }
1142
1143 vixl32::Label loop;
1144 vixl32::Label find_char_diff;
1145 vixl32::Label end;
1146 vixl32::Label different_compression;
1147
1148 // Get offsets of count and value fields within a string object.
1149 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1150 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1151
1152 // Note that the null check must have been done earlier.
1153 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1154
1155 // Take slow path and throw if input can be and is null.
1156 SlowPathCodeARMVIXL* slow_path = nullptr;
1157 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1158 if (can_slow_path) {
1159 slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1160 codegen_->AddSlowPath(slow_path);
1161 __ Cbz(arg, slow_path->GetEntryLabel());
1162 }
1163
1164 // Reference equality check, return 0 if same reference.
1165 __ Subs(out, str, arg);
1166 __ B(eq, &end);
1167
Anton Kirilov5ec62182016-10-13 20:16:02 +01001168 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001169 // Load `count` fields of this and argument strings.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001170 __ Ldr(temp3, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001171 __ Ldr(temp2, MemOperand(arg, count_offset));
1172 // Extract lengths from the `count` fields.
1173 __ Lsr(temp0, temp3, 1u);
1174 __ Lsr(temp1, temp2, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001175 } else {
1176 // Load lengths of this and argument strings.
1177 __ Ldr(temp0, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001178 __ Ldr(temp1, MemOperand(arg, count_offset));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001179 }
1180 // out = length diff.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001181 __ Subs(out, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001182 // temp0 = min(len(str), len(arg)).
1183
1184 {
1185 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1186 2 * kMaxInstructionSizeInBytes,
1187 CodeBufferCheckScope::kMaximumSize);
1188
1189 __ it(gt);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001190 __ mov(gt, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001191 }
1192
Anton Kirilov5ec62182016-10-13 20:16:02 +01001193 // Shorter string is empty?
1194 __ Cbz(temp0, &end);
1195
1196 if (mirror::kUseStringCompression) {
1197 // Check if both strings using same compression style to use this comparison loop.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001198 __ Eors(temp2, temp2, temp3);
1199 __ Lsrs(temp2, temp2, 1u);
1200 __ B(cs, &different_compression);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001201 // For string compression, calculate the number of bytes to compare (not chars).
1202 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001203 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001204
1205 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1206 2 * kMaxInstructionSizeInBytes,
1207 CodeBufferCheckScope::kMaximumSize);
1208
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001209 __ it(ne);
1210 __ add(ne, temp0, temp0, temp0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001211 }
1212
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001213 // Store offset of string value in preparation for comparison loop.
1214 __ Mov(temp1, value_offset);
1215
Anton Kirilov5ec62182016-10-13 20:16:02 +01001216 // Assertions that must hold in order to compare multiple characters at a time.
1217 CHECK_ALIGNED(value_offset, 8);
1218 static_assert(IsAligned<8>(kObjectAlignment),
1219 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1220
1221 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1222 DCHECK_EQ(char_size, 2u);
1223
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001224 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1225
Anton Kirilov5ec62182016-10-13 20:16:02 +01001226 vixl32::Label find_char_diff_2nd_cmp;
1227 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1228 __ Bind(&loop);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001229 vixl32::Register temp_reg = temps.Acquire();
Anton Kirilov5ec62182016-10-13 20:16:02 +01001230 __ Ldr(temp_reg, MemOperand(str, temp1));
1231 __ Ldr(temp2, MemOperand(arg, temp1));
1232 __ Cmp(temp_reg, temp2);
1233 __ B(ne, &find_char_diff);
1234 __ Add(temp1, temp1, char_size * 2);
1235
1236 __ Ldr(temp_reg, MemOperand(str, temp1));
1237 __ Ldr(temp2, MemOperand(arg, temp1));
1238 __ Cmp(temp_reg, temp2);
1239 __ B(ne, &find_char_diff_2nd_cmp);
1240 __ Add(temp1, temp1, char_size * 2);
1241 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1242 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
1243 __ B(hi, &loop);
1244 __ B(&end);
1245
1246 __ Bind(&find_char_diff_2nd_cmp);
1247 if (mirror::kUseStringCompression) {
1248 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
1249 __ B(ls, &end); // Was the second comparison fully beyond the end?
1250 } else {
1251 // Without string compression, we can start treating temp0 as signed
1252 // and rely on the signed comparison below.
1253 __ Sub(temp0, temp0, 2);
1254 }
1255
1256 // Find the single character difference.
1257 __ Bind(&find_char_diff);
1258 // Get the bit position of the first character that differs.
1259 __ Eor(temp1, temp2, temp_reg);
1260 __ Rbit(temp1, temp1);
1261 __ Clz(temp1, temp1);
1262
1263 // temp0 = number of characters remaining to compare.
1264 // (Without string compression, it could be < 1 if a difference is found by the second CMP
1265 // in the comparison loop, and after the end of the shorter string data).
1266
1267 // Without string compression (temp1 >> 4) = character where difference occurs between the last
1268 // two words compared, in the interval [0,1].
1269 // (0 for low half-word different, 1 for high half-word different).
1270 // With string compression, (temp1 << 3) = byte where the difference occurs,
1271 // in the interval [0,3].
1272
1273 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1274 // the remaining string data, so just return length diff (out).
1275 // The comparison is unsigned for string compression, otherwise signed.
1276 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
1277 __ B((mirror::kUseStringCompression ? ls : le), &end);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001278
Anton Kirilov5ec62182016-10-13 20:16:02 +01001279 // Extract the characters and calculate the difference.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001280 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001281 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1282 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1283 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
1284 __ orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
1285 __ bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
1286 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
1287 __ Lsr(temp2, temp2, temp1); // Extract second character.
1288 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
1289 __ Lsr(out, temp_reg, temp1); // Extract first character.
1290 __ and_(temp2, temp2, temp3);
1291 __ and_(out, out, temp3);
1292 } else {
1293 __ bic(temp1, temp1, 0xf);
1294 __ Lsr(temp2, temp2, temp1);
1295 __ Lsr(out, temp_reg, temp1);
1296 __ movt(temp2, 0);
1297 __ movt(out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001298 }
Anton Kirilov5ec62182016-10-13 20:16:02 +01001299
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001300 __ Sub(out, out, temp2);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001301 temps.Release(temp_reg);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001302
1303 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001304 __ B(&end);
1305 __ Bind(&different_compression);
1306
1307 // Comparison for different compression style.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001308 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1309 DCHECK_EQ(c_char_size, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001310
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001311 // We want to free up the temp3, currently holding `str.count`, for comparison.
1312 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1313 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1314 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1315 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1316 __ add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
1317 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1318 __ mov(temp1, str);
1319 __ mov(temp2, arg);
1320 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
1321 {
1322 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1323 3 * kMaxInstructionSizeInBytes,
1324 CodeBufferCheckScope::kMaximumSize);
1325 __ itt(cs); // Interleave with selection of temp1 and temp2.
1326 __ mov(cs, temp1, arg); // Preserves flags.
1327 __ mov(cs, temp2, str); // Preserves flags.
1328 }
1329 __ sbc(temp0, temp0, 0); // Complete the move of the compression flag.
1330
1331 // Adjust temp1 and temp2 from string pointers to data pointers.
1332 __ add(temp1, temp1, value_offset);
1333 __ add(temp2, temp2, value_offset);
1334
1335 vixl32::Label different_compression_loop;
1336 vixl32::Label different_compression_diff;
1337
1338 // Main loop for different compression.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001339 temp_reg = temps.Acquire();
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001340 __ Bind(&different_compression_loop);
1341 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1342 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
1343 __ cmp(temp_reg, temp3);
1344 __ B(ne, &different_compression_diff);
1345 __ Subs(temp0, temp0, 2);
1346 __ B(hi, &different_compression_loop);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001347 __ B(&end);
1348
1349 // Calculate the difference.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001350 __ Bind(&different_compression_diff);
1351 __ Sub(out, temp_reg, temp3);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001352 temps.Release(temp_reg);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001353 // Flip the difference if the `arg` is compressed.
1354 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1355 __ Lsrs(temp0, temp0, 1u);
1356 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1357 "Expecting 0=compressed, 1=uncompressed");
1358
1359 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1360 2 * kMaxInstructionSizeInBytes,
1361 CodeBufferCheckScope::kMaximumSize);
1362 __ it(cc);
1363 __ rsb(cc, out, out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001364 }
1365
1366 __ Bind(&end);
1367
1368 if (can_slow_path) {
1369 __ Bind(slow_path->GetExitLabel());
1370 }
1371}
1372
1373void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1374 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1375 LocationSummary::kNoCall,
1376 kIntrinsified);
1377 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1378 locations->SetInAt(0, Location::RequiresRegister());
1379 locations->SetInAt(1, Location::RequiresRegister());
1380 // Temporary registers to store lengths of strings and for calculations.
1381 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1382 locations->AddTemp(LocationFrom(r0));
1383 locations->AddTemp(Location::RequiresRegister());
1384 locations->AddTemp(Location::RequiresRegister());
1385
1386 locations->SetOut(Location::RequiresRegister());
1387}
1388
1389void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1390 ArmVIXLAssembler* assembler = GetAssembler();
1391 LocationSummary* locations = invoke->GetLocations();
1392
1393 vixl32::Register str = InputRegisterAt(invoke, 0);
1394 vixl32::Register arg = InputRegisterAt(invoke, 1);
1395 vixl32::Register out = OutputRegister(invoke);
1396
1397 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1398 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1399 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1400
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001401 vixl32::Label loop;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001402 vixl32::Label end;
1403 vixl32::Label return_true;
1404 vixl32::Label return_false;
1405
1406 // Get offsets of count, value, and class fields within a string object.
1407 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1408 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1409 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1410
1411 // Note that the null check must have been done earlier.
1412 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1413
1414 StringEqualsOptimizations optimizations(invoke);
1415 if (!optimizations.GetArgumentNotNull()) {
1416 // Check if input is null, return false if it is.
1417 __ Cbz(arg, &return_false);
1418 }
1419
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001420 // Reference equality check, return true if same reference.
1421 __ Cmp(str, arg);
1422 __ B(eq, &return_true);
1423
Anton Kirilov5ec62182016-10-13 20:16:02 +01001424 if (!optimizations.GetArgumentIsString()) {
1425 // Instanceof check for the argument by comparing class fields.
1426 // All string objects must have the same type since String cannot be subclassed.
1427 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1428 // If the argument is a string object, its class field must be equal to receiver's class field.
1429 __ Ldr(temp, MemOperand(str, class_offset));
1430 __ Ldr(temp1, MemOperand(arg, class_offset));
1431 __ Cmp(temp, temp1);
1432 __ B(ne, &return_false);
1433 }
1434
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001435 // Load `count` fields of this and argument strings.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001436 __ Ldr(temp, MemOperand(str, count_offset));
1437 __ Ldr(temp1, MemOperand(arg, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001438 // Check if `count` fields are equal, return false if they're not.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001439 // Also compares the compression style, if differs return false.
1440 __ Cmp(temp, temp1);
1441 __ B(ne, &return_false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001442 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1443 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1444 "Expecting 0=compressed, 1=uncompressed");
Anton Kirilov5ec62182016-10-13 20:16:02 +01001445 __ Cbz(temp, &return_true);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001446
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001447 // Assertions that must hold in order to compare strings 4 bytes at a time.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001448 DCHECK_ALIGNED(value_offset, 4);
1449 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1450
1451 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001452 // For string compression, calculate the number of bytes to compare (not chars).
1453 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1454 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
1455 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1456 2 * kMaxInstructionSizeInBytes,
1457 CodeBufferCheckScope::kMaximumSize);
1458 __ it(cs); // If uncompressed,
1459 __ add(cs, temp, temp, temp); // double the byte count.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001460 }
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001461
1462 // Store offset of string value in preparation for comparison loop.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001463 __ Mov(temp1, value_offset);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001464
1465 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1466 // Ok to do this because strings are zero-padded to kObjectAlignment.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001467 __ Bind(&loop);
1468 __ Ldr(out, MemOperand(str, temp1));
1469 __ Ldr(temp2, MemOperand(arg, temp1));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001470 __ Add(temp1, temp1, sizeof(uint32_t));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001471 __ Cmp(out, temp2);
1472 __ B(ne, &return_false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001473 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1474 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1475 __ B(hi, &loop);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001476
1477 // Return true and exit the function.
1478 // If loop does not result in returning false, we return true.
1479 __ Bind(&return_true);
1480 __ Mov(out, 1);
1481 __ B(&end);
1482
1483 // Return false and exit the function.
1484 __ Bind(&return_false);
1485 __ Mov(out, 0);
1486 __ Bind(&end);
1487}
1488
1489static void GenerateVisitStringIndexOf(HInvoke* invoke,
1490 ArmVIXLAssembler* assembler,
1491 CodeGeneratorARMVIXL* codegen,
1492 ArenaAllocator* allocator,
1493 bool start_at_zero) {
1494 LocationSummary* locations = invoke->GetLocations();
1495
1496 // Note that the null check must have been done earlier.
1497 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1498
1499 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1500 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1501 SlowPathCodeARMVIXL* slow_path = nullptr;
1502 HInstruction* code_point = invoke->InputAt(1);
1503 if (code_point->IsIntConstant()) {
1504 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1505 std::numeric_limits<uint16_t>::max()) {
1506 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1507 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1508 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1509 codegen->AddSlowPath(slow_path);
1510 __ B(slow_path->GetEntryLabel());
1511 __ Bind(slow_path->GetExitLabel());
1512 return;
1513 }
1514 } else if (code_point->GetType() != Primitive::kPrimChar) {
1515 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1516 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1517 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1518 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1519 codegen->AddSlowPath(slow_path);
1520 __ B(hs, slow_path->GetEntryLabel());
1521 }
1522
1523 if (start_at_zero) {
1524 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1525 DCHECK(tmp_reg.Is(r2));
1526 // Start-index = 0.
1527 __ Mov(tmp_reg, 0);
1528 }
1529
1530 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1531 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1532
1533 if (slow_path != nullptr) {
1534 __ Bind(slow_path->GetExitLabel());
1535 }
1536}
1537
1538void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1539 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1540 LocationSummary::kCallOnMainAndSlowPath,
1541 kIntrinsified);
1542 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1543 // best to align the inputs accordingly.
1544 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1545 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1546 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1547 locations->SetOut(LocationFrom(r0));
1548
1549 // Need to send start-index=0.
1550 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1551}
1552
1553void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1554 GenerateVisitStringIndexOf(
1555 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1556}
1557
1558void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1559 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1560 LocationSummary::kCallOnMainAndSlowPath,
1561 kIntrinsified);
1562 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1563 // best to align the inputs accordingly.
1564 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1565 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1566 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1567 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1568 locations->SetOut(LocationFrom(r0));
1569}
1570
1571void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1572 GenerateVisitStringIndexOf(
1573 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1574}
1575
1576void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1577 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1578 LocationSummary::kCallOnMainAndSlowPath,
1579 kIntrinsified);
1580 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1581 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1582 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1583 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1584 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1585 locations->SetOut(LocationFrom(r0));
1586}
1587
1588void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1589 ArmVIXLAssembler* assembler = GetAssembler();
1590 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1591 __ Cmp(byte_array, 0);
1592 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1593 codegen_->AddSlowPath(slow_path);
1594 __ B(eq, slow_path->GetEntryLabel());
1595
1596 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1597 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1598 __ Bind(slow_path->GetExitLabel());
1599}
1600
1601void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1602 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1603 LocationSummary::kCallOnMainOnly,
1604 kIntrinsified);
1605 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1606 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1607 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1608 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1609 locations->SetOut(LocationFrom(r0));
1610}
1611
1612void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1613 // No need to emit code checking whether `locations->InAt(2)` is a null
1614 // pointer, as callers of the native method
1615 //
1616 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1617 //
1618 // all include a null check on `data` before calling that method.
1619 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1620 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1621}
1622
1623void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1624 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1625 LocationSummary::kCallOnMainAndSlowPath,
1626 kIntrinsified);
1627 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1628 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1629 locations->SetOut(LocationFrom(r0));
1630}
1631
1632void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1633 ArmVIXLAssembler* assembler = GetAssembler();
1634 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1635 __ Cmp(string_to_copy, 0);
1636 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1637 codegen_->AddSlowPath(slow_path);
1638 __ B(eq, slow_path->GetEntryLabel());
1639
1640 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1641 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1642
1643 __ Bind(slow_path->GetExitLabel());
1644}
1645
1646void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1647 // The only read barrier implementation supporting the
1648 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1649 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1650 return;
1651 }
1652
1653 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1654 LocationSummary* locations = invoke->GetLocations();
1655 if (locations == nullptr) {
1656 return;
1657 }
1658
1659 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1660 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1661 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1662
1663 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1664 locations->SetInAt(1, Location::RequiresRegister());
1665 }
1666 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1667 locations->SetInAt(3, Location::RequiresRegister());
1668 }
1669 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1670 locations->SetInAt(4, Location::RequiresRegister());
1671 }
1672 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1673 // Temporary register IP cannot be used in
1674 // ReadBarrierSystemArrayCopySlowPathARM (because that register
1675 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1676 // temporary register from the register allocator.
1677 locations->AddTemp(Location::RequiresRegister());
1678 }
1679}
1680
1681static void CheckPosition(ArmVIXLAssembler* assembler,
1682 Location pos,
1683 vixl32::Register input,
1684 Location length,
1685 SlowPathCodeARMVIXL* slow_path,
1686 vixl32::Register temp,
1687 bool length_is_input_length = false) {
1688 // Where is the length in the Array?
1689 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1690
1691 if (pos.IsConstant()) {
1692 int32_t pos_const = Int32ConstantFrom(pos);
1693 if (pos_const == 0) {
1694 if (!length_is_input_length) {
1695 // Check that length(input) >= length.
1696 __ Ldr(temp, MemOperand(input, length_offset));
1697 if (length.IsConstant()) {
1698 __ Cmp(temp, Int32ConstantFrom(length));
1699 } else {
1700 __ Cmp(temp, RegisterFrom(length));
1701 }
1702 __ B(lt, slow_path->GetEntryLabel());
1703 }
1704 } else {
1705 // Check that length(input) >= pos.
1706 __ Ldr(temp, MemOperand(input, length_offset));
1707 __ Subs(temp, temp, pos_const);
1708 __ B(lt, slow_path->GetEntryLabel());
1709
1710 // Check that (length(input) - pos) >= length.
1711 if (length.IsConstant()) {
1712 __ Cmp(temp, Int32ConstantFrom(length));
1713 } else {
1714 __ Cmp(temp, RegisterFrom(length));
1715 }
1716 __ B(lt, slow_path->GetEntryLabel());
1717 }
1718 } else if (length_is_input_length) {
1719 // The only way the copy can succeed is if pos is zero.
1720 vixl32::Register pos_reg = RegisterFrom(pos);
1721 __ Cbnz(pos_reg, slow_path->GetEntryLabel());
1722 } else {
1723 // Check that pos >= 0.
1724 vixl32::Register pos_reg = RegisterFrom(pos);
1725 __ Cmp(pos_reg, 0);
1726 __ B(lt, slow_path->GetEntryLabel());
1727
1728 // Check that pos <= length(input).
1729 __ Ldr(temp, MemOperand(input, length_offset));
1730 __ Subs(temp, temp, pos_reg);
1731 __ B(lt, slow_path->GetEntryLabel());
1732
1733 // Check that (length(input) - pos) >= length.
1734 if (length.IsConstant()) {
1735 __ Cmp(temp, Int32ConstantFrom(length));
1736 } else {
1737 __ Cmp(temp, RegisterFrom(length));
1738 }
1739 __ B(lt, slow_path->GetEntryLabel());
1740 }
1741}
1742
1743void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1744 // The only read barrier implementation supporting the
1745 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1746 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1747
1748 ArmVIXLAssembler* assembler = GetAssembler();
1749 LocationSummary* locations = invoke->GetLocations();
1750
1751 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1752 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1753 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1754 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1755 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1756
1757 vixl32::Register src = InputRegisterAt(invoke, 0);
1758 Location src_pos = locations->InAt(1);
1759 vixl32::Register dest = InputRegisterAt(invoke, 2);
1760 Location dest_pos = locations->InAt(3);
1761 Location length = locations->InAt(4);
1762 Location temp1_loc = locations->GetTemp(0);
1763 vixl32::Register temp1 = RegisterFrom(temp1_loc);
1764 Location temp2_loc = locations->GetTemp(1);
1765 vixl32::Register temp2 = RegisterFrom(temp2_loc);
1766 Location temp3_loc = locations->GetTemp(2);
1767 vixl32::Register temp3 = RegisterFrom(temp3_loc);
1768
1769 SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1770 codegen_->AddSlowPath(intrinsic_slow_path);
1771
1772 vixl32::Label conditions_on_positions_validated;
1773 SystemArrayCopyOptimizations optimizations(invoke);
1774
1775 // If source and destination are the same, we go to slow path if we need to do
1776 // forward copying.
1777 if (src_pos.IsConstant()) {
1778 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1779 if (dest_pos.IsConstant()) {
1780 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1781 if (optimizations.GetDestinationIsSource()) {
1782 // Checked when building locations.
1783 DCHECK_GE(src_pos_constant, dest_pos_constant);
1784 } else if (src_pos_constant < dest_pos_constant) {
1785 __ Cmp(src, dest);
1786 __ B(eq, intrinsic_slow_path->GetEntryLabel());
1787 }
1788
1789 // Checked when building locations.
1790 DCHECK(!optimizations.GetDestinationIsSource()
1791 || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
1792 } else {
1793 if (!optimizations.GetDestinationIsSource()) {
1794 __ Cmp(src, dest);
1795 __ B(ne, &conditions_on_positions_validated);
1796 }
1797 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1798 __ B(gt, intrinsic_slow_path->GetEntryLabel());
1799 }
1800 } else {
1801 if (!optimizations.GetDestinationIsSource()) {
1802 __ Cmp(src, dest);
1803 __ B(ne, &conditions_on_positions_validated);
1804 }
1805 if (dest_pos.IsConstant()) {
1806 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1807 __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
1808 } else {
1809 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
1810 }
1811 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1812 }
1813
1814 __ Bind(&conditions_on_positions_validated);
1815
1816 if (!optimizations.GetSourceIsNotNull()) {
1817 // Bail out if the source is null.
1818 __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
1819 }
1820
1821 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1822 // Bail out if the destination is null.
1823 __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
1824 }
1825
1826 // If the length is negative, bail out.
1827 // We have already checked in the LocationsBuilder for the constant case.
1828 if (!length.IsConstant() &&
1829 !optimizations.GetCountIsSourceLength() &&
1830 !optimizations.GetCountIsDestinationLength()) {
1831 __ Cmp(RegisterFrom(length), 0);
1832 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1833 }
1834
1835 // Validity checks: source.
1836 CheckPosition(assembler,
1837 src_pos,
1838 src,
1839 length,
1840 intrinsic_slow_path,
1841 temp1,
1842 optimizations.GetCountIsSourceLength());
1843
1844 // Validity checks: dest.
1845 CheckPosition(assembler,
1846 dest_pos,
1847 dest,
1848 length,
1849 intrinsic_slow_path,
1850 temp1,
1851 optimizations.GetCountIsDestinationLength());
1852
1853 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1854 // Check whether all elements of the source array are assignable to the component
1855 // type of the destination array. We do two checks: the classes are the same,
1856 // or the destination is Object[]. If none of these checks succeed, we go to the
1857 // slow path.
1858
1859 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1860 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1861 // /* HeapReference<Class> */ temp1 = src->klass_
1862 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1863 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1864 // Bail out if the source is not a non primitive array.
1865 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1866 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1867 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1868 __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
1869 // If heap poisoning is enabled, `temp1` has been unpoisoned
1870 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1871 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
1872 __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
1873 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1874 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
1875 }
1876
1877 // /* HeapReference<Class> */ temp1 = dest->klass_
1878 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1879 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
1880
1881 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1882 // Bail out if the destination is not a non primitive array.
1883 //
1884 // Register `temp1` is not trashed by the read barrier emitted
1885 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1886 // method produces a call to a ReadBarrierMarkRegX entry point,
1887 // which saves all potentially live registers, including
1888 // temporaries such a `temp1`.
1889 // /* HeapReference<Class> */ temp2 = temp1->component_type_
1890 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1891 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
1892 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
1893 // If heap poisoning is enabled, `temp2` has been unpoisoned
1894 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1895 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
1896 __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
1897 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1898 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
1899 }
1900
1901 // For the same reason given earlier, `temp1` is not trashed by the
1902 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1903 // /* HeapReference<Class> */ temp2 = src->klass_
1904 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1905 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
1906 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
1907 __ Cmp(temp1, temp2);
1908
1909 if (optimizations.GetDestinationIsTypedObjectArray()) {
1910 vixl32::Label do_copy;
1911 __ B(eq, &do_copy);
1912 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1913 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1914 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1915 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1916 // We do not need to emit a read barrier for the following
1917 // heap reference load, as `temp1` is only used in a
1918 // comparison with null below, and this reference is not
1919 // kept afterwards.
1920 __ Ldr(temp1, MemOperand(temp1, super_offset));
1921 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
1922 __ Bind(&do_copy);
1923 } else {
1924 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1925 }
1926 } else {
1927 // Non read barrier code.
1928
1929 // /* HeapReference<Class> */ temp1 = dest->klass_
1930 __ Ldr(temp1, MemOperand(dest, class_offset));
1931 // /* HeapReference<Class> */ temp2 = src->klass_
1932 __ Ldr(temp2, MemOperand(src, class_offset));
1933 bool did_unpoison = false;
1934 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1935 !optimizations.GetSourceIsNonPrimitiveArray()) {
1936 // One or two of the references need to be unpoisoned. Unpoison them
1937 // both to make the identity check valid.
1938 assembler->MaybeUnpoisonHeapReference(temp1);
1939 assembler->MaybeUnpoisonHeapReference(temp2);
1940 did_unpoison = true;
1941 }
1942
1943 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1944 // Bail out if the destination is not a non primitive array.
1945 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1946 __ Ldr(temp3, MemOperand(temp1, component_offset));
1947 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
1948 assembler->MaybeUnpoisonHeapReference(temp3);
1949 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1950 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1951 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1952 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
1953 }
1954
1955 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1956 // Bail out if the source is not a non primitive array.
1957 // /* HeapReference<Class> */ temp3 = temp2->component_type_
1958 __ Ldr(temp3, MemOperand(temp2, component_offset));
1959 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
1960 assembler->MaybeUnpoisonHeapReference(temp3);
1961 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1962 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1963 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1964 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
1965 }
1966
1967 __ Cmp(temp1, temp2);
1968
1969 if (optimizations.GetDestinationIsTypedObjectArray()) {
1970 vixl32::Label do_copy;
1971 __ B(eq, &do_copy);
1972 if (!did_unpoison) {
1973 assembler->MaybeUnpoisonHeapReference(temp1);
1974 }
1975 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1976 __ Ldr(temp1, MemOperand(temp1, component_offset));
1977 assembler->MaybeUnpoisonHeapReference(temp1);
1978 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1979 __ Ldr(temp1, MemOperand(temp1, super_offset));
1980 // No need to unpoison the result, we're comparing against null.
1981 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
1982 __ Bind(&do_copy);
1983 } else {
1984 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1985 }
1986 }
1987 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1988 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1989 // Bail out if the source is not a non primitive array.
1990 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1991 // /* HeapReference<Class> */ temp1 = src->klass_
1992 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1993 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1994 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1995 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1996 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1997 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
1998 // If heap poisoning is enabled, `temp3` has been unpoisoned
1999 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2000 } else {
2001 // /* HeapReference<Class> */ temp1 = src->klass_
2002 __ Ldr(temp1, MemOperand(src, class_offset));
2003 assembler->MaybeUnpoisonHeapReference(temp1);
2004 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2005 __ Ldr(temp3, MemOperand(temp1, component_offset));
2006 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2007 assembler->MaybeUnpoisonHeapReference(temp3);
2008 }
2009 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2010 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2011 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2012 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2013 }
2014
2015 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
2016 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
2017 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
2018
2019 // Compute the base source address in `temp1`.
2020 if (src_pos.IsConstant()) {
2021 int32_t constant = Int32ConstantFrom(src_pos);
2022 __ Add(temp1, src, element_size * constant + offset);
2023 } else {
2024 __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift));
2025 __ Add(temp1, temp1, offset);
2026 }
2027
2028 // Compute the end source address in `temp3`.
2029 if (length.IsConstant()) {
2030 int32_t constant = Int32ConstantFrom(length);
2031 __ Add(temp3, temp1, element_size * constant);
2032 } else {
2033 __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift));
2034 }
2035
2036 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2037 // The base destination address is computed later, as `temp2` is
2038 // used for intermediate computations.
2039
2040 // SystemArrayCopy implementation for Baker read barriers (see
2041 // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
2042 //
2043 // if (src_ptr != end_ptr) {
2044 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2045 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
Roland Levillain4bbca2a2016-11-03 18:09:18 +00002046 // bool is_gray = (rb_state == ReadBarrier::GrayState());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002047 // if (is_gray) {
2048 // // Slow-path copy.
2049 // do {
2050 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2051 // } while (src_ptr != end_ptr)
2052 // } else {
2053 // // Fast-path copy.
2054 // do {
2055 // *dest_ptr++ = *src_ptr++;
2056 // } while (src_ptr != end_ptr)
2057 // }
2058 // }
2059
2060 vixl32::Label loop, done;
2061
2062 // Don't enter copy loop if `length == 0`.
2063 __ Cmp(temp1, temp3);
2064 __ B(eq, &done);
2065
2066 // /* int32_t */ monitor = src->monitor_
2067 __ Ldr(temp2, MemOperand(src, monitor_offset));
2068 // /* LockWord */ lock_word = LockWord(monitor)
2069 static_assert(sizeof(LockWord) == sizeof(int32_t),
2070 "art::LockWord and int32_t have different sizes.");
2071
2072 // Introduce a dependency on the lock_word including the rb_state,
2073 // which shall prevent load-load reordering without using
2074 // a memory barrier (which would be more expensive).
2075 // `src` is unchanged by this operation, but its value now depends
2076 // on `temp2`.
2077 __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2078
2079 // Slow path used to copy array when `src` is gray.
2080 SlowPathCodeARMVIXL* read_barrier_slow_path =
2081 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2082 codegen_->AddSlowPath(read_barrier_slow_path);
2083
2084 // Given the numeric representation, it's enough to check the low bit of the
2085 // rb_state. We do that by shifting the bit out of the lock word with LSRS
2086 // which can be a 16-bit instruction unlike the TST immediate.
Roland Levillain4bbca2a2016-11-03 18:09:18 +00002087 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2088 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
Anton Kirilov5ec62182016-10-13 20:16:02 +01002089 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2090 // Carry flag is the last bit shifted out by LSRS.
2091 __ B(cs, read_barrier_slow_path->GetEntryLabel());
2092
2093 // Fast-path copy.
2094
2095 // Compute the base destination address in `temp2`.
2096 if (dest_pos.IsConstant()) {
2097 int32_t constant = Int32ConstantFrom(dest_pos);
2098 __ Add(temp2, dest, element_size * constant + offset);
2099 } else {
2100 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2101 __ Add(temp2, temp2, offset);
2102 }
2103
2104 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2105 // poison/unpoison.
2106 __ Bind(&loop);
2107
2108 {
2109 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2110 const vixl32::Register temp_reg = temps.Acquire();
2111
2112 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2113 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2114 }
2115
2116 __ Cmp(temp1, temp3);
2117 __ B(ne, &loop);
2118
2119 __ Bind(read_barrier_slow_path->GetExitLabel());
2120 __ Bind(&done);
2121 } else {
2122 // Non read barrier code.
2123
2124 // Compute the base destination address in `temp2`.
2125 if (dest_pos.IsConstant()) {
2126 int32_t constant = Int32ConstantFrom(dest_pos);
2127 __ Add(temp2, dest, element_size * constant + offset);
2128 } else {
2129 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2130 __ Add(temp2, temp2, offset);
2131 }
2132
2133 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2134 // poison/unpoison.
2135 vixl32::Label loop, done;
2136 __ Cmp(temp1, temp3);
2137 __ B(eq, &done);
2138 __ Bind(&loop);
2139
2140 {
2141 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2142 const vixl32::Register temp_reg = temps.Acquire();
2143
2144 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2145 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2146 }
2147
2148 __ Cmp(temp1, temp3);
2149 __ B(ne, &loop);
2150 __ Bind(&done);
2151 }
2152
2153 // We only need one card marking on the destination array.
2154 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
2155
2156 __ Bind(intrinsic_slow_path->GetExitLabel());
2157}
2158
2159static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2160 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2161 // the code generator. Furthermore, the register allocator creates fixed live intervals
2162 // for all caller-saved registers because we are doing a function call. As a result, if
2163 // the input and output locations are unallocated, the register allocator runs out of
2164 // registers and fails; however, a debuggable graph is not the common case.
2165 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2166 return;
2167 }
2168
2169 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2170 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2171 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2172
2173 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2174 LocationSummary::kCallOnMainOnly,
2175 kIntrinsified);
2176 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2177
2178 locations->SetInAt(0, Location::RequiresFpuRegister());
2179 locations->SetOut(Location::RequiresFpuRegister());
2180 // Native code uses the soft float ABI.
2181 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2182 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2183}
2184
2185static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2186 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2187 // the code generator. Furthermore, the register allocator creates fixed live intervals
2188 // for all caller-saved registers because we are doing a function call. As a result, if
2189 // the input and output locations are unallocated, the register allocator runs out of
2190 // registers and fails; however, a debuggable graph is not the common case.
2191 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2192 return;
2193 }
2194
2195 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2196 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2197 DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
2198 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2199
2200 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2201 LocationSummary::kCallOnMainOnly,
2202 kIntrinsified);
2203 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2204
2205 locations->SetInAt(0, Location::RequiresFpuRegister());
2206 locations->SetInAt(1, Location::RequiresFpuRegister());
2207 locations->SetOut(Location::RequiresFpuRegister());
2208 // Native code uses the soft float ABI.
2209 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2210 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2211 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2212 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2213}
2214
2215static void GenFPToFPCall(HInvoke* invoke,
2216 ArmVIXLAssembler* assembler,
2217 CodeGeneratorARMVIXL* codegen,
2218 QuickEntrypointEnum entry) {
2219 LocationSummary* const locations = invoke->GetLocations();
2220
2221 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2222 DCHECK(locations->WillCall() && locations->Intrinsified());
2223
2224 // Native code uses the soft float ABI.
2225 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2226 RegisterFrom(locations->GetTemp(1)),
2227 InputDRegisterAt(invoke, 0));
2228 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2229 __ Vmov(OutputDRegister(invoke),
2230 RegisterFrom(locations->GetTemp(0)),
2231 RegisterFrom(locations->GetTemp(1)));
2232}
2233
2234static void GenFPFPToFPCall(HInvoke* invoke,
2235 ArmVIXLAssembler* assembler,
2236 CodeGeneratorARMVIXL* codegen,
2237 QuickEntrypointEnum entry) {
2238 LocationSummary* const locations = invoke->GetLocations();
2239
2240 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2241 DCHECK(locations->WillCall() && locations->Intrinsified());
2242
2243 // Native code uses the soft float ABI.
2244 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2245 RegisterFrom(locations->GetTemp(1)),
2246 InputDRegisterAt(invoke, 0));
2247 __ Vmov(RegisterFrom(locations->GetTemp(2)),
2248 RegisterFrom(locations->GetTemp(3)),
2249 InputDRegisterAt(invoke, 1));
2250 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2251 __ Vmov(OutputDRegister(invoke),
2252 RegisterFrom(locations->GetTemp(0)),
2253 RegisterFrom(locations->GetTemp(1)));
2254}
2255
2256void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2257 CreateFPToFPCallLocations(arena_, invoke);
2258}
2259
2260void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2261 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2262}
2263
2264void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2265 CreateFPToFPCallLocations(arena_, invoke);
2266}
2267
2268void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2269 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2270}
2271
2272void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2273 CreateFPToFPCallLocations(arena_, invoke);
2274}
2275
2276void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2277 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2278}
2279
2280void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2281 CreateFPToFPCallLocations(arena_, invoke);
2282}
2283
2284void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2285 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2286}
2287
2288void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2289 CreateFPToFPCallLocations(arena_, invoke);
2290}
2291
2292void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2293 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2294}
2295
2296void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2297 CreateFPToFPCallLocations(arena_, invoke);
2298}
2299
2300void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2301 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2302}
2303
2304void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2305 CreateFPToFPCallLocations(arena_, invoke);
2306}
2307
2308void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2309 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2310}
2311
2312void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2313 CreateFPToFPCallLocations(arena_, invoke);
2314}
2315
2316void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2317 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2318}
2319
2320void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2321 CreateFPToFPCallLocations(arena_, invoke);
2322}
2323
2324void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2325 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2326}
2327
2328void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2329 CreateFPToFPCallLocations(arena_, invoke);
2330}
2331
2332void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2333 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2334}
2335
2336void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2337 CreateFPToFPCallLocations(arena_, invoke);
2338}
2339
2340void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2341 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2342}
2343
2344void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2345 CreateFPToFPCallLocations(arena_, invoke);
2346}
2347
2348void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2349 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2350}
2351
2352void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2353 CreateFPToFPCallLocations(arena_, invoke);
2354}
2355
2356void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2357 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2358}
2359
2360void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2361 CreateFPToFPCallLocations(arena_, invoke);
2362}
2363
2364void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2365 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2366}
2367
2368void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2369 CreateFPFPToFPCallLocations(arena_, invoke);
2370}
2371
2372void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2373 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2374}
2375
2376void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2377 CreateFPFPToFPCallLocations(arena_, invoke);
2378}
2379
2380void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2381 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2382}
2383
2384void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2385 CreateFPFPToFPCallLocations(arena_, invoke);
2386}
2387
2388void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2389 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2390}
2391
2392void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2393 CreateIntToIntLocations(arena_, invoke);
2394}
2395
2396void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2397 ArmVIXLAssembler* assembler = GetAssembler();
2398 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2399}
2400
2401void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2402 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2403 LocationSummary::kNoCall,
2404 kIntrinsified);
2405 locations->SetInAt(0, Location::RequiresRegister());
2406 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2407}
2408
2409void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2410 ArmVIXLAssembler* assembler = GetAssembler();
2411 LocationSummary* locations = invoke->GetLocations();
2412
2413 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2414 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2415 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2416 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2417
2418 __ Rbit(out_reg_lo, in_reg_hi);
2419 __ Rbit(out_reg_hi, in_reg_lo);
2420}
2421
2422void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2423 CreateIntToIntLocations(arena_, invoke);
2424}
2425
2426void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2427 ArmVIXLAssembler* assembler = GetAssembler();
2428 __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2429}
2430
2431void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2432 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2433 LocationSummary::kNoCall,
2434 kIntrinsified);
2435 locations->SetInAt(0, Location::RequiresRegister());
2436 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2437}
2438
2439void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2440 ArmVIXLAssembler* assembler = GetAssembler();
2441 LocationSummary* locations = invoke->GetLocations();
2442
2443 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2444 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2445 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2446 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2447
2448 __ Rev(out_reg_lo, in_reg_hi);
2449 __ Rev(out_reg_hi, in_reg_lo);
2450}
2451
2452void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2453 CreateIntToIntLocations(arena_, invoke);
2454}
2455
2456void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2457 ArmVIXLAssembler* assembler = GetAssembler();
2458 __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2459}
2460
2461static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) {
2462 DCHECK(Primitive::IsIntOrLongType(type)) << type;
2463 DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
2464 DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
2465
2466 bool is_long = type == Primitive::kPrimLong;
2467 LocationSummary* locations = instr->GetLocations();
2468 Location in = locations->InAt(0);
2469 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2470 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2471 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2472 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2473 vixl32::Register out_r = OutputRegister(instr);
2474
2475 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2476 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2477 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2478 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2479 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
2480 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
2481 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
2482 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
2483 if (is_long) {
2484 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
2485 }
2486 __ Vmov(out_r, tmp_s);
2487}
2488
2489void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2490 CreateIntToIntLocations(arena_, invoke);
2491 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2492}
2493
2494void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2495 GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
2496}
2497
2498void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2499 VisitIntegerBitCount(invoke);
2500}
2501
2502void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2503 GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
2504}
2505
2506void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2507 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2508 LocationSummary::kNoCall,
2509 kIntrinsified);
2510 locations->SetInAt(0, Location::RequiresRegister());
2511 locations->SetInAt(1, Location::RequiresRegister());
2512 locations->SetInAt(2, Location::RequiresRegister());
2513 locations->SetInAt(3, Location::RequiresRegister());
2514 locations->SetInAt(4, Location::RequiresRegister());
2515
2516 // Temporary registers to store lengths of strings and for calculations.
2517 locations->AddTemp(Location::RequiresRegister());
2518 locations->AddTemp(Location::RequiresRegister());
2519 locations->AddTemp(Location::RequiresRegister());
2520}
2521
2522void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2523 ArmVIXLAssembler* assembler = GetAssembler();
2524 LocationSummary* locations = invoke->GetLocations();
2525
2526 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2527 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2528 DCHECK_EQ(char_size, 2u);
2529
2530 // Location of data in char array buffer.
2531 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2532
2533 // Location of char array data in string.
2534 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2535
2536 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2537 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2538 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2539 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2540 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2541 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2542 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2543
2544 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2545 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2546 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2547
2548 vixl32::Label done, compressed_string_loop;
2549 // dst to be copied.
2550 __ Add(dst_ptr, dstObj, data_offset);
2551 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2552
2553 __ Subs(num_chr, srcEnd, srcBegin);
2554 // Early out for valid zero-length retrievals.
2555 __ B(eq, &done);
2556
2557 // src range to copy.
2558 __ Add(src_ptr, srcObj, value_offset);
2559
2560 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2561 vixl32::Register temp;
2562 vixl32::Label compressed_string_preloop;
2563 if (mirror::kUseStringCompression) {
2564 // Location of count in string.
2565 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2566 temp = temps.Acquire();
2567 // String's length.
2568 __ Ldr(temp, MemOperand(srcObj, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002569 __ Tst(temp, 1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002570 temps.Release(temp);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002571 __ B(eq, &compressed_string_preloop);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002572 }
2573 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2574
2575 // Do the copy.
2576 vixl32::Label loop, remainder;
2577
2578 temp = temps.Acquire();
2579 // Save repairing the value of num_chr on the < 4 character path.
2580 __ Subs(temp, num_chr, 4);
2581 __ B(lt, &remainder);
2582
2583 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2584 __ Mov(num_chr, temp);
2585
2586 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2587 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2588 // to rectify these everywhere this intrinsic applies.)
2589 __ Bind(&loop);
2590 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2591 __ Subs(num_chr, num_chr, 4);
2592 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2593 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2594 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2595 temps.Release(temp);
2596 __ B(ge, &loop);
2597
2598 __ Adds(num_chr, num_chr, 4);
2599 __ B(eq, &done);
2600
2601 // Main loop for < 4 character case and remainder handling. Loads and stores one
2602 // 16-bit Java character at a time.
2603 __ Bind(&remainder);
2604 temp = temps.Acquire();
2605 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2606 __ Subs(num_chr, num_chr, 1);
2607 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2608 temps.Release(temp);
2609 __ B(gt, &remainder);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002610
2611 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002612 __ B(&done);
2613
Anton Kirilov5ec62182016-10-13 20:16:02 +01002614 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
2615 DCHECK_EQ(c_char_size, 1u);
2616 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2617 __ Bind(&compressed_string_preloop);
2618 __ Add(src_ptr, src_ptr, srcBegin);
2619 __ Bind(&compressed_string_loop);
2620 temp = temps.Acquire();
2621 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2622 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2623 temps.Release(temp);
2624 __ Subs(num_chr, num_chr, 1);
2625 __ B(gt, &compressed_string_loop);
2626 }
2627
2628 __ Bind(&done);
2629}
2630
2631void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2632 CreateFPToIntLocations(arena_, invoke);
2633}
2634
2635void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2636 ArmVIXLAssembler* const assembler = GetAssembler();
2637 const vixl32::Register out = OutputRegister(invoke);
2638 // Shifting left by 1 bit makes the value encodable as an immediate operand;
2639 // we don't care about the sign bit anyway.
2640 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2641
2642 __ Vmov(out, InputSRegisterAt(invoke, 0));
2643 // We don't care about the sign bit, so shift left.
2644 __ Lsl(out, out, 1);
2645 __ Eor(out, out, infinity);
2646 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2647 __ Clz(out, out);
2648 // Any number less than 32 logically shifted right by 5 bits results in 0;
2649 // the same operation on 32 yields 1.
2650 __ Lsr(out, out, 5);
2651}
2652
2653void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2654 CreateFPToIntLocations(arena_, invoke);
2655}
2656
2657void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2658 ArmVIXLAssembler* const assembler = GetAssembler();
2659 const vixl32::Register out = OutputRegister(invoke);
2660 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2661 const vixl32::Register temp = temps.Acquire();
2662 // The highest 32 bits of double precision positive infinity separated into
2663 // two constants encodable as immediate operands.
2664 constexpr uint32_t infinity_high = 0x7f000000U;
2665 constexpr uint32_t infinity_high2 = 0x00f00000U;
2666
2667 static_assert((infinity_high | infinity_high2) ==
2668 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2669 "The constants do not add up to the high 32 bits of double "
2670 "precision positive infinity.");
2671 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2672 __ Eor(out, out, infinity_high);
2673 __ Eor(out, out, infinity_high2);
2674 // We don't care about the sign bit, so shift left.
2675 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2676 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2677 __ Clz(out, out);
2678 // Any number less than 32 logically shifted right by 5 bits results in 0;
2679 // the same operation on 32 yields 1.
2680 __ Lsr(out, out, 5);
2681}
2682
2683UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble)
2684UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat)
2685UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble)
2686UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat)
2687UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong)
2688UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong)
2689UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathCeil) // Could be done by changing rounding mode, maybe?
2690UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFloor) // Could be done by changing rounding mode, maybe?
2691UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRint)
2692UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
2693UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe?
2694UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
2695UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
2696UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
2697UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
2698UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
2699UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
2700UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
2701
Aart Bikff7d89c2016-11-07 08:49:28 -08002702UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
2703UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
2704
Anton Kirilov5ec62182016-10-13 20:16:02 +01002705// 1.8.
2706UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
2707UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
2708UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
2709UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
2710UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
2711
2712UNREACHABLE_INTRINSICS(ARMVIXL)
2713
2714#undef __
2715
2716} // namespace arm
2717} // namespace art