blob: e5eef37b7b94b0dc08d48ad8d8aa1f6ad6f2ac37 [file] [log] [blame]
Artem Serov12e097c2016-08-08 15:13:26 +01001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <iostream>
18#include <type_traits>
19
20#include "assembler_arm_vixl.h"
21#include "entrypoints/quick/quick_entrypoints.h"
22#include "thread.h"
23
24using namespace vixl::aarch32; // NOLINT(build/namespaces)
25
Artem Serov0fb37192016-12-06 18:13:40 +000026using vixl::ExactAssemblyScope;
27using vixl::CodeBufferCheckScope;
28
Artem Serov12e097c2016-08-08 15:13:26 +010029namespace art {
30namespace arm {
31
32#ifdef ___
33#error "ARM Assembler macro already defined."
34#else
35#define ___ vixl_masm_.
36#endif
37
38extern const vixl32::Register tr(TR);
39
40void ArmVIXLAssembler::FinalizeCode() {
41 vixl_masm_.FinalizeCode();
42}
43
44size_t ArmVIXLAssembler::CodeSize() const {
45 return vixl_masm_.GetSizeOfCodeGenerated();
46}
47
48const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const {
Scott Wakelingb77051e2016-11-21 19:46:00 +000049 return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
Artem Serov12e097c2016-08-08 15:13:26 +010050}
51
52void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) {
53 // Copy the instructions from the buffer.
Scott Wakelingb77051e2016-11-21 19:46:00 +000054 MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
Artem Serov12e097c2016-08-08 15:13:26 +010055 region.CopyFrom(0, from);
56}
57
58void ArmVIXLAssembler::PoisonHeapReference(vixl::aarch32::Register reg) {
59 // reg = -reg.
60 ___ Rsb(reg, reg, 0);
61}
62
63void ArmVIXLAssembler::UnpoisonHeapReference(vixl::aarch32::Register reg) {
64 // reg = -reg.
65 ___ Rsb(reg, reg, 0);
66}
67
Anton Kirilove28d9ae2016-10-25 18:17:23 +010068void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) {
69 if (kPoisonHeapReferences) {
70 PoisonHeapReference(reg);
71 }
72}
73
Artem Serov12e097c2016-08-08 15:13:26 +010074void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) {
75 if (kPoisonHeapReferences) {
76 UnpoisonHeapReference(reg);
77 }
78}
79
80void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) {
81 // TODO(VIXL): Implement this optimization in VIXL.
82 if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) {
83 ___ Mvn(rd, ~value);
84 } else {
85 ___ Mov(rd, value);
86 }
87}
88
89bool ArmVIXLAssembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
90 return vixl_masm_.IsModifiedImmediate(immediate);
91}
92
93bool ArmVIXLAssembler::ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc) {
94 switch (opcode) {
95 case ADD:
96 case SUB:
97 // Less than (or equal to) 12 bits can be done if we don't need to set condition codes.
98 if (IsUint<12>(immediate) && set_cc != kCcSet) {
99 return true;
100 }
101 return ShifterOperandCanAlwaysHold(immediate);
102
103 case MOV:
104 // TODO: Support less than or equal to 12bits.
105 return ShifterOperandCanAlwaysHold(immediate);
106
107 case MVN:
108 default:
109 return ShifterOperandCanAlwaysHold(immediate);
110 }
111}
112
113bool ArmVIXLAssembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
114 int32_t offset,
115 /*out*/ int32_t* add_to_base,
116 /*out*/ int32_t* offset_for_load_store) {
117 int32_t other_bits = offset & ~allowed_offset_bits;
118 if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) {
119 *add_to_base = offset & ~allowed_offset_bits;
120 *offset_for_load_store = offset & allowed_offset_bits;
121 return true;
122 }
123 return false;
124}
125
126int32_t ArmVIXLAssembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits,
127 vixl32::Register temp,
128 vixl32::Register base,
129 int32_t offset) {
130 DCHECK_NE(offset & ~allowed_offset_bits, 0);
131 int32_t add_to_base, offset_for_load;
132 if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
133 ___ Add(temp, base, add_to_base);
134 return offset_for_load;
135 } else {
136 ___ Mov(temp, offset);
137 ___ Add(temp, temp, base);
138 return 0;
139 }
140}
141
142// TODO(VIXL): Implement this in VIXL.
143int32_t ArmVIXLAssembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
144 switch (type) {
145 case kLoadSignedByte:
146 case kLoadSignedHalfword:
147 case kLoadUnsignedHalfword:
148 case kLoadUnsignedByte:
149 case kLoadWord:
150 // We can encode imm12 offset.
151 return 0xfff;
152 case kLoadSWord:
153 case kLoadDWord:
154 case kLoadWordPair:
155 // We can encode imm8:'00' offset.
156 return 0xff << 2;
157 default:
158 LOG(FATAL) << "UNREACHABLE";
159 UNREACHABLE();
160 }
161}
162
163// TODO(VIXL): Implement this in VIXL.
164int32_t ArmVIXLAssembler::GetAllowedStoreOffsetBits(StoreOperandType type) {
165 switch (type) {
166 case kStoreHalfword:
167 case kStoreByte:
168 case kStoreWord:
169 // We can encode imm12 offset.
170 return 0xfff;
171 case kStoreSWord:
172 case kStoreDWord:
173 case kStoreWordPair:
174 // We can encode imm8:'00' offset.
175 return 0xff << 2;
176 default:
177 LOG(FATAL) << "UNREACHABLE";
178 UNREACHABLE();
179 }
180}
181
182// TODO(VIXL): Implement this in VIXL.
183static bool CanHoldLoadOffsetThumb(LoadOperandType type, int offset) {
184 switch (type) {
185 case kLoadSignedByte:
186 case kLoadSignedHalfword:
187 case kLoadUnsignedHalfword:
188 case kLoadUnsignedByte:
189 case kLoadWord:
190 return IsAbsoluteUint<12>(offset);
191 case kLoadSWord:
192 case kLoadDWord:
193 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); // VFP addressing mode.
194 case kLoadWordPair:
195 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
196 default:
197 LOG(FATAL) << "UNREACHABLE";
198 UNREACHABLE();
199 }
200}
201
202// TODO(VIXL): Implement this in VIXL.
203static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset) {
204 switch (type) {
205 case kStoreHalfword:
206 case kStoreByte:
207 case kStoreWord:
208 return IsAbsoluteUint<12>(offset);
209 case kStoreSWord:
210 case kStoreDWord:
211 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); // VFP addressing mode.
212 case kStoreWordPair:
213 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
214 default:
215 LOG(FATAL) << "UNREACHABLE";
216 UNREACHABLE();
217 }
218}
219
220// Implementation note: this method must emit at most one instruction when
221// Address::CanHoldStoreOffsetThumb.
222// TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
223void ArmVIXLAssembler::StoreToOffset(StoreOperandType type,
224 vixl32::Register reg,
225 vixl32::Register base,
226 int32_t offset) {
227 vixl32::Register tmp_reg;
228 UseScratchRegisterScope temps(&vixl_masm_);
229
230 if (!CanHoldStoreOffsetThumb(type, offset)) {
231 CHECK_NE(base.GetCode(), kIpCode);
232 if ((reg.GetCode() != kIpCode) &&
233 ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) {
234 tmp_reg = temps.Acquire();
235 } else {
236 // Be careful not to use ip twice (for `reg` (or `reg` + 1 in
237 // the case of a word-pair store) and `base`) to build the
238 // Address object used by the store instruction(s) below.
239 // Instead, save R5 on the stack (or R6 if R5 is already used by
240 // `base`), use it as secondary temporary register, and restore
241 // it after the store instruction has been emitted.
242 tmp_reg = (base.GetCode() != 5) ? r5 : r6;
243 ___ Push(tmp_reg);
244 if (base.GetCode() == kSpCode) {
245 offset += kRegisterSize;
246 }
247 }
248 // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset()
249 // and in the "unsplittable" path get rid of the "add" by using the store indexed instead.
250 offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset);
251 base = tmp_reg;
252 }
253 DCHECK(CanHoldStoreOffsetThumb(type, offset));
254 switch (type) {
255 case kStoreByte:
256 ___ Strb(reg, MemOperand(base, offset));
257 break;
258 case kStoreHalfword:
259 ___ Strh(reg, MemOperand(base, offset));
260 break;
261 case kStoreWord:
262 ___ Str(reg, MemOperand(base, offset));
263 break;
264 case kStoreWordPair:
265 ___ Strd(reg, vixl32::Register(reg.GetCode() + 1), MemOperand(base, offset));
266 break;
267 default:
268 LOG(FATAL) << "UNREACHABLE";
269 UNREACHABLE();
270 }
271 if ((tmp_reg.IsValid()) && (tmp_reg.GetCode() != kIpCode)) {
272 CHECK(tmp_reg.Is(r5) || tmp_reg.Is(r6)) << tmp_reg;
273 ___ Pop(tmp_reg);
274 }
275}
276
277// Implementation note: this method must emit at most one instruction when
278// Address::CanHoldLoadOffsetThumb.
279// TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
280void ArmVIXLAssembler::LoadFromOffset(LoadOperandType type,
281 vixl32::Register dest,
282 vixl32::Register base,
283 int32_t offset) {
284 if (!CanHoldLoadOffsetThumb(type, offset)) {
285 CHECK(!base.Is(ip));
286 // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks.
287 int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type);
288 DCHECK_NE(offset & ~allowed_offset_bits, 0);
289 int32_t add_to_base, offset_for_load;
290 if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
291 // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
292 AddConstant(dest, base, add_to_base);
293 base = dest;
294 offset = offset_for_load;
295 } else {
296 UseScratchRegisterScope temps(&vixl_masm_);
297 vixl32::Register temp = (dest.Is(base)) ? temps.Acquire() : dest;
298 LoadImmediate(temp, offset);
299 // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD.
300 // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
301 ___ Add(dest, dest, (dest.Is(base)) ? temp : base);
302 base = dest;
303 offset = 0;
304 }
305 }
306
307 DCHECK(CanHoldLoadOffsetThumb(type, offset));
308 switch (type) {
309 case kLoadSignedByte:
310 ___ Ldrsb(dest, MemOperand(base, offset));
311 break;
312 case kLoadUnsignedByte:
313 ___ Ldrb(dest, MemOperand(base, offset));
314 break;
315 case kLoadSignedHalfword:
316 ___ Ldrsh(dest, MemOperand(base, offset));
317 break;
318 case kLoadUnsignedHalfword:
319 ___ Ldrh(dest, MemOperand(base, offset));
320 break;
321 case kLoadWord:
322 CHECK(!dest.IsSP());
323 ___ Ldr(dest, MemOperand(base, offset));
324 break;
325 case kLoadWordPair:
326 ___ Ldrd(dest, vixl32::Register(dest.GetCode() + 1), MemOperand(base, offset));
327 break;
328 default:
329 LOG(FATAL) << "UNREACHABLE";
330 UNREACHABLE();
331 }
332}
333
334void ArmVIXLAssembler::StoreSToOffset(vixl32::SRegister source,
335 vixl32::Register base,
336 int32_t offset) {
337 ___ Vstr(source, MemOperand(base, offset));
338}
339
340void ArmVIXLAssembler::StoreDToOffset(vixl32::DRegister source,
341 vixl32::Register base,
342 int32_t offset) {
343 ___ Vstr(source, MemOperand(base, offset));
344}
345
346void ArmVIXLAssembler::LoadSFromOffset(vixl32::SRegister reg,
347 vixl32::Register base,
348 int32_t offset) {
349 ___ Vldr(reg, MemOperand(base, offset));
350}
351
352void ArmVIXLAssembler::LoadDFromOffset(vixl32::DRegister reg,
353 vixl32::Register base,
354 int32_t offset) {
355 ___ Vldr(reg, MemOperand(base, offset));
356}
357
Scott Wakelinga7812ae2016-10-17 10:03:36 +0100358// Prefer Str to Add/Stm in ArmVIXLAssembler::StoreRegisterList and
359// ArmVIXLAssembler::LoadRegisterList where this generates less code (size).
360static constexpr int kRegListThreshold = 4;
361
362void ArmVIXLAssembler::StoreRegisterList(RegList regs, size_t stack_offset) {
363 int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
364 if (number_of_regs != 0) {
365 if (number_of_regs > kRegListThreshold) {
366 UseScratchRegisterScope temps(GetVIXLAssembler());
367 vixl32::Register base = sp;
368 if (stack_offset != 0) {
369 base = temps.Acquire();
370 DCHECK_EQ(regs & (1u << base.GetCode()), 0u);
Scott Wakelingb77051e2016-11-21 19:46:00 +0000371 ___ Add(base, sp, Operand::From(stack_offset));
Scott Wakelinga7812ae2016-10-17 10:03:36 +0100372 }
373 ___ Stm(base, NO_WRITE_BACK, RegisterList(regs));
374 } else {
375 for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
376 ___ Str(vixl32::Register(i), MemOperand(sp, stack_offset));
377 stack_offset += kRegSizeInBytes;
378 }
379 }
380 }
381}
382
383void ArmVIXLAssembler::LoadRegisterList(RegList regs, size_t stack_offset) {
384 int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
385 if (number_of_regs != 0) {
386 if (number_of_regs > kRegListThreshold) {
387 UseScratchRegisterScope temps(GetVIXLAssembler());
388 vixl32::Register base = sp;
389 if (stack_offset != 0) {
390 base = temps.Acquire();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000391 ___ Add(base, sp, Operand::From(stack_offset));
Scott Wakelinga7812ae2016-10-17 10:03:36 +0100392 }
393 ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs));
394 } else {
395 for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
396 ___ Ldr(vixl32::Register(i), MemOperand(sp, stack_offset));
397 stack_offset += kRegSizeInBytes;
398 }
399 }
400 }
401}
402
Artem Serov12e097c2016-08-08 15:13:26 +0100403void ArmVIXLAssembler::AddConstant(vixl32::Register rd, int32_t value) {
404 AddConstant(rd, rd, value);
405}
406
407// TODO(VIXL): think about using adds which updates flags where possible.
408void ArmVIXLAssembler::AddConstant(vixl32::Register rd,
409 vixl32::Register rn,
410 int32_t value) {
411 DCHECK(vixl_masm_.OutsideITBlock());
412 // TODO(VIXL): implement this optimization in VIXL.
413 if (value == 0) {
414 if (!rd.Is(rn)) {
415 ___ Mov(rd, rn);
416 }
417 return;
418 }
419 ___ Add(rd, rn, value);
420}
421
422// Inside IT block we must use assembler, macroassembler instructions are not permitted.
423void ArmVIXLAssembler::AddConstantInIt(vixl32::Register rd,
424 vixl32::Register rn,
425 int32_t value,
426 vixl32::Condition cond) {
427 DCHECK(vixl_masm_.InITBlock());
428 if (value == 0) {
429 ___ mov(cond, rd, rn);
430 } else {
431 ___ add(cond, rd, rn, value);
432 }
433}
434
xueliang.zhongf51bc622016-11-04 09:23:32 +0000435void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn,
436 vixl32::Label* label,
437 bool is_far_target) {
438 if (!is_far_target && rn.IsLow() && !label->IsBound()) {
439 // In T32, Cbz/Cbnz instructions have following limitations:
440 // - There are only 7 bits (i:imm5:0) to encode branch target address (cannot be far target).
441 // - Only low registers (i.e R0 .. R7) can be encoded.
442 // - Only forward branches (unbound labels) are supported.
443 Cbz(rn, label);
444 return;
445 }
446 Cmp(rn, 0);
Artem Serov517d9f62016-12-12 15:51:15 +0000447 B(eq, label, is_far_target);
xueliang.zhongf51bc622016-11-04 09:23:32 +0000448}
449
450void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn,
451 vixl32::Label* label,
452 bool is_far_target) {
453 if (!is_far_target && rn.IsLow() && !label->IsBound()) {
454 Cbnz(rn, label);
455 return;
456 }
457 Cmp(rn, 0);
Artem Serov517d9f62016-12-12 15:51:15 +0000458 B(ne, label, is_far_target);
xueliang.zhongf51bc622016-11-04 09:23:32 +0000459}
460
Scott Wakelingbffdc702016-12-07 17:46:03 +0000461void ArmVIXLMacroAssembler::B(vixl32::Label* label) {
462 if (!label->IsBound()) {
463 // Try to use 16-bit T2 encoding of B instruction.
464 DCHECK(OutsideITBlock());
Artem Serov517d9f62016-12-12 15:51:15 +0000465 ExactAssemblyScope guard(this,
466 k16BitT32InstructionSizeInBytes,
467 CodeBufferCheckScope::kMaximumSize);
Scott Wakelingbffdc702016-12-07 17:46:03 +0000468 b(al, Narrow, label);
469 AddBranchLabel(label);
470 return;
471 }
472 MacroAssembler::B(label);
473}
474
Artem Serov517d9f62016-12-12 15:51:15 +0000475void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) {
476 if (!label->IsBound() && !is_far_target) {
477 // Try to use 16-bit T2 encoding of B instruction.
478 DCHECK(OutsideITBlock());
479 ExactAssemblyScope guard(this,
480 k16BitT32InstructionSizeInBytes,
481 CodeBufferCheckScope::kMaximumSize);
482 b(cond, Narrow, label);
483 AddBranchLabel(label);
484 return;
485 }
Scott Wakelingbffdc702016-12-07 17:46:03 +0000486 // To further reduce the Bcc encoding size and use 16-bit T1 encoding,
487 // we can provide a hint to this function: i.e. far_target=false.
488 // By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding.
489 MacroAssembler::B(cond, label);
490}
491
Artem Serov12e097c2016-08-08 15:13:26 +0100492} // namespace arm
493} // namespace art