Opt Compiler: ARM64: Enable Callee-saved register, as defined by AAPCS64.

For now we block kQuickSuspendRegister - x19, since Quick and the runtime
use this as a suspend counter register.

Change-Id: I090d386670e81e7924e4aa9a3864ef30d0580a30
Signed-off-by: Serban Constantinescu <serban.constantinescu@arm.com>
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3bc23fe..7588a29 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -402,15 +402,15 @@
                     kNumberOfAllocatableRegisters,
                     kNumberOfAllocatableFPRegisters,
                     kNumberOfAllocatableRegisterPairs,
-                    (1 << LR),
-                    0,
+                    callee_saved_core_registers.list(),
+                    callee_saved_fp_registers.list(),
                     compiler_options),
       block_labels_(nullptr),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this) {
   // Save the link register (containing the return address) to mimic Quick.
-  AddAllocatedRegister(Location::RegisterLocation(LR));
+  AddAllocatedRegister(LocationFrom(lr));
 }
 
 #undef __
@@ -448,26 +448,28 @@
     UseScratchRegisterScope temps(GetVIXLAssembler());
     Register temp = temps.AcquireX();
     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
-    __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
+    __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
     __ Ldr(wzr, MemOperand(temp, 0));
     RecordPcInfo(nullptr, 0);
   }
 
   int frame_size = GetFrameSize();
   __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
-  __ PokeCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize());
+  __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+  __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
 
   // Stack layout:
-  // sp[frame_size - 8]        : lr.
-  // ...                       : other preserved registers.
-  // sp[frame_size - regs_size]: first preserved register.
-  // ...                       : reserved frame space.
-  // sp[0]                     : current method.
+  //      sp[frame_size - 8]        : lr.
+  //      ...                       : other preserved core registers.
+  //      ...                       : other preserved fp registers.
+  //      ...                       : reserved frame space.
+  //      sp[0]                     : current method.
 }
 
 void CodeGeneratorARM64::GenerateFrameExit() {
   int frame_size = GetFrameSize();
-  __ PeekCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize());
+  __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+  __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
   __ Drop(frame_size);
 }
 
@@ -555,26 +557,38 @@
   __ Bind(&done);
 }
 
-void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
-  // Block reserved registers:
-  //   ip0 (VIXL temporary)
-  //   ip1 (VIXL temporary)
-  //   tr
-  //   lr
-  // sp is not part of the allocatable registers, so we don't need to block it.
-  // TODO: Avoid blocking callee-saved registers, and instead preserve them
-  // where necessary.
+void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
+  // Blocked core registers:
+  //      lr        : Runtime reserved.
+  //      tr        : Runtime reserved.
+  //      xSuspend  : Runtime reserved. TODO: Unblock this when the runtime stops using it.
+  //      ip1       : VIXL core temp.
+  //      ip0       : VIXL core temp.
+  //
+  // Blocked fp registers:
+  //      d31       : VIXL fp temp.
   CPURegList reserved_core_registers = vixl_reserved_core_registers;
   reserved_core_registers.Combine(runtime_reserved_core_registers);
-  reserved_core_registers.Combine(quick_callee_saved_registers);
   while (!reserved_core_registers.IsEmpty()) {
     blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true;
   }
+
   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
-  reserved_fp_registers.Combine(CPURegList::GetCalleeSavedFP());
   while (!reserved_core_registers.IsEmpty()) {
     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
   }
+
+  if (is_baseline) {
+    CPURegList reserved_core_baseline_registers = callee_saved_core_registers;
+    while (!reserved_core_baseline_registers.IsEmpty()) {
+      blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true;
+    }
+
+    CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers;
+    while (!reserved_fp_baseline_registers.IsEmpty()) {
+      blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true;
+    }
+  }
 }
 
 Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 9a99dcc..2e937e2 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -50,14 +50,24 @@
 
 const vixl::Register tr = vixl::x18;                        // Thread Register
 static const vixl::Register kArtMethodRegister = vixl::w0;  // Method register on invoke.
+const vixl::Register kQuickSuspendRegister = vixl::x19;
 
 const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
 const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
-const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
-const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister,
-                                                    vixl::kXRegSize,
-                                                    kArm64CalleeSaveRefSpills);
 
+// TODO: When the runtime does not use kQuickSuspendRegister as a suspend
+// counter remove it from the reserved registers list.
+const vixl::CPURegList runtime_reserved_core_registers(tr, kQuickSuspendRegister, vixl::lr);
+
+// Callee-saved registers defined by AAPCS64.
+const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister,
+                                                   vixl::kXRegSize,
+                                                   vixl::x19.code(),
+                                                   vixl::x30.code());
+const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister,
+                                                 vixl::kDRegSize,
+                                                 vixl::d8.code(),
+                                                 vixl::d15.code());
 Location ARM64ReturnLocation(Primitive::Type return_type);
 
 class SlowPathCodeARM64 : public SlowPathCode {
@@ -191,10 +201,14 @@
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
 
-  static const vixl::CPURegList& GetFramePreservedRegisters() {
-    static const vixl::CPURegList frame_preserved_regs =
-        vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, vixl::lr.Bit());
-    return frame_preserved_regs;
+  vixl::CPURegList GetFramePreservedCoreRegisters() const {
+    return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
+                            core_spill_mask_);
+  }
+
+  vixl::CPURegList GetFramePreservedFPRegisters() const {
+    return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize,
+                            fpu_spill_mask_);
   }
 
   void Bind(HBasicBlock* block) OVERRIDE;