Introduce more compact ReadBarrierMark slow-paths.

Replace entry point ReadBarrierMark with 32
ReadBarrierMarkRegX entry points, using register
number X as input and output (instead of the standard
runtime calling convention) to save two moves in Baker's
read barrier mark slow-path code.

Test: ART host and target (ARM, ARM64) tests.
Bug: 29506760
Bug: 12687968
Change-Id: I73cfb82831cf040b8b018e984163c865cc44ed87
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index bf0f647..c3188b6 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -30,6 +30,42 @@
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg04(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg13(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg14(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg15(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg16(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg17(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg18(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg19(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg20(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg21(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg23(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg24(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg25(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg26(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg27(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg28(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*);
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
 
@@ -86,7 +122,38 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMark = artReadBarrierMark;
+  qpoints->pReadBarrierMarkReg00 = artReadBarrierMark;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = art_quick_read_barrier_mark_reg04;
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
+  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
+  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
+  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
+  qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13;
+  qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14;
+  qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15;
+  qpoints->pReadBarrierMarkReg16 = art_quick_read_barrier_mark_reg16;
+  qpoints->pReadBarrierMarkReg17 = art_quick_read_barrier_mark_reg17;
+  qpoints->pReadBarrierMarkReg18 = art_quick_read_barrier_mark_reg18;
+  qpoints->pReadBarrierMarkReg19 = art_quick_read_barrier_mark_reg19;
+  qpoints->pReadBarrierMarkReg20 = art_quick_read_barrier_mark_reg20;
+  qpoints->pReadBarrierMarkReg21 = art_quick_read_barrier_mark_reg21;
+  qpoints->pReadBarrierMarkReg22 = art_quick_read_barrier_mark_reg22;
+  qpoints->pReadBarrierMarkReg23 = art_quick_read_barrier_mark_reg23;
+  qpoints->pReadBarrierMarkReg24 = art_quick_read_barrier_mark_reg24;
+  qpoints->pReadBarrierMarkReg25 = art_quick_read_barrier_mark_reg25;
+  qpoints->pReadBarrierMarkReg26 = art_quick_read_barrier_mark_reg26;
+  qpoints->pReadBarrierMarkReg27 = art_quick_read_barrier_mark_reg27;
+  qpoints->pReadBarrierMarkReg28 = art_quick_read_barrier_mark_reg28;
+  qpoints->pReadBarrierMarkReg29 = art_quick_read_barrier_mark_reg29;
+  qpoints->pReadBarrierMarkReg30 = nullptr;  // Cannot use register 30 (LR) to pass arguments.
+  qpoints->pReadBarrierMarkReg31 = nullptr;  // Cannot use register 31 (SP/XZR) to pass arguments.
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 10ee63f..e9ad1f4 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2214,3 +2214,59 @@
     asr   x0, x0, #1
     ret
 END art_quick_indexof
+
+    /*
+     * Create a function `name` calling the ReadBarrier::Mark routine,
+     * getting its argument and returning its result through register
+     * `reg`, thus following a non-standard runtime calling convention:
+     * - `reg` is used to pass the (sole) argument of this function
+     *   (instead of W0);
+     * - `reg` is used to return the result of this function (instead of W0);
+     * - W0 is treated like a normal (non-argument) caller-save register;
+     * - everything else is the same as in the standard runtime calling
+     *   convention (e.g. same callee-save registers).
+     */
+.macro READ_BARRIER_MARK_REG name, reg
+ENTRY \name
+    str   xLR, [sp, #-16]!              // Save return address and add padding (16B align stack).
+    .cfi_adjust_cfa_offset 16
+    .cfi_rel_offset x30, 0
+    mov   w0, \reg                      // Pass arg1 - obj from `reg`
+    bl    artReadBarrierMark            // artReadBarrierMark(obj)
+    mov   \reg, w0                      // Return result into `reg`
+    ldr   xLR, [sp], #16                // Restore return address and remove padding.
+    .cfi_restore x30
+    .cfi_adjust_cfa_offset -16
+    ret
+END \name
+.endm
+
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29