ARM: Add new String.compareTo intrinsic.
Benchmarked on Nexus6P big, little, and all cores. The new intrinsic is
faster than pStringCompareTo for compare lengths on [1,512], so the
runtime call is no longer needed.
Change-Id: If853b592dfc5e561ea3389b51729f37a2c89c18e
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index f0e9ac5..4c68862 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -97,7 +97,8 @@
// Intrinsics
qpoints->pIndexOf = art_quick_indexof;
- qpoints->pStringCompareTo = art_quick_string_compareto;
+ // The ARM StringCompareTo intrinsic does not call the runtime.
+ qpoints->pStringCompareTo = nullptr;
qpoints->pMemcpy = memcpy;
// Read barrier.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 321b9d2..1bba4f9 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1679,145 +1679,6 @@
pop {r4, r10-r11, pc}
END art_quick_indexof
- /*
- * String's compareTo.
- *
- * Requires rARG0/rARG1 to have been previously checked for null. Will
- * return negative if this's string is < comp, 0 if they are the
- * same and positive if >.
- *
- * On entry:
- * r0: this object pointer
- * r1: comp object pointer
- *
- */
- .extern __memcmp16
-ENTRY art_quick_string_compareto
- mov r2, r0 @ this to r2, opening up r0 for return value
- sub r0, r2, r1 @ Same?
- cbnz r0,1f
- bx lr
-1: @ Same strings, return.
-
- push {r4, r7-r12, lr} @ 8 words - keep alignment
- .cfi_adjust_cfa_offset 32
- .cfi_rel_offset r4, 0
- .cfi_rel_offset r7, 4
- .cfi_rel_offset r8, 8
- .cfi_rel_offset r9, 12
- .cfi_rel_offset r10, 16
- .cfi_rel_offset r11, 20
- .cfi_rel_offset r12, 24
- .cfi_rel_offset lr, 28
-
- ldr r7, [r2, #MIRROR_STRING_COUNT_OFFSET]
- ldr r10, [r1, #MIRROR_STRING_COUNT_OFFSET]
- add r2, #MIRROR_STRING_VALUE_OFFSET
- add r1, #MIRROR_STRING_VALUE_OFFSET
-
- /*
- * At this point, we have:
- * value: r2/r1
- * offset: r4/r9
- * count: r7/r10
- * We're going to compute
- * r11 <- countDiff
- * r10 <- minCount
- */
- subs r11, r7, r10
- it ls
- movls r10, r7
-
- /*
- * Note: data pointers point to previous element so we can use pre-index
- * mode with base writeback.
- */
- subs r2, #2 @ offset to contents[-1]
- subs r1, #2 @ offset to contents[-1]
-
- /*
- * At this point we have:
- * r2: *this string data
- * r1: *comp string data
- * r10: iteration count for comparison
- * r11: value to return if the first part of the string is equal
- * r0: reserved for result
- * r3, r4, r7, r8, r9, r12 available for loading string data
- */
-
- subs r10, #2
- blt .Ldo_remainder2
-
- /*
- * Unroll the first two checks so we can quickly catch early mismatch
- * on long strings (but preserve incoming alignment)
- */
-
- ldrh r3, [r2, #2]!
- ldrh r4, [r1, #2]!
- ldrh r7, [r2, #2]!
- ldrh r8, [r1, #2]!
- subs r0, r3, r4
- it eq
- subseq r0, r7, r8
- bne .Ldone
- cmp r10, #28
- bgt .Ldo_memcmp16
- subs r10, #3
- blt .Ldo_remainder
-
-.Lloopback_triple:
- ldrh r3, [r2, #2]!
- ldrh r4, [r1, #2]!
- ldrh r7, [r2, #2]!
- ldrh r8, [r1, #2]!
- ldrh r9, [r2, #2]!
- ldrh r12,[r1, #2]!
- subs r0, r3, r4
- it eq
- subseq r0, r7, r8
- it eq
- subseq r0, r9, r12
- bne .Ldone
- subs r10, #3
- bge .Lloopback_triple
-
-.Ldo_remainder:
- adds r10, #3
- beq .Lreturn_diff
-
-.Lloopback_single:
- ldrh r3, [r2, #2]!
- ldrh r4, [r1, #2]!
- subs r0, r3, r4
- bne .Ldone
- subs r10, #1
- bne .Lloopback_single
-
-.Lreturn_diff:
- mov r0, r11
- pop {r4, r7-r12, pc}
-
-.Ldo_remainder2:
- adds r10, #2
- bne .Lloopback_single
- mov r0, r11
- pop {r4, r7-r12, pc}
-
- /* Long string case */
-.Ldo_memcmp16:
- mov r7, r11
- add r0, r2, #2
- add r1, r1, #2
- mov r2, r10
- bl __memcmp16
- cmp r0, #0
- it eq
- moveq r0, r7
-.Ldone:
- pop {r4, r7-r12, pc}
-END art_quick_string_compareto
-
/* Assembly routines used to handle ABI differences. */
/* double fmod(double a, double b) */
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 02629e8..a7d6d6f 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1205,9 +1205,9 @@
TEST_F(StubTest, StringCompareTo) {
- // There is no StringCompareTo runtime entrypoint for __aarch64__.
-#if defined(__i386__) || defined(__arm__) || \
- defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__))
+ // There is no StringCompareTo runtime entrypoint for __arm__ or __aarch64__.
+#if defined(__i386__) || defined(__mips__) || \
+ (defined(__x86_64__) && !defined(__APPLE__))
// TODO: Check the "Unresolved" allocation stubs
Thread* self = Thread::Current();