Fix string compression, add tests.
Fix String.compareTo() for interpreter; memcmp() does not
return the required result (only the right sign).
Fix x86-64 stub where the assembler silently accepted and
generated bad code for out-of-range JECXZ.
Add extensive tests for String.equals(), String.compareTo()
and String.indexOf().
Bug: 31040547
Test: Run ART test suite including interpreter tests on host and Nexus 9.
Test: Ditto with string compression enabled.
Change-Id: I21b7a74da8a577c8fbaf8d9225f048550236d414
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 54e52e5..c3321e1 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2117,7 +2117,7 @@
mov %r8d, %ecx
cmovg %r9d, %ecx
/* Going into loop to compare each character */
- jecxz .Lstring_compareto_keep_length // check loop counter (if 0 then stop)
+ jecxz .Lstring_compareto_keep_length1 // check loop counter (if 0 then stop)
.Lstring_compareto_loop_comparison_this_compressed:
movzbl (%edi), %r8d // move *(this_cur_char) byte to long
movzwl (%esi), %r9d // move *(that_cur_char) word to long
@@ -2126,6 +2126,7 @@
subl %r9d, %r8d
loope .Lstring_compareto_loop_comparison_this_compressed
cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char)
+.Lstring_compareto_keep_length1:
ret
.Lstring_compareto_that_is_compressed:
andl LITERAL(0x7FFFFFFF), %r9d
@@ -2134,7 +2135,7 @@
mov %r8d, %ecx
cmovg %r9d, %ecx
/* Comparison this (8-bit) and that (16-bit) */
- jecxz .Lstring_compareto_keep_length // check loop counter (if 0, don't compare)
+ jecxz .Lstring_compareto_keep_length2 // check loop counter (if 0, don't compare)
.Lstring_compareto_loop_comparison_that_compressed:
movzwl (%edi), %r8d // move *(this_cur_char) word to long
movzbl (%esi), %r9d // move *(that_cur_chat) byte to long
@@ -2143,6 +2144,7 @@
subl %r9d, %r8d
loope .Lstring_compareto_loop_comparison_that_compressed
cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char)
+.Lstring_compareto_keep_length2:
ret
.Lstring_compareto_both_compressed:
andl LITERAL(0x7FFFFFFF), %r9d
@@ -2151,9 +2153,9 @@
movl %r8d, %eax
subl %r9d, %eax
cmovg %r9d, %ecx
- jecxz .Lstring_compareto_keep_length
+ jecxz .Lstring_compareto_keep_length3
repe cmpsb
- je .Lstring_compareto_keep_length
+ je .Lstring_compareto_keep_length3
movzbl -1(%edi), %eax // get last compared char from this string (8-bit)
movzbl -1(%esi), %ecx // get last compared char from comp string (8-bit)
jmp .Lstring_compareto_count_difference
@@ -2171,14 +2173,14 @@
* esi: pointer to comp string data
* edi: pointer to this string data
*/
- jecxz .Lstring_compareto_keep_length
+ jecxz .Lstring_compareto_keep_length3
repe cmpsw // find nonmatching chars in [%esi] and [%edi], up to length %ecx
- je .Lstring_compareto_keep_length
+ je .Lstring_compareto_keep_length3
movzwl -2(%edi), %eax // get last compared char from this string (16-bit)
movzwl -2(%esi), %ecx // get last compared char from comp string (16-bit)
.Lstring_compareto_count_difference:
subl %ecx, %eax // return the difference
-.Lstring_compareto_keep_length:
+.Lstring_compareto_keep_length3:
ret
END_FUNCTION art_quick_string_compareto
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index ed1103f..d60274f 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -292,38 +292,41 @@
if (lhs == rhs) {
return 0;
}
- // TODO: is this still true?
- // The annoying part here is that 0x00e9 - 0xffff != 0x00ea,
- // because the interpreter converts the characters to 32-bit integers
- // *without* sign extension before it subtracts them (which makes some
- // sense since "char" is unsigned). So what we get is the result of
- // 0x000000e9 - 0x0000ffff, which is 0xffff00ea.
- int32_t lhsCount = lhs->GetLength();
- int32_t rhsCount = rhs->GetLength();
- int32_t countDiff = lhsCount - rhsCount;
- int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
+ int32_t lhs_count = lhs->GetLength();
+ int32_t rhs_count = rhs->GetLength();
+ int32_t count_diff = lhs_count - rhs_count;
+ int32_t min_count = (count_diff < 0) ? lhs_count : rhs_count;
if (lhs->IsCompressed() && rhs->IsCompressed()) {
- int32_t comparison = memcmp(lhs->GetValueCompressed(),
- rhs->GetValueCompressed(),
- minCount * sizeof(uint8_t));
- if (comparison != 0) {
- return comparison;
+ const uint8_t* lhs_chars = lhs->GetValueCompressed();
+ const uint8_t* rhs_chars = rhs->GetValueCompressed();
+ for (int32_t i = 0; i < min_count; ++i) {
+ int32_t char_diff = static_cast<int32_t>(lhs_chars[i]) - static_cast<int32_t>(rhs_chars[i]);
+ if (char_diff != 0) {
+ return char_diff;
+ }
}
} else if (lhs->IsCompressed() || rhs->IsCompressed()) {
- for (int32_t i = 0; i < minCount; ++i) {
- if (lhs->CharAt(i) != rhs->CharAt(i)) {
- return static_cast<int32_t>(lhs->CharAt(i)) - static_cast<int32_t>(rhs->CharAt(i));
+ const uint8_t* compressed_chars =
+ lhs->IsCompressed() ? lhs->GetValueCompressed() : rhs->GetValueCompressed();
+ const uint16_t* uncompressed_chars = lhs->IsCompressed() ? rhs->GetValue() : lhs->GetValue();
+ for (int32_t i = 0; i < min_count; ++i) {
+ int32_t char_diff =
+ static_cast<int32_t>(compressed_chars[i]) - static_cast<int32_t>(uncompressed_chars[i]);
+ if (char_diff != 0) {
+ return lhs->IsCompressed() ? char_diff : -char_diff;
}
}
} else {
- const uint16_t* lhsChars = lhs->GetValue();
- const uint16_t* rhsChars = rhs->GetValue();
- int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount);
- if (otherRes != 0) {
- return otherRes;
+ const uint16_t* lhs_chars = lhs->GetValue();
+ const uint16_t* rhs_chars = rhs->GetValue();
+ // FIXME: The MemCmp16() name is misleading. It returns the char difference on mismatch
+ // where memcmp() only guarantees that the returned value has the same sign.
+ int32_t char_diff = MemCmp16(lhs_chars, rhs_chars, min_count);
+ if (char_diff != 0) {
+ return char_diff;
}
}
- return countDiff;
+ return count_diff;
}
void String::VisitRoots(RootVisitor* visitor) {