blob: d60274fae7e8d1e989e84855ee26b90d1c3108b4 [file] [log] [blame]
Ian Rogers2dd0e2c2013-01-24 12:42:14 -08001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Ian Rogersb0fa5dc2014-04-28 16:47:08 -070017#include "string-inl.h"
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080018
Andreas Gampe4d0589c2014-06-10 16:10:56 -070019#include "arch/memcmp16.h"
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080020#include "array.h"
Hiroshi Yamauchi967a0ad2013-09-10 16:24:21 -070021#include "class-inl.h"
Ian Rogers1d54e732013-05-02 21:10:01 -070022#include "gc/accounting/card_table-inl.h"
Jeff Hao848f70a2014-01-15 13:49:50 -080023#include "handle_scope-inl.h"
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080024#include "intern_table.h"
25#include "object-inl.h"
26#include "runtime.h"
Jeff Hao848f70a2014-01-15 13:49:50 -080027#include "string-inl.h"
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080028#include "thread.h"
Ian Rogersa6724902013-09-23 09:23:37 -070029#include "utf-inl.h"
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080030
31namespace art {
32namespace mirror {
33
Ian Rogersb0fa5dc2014-04-28 16:47:08 -070034// TODO: get global references for these
Hiroshi Yamauchi94f7b492014-07-22 18:08:23 -070035GcRoot<Class> String::java_lang_String_;
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080036
Ian Rogersef7d42f2014-01-06 12:55:46 -080037int32_t String::FastIndexOf(int32_t ch, int32_t start) {
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080038 int32_t count = GetLength();
39 if (start < 0) {
40 start = 0;
41 } else if (start > count) {
42 start = count;
43 }
jessicahandojo3aaa37b2016-07-29 14:46:37 -070044 if (IsCompressed()) {
45 return FastIndexOf<uint8_t>(GetValueCompressed(), ch, start);
46 } else {
47 return FastIndexOf<uint16_t>(GetValue(), ch, start);
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080048 }
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080049}
50
Mathieu Chartier31e88222016-10-14 18:43:19 -070051void String::SetClass(ObjPtr<Class> java_lang_String) {
Hiroshi Yamauchi94f7b492014-07-22 18:08:23 -070052 CHECK(java_lang_String_.IsNull());
Mathieu Chartier2cebb242015-04-21 16:50:40 -070053 CHECK(java_lang_String != nullptr);
Mathieu Chartier52a7f5c2015-08-18 18:35:52 -070054 CHECK(java_lang_String->IsStringClass());
Hiroshi Yamauchi94f7b492014-07-22 18:08:23 -070055 java_lang_String_ = GcRoot<Class>(java_lang_String);
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080056}
57
58void String::ResetClass() {
Hiroshi Yamauchi94f7b492014-07-22 18:08:23 -070059 CHECK(!java_lang_String_.IsNull());
60 java_lang_String_ = GcRoot<Class>(nullptr);
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080061}
62
Jeff Hao848f70a2014-01-15 13:49:50 -080063int String::ComputeHashCode() {
jessicahandojo3aaa37b2016-07-29 14:46:37 -070064 int32_t hash_code = 0;
65 if (IsCompressed()) {
66 hash_code = ComputeUtf16Hash(GetValueCompressed(), GetLength());
67 } else {
68 hash_code = ComputeUtf16Hash(GetValue(), GetLength());
69 }
Mathieu Chartiercdfd39f2014-08-29 18:16:58 -070070 SetHashCode(hash_code);
71 return hash_code;
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080072}
73
Ian Rogersb0fa5dc2014-04-28 16:47:08 -070074int32_t String::GetUtfLength() {
jessicahandojo3aaa37b2016-07-29 14:46:37 -070075 if (IsCompressed()) {
76 return GetLength();
77 } else {
78 return CountUtf8Bytes(GetValue(), GetLength());
79 }
Ian Rogers2dd0e2c2013-01-24 12:42:14 -080080}
81
Jeff Hao848f70a2014-01-15 13:49:50 -080082void String::SetCharAt(int32_t index, uint16_t c) {
jessicahandojo3aaa37b2016-07-29 14:46:37 -070083 DCHECK((index >= 0) && (index < GetLength()));
84 if (IsCompressed()) {
85 // TODO: Handle the case where String is compressed and c is non-ASCII
86 GetValueCompressed()[index] = static_cast<uint8_t>(c);
87 } else {
88 GetValue()[index] = c;
89 }
Jeff Hao848f70a2014-01-15 13:49:50 -080090}
91
92String* String::AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) {
93 int32_t length = string->GetLength();
94 int32_t length2 = string2->GetLength();
95 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
Mathieu Chartier31e88222016-10-14 18:43:19 -070096 const bool compressible = kUseStringCompression &&
97 (string->IsCompressed() && string2->IsCompressed());
98 const int32_t length_with_flag = compressible ? String::GetFlaggedCount(length + length2)
99 : (length + length2);
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700100
101 SetStringCountVisitor visitor(length_with_flag);
Mathieu Chartier31e88222016-10-14 18:43:19 -0700102 ObjPtr<String> new_string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
Jeff Hao848f70a2014-01-15 13:49:50 -0800103 if (UNLIKELY(new_string == nullptr)) {
104 return nullptr;
105 }
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700106 if (compressible) {
107 uint8_t* new_value = new_string->GetValueCompressed();
108 memcpy(new_value, string->GetValueCompressed(), length * sizeof(uint8_t));
109 memcpy(new_value + length, string2->GetValueCompressed(), length2 * sizeof(uint8_t));
110 } else {
111 uint16_t* new_value = new_string->GetValue();
112 if (string->IsCompressed()) {
113 for (int i = 0; i < length; ++i) {
114 new_value[i] = string->CharAt(i);
115 }
116 } else {
117 memcpy(new_value, string->GetValue(), length * sizeof(uint16_t));
118 }
119 if (string2->IsCompressed()) {
120 for (int i = 0; i < length2; ++i) {
121 new_value[i+length] = string2->CharAt(i);
122 }
123 } else {
124 memcpy(new_value + length, string2->GetValue(), length2 * sizeof(uint16_t));
125 }
126 }
Mathieu Chartier31e88222016-10-14 18:43:19 -0700127 return new_string.Ptr();
Jeff Hao848f70a2014-01-15 13:49:50 -0800128}
129
130String* String::AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_t* utf16_data_in) {
Ian Rogers4069d332014-01-03 10:28:27 -0800131 CHECK(utf16_data_in != nullptr || utf16_length == 0);
Jeff Hao848f70a2014-01-15 13:49:50 -0800132 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700133 const bool compressible = kUseStringCompression &&
134 String::AllASCII<uint16_t>(utf16_data_in, utf16_length);
135 int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length)
136 : utf16_length;
137 SetStringCountVisitor visitor(length_with_flag);
Mathieu Chartier31e88222016-10-14 18:43:19 -0700138 ObjPtr<String> string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
Mathieu Chartier590fee92013-09-13 13:46:47 -0700139 if (UNLIKELY(string == nullptr)) {
140 return nullptr;
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800141 }
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700142 if (compressible) {
143 for (int i = 0; i < utf16_length; ++i) {
144 string->GetValueCompressed()[i] = static_cast<uint8_t>(utf16_data_in[i]);
145 }
146 } else {
147 uint16_t* array = string->GetValue();
148 memcpy(array, utf16_data_in, utf16_length * sizeof(uint16_t));
149 }
Mathieu Chartier31e88222016-10-14 18:43:19 -0700150 return string.Ptr();
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800151}
152
Ian Rogersa436fde2013-08-27 23:34:06 -0700153String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
Mathieu Chartiered0fc1d2014-03-21 14:09:35 -0700154 DCHECK(utf != nullptr);
Bruce Hoult1646d7a2015-10-28 15:06:12 +0300155 size_t byte_count = strlen(utf);
156 size_t char_count = CountModifiedUtf8Chars(utf, byte_count);
157 return AllocFromModifiedUtf8(self, char_count, utf, byte_count);
158}
159
Mathieu Chartier31e88222016-10-14 18:43:19 -0700160String* String::AllocFromModifiedUtf8(Thread* self,
161 int32_t utf16_length,
162 const char* utf8_data_in) {
Bruce Hoult1646d7a2015-10-28 15:06:12 +0300163 return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in));
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800164}
165
Mathieu Chartier31e88222016-10-14 18:43:19 -0700166String* String::AllocFromModifiedUtf8(Thread* self,
167 int32_t utf16_length,
168 const char* utf8_data_in,
169 int32_t utf8_length) {
Jeff Hao848f70a2014-01-15 13:49:50 -0800170 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700171 const bool compressible = kUseStringCompression && (utf16_length == utf8_length);
172 const int32_t utf16_length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length)
173 : utf16_length;
174 SetStringCountVisitor visitor(utf16_length_with_flag);
Mathieu Chartier31e88222016-10-14 18:43:19 -0700175 ObjPtr<String> string = Alloc<true>(self, utf16_length_with_flag, allocator_type, visitor);
Mathieu Chartier590fee92013-09-13 13:46:47 -0700176 if (UNLIKELY(string == nullptr)) {
177 return nullptr;
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800178 }
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700179 if (compressible) {
180 memcpy(string->GetValueCompressed(), utf8_data_in, utf16_length * sizeof(uint8_t));
181 } else {
182 uint16_t* utf16_data_out = string->GetValue();
183 ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length);
184 }
Mathieu Chartier31e88222016-10-14 18:43:19 -0700185 return string.Ptr();
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800186}
187
Mathieu Chartier31e88222016-10-14 18:43:19 -0700188bool String::Equals(ObjPtr<String> that) {
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800189 if (this == that) {
190 // Quick reference equality test
191 return true;
Mathieu Chartier2cebb242015-04-21 16:50:40 -0700192 } else if (that == nullptr) {
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800193 // Null isn't an instanceof anything
194 return false;
195 } else if (this->GetLength() != that->GetLength()) {
196 // Quick length inequality test
197 return false;
198 } else {
199 // Note: don't short circuit on hash code as we're presumably here as the
200 // hash code was already equal
201 for (int32_t i = 0; i < that->GetLength(); ++i) {
Jeff Hao848f70a2014-01-15 13:49:50 -0800202 if (this->CharAt(i) != that->CharAt(i)) {
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800203 return false;
204 }
205 }
206 return true;
207 }
208}
209
Ian Rogersef7d42f2014-01-06 12:55:46 -0800210bool String::Equals(const uint16_t* that_chars, int32_t that_offset, int32_t that_length) {
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800211 if (this->GetLength() != that_length) {
212 return false;
213 } else {
214 for (int32_t i = 0; i < that_length; ++i) {
Jeff Hao848f70a2014-01-15 13:49:50 -0800215 if (this->CharAt(i) != that_chars[that_offset + i]) {
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800216 return false;
217 }
218 }
219 return true;
220 }
221}
222
Ian Rogersef7d42f2014-01-06 12:55:46 -0800223bool String::Equals(const char* modified_utf8) {
Narayan Kamatha5afcfc2015-01-29 20:06:46 +0000224 const int32_t length = GetLength();
225 int32_t i = 0;
226 while (i < length) {
227 const uint32_t ch = GetUtf16FromUtf8(&modified_utf8);
228 if (ch == '\0') {
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800229 return false;
230 }
Narayan Kamatha5afcfc2015-01-29 20:06:46 +0000231
Jeff Hao848f70a2014-01-15 13:49:50 -0800232 if (GetLeadingUtf16Char(ch) != CharAt(i++)) {
Narayan Kamatha5afcfc2015-01-29 20:06:46 +0000233 return false;
234 }
235
236 const uint16_t trailing = GetTrailingUtf16Char(ch);
237 if (trailing != 0) {
238 if (i == length) {
239 return false;
240 }
241
Jeff Hao848f70a2014-01-15 13:49:50 -0800242 if (CharAt(i++) != trailing) {
Narayan Kamatha5afcfc2015-01-29 20:06:46 +0000243 return false;
244 }
245 }
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800246 }
247 return *modified_utf8 == '\0';
248}
249
Ian Rogersef7d42f2014-01-06 12:55:46 -0800250bool String::Equals(const StringPiece& modified_utf8) {
Narayan Kamatha5afcfc2015-01-29 20:06:46 +0000251 const int32_t length = GetLength();
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800252 const char* p = modified_utf8.data();
Narayan Kamatha5afcfc2015-01-29 20:06:46 +0000253 for (int32_t i = 0; i < length; ++i) {
254 uint32_t ch = GetUtf16FromUtf8(&p);
255
Jeff Hao848f70a2014-01-15 13:49:50 -0800256 if (GetLeadingUtf16Char(ch) != CharAt(i)) {
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800257 return false;
258 }
Narayan Kamatha5afcfc2015-01-29 20:06:46 +0000259
260 const uint16_t trailing = GetTrailingUtf16Char(ch);
261 if (trailing != 0) {
262 if (i == (length - 1)) {
263 return false;
264 }
265
Jeff Hao848f70a2014-01-15 13:49:50 -0800266 if (CharAt(++i) != trailing) {
Narayan Kamatha5afcfc2015-01-29 20:06:46 +0000267 return false;
268 }
269 }
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800270 }
271 return true;
272}
273
274// Create a modified UTF-8 encoded std::string from a java/lang/String object.
Ian Rogersef7d42f2014-01-06 12:55:46 -0800275std::string String::ToModifiedUtf8() {
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800276 size_t byte_count = GetUtfLength();
277 std::string result(byte_count, static_cast<char>(0));
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700278 if (IsCompressed()) {
279 for (size_t i = 0; i < byte_count; ++i) {
280 result[i] = static_cast<char>(CharAt(i));
281 }
282 } else {
283 const uint16_t* chars = GetValue();
284 ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength());
285 }
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800286 return result;
287}
288
Mathieu Chartier31e88222016-10-14 18:43:19 -0700289int32_t String::CompareTo(ObjPtr<String> rhs) {
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800290 // Quick test for comparison of a string with itself.
Mathieu Chartier31e88222016-10-14 18:43:19 -0700291 ObjPtr<String> lhs = this;
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800292 if (lhs == rhs) {
293 return 0;
294 }
Vladimir Marko9c9883b2016-10-17 14:45:29 +0100295 int32_t lhs_count = lhs->GetLength();
296 int32_t rhs_count = rhs->GetLength();
297 int32_t count_diff = lhs_count - rhs_count;
298 int32_t min_count = (count_diff < 0) ? lhs_count : rhs_count;
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700299 if (lhs->IsCompressed() && rhs->IsCompressed()) {
Vladimir Marko9c9883b2016-10-17 14:45:29 +0100300 const uint8_t* lhs_chars = lhs->GetValueCompressed();
301 const uint8_t* rhs_chars = rhs->GetValueCompressed();
302 for (int32_t i = 0; i < min_count; ++i) {
303 int32_t char_diff = static_cast<int32_t>(lhs_chars[i]) - static_cast<int32_t>(rhs_chars[i]);
304 if (char_diff != 0) {
305 return char_diff;
306 }
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700307 }
308 } else if (lhs->IsCompressed() || rhs->IsCompressed()) {
Vladimir Marko9c9883b2016-10-17 14:45:29 +0100309 const uint8_t* compressed_chars =
310 lhs->IsCompressed() ? lhs->GetValueCompressed() : rhs->GetValueCompressed();
311 const uint16_t* uncompressed_chars = lhs->IsCompressed() ? rhs->GetValue() : lhs->GetValue();
312 for (int32_t i = 0; i < min_count; ++i) {
313 int32_t char_diff =
314 static_cast<int32_t>(compressed_chars[i]) - static_cast<int32_t>(uncompressed_chars[i]);
315 if (char_diff != 0) {
316 return lhs->IsCompressed() ? char_diff : -char_diff;
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700317 }
318 }
319 } else {
Vladimir Marko9c9883b2016-10-17 14:45:29 +0100320 const uint16_t* lhs_chars = lhs->GetValue();
321 const uint16_t* rhs_chars = rhs->GetValue();
322 // FIXME: The MemCmp16() name is misleading. It returns the char difference on mismatch
323 // where memcmp() only guarantees that the returned value has the same sign.
324 int32_t char_diff = MemCmp16(lhs_chars, rhs_chars, min_count);
325 if (char_diff != 0) {
326 return char_diff;
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700327 }
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800328 }
Vladimir Marko9c9883b2016-10-17 14:45:29 +0100329 return count_diff;
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800330}
331
Mathieu Chartierbb87e0f2015-04-03 11:21:55 -0700332void String::VisitRoots(RootVisitor* visitor) {
333 java_lang_String_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
Mathieu Chartierc528dba2013-11-26 12:00:11 -0800334}
335
Jeff Hao848f70a2014-01-15 13:49:50 -0800336CharArray* String::ToCharArray(Thread* self) {
337 StackHandleScope<1> hs(self);
338 Handle<String> string(hs.NewHandle(this));
Mathieu Chartier31e88222016-10-14 18:43:19 -0700339 ObjPtr<CharArray> result = CharArray::Alloc(self, GetLength());
Mathieu Chartier04e983a2015-11-13 08:36:59 -0800340 if (result != nullptr) {
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700341 if (string->IsCompressed()) {
342 int32_t length = string->GetLength();
343 for (int i = 0; i < length; ++i) {
344 result->GetData()[i] = string->CharAt(i);
345 }
346 } else {
347 memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t));
348 }
Mathieu Chartier04e983a2015-11-13 08:36:59 -0800349 } else {
350 self->AssertPendingOOMException();
351 }
Mathieu Chartier31e88222016-10-14 18:43:19 -0700352 return result.Ptr();
Jeff Hao848f70a2014-01-15 13:49:50 -0800353}
354
355void String::GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index) {
356 uint16_t* data = array->GetData() + index;
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700357 if (IsCompressed()) {
358 for (int i = start; i < end; ++i) {
359 data[i-start] = CharAt(i);
360 }
361 } else {
362 uint16_t* value = GetValue() + start;
363 memcpy(data, value, (end - start) * sizeof(uint16_t));
364 }
365}
366
367bool String::IsValueNull() {
368 return (IsCompressed()) ? (GetValueCompressed() == nullptr) : (GetValue() == nullptr);
Jeff Hao848f70a2014-01-15 13:49:50 -0800369}
370
Ian Rogers2dd0e2c2013-01-24 12:42:14 -0800371} // namespace mirror
372} // namespace art