AAPT2: Fix issue with styled string indices
Styled strings use spans to denote which part
is styled (<b>, <i>, etc). Spans are simply a range
of indices into the original string.
In Java, we use String and its internal representation, meaning
we must encode the indices using UTF16 lengths.
When the internal AAPT2 representation of strings switched to UTF8,
the indices also began to index into the UTF8 string.
This change reverts the indices to use UTF16 lengths.
Bug:31170115
Change-Id: I07b8b5b67d2542c7e0a855b601cdbd3ac4ebffb0
diff --git a/tools/aapt2/ResourceParser.cpp b/tools/aapt2/ResourceParser.cpp
index 32e5cfd..c430c46 100644
--- a/tools/aapt2/ResourceParser.cpp
+++ b/tools/aapt2/ResourceParser.cpp
@@ -152,7 +152,7 @@
break;
}
- spanStack.back().lastChar = builder.str().size() - 1;
+ spanStack.back().lastChar = builder.utf16Len() - 1;
outStyleString->spans.push_back(spanStack.back());
spanStack.pop_back();
@@ -185,12 +185,12 @@
spanName += attrIter->value;
}
- if (builder.str().size() > std::numeric_limits<uint32_t>::max()) {
+ if (builder.utf16Len() > std::numeric_limits<uint32_t>::max()) {
mDiag->error(DiagMessage(mSource.withLine(parser->getLineNumber()))
<< "style string '" << builder.str() << "' is too long");
error = true;
} else {
- spanStack.push_back(Span{ spanName, static_cast<uint32_t>(builder.str().size()) });
+ spanStack.push_back(Span{ spanName, static_cast<uint32_t>(builder.utf16Len()) });
}
} else if (event == xml::XmlPullParser::Event::kComment) {
diff --git a/tools/aapt2/ResourceParser_test.cpp b/tools/aapt2/ResourceParser_test.cpp
index 3d03a88..e097740 100644
--- a/tools/aapt2/ResourceParser_test.cpp
+++ b/tools/aapt2/ResourceParser_test.cpp
@@ -90,6 +90,40 @@
ASSERT_TRUE(testParse(input));
}
+TEST_F(ResourceParserTest, ParseStyledString) {
+ // Use a surrogate pair unicode point so that we can verify that the span indices
+ // use UTF-16 length and not UTF-18 length.
+ std::string input = "<string name=\"foo\">This is my aunt\u2019s <b>string</b></string>";
+ ASSERT_TRUE(testParse(input));
+
+ StyledString* str = test::getValue<StyledString>(&mTable, "string/foo");
+ ASSERT_NE(nullptr, str);
+
+ const std::string expectedStr = "This is my aunt\u2019s string";
+ EXPECT_EQ(expectedStr, *str->value->str);
+ EXPECT_EQ(1u, str->value->spans.size());
+
+ EXPECT_EQ(std::string("b"), *str->value->spans[0].name);
+ EXPECT_EQ(17u, str->value->spans[0].firstChar);
+ EXPECT_EQ(23u, str->value->spans[0].lastChar);
+}
+
+TEST_F(ResourceParserTest, ParseStringWithWhitespace) {
+ std::string input = "<string name=\"foo\"> This is what I think </string>";
+ ASSERT_TRUE(testParse(input));
+
+ String* str = test::getValue<String>(&mTable, "string/foo");
+ ASSERT_NE(nullptr, str);
+ EXPECT_EQ(std::string("This is what I think"), *str->value);
+
+ input = "<string name=\"foo2\">\" This is what I think \"</string>";
+ ASSERT_TRUE(testParse(input));
+
+ str = test::getValue<String>(&mTable, "string/foo2");
+ ASSERT_NE(nullptr, str);
+ EXPECT_EQ(std::string(" This is what I think "), *str->value);
+}
+
TEST_F(ResourceParserTest, IgnoreXliffTags) {
std::string input = "<string name=\"foo\" \n"
" xmlns:xliff=\"urn:oasis:names:tc:xliff:document:1.2\">\n"
diff --git a/tools/aapt2/util/Util.cpp b/tools/aapt2/util/Util.cpp
index e743247..b0bec62 100644
--- a/tools/aapt2/util/Util.cpp
+++ b/tools/aapt2/util/Util.cpp
@@ -314,6 +314,9 @@
return *this;
}
+ // Where the new data will be appended to.
+ size_t newDataIndex = mStr.size();
+
const char* const end = str.end();
const char* start = str.begin();
const char* current = start;
@@ -422,6 +425,16 @@
current++;
}
mStr.append(start, end - start);
+
+ // Accumulate the added string's UTF-16 length.
+ ssize_t len = utf8_to_utf16_length(
+ reinterpret_cast<const uint8_t*>(mStr.data()) + newDataIndex,
+ mStr.size() - newDataIndex);
+ if (len < 0) {
+ mError = "invalid unicode code point";
+ return *this;
+ }
+ mUtf16Len += len;
return *this;
}
@@ -434,11 +447,8 @@
std::u16string utf16;
utf16.resize(utf16Length);
- utf8_to_utf16(
- reinterpret_cast<const uint8_t*>(utf8.data()),
- utf8.length(),
- &*utf16.begin(),
- (size_t) utf16Length + 1);
+ utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(),
+ &*utf16.begin(), utf16Length + 1);
return utf16;
}
diff --git a/tools/aapt2/util/Util.h b/tools/aapt2/util/Util.h
index 998ecf7..9c88354 100644
--- a/tools/aapt2/util/Util.h
+++ b/tools/aapt2/util/Util.h
@@ -163,10 +163,16 @@
StringBuilder& append(const StringPiece& str);
const std::string& str() const;
const std::string& error() const;
+
+ // When building StyledStrings, we need UTF-16 indices into the string,
+ // which is what the Java layer expects when dealing with java String.charAt().
+ size_t utf16Len() const;
+
operator bool() const;
private:
std::string mStr;
+ size_t mUtf16Len = 0;
bool mQuote = false;
bool mTrailingSpace = false;
bool mLastCharWasEscape = false;
@@ -181,6 +187,10 @@
return mError;
}
+inline size_t StringBuilder::utf16Len() const {
+ return mUtf16Len;
+}
+
inline StringBuilder::operator bool() const {
return mError.empty();
}