AAPT2: Fix styled string whitespace processing
Change styled string whitespace processing to be like AAPT's was.
Main changes:
- whitespace around tags is preserved.
- tags start exactly where they are supposed to, not off by one.
Bug: 72406283
Test: make aapt2_tests
Change-Id: I4d12728c493efd8c978e2e3d2718b56534ff52ef
diff --git a/tools/aapt2/util/Util.cpp b/tools/aapt2/util/Util.cpp
index e42145d..d1c9ca1 100644
--- a/tools/aapt2/util/Util.cpp
+++ b/tools/aapt2/util/Util.cpp
@@ -76,6 +76,34 @@
return str.substr(str.size() - suffix.size(), suffix.size()) == suffix;
}
+StringPiece TrimLeadingWhitespace(const StringPiece& str) {
+ if (str.size() == 0 || str.data() == nullptr) {
+ return str;
+ }
+
+ const char* start = str.data();
+ const char* end = start + str.length();
+
+ while (start != end && isspace(*start)) {
+ start++;
+ }
+ return StringPiece(start, end - start);
+}
+
+StringPiece TrimTrailingWhitespace(const StringPiece& str) {
+ if (str.size() == 0 || str.data() == nullptr) {
+ return str;
+ }
+
+ const char* start = str.data();
+ const char* end = start + str.length();
+
+ while (end != start && isspace(*(end - 1))) {
+ end--;
+ }
+ return StringPiece(start, end - start);
+}
+
StringPiece TrimWhitespace(const StringPiece& str) {
if (str.size() == 0 || str.data() == nullptr) {
return str;
@@ -269,162 +297,6 @@
return true;
}
-static bool AppendCodepointToUtf8String(char32_t codepoint, std::string* output) {
- ssize_t len = utf32_to_utf8_length(&codepoint, 1);
- if (len < 0) {
- return false;
- }
-
- const size_t start_append_pos = output->size();
-
- // Make room for the next character.
- output->resize(output->size() + len);
-
- char* dst = &*(output->begin() + start_append_pos);
- utf32_to_utf8(&codepoint, 1, dst, len + 1);
- return true;
-}
-
-static bool AppendUnicodeCodepoint(Utf8Iterator* iter, std::string* output) {
- char32_t code = 0;
- for (size_t i = 0; i < 4 && iter->HasNext(); i++) {
- char32_t codepoint = iter->Next();
- char32_t a;
- if (codepoint >= U'0' && codepoint <= U'9') {
- a = codepoint - U'0';
- } else if (codepoint >= U'a' && codepoint <= U'f') {
- a = codepoint - U'a' + 10;
- } else if (codepoint >= U'A' && codepoint <= U'F') {
- a = codepoint - U'A' + 10;
- } else {
- return {};
- }
- code = (code << 4) | a;
- }
- return AppendCodepointToUtf8String(code, output);
-}
-
-static bool IsCodepointSpace(char32_t codepoint) {
- if (static_cast<uint32_t>(codepoint) & 0xffffff00u) {
- return false;
- }
- return isspace(static_cast<char>(codepoint));
-}
-
-StringBuilder::StringBuilder(bool preserve_spaces) : preserve_spaces_(preserve_spaces) {
-}
-
-StringBuilder& StringBuilder::Append(const StringPiece& str) {
- if (!error_.empty()) {
- return *this;
- }
-
- // Where the new data will be appended to.
- const size_t new_data_index = str_.size();
-
- Utf8Iterator iter(str);
- while (iter.HasNext()) {
- const char32_t codepoint = iter.Next();
-
- if (last_char_was_escape_) {
- switch (codepoint) {
- case U't':
- str_ += '\t';
- break;
-
- case U'n':
- str_ += '\n';
- break;
-
- case U'#':
- case U'@':
- case U'?':
- case U'"':
- case U'\'':
- case U'\\':
- str_ += static_cast<char>(codepoint);
- break;
-
- case U'u':
- if (!AppendUnicodeCodepoint(&iter, &str_)) {
- error_ = "invalid unicode escape sequence";
- return *this;
- }
- break;
-
- default:
- // Ignore the escape character and just include the codepoint.
- AppendCodepointToUtf8String(codepoint, &str_);
- break;
- }
- last_char_was_escape_ = false;
-
- } else if (!preserve_spaces_ && codepoint == U'"') {
- if (!quote_ && trailing_space_) {
- // We found an opening quote, and we have trailing space, so we should append that
- // space now.
- if (trailing_space_) {
- // We had trailing whitespace, so replace with a single space.
- if (!str_.empty()) {
- str_ += ' ';
- }
- trailing_space_ = false;
- }
- }
- quote_ = !quote_;
-
- } else if (!preserve_spaces_ && codepoint == U'\'' && !quote_) {
- // This should be escaped.
- error_ = "unescaped apostrophe";
- return *this;
-
- } else if (codepoint == U'\\') {
- // This is an escape sequence, convert to the real value.
- if (!quote_ && trailing_space_) {
- // We had trailing whitespace, so
- // replace with a single space.
- if (!str_.empty()) {
- str_ += ' ';
- }
- trailing_space_ = false;
- }
- last_char_was_escape_ = true;
- } else {
- if (preserve_spaces_ || quote_) {
- // Quotes mean everything is taken, including whitespace.
- AppendCodepointToUtf8String(codepoint, &str_);
- } else {
- // This is not quoted text, so we will accumulate whitespace and only emit a single
- // character of whitespace if it is followed by a non-whitespace character.
- if (IsCodepointSpace(codepoint)) {
- // We found whitespace.
- trailing_space_ = true;
- } else {
- if (trailing_space_) {
- // We saw trailing space before, so replace all
- // that trailing space with one space.
- if (!str_.empty()) {
- str_ += ' ';
- }
- trailing_space_ = false;
- }
- AppendCodepointToUtf8String(codepoint, &str_);
- }
- }
- }
- }
-
- // Accumulate the added string's UTF-16 length.
- ssize_t len = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(str_.data()) + new_data_index,
- str_.size() - new_data_index);
- if (len < 0) {
- error_ = "invalid unicode code point";
- return *this;
- }
- utf16_len_ += len;
- return *this;
-}
-
std::u16string Utf8ToUtf16(const StringPiece& utf8) {
ssize_t utf16_length = utf8_to_utf16_length(
reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
diff --git a/tools/aapt2/util/Util.h b/tools/aapt2/util/Util.h
index 7c949b90..0eb35d1 100644
--- a/tools/aapt2/util/Util.h
+++ b/tools/aapt2/util/Util.h
@@ -59,7 +59,15 @@
// Returns true if the string ends with suffix.
bool EndsWith(const android::StringPiece& str, const android::StringPiece& suffix);
-// Creates a new StringPiece16 that points to a substring of the original string without leading or
+// Creates a new StringPiece that points to a substring of the original string without leading
+// whitespace.
+android::StringPiece TrimLeadingWhitespace(const android::StringPiece& str);
+
+// Creates a new StringPiece that points to a substring of the original string without trailing
+// whitespace.
+android::StringPiece TrimTrailingWhitespace(const android::StringPiece& str);
+
+// Creates a new StringPiece that points to a substring of the original string without leading or
// trailing whitespace.
android::StringPiece TrimWhitespace(const android::StringPiece& str);
@@ -141,9 +149,12 @@
// break the string interpolation.
bool VerifyJavaStringFormat(const android::StringPiece& str);
+bool AppendStyledString(const android::StringPiece& input, bool preserve_spaces,
+ std::string* out_str, std::string* out_error);
+
class StringBuilder {
public:
- explicit StringBuilder(bool preserve_spaces = false);
+ StringBuilder() = default;
StringBuilder& Append(const android::StringPiece& str);
const std::string& ToString() const;
@@ -158,7 +169,6 @@
explicit operator bool() const;
private:
- bool preserve_spaces_;
std::string str_;
size_t utf16_len_ = 0;
bool quote_ = false;
diff --git a/tools/aapt2/util/Util_test.cpp b/tools/aapt2/util/Util_test.cpp
index 2d1242a..d4e3bec 100644
--- a/tools/aapt2/util/Util_test.cpp
+++ b/tools/aapt2/util/Util_test.cpp
@@ -41,45 +41,6 @@
EXPECT_TRUE(util::StartsWith("hello.xml", "he"));
}
-TEST(UtilTest, StringBuilderSplitEscapeSequence) {
- EXPECT_THAT(util::StringBuilder().Append("this is a new\\").Append("nline.").ToString(),
- Eq("this is a new\nline."));
-}
-
-TEST(UtilTest, StringBuilderWhitespaceRemoval) {
- EXPECT_THAT(util::StringBuilder().Append(" hey guys ").Append(" this is so cool ").ToString(),
- Eq("hey guys this is so cool"));
- EXPECT_THAT(
- util::StringBuilder().Append(" \" wow, so many \t ").Append("spaces. \"what? ").ToString(),
- Eq(" wow, so many \t spaces. what?"));
- EXPECT_THAT(util::StringBuilder().Append(" where \t ").Append(" \nis the pie?").ToString(),
- Eq("where is the pie?"));
-}
-
-TEST(UtilTest, StringBuilderEscaping) {
- EXPECT_THAT(util::StringBuilder()
- .Append(" hey guys\\n ")
- .Append(" this \\t is so\\\\ cool ")
- .ToString(),
- Eq("hey guys\n this \t is so\\ cool"));
- EXPECT_THAT(util::StringBuilder().Append("\\@\\?\\#\\\\\\'").ToString(), Eq("@?#\\\'"));
-}
-
-TEST(UtilTest, StringBuilderMisplacedQuote) {
- util::StringBuilder builder;
- EXPECT_FALSE(builder.Append("they're coming!"));
-}
-
-TEST(UtilTest, StringBuilderUnicodeCodes) {
- EXPECT_THAT(util::StringBuilder().Append("\\u00AF\\u0AF0 woah").ToString(),
- Eq("\u00AF\u0AF0 woah"));
- EXPECT_FALSE(util::StringBuilder().Append("\\u00 yo"));
-}
-
-TEST(UtilTest, StringBuilderPreserveSpaces) {
- EXPECT_THAT(util::StringBuilder(true /*preserve_spaces*/).Append("\"").ToString(), Eq("\""));
-}
-
TEST(UtilTest, TokenizeInput) {
auto tokenizer = util::Tokenize(StringPiece("this| is|the|end"), '|');
auto iter = tokenizer.begin();