AAPT2: Fix styled string whitespace processing
Change styled string whitespace processing to be like AAPT's was.
Main changes:
- whitespace around tags is preserved.
- tags start exactly where they are supposed to, not off by one.
Bug: 72406283
Test: make aapt2_tests
Change-Id: I4d12728c493efd8c978e2e3d2718b56534ff52ef
diff --git a/core/res/res/values/strings.xml b/core/res/res/values/strings.xml
index cadc3ff..aa90b87 100644
--- a/core/res/res/values/strings.xml
+++ b/core/res/res/values/strings.xml
@@ -379,7 +379,7 @@
<!-- Text message in the factory reset warning dialog. This says that the the device admin app
is missing or corrupted. As a result the device will be erased. [CHAR LIMIT=NONE]-->
<string name="factory_reset_message">The admin app can\'t be used. Your device will now be
- erased.\n\nIf you have questions, contact your organization's admin.</string>
+ erased.\n\nIf you have questions, contact your organization\'s admin.</string>
<!-- A toast message displayed when printing is attempted but disabled by policy. -->
<string name="printing_disabled_by">Printing disabled by <xliff:g id="owner_app">%s</xliff:g>.</string>
@@ -764,7 +764,7 @@
<string name="capability_title_canCaptureFingerprintGestures">Fingerprint gestures</string>
<!-- Description for the capability of an accessibility service to perform gestures. -->
<string name="capability_desc_canCaptureFingerprintGestures">Can capture gestures performed on
- the device's fingerprint sensor.</string>
+ the device\'s fingerprint sensor.</string>
<!-- Permissions -->
@@ -3774,7 +3774,7 @@
<!-- Notification title when data usage has exceeded warning threshold. [CHAR LIMIT=50] -->
<string name="data_usage_warning_title">Data warning</string>
<!-- Notification body when data usage has exceeded warning threshold. [CHAR LIMIT=32] -->
- <string name="data_usage_warning_body">You've used <xliff:g id="app" example="3.8GB">%s</xliff:g> of data</string>
+ <string name="data_usage_warning_body">You\'ve used <xliff:g id="app" example="3.8GB">%s</xliff:g> of data</string>
<!-- Notification title when mobile data usage has exceeded limit threshold, and has been disabled. [CHAR LIMIT=50] -->
<string name="data_usage_mobile_limit_title">Mobile data limit reached</string>
@@ -3788,7 +3788,7 @@
<!-- Notification title when Wi-Fi data usage has exceeded limit threshold. [CHAR LIMIT=32] -->
<string name="data_usage_wifi_limit_snoozed_title">Over your Wi-Fi data limit</string>
<!-- Notification body when data usage has exceeded limit threshold. -->
- <string name="data_usage_limit_snoozed_body">You've gone <xliff:g id="size" example="3.8GB">%s</xliff:g> over your set limit</string>
+ <string name="data_usage_limit_snoozed_body">You\'ve gone <xliff:g id="size" example="3.8GB">%s</xliff:g> over your set limit</string>
<!-- Notification title when background data usage is limited. [CHAR LIMIT=32] -->
<string name="data_usage_restricted_title">Background data restricted</string>
diff --git a/tools/aapt2/ResourceParser.cpp b/tools/aapt2/ResourceParser.cpp
index 7cffeea..1b6f882 100644
--- a/tools/aapt2/ResourceParser.cpp
+++ b/tools/aapt2/ResourceParser.cpp
@@ -26,11 +26,14 @@
#include "ResourceUtils.h"
#include "ResourceValues.h"
#include "ValueVisitor.h"
+#include "text/Utf8Iterator.h"
#include "util/ImmutableMap.h"
#include "util/Maybe.h"
#include "util/Util.h"
#include "xml/XmlPullParser.h"
+using ::aapt::ResourceUtils::StringBuilder;
+using ::aapt::text::Utf8Iterator;
using ::android::StringPiece;
namespace aapt {
@@ -169,114 +172,212 @@
config_(config),
options_(options) {}
-/**
- * Build a string from XML that converts nested elements into Span objects.
- */
+// Base class Node for representing the various Spans and UntranslatableSections of an XML string.
+// This will be used to traverse and flatten the XML string into a single std::string, with all
+// Span and Untranslatable data maintained in parallel, as indices into the string.
+class Node {
+ public:
+ virtual ~Node() = default;
+
+ // Adds the given child node to this parent node's set of child nodes, moving ownership to the
+ // parent node as well.
+ // Returns a pointer to the child node that was added as a convenience.
+ template <typename T>
+ T* AddChild(std::unique_ptr<T> node) {
+ T* raw_ptr = node.get();
+ children.push_back(std::move(node));
+ return raw_ptr;
+ }
+
+ virtual void Build(StringBuilder* builder) const {
+ for (const auto& child : children) {
+ child->Build(builder);
+ }
+ }
+
+ std::vector<std::unique_ptr<Node>> children;
+};
+
+// A chunk of text in the XML string. This lives between other tags, such as XLIFF tags and Spans.
+class SegmentNode : public Node {
+ public:
+ std::string data;
+
+ void Build(StringBuilder* builder) const override {
+ builder->AppendText(data);
+ }
+};
+
+// A tag that will be encoded into the final flattened string. Tags like <b> or <i>.
+class SpanNode : public Node {
+ public:
+ std::string name;
+
+ void Build(StringBuilder* builder) const override {
+ StringBuilder::SpanHandle span_handle = builder->StartSpan(name);
+ Node::Build(builder);
+ builder->EndSpan(span_handle);
+ }
+};
+
+// An XLIFF 'g' tag, which marks a section of the string as untranslatable.
+class UntranslatableNode : public Node {
+ public:
+ void Build(StringBuilder* builder) const override {
+ StringBuilder::UntranslatableHandle handle = builder->StartUntranslatable();
+ Node::Build(builder);
+ builder->EndUntranslatable(handle);
+ }
+};
+
+// Build a string from XML that converts nested elements into Span objects.
bool ResourceParser::FlattenXmlSubtree(
xml::XmlPullParser* parser, std::string* out_raw_string, StyleString* out_style_string,
std::vector<UntranslatableSection>* out_untranslatable_sections) {
- // Keeps track of formatting tags (<b>, <i>) and the range of characters for which they apply.
- // The stack elements refer to the indices in out_style_string->spans.
- // By first adding to the out_style_string->spans vector, and then using the stack to refer
- // to this vector, the original order of tags is preserved in cases such as <b><i>hello</b></i>.
- std::vector<size_t> span_stack;
-
- // Clear the output variables.
- out_raw_string->clear();
- out_style_string->spans.clear();
- out_untranslatable_sections->clear();
-
- // The StringBuilder will concatenate the various segments of text which are initially
- // separated by tags. It also handles unicode escape codes and quotations.
- util::StringBuilder builder;
+ std::string raw_string;
+ std::string current_text;
// The first occurrence of a <xliff:g> tag. Nested <xliff:g> tags are illegal.
Maybe<size_t> untranslatable_start_depth;
+ Node root;
+ std::vector<Node*> node_stack;
+ node_stack.push_back(&root);
+
+ bool saw_span_node = false;
+ SegmentNode* first_segment = nullptr;
+ SegmentNode* last_segment = nullptr;
+
size_t depth = 1;
- while (xml::XmlPullParser::IsGoodEvent(parser->Next())) {
+ while (depth > 0 && xml::XmlPullParser::IsGoodEvent(parser->Next())) {
const xml::XmlPullParser::Event event = parser->event();
- if (event == xml::XmlPullParser::Event::kStartElement) {
- if (parser->element_namespace().empty()) {
- // This is an HTML tag which we encode as a span. Add it to the span stack.
- std::string span_name = parser->element_name();
- const auto end_attr_iter = parser->end_attributes();
- for (auto attr_iter = parser->begin_attributes(); attr_iter != end_attr_iter; ++attr_iter) {
- span_name += ";";
- span_name += attr_iter->name;
- span_name += "=";
- span_name += attr_iter->value;
+ // First take care of any SegmentNodes that should be created.
+ if (event == xml::XmlPullParser::Event::kStartElement ||
+ event == xml::XmlPullParser::Event::kEndElement) {
+ if (!current_text.empty()) {
+ std::unique_ptr<SegmentNode> segment_node = util::make_unique<SegmentNode>();
+ segment_node->data = std::move(current_text);
+ last_segment = node_stack.back()->AddChild(std::move(segment_node));
+ if (first_segment == nullptr) {
+ first_segment = last_segment;
}
+ current_text = {};
+ }
+ }
- // Make sure the string is representable in our binary format.
- if (builder.Utf16Len() > std::numeric_limits<uint32_t>::max()) {
- diag_->Error(DiagMessage(source_.WithLine(parser->line_number()))
- << "style string '" << builder.ToString() << "' is too long");
- return false;
- }
+ switch (event) {
+ case xml::XmlPullParser::Event::kText: {
+ current_text += parser->text();
+ raw_string += parser->text();
+ } break;
- out_style_string->spans.push_back(
- Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())});
- span_stack.push_back(out_style_string->spans.size() - 1);
- } else if (parser->element_namespace() == sXliffNamespaceUri) {
- if (parser->element_name() == "g") {
- if (untranslatable_start_depth) {
- // We've already encountered an <xliff:g> tag, and nested <xliff:g> tags are illegal.
- diag_->Error(DiagMessage(source_.WithLine(parser->line_number()))
- << "illegal nested XLIFF 'g' tag");
- return false;
- } else {
- // Mark the start of an untranslatable section. Use UTF8 indices/lengths.
- untranslatable_start_depth = depth;
- const size_t current_idx = builder.ToString().size();
- out_untranslatable_sections->push_back(UntranslatableSection{current_idx, current_idx});
+ case xml::XmlPullParser::Event::kStartElement: {
+ if (parser->element_namespace().empty()) {
+ // This is an HTML tag which we encode as a span. Add it to the span stack.
+ std::unique_ptr<SpanNode> span_node = util::make_unique<SpanNode>();
+ span_node->name = parser->element_name();
+ const auto end_attr_iter = parser->end_attributes();
+ for (auto attr_iter = parser->begin_attributes(); attr_iter != end_attr_iter;
+ ++attr_iter) {
+ span_node->name += ";";
+ span_node->name += attr_iter->name;
+ span_node->name += "=";
+ span_node->name += attr_iter->value;
}
+
+ node_stack.push_back(node_stack.back()->AddChild(std::move(span_node)));
+ saw_span_node = true;
+ } else if (parser->element_namespace() == sXliffNamespaceUri) {
+ // This is an XLIFF tag, which is not encoded as a span.
+ if (parser->element_name() == "g") {
+ // Check that an 'untranslatable' tag is not already being processed. Nested
+ // <xliff:g> tags are illegal.
+ if (untranslatable_start_depth) {
+ diag_->Error(DiagMessage(source_.WithLine(parser->line_number()))
+ << "illegal nested XLIFF 'g' tag");
+ return false;
+ } else {
+ // Mark the beginning of an 'untranslatable' section.
+ untranslatable_start_depth = depth;
+ node_stack.push_back(
+ node_stack.back()->AddChild(util::make_unique<UntranslatableNode>()));
+ }
+ } else {
+ // Ignore unknown XLIFF tags, but don't warn.
+ node_stack.push_back(node_stack.back()->AddChild(util::make_unique<Node>()));
+ }
+ } else {
+ // Besides XLIFF, any other namespaced tag is unsupported and ignored.
+ diag_->Warn(DiagMessage(source_.WithLine(parser->line_number()))
+ << "ignoring element '" << parser->element_name()
+ << "' with unknown namespace '" << parser->element_namespace() << "'");
+ node_stack.push_back(node_stack.back()->AddChild(util::make_unique<Node>()));
}
- // Ignore other xliff tags, they get handled by other tools.
- } else {
- // Besides XLIFF, any other namespaced tag is unsupported and ignored.
- diag_->Warn(DiagMessage(source_.WithLine(parser->line_number()))
- << "ignoring element '" << parser->element_name()
- << "' with unknown namespace '" << parser->element_namespace() << "'");
- }
+ // Enter one level inside the element.
+ depth++;
+ } break;
- // Enter one level inside the element.
- depth++;
- } else if (event == xml::XmlPullParser::Event::kText) {
- // Record both the raw text and append to the builder to deal with escape sequences
- // and quotations.
- out_raw_string->append(parser->text());
- builder.Append(parser->text());
- } else if (event == xml::XmlPullParser::Event::kEndElement) {
- // Return one level from within the element.
- depth--;
- if (depth == 0) {
+ case xml::XmlPullParser::Event::kEndElement: {
+ // Return one level from within the element.
+ depth--;
+ if (depth == 0) {
+ break;
+ }
+
+ node_stack.pop_back();
+ if (untranslatable_start_depth == make_value(depth)) {
+ // This is the end of an untranslatable section.
+ untranslatable_start_depth = {};
+ }
+ } break;
+
+ default:
+ // ignore.
break;
- }
-
- if (parser->element_namespace().empty()) {
- // This is an HTML tag which we encode as a span. Update the span
- // stack and pop the top entry.
- Span& top_span = out_style_string->spans[span_stack.back()];
- top_span.last_char = builder.Utf16Len() - 1;
- span_stack.pop_back();
- } else if (untranslatable_start_depth == make_value(depth)) {
- // This is the end of an untranslatable section. Use UTF8 indices/lengths.
- UntranslatableSection& untranslatable_section = out_untranslatable_sections->back();
- untranslatable_section.end = builder.ToString().size();
- untranslatable_start_depth = {};
- }
- } else if (event == xml::XmlPullParser::Event::kComment) {
- // Ignore.
- } else {
- LOG(FATAL) << "unhandled XML event";
}
}
- CHECK(span_stack.empty()) << "spans haven't been fully processed";
- out_style_string->str = builder.ToString();
+ // Sanity check to make sure we processed all the nodes.
+ CHECK(node_stack.size() == 1u);
+ CHECK(node_stack.back() == &root);
+
+ if (!saw_span_node) {
+ // If there were no spans, we must treat this string a little differently (according to AAPT).
+ // Find and strip the leading whitespace from the first segment, and the trailing whitespace
+ // from the last segment.
+ if (first_segment != nullptr) {
+ // Trim leading whitespace.
+ StringPiece trimmed = util::TrimLeadingWhitespace(first_segment->data);
+ if (trimmed.size() != first_segment->data.size()) {
+ first_segment->data = trimmed.to_string();
+ }
+ }
+
+ if (last_segment != nullptr) {
+ // Trim trailing whitespace.
+ StringPiece trimmed = util::TrimTrailingWhitespace(last_segment->data);
+ if (trimmed.size() != last_segment->data.size()) {
+ last_segment->data = trimmed.to_string();
+ }
+ }
+ }
+
+ // Have the XML structure flatten itself into the StringBuilder. The StringBuilder will take
+ // care of recording the correctly adjusted Spans and UntranslatableSections.
+ StringBuilder builder;
+ root.Build(&builder);
+ if (!builder) {
+ diag_->Error(DiagMessage(source_.WithLine(parser->line_number())) << builder.GetError());
+ return false;
+ }
+
+ ResourceUtils::FlattenedXmlString flattened_string = builder.GetFlattenedString();
+ *out_raw_string = std::move(raw_string);
+ *out_untranslatable_sections = std::move(flattened_string.untranslatable_sections);
+ out_style_string->str = std::move(flattened_string.text);
+ out_style_string->spans = std::move(flattened_string.spans);
return true;
}
diff --git a/tools/aapt2/ResourceParser_test.cpp b/tools/aapt2/ResourceParser_test.cpp
index 618c8ed..c98c0b9 100644
--- a/tools/aapt2/ResourceParser_test.cpp
+++ b/tools/aapt2/ResourceParser_test.cpp
@@ -95,6 +95,16 @@
ASSERT_THAT(str, NotNull());
EXPECT_THAT(*str, StrValueEq(" hey there "));
EXPECT_THAT(str->untranslatable_sections, IsEmpty());
+
+ ASSERT_TRUE(TestParse(R"(<string name="bar">Isn\'t it cool?</string>)"));
+ str = test::GetValue<String>(&table_, "string/bar");
+ ASSERT_THAT(str, NotNull());
+ EXPECT_THAT(*str, StrValueEq("Isn't it cool?"));
+
+ ASSERT_TRUE(TestParse(R"(<string name="baz">"Isn't it cool?"</string>)"));
+ str = test::GetValue<String>(&table_, "string/baz");
+ ASSERT_THAT(str, NotNull());
+ EXPECT_THAT(*str, StrValueEq("Isn't it cool?"));
}
TEST_F(ResourceParserTest, ParseEscapedString) {
@@ -126,16 +136,16 @@
StyledString* str = test::GetValue<StyledString>(&table_, "string/foo");
ASSERT_THAT(str, NotNull());
- EXPECT_THAT(str->value->value, Eq("This is my aunt\u2019s fickle string"));
+ EXPECT_THAT(str->value->value, StrEq("This is my aunt\u2019s fickle string"));
EXPECT_THAT(str->value->spans, SizeIs(2));
EXPECT_THAT(str->untranslatable_sections, IsEmpty());
- EXPECT_THAT(*str->value->spans[0].name, Eq("b"));
- EXPECT_THAT(str->value->spans[0].first_char, Eq(17u));
+ EXPECT_THAT(*str->value->spans[0].name, StrEq("b"));
+ EXPECT_THAT(str->value->spans[0].first_char, Eq(18u));
EXPECT_THAT(str->value->spans[0].last_char, Eq(30u));
- EXPECT_THAT(*str->value->spans[1].name, Eq("small"));
- EXPECT_THAT(str->value->spans[1].first_char, Eq(24u));
+ EXPECT_THAT(*str->value->spans[1].name, StrEq("small"));
+ EXPECT_THAT(str->value->spans[1].first_char, Eq(25u));
EXPECT_THAT(str->value->spans[1].last_char, Eq(30u));
}
@@ -144,7 +154,7 @@
String* str = test::GetValue<String>(&table_, "string/foo");
ASSERT_THAT(str, NotNull());
- EXPECT_THAT(*str->value, Eq("This is what I think"));
+ EXPECT_THAT(*str->value, StrEq("This is what I think"));
EXPECT_THAT(str->untranslatable_sections, IsEmpty());
ASSERT_TRUE(TestParse(R"(<string name="foo2">" This is what I think "</string>)"));
@@ -154,6 +164,25 @@
EXPECT_THAT(*str, StrValueEq(" This is what I think "));
}
+TEST_F(ResourceParserTest, ParseStyledStringWithWhitespace) {
+ std::string input = R"(<string name="foo"> <b> My <i> favorite</i> string </b> </string>)";
+ ASSERT_TRUE(TestParse(input));
+
+ StyledString* str = test::GetValue<StyledString>(&table_, "string/foo");
+ ASSERT_THAT(str, NotNull());
+ EXPECT_THAT(str->value->value, StrEq(" My favorite string "));
+ EXPECT_THAT(str->untranslatable_sections, IsEmpty());
+
+ ASSERT_THAT(str->value->spans, SizeIs(2u));
+ EXPECT_THAT(*str->value->spans[0].name, StrEq("b"));
+ EXPECT_THAT(str->value->spans[0].first_char, Eq(1u));
+ EXPECT_THAT(str->value->spans[0].last_char, Eq(21u));
+
+ EXPECT_THAT(*str->value->spans[1].name, StrEq("i"));
+ EXPECT_THAT(str->value->spans[1].first_char, Eq(5u));
+ EXPECT_THAT(str->value->spans[1].last_char, Eq(13u));
+}
+
TEST_F(ResourceParserTest, IgnoreXliffTagsOtherThanG) {
std::string input = R"(
<string name="foo" xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2">
@@ -182,12 +211,9 @@
String* str = test::GetValue<String>(&table_, "string/foo");
ASSERT_THAT(str, NotNull());
EXPECT_THAT(*str, StrValueEq("There are %1$d apples"));
- ASSERT_THAT(str->untranslatable_sections, SizeIs(1));
- // We expect indices and lengths that span to include the whitespace
- // before %1$d. This is due to how the StringBuilder withholds whitespace unless
- // needed (to deal with line breaks, etc.).
- EXPECT_THAT(str->untranslatable_sections[0].start, Eq(9u));
+ ASSERT_THAT(str->untranslatable_sections, SizeIs(1));
+ EXPECT_THAT(str->untranslatable_sections[0].start, Eq(10u));
EXPECT_THAT(str->untranslatable_sections[0].end, Eq(14u));
}
@@ -199,14 +225,16 @@
StyledString* str = test::GetValue<StyledString>(&table_, "string/foo");
ASSERT_THAT(str, NotNull());
- EXPECT_THAT(str->value->value, Eq("There are %1$d apples"));
- ASSERT_THAT(str->untranslatable_sections, SizeIs(1));
+ EXPECT_THAT(str->value->value, Eq(" There are %1$d apples"));
- // We expect indices and lengths that span to include the whitespace
- // before %1$d. This is due to how the StringBuilder withholds whitespace unless
- // needed (to deal with line breaks, etc.).
- EXPECT_THAT(str->untranslatable_sections[0].start, Eq(9u));
- EXPECT_THAT(str->untranslatable_sections[0].end, Eq(14u));
+ ASSERT_THAT(str->untranslatable_sections, SizeIs(1));
+ EXPECT_THAT(str->untranslatable_sections[0].start, Eq(11u));
+ EXPECT_THAT(str->untranslatable_sections[0].end, Eq(15u));
+
+ ASSERT_THAT(str->value->spans, SizeIs(1u));
+ EXPECT_THAT(*str->value->spans[0].name, StrEq("b"));
+ EXPECT_THAT(str->value->spans[0].first_char, Eq(11u));
+ EXPECT_THAT(str->value->spans[0].last_char, Eq(14u));
}
TEST_F(ResourceParserTest, ParseNull) {
diff --git a/tools/aapt2/ResourceUtils.cpp b/tools/aapt2/ResourceUtils.cpp
index 628466d..8fc3d65 100644
--- a/tools/aapt2/ResourceUtils.cpp
+++ b/tools/aapt2/ResourceUtils.cpp
@@ -18,17 +18,23 @@
#include <sstream>
+#include "android-base/stringprintf.h"
#include "androidfw/ResourceTypes.h"
#include "androidfw/ResourceUtils.h"
#include "NameMangler.h"
#include "SdkConstants.h"
#include "format/binary/ResourceTypeExtensions.h"
+#include "text/Unicode.h"
+#include "text/Utf8Iterator.h"
#include "util/Files.h"
#include "util/Util.h"
+using ::aapt::text::IsWhitespace;
+using ::aapt::text::Utf8Iterator;
using ::android::StringPiece;
using ::android::StringPiece16;
+using ::android::base::StringPrintf;
namespace aapt {
namespace ResourceUtils {
@@ -750,5 +756,195 @@
return util::make_unique<BinaryPrimitive>(res_value);
}
+// Converts the codepoint to UTF-8 and appends it to the string.
+static bool AppendCodepointToUtf8String(char32_t codepoint, std::string* output) {
+ ssize_t len = utf32_to_utf8_length(&codepoint, 1);
+ if (len < 0) {
+ return false;
+ }
+
+ const size_t start_append_pos = output->size();
+
+ // Make room for the next character.
+ output->resize(output->size() + len);
+
+ char* dst = &*(output->begin() + start_append_pos);
+ utf32_to_utf8(&codepoint, 1, dst, len + 1);
+ return true;
+}
+
+// Reads up to 4 UTF-8 characters that represent a Unicode escape sequence, and appends the
+// Unicode codepoint represented by the escape sequence to the string.
+static bool AppendUnicodeEscapeSequence(Utf8Iterator* iter, std::string* output) {
+ char32_t code = 0;
+ for (size_t i = 0; i < 4 && iter->HasNext(); i++) {
+ char32_t codepoint = iter->Next();
+ char32_t a;
+ if (codepoint >= U'0' && codepoint <= U'9') {
+ a = codepoint - U'0';
+ } else if (codepoint >= U'a' && codepoint <= U'f') {
+ a = codepoint - U'a' + 10;
+ } else if (codepoint >= U'A' && codepoint <= U'F') {
+ a = codepoint - U'A' + 10;
+ } else {
+ return {};
+ }
+ code = (code << 4) | a;
+ }
+ return AppendCodepointToUtf8String(code, output);
+}
+
+StringBuilder::StringBuilder(bool preserve_spaces)
+ : preserve_spaces_(preserve_spaces), quote_(preserve_spaces) {
+}
+
+StringBuilder& StringBuilder::AppendText(const std::string& text) {
+ if (!error_.empty()) {
+ return *this;
+ }
+
+ const size_t previous_len = xml_string_.text.size();
+ Utf8Iterator iter(text);
+ while (iter.HasNext()) {
+ char32_t codepoint = iter.Next();
+ if (!quote_ && text::IsWhitespace(codepoint)) {
+ if (!last_codepoint_was_space_) {
+ // Emit a space if it's the first.
+ xml_string_.text += ' ';
+ last_codepoint_was_space_ = true;
+ }
+
+ // Keep eating spaces.
+ continue;
+ }
+
+ // This is not a space.
+ last_codepoint_was_space_ = false;
+
+ if (codepoint == U'\\') {
+ if (iter.HasNext()) {
+ codepoint = iter.Next();
+ switch (codepoint) {
+ case U't':
+ xml_string_.text += '\t';
+ break;
+
+ case U'n':
+ xml_string_.text += '\n';
+ break;
+
+ case U'#':
+ case U'@':
+ case U'?':
+ case U'"':
+ case U'\'':
+ case U'\\':
+ xml_string_.text += static_cast<char>(codepoint);
+ break;
+
+ case U'u':
+ if (!AppendUnicodeEscapeSequence(&iter, &xml_string_.text)) {
+ error_ =
+ StringPrintf("invalid unicode escape sequence in string\n\"%s\"", text.c_str());
+ return *this;
+ }
+ break;
+
+ default:
+ // Ignore the escape character and just include the codepoint.
+ AppendCodepointToUtf8String(codepoint, &xml_string_.text);
+ break;
+ }
+ }
+ } else if (!preserve_spaces_ && codepoint == U'"') {
+ // Only toggle the quote state when we are not preserving spaces.
+ quote_ = !quote_;
+
+ } else if (!quote_ && codepoint == U'\'') {
+ // This should be escaped.
+ error_ = StringPrintf("unescaped apostrophe in string\n\"%s\"", text.c_str());
+ return *this;
+
+ } else {
+ AppendCodepointToUtf8String(codepoint, &xml_string_.text);
+ }
+ }
+
+ // Accumulate the added string's UTF-16 length.
+ const uint8_t* utf8_data = reinterpret_cast<const uint8_t*>(xml_string_.text.c_str());
+ const size_t utf8_length = xml_string_.text.size();
+ ssize_t len = utf8_to_utf16_length(utf8_data + previous_len, utf8_length - previous_len);
+ if (len < 0) {
+ error_ = StringPrintf("invalid unicode code point in string\n\"%s\"", utf8_data + previous_len);
+ return *this;
+ }
+
+ utf16_len_ += static_cast<uint32_t>(len);
+ return *this;
+}
+
+StringBuilder::SpanHandle StringBuilder::StartSpan(const std::string& name) {
+ if (!error_.empty()) {
+ return 0u;
+ }
+
+ // When we start a span, all state associated with whitespace truncation and quotation is ended.
+ ResetTextState();
+ Span span;
+ span.name = name;
+ span.first_char = span.last_char = utf16_len_;
+ xml_string_.spans.push_back(std::move(span));
+ return xml_string_.spans.size() - 1;
+}
+
+void StringBuilder::EndSpan(SpanHandle handle) {
+ if (!error_.empty()) {
+ return;
+ }
+
+ // When we end a span, all state associated with whitespace truncation and quotation is ended.
+ ResetTextState();
+ xml_string_.spans[handle].last_char = utf16_len_ - 1u;
+}
+
+StringBuilder::UntranslatableHandle StringBuilder::StartUntranslatable() {
+ if (!error_.empty()) {
+ return 0u;
+ }
+
+ UntranslatableSection section;
+ section.start = section.end = xml_string_.text.size();
+ xml_string_.untranslatable_sections.push_back(section);
+ return xml_string_.untranslatable_sections.size() - 1;
+}
+
+void StringBuilder::EndUntranslatable(UntranslatableHandle handle) {
+ if (!error_.empty()) {
+ return;
+ }
+ xml_string_.untranslatable_sections[handle].end = xml_string_.text.size();
+}
+
+FlattenedXmlString StringBuilder::GetFlattenedString() const {
+ return xml_string_;
+}
+
+std::string StringBuilder::to_string() const {
+ return xml_string_.text;
+}
+
+StringBuilder::operator bool() const {
+ return error_.empty();
+}
+
+std::string StringBuilder::GetError() const {
+ return error_;
+}
+
+void StringBuilder::ResetTextState() {
+ quote_ = preserve_spaces_;
+ last_codepoint_was_space_ = false;
+}
+
} // namespace ResourceUtils
} // namespace aapt
diff --git a/tools/aapt2/ResourceUtils.h b/tools/aapt2/ResourceUtils.h
index f83d49e..7af2fe0 100644
--- a/tools/aapt2/ResourceUtils.h
+++ b/tools/aapt2/ResourceUtils.h
@@ -224,6 +224,95 @@
const android::Res_value& res_value,
StringPool* dst_pool);
+// A string flattened from an XML hierarchy, which maintains tags and untranslatable sections
+// in parallel data structures.
+struct FlattenedXmlString {
+ std::string text;
+ std::vector<UntranslatableSection> untranslatable_sections;
+ std::vector<Span> spans;
+};
+
+// Flattens an XML hierarchy into a FlattenedXmlString, formatting the text, escaping characters,
+// and removing whitespace, all while keeping the untranslatable sections and spans in sync with the
+// transformations.
+//
+// Specifically, the StringBuilder will handle escaped characters like \t, \n, \\, \', etc.
+// Single quotes *must* be escaped, unless within a pair of double-quotes.
+// Pairs of double-quotes disable whitespace stripping of the enclosed text.
+// Unicode escape codes (\u0049) are interpreted and the represented Unicode character is inserted.
+//
+// A NOTE ON WHITESPACE:
+//
+// When preserve_spaces is false, and when text is not enclosed within double-quotes,
+// StringBuilder replaces a series of whitespace with a single space character. This happens at the
+// start and end of the string as well, so leading and trailing whitespace is possible.
+//
+// When a Span is started or stopped, the whitespace counter is reset, meaning if whitespace
+// is encountered directly after the span, it will be emitted. This leads to situations like the
+// following: "This <b> is </b> spaced" -> "This is spaced". Without spans, this would be properly
+// compressed: "This is spaced" -> "This is spaced".
+//
+// Untranslatable sections do not have the same problem:
+// "This <xliff:g> is </xliff:g> not spaced" -> "This is not spaced".
+//
+// NOTE: This is all the way it is because AAPT1 did it this way. Maintaining backwards
+// compatibility is important.
+//
+class StringBuilder {
+ public:
+ using SpanHandle = size_t;
+ using UntranslatableHandle = size_t;
+
+ // Creates a StringBuilder. If preserve_spaces is true, whitespace removal is not performed, and
+ // single quotations can be used without escaping them.
+ explicit StringBuilder(bool preserve_spaces = false);
+
+ // Appends a chunk of text.
+ StringBuilder& AppendText(const std::string& text);
+
+ // Starts a Span (tag) with the given name. The name is expected to be of the form:
+ // "tag_name;attr1=value;attr2=value;"
+ // Which is how Spans are encoded in the ResStringPool.
+ // To end the span, pass back the SpanHandle received from this method to the EndSpan() method.
+ SpanHandle StartSpan(const std::string& name);
+
+ // Ends a Span (tag). Pass in the matching SpanHandle previously obtained from StartSpan().
+ void EndSpan(SpanHandle handle);
+
+ // Starts an Untranslatable section.
+ // To end the section, pass back the UntranslatableHandle received from this method to
+ // the EndUntranslatable() method.
+ UntranslatableHandle StartUntranslatable();
+
+ // Ends an Untranslatable section. Pass in the matching UntranslatableHandle previously obtained
+ // from StartUntranslatable().
+ void EndUntranslatable(UntranslatableHandle handle);
+
+ // Returns the flattened XML string, with all spans and untranslatable sections encoded as
+ // parallel data structures.
+ FlattenedXmlString GetFlattenedString() const;
+
+ // Returns just the flattened XML text, with no spans or untranslatable sections.
+ std::string to_string() const;
+
+ // Returns true if there was no error.
+ explicit operator bool() const;
+
+ std::string GetError() const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(StringBuilder);
+
+ void ResetTextState();
+
+ std::string error_;
+ FlattenedXmlString xml_string_;
+ uint32_t utf16_len_ = 0u;
+ bool preserve_spaces_;
+ bool quote_;
+ bool last_codepoint_was_space_ = false;
+};
+
} // namespace ResourceUtils
} // namespace aapt
diff --git a/tools/aapt2/ResourceUtils_test.cpp b/tools/aapt2/ResourceUtils_test.cpp
index cb786d3..11f3fa3 100644
--- a/tools/aapt2/ResourceUtils_test.cpp
+++ b/tools/aapt2/ResourceUtils_test.cpp
@@ -212,4 +212,48 @@
Pointee(ValueEq(BinaryPrimitive(Res_value::TYPE_FLOAT, expected_float_flattened))));
}
+TEST(ResourceUtilsTest, StringBuilderWhitespaceRemoval) {
+ EXPECT_THAT(ResourceUtils::StringBuilder()
+ .AppendText(" hey guys ")
+ .AppendText(" this is so cool ")
+ .to_string(),
+ Eq(" hey guys this is so cool "));
+ EXPECT_THAT(ResourceUtils::StringBuilder()
+ .AppendText(" \" wow, so many \t ")
+ .AppendText("spaces. \"what? ")
+ .to_string(),
+ Eq(" wow, so many \t spaces. what? "));
+ EXPECT_THAT(ResourceUtils::StringBuilder()
+ .AppendText(" where \t ")
+ .AppendText(" \nis the pie?")
+ .to_string(),
+ Eq(" where is the pie?"));
+}
+
+TEST(ResourceUtilsTest, StringBuilderEscaping) {
+ EXPECT_THAT(ResourceUtils::StringBuilder()
+ .AppendText("hey guys\\n ")
+ .AppendText(" this \\t is so\\\\ cool")
+ .to_string(),
+ Eq("hey guys\n this \t is so\\ cool"));
+ EXPECT_THAT(ResourceUtils::StringBuilder().AppendText("\\@\\?\\#\\\\\\'").to_string(),
+ Eq("@?#\\\'"));
+}
+
+TEST(ResourceUtilsTest, StringBuilderMisplacedQuote) {
+ ResourceUtils::StringBuilder builder;
+ EXPECT_FALSE(builder.AppendText("they're coming!"));
+}
+
+TEST(ResourceUtilsTest, StringBuilderUnicodeCodes) {
+ EXPECT_THAT(ResourceUtils::StringBuilder().AppendText("\\u00AF\\u0AF0 woah").to_string(),
+ Eq("\u00AF\u0AF0 woah"));
+ EXPECT_FALSE(ResourceUtils::StringBuilder().AppendText("\\u00 yo"));
+}
+
+TEST(ResourceUtilsTest, StringBuilderPreserveSpaces) {
+ EXPECT_THAT(ResourceUtils::StringBuilder(true /*preserve_spaces*/).AppendText("\"").to_string(),
+ Eq("\""));
+}
+
} // namespace aapt
diff --git a/tools/aapt2/format/binary/XmlFlattener.cpp b/tools/aapt2/format/binary/XmlFlattener.cpp
index 067372b..781b9fe 100644
--- a/tools/aapt2/format/binary/XmlFlattener.cpp
+++ b/tools/aapt2/format/binary/XmlFlattener.cpp
@@ -25,6 +25,7 @@
#include "androidfw/ResourceTypes.h"
#include "utils/misc.h"
+#include "ResourceUtils.h"
#include "SdkConstants.h"
#include "ValueVisitor.h"
#include "format/binary/ChunkWriter.h"
@@ -33,6 +34,8 @@
using namespace android;
+using ::aapt::ResourceUtils::StringBuilder;
+
namespace aapt {
namespace {
@@ -89,9 +92,9 @@
ResXMLTree_cdataExt* flat_text = writer.NextBlock<ResXMLTree_cdataExt>();
// Process plain strings to make sure they get properly escaped.
- util::StringBuilder builder;
- builder.Append(node->text);
- AddString(builder.ToString(), kLowPriority, &flat_text->data);
+ StringBuilder builder;
+ builder.AppendText(node->text);
+ AddString(builder.to_string(), kLowPriority, &flat_text->data);
writer.Finish();
}
@@ -272,7 +275,7 @@
// There is no compiled value, so treat the raw string as compiled, once it is processed to
// make sure escape sequences are properly interpreted.
processed_str =
- util::StringBuilder(true /*preserve_spaces*/).Append(xml_attr->value).ToString();
+ StringBuilder(true /*preserve_spaces*/).AppendText(xml_attr->value).to_string();
compiled_text = StringPiece(processed_str);
}
diff --git a/tools/aapt2/link/ReferenceLinker.cpp b/tools/aapt2/link/ReferenceLinker.cpp
index b8f8804..9aaaa69 100644
--- a/tools/aapt2/link/ReferenceLinker.cpp
+++ b/tools/aapt2/link/ReferenceLinker.cpp
@@ -30,6 +30,7 @@
#include "util/Util.h"
#include "xml/XmlUtil.h"
+using ::aapt::ResourceUtils::StringBuilder;
using ::android::StringPiece;
namespace aapt {
@@ -133,10 +134,11 @@
// If we could not parse as any specific type, try a basic STRING.
if (!transformed && (attr->type_mask & android::ResTable_map::TYPE_STRING)) {
- util::StringBuilder string_builder;
- string_builder.Append(*raw_string->value);
+ StringBuilder string_builder;
+ string_builder.AppendText(*raw_string->value);
if (string_builder) {
- transformed = util::make_unique<String>(string_pool_->MakeRef(string_builder.ToString()));
+ transformed =
+ util::make_unique<String>(string_pool_->MakeRef(string_builder.to_string()));
}
}
diff --git a/tools/aapt2/util/Util.cpp b/tools/aapt2/util/Util.cpp
index e42145d..d1c9ca1 100644
--- a/tools/aapt2/util/Util.cpp
+++ b/tools/aapt2/util/Util.cpp
@@ -76,6 +76,34 @@
return str.substr(str.size() - suffix.size(), suffix.size()) == suffix;
}
+StringPiece TrimLeadingWhitespace(const StringPiece& str) {
+ if (str.size() == 0 || str.data() == nullptr) {
+ return str;
+ }
+
+ const char* start = str.data();
+ const char* end = start + str.length();
+
+ while (start != end && isspace(*start)) {
+ start++;
+ }
+ return StringPiece(start, end - start);
+}
+
+StringPiece TrimTrailingWhitespace(const StringPiece& str) {
+ if (str.size() == 0 || str.data() == nullptr) {
+ return str;
+ }
+
+ const char* start = str.data();
+ const char* end = start + str.length();
+
+ while (end != start && isspace(*(end - 1))) {
+ end--;
+ }
+ return StringPiece(start, end - start);
+}
+
StringPiece TrimWhitespace(const StringPiece& str) {
if (str.size() == 0 || str.data() == nullptr) {
return str;
@@ -269,162 +297,6 @@
return true;
}
-static bool AppendCodepointToUtf8String(char32_t codepoint, std::string* output) {
- ssize_t len = utf32_to_utf8_length(&codepoint, 1);
- if (len < 0) {
- return false;
- }
-
- const size_t start_append_pos = output->size();
-
- // Make room for the next character.
- output->resize(output->size() + len);
-
- char* dst = &*(output->begin() + start_append_pos);
- utf32_to_utf8(&codepoint, 1, dst, len + 1);
- return true;
-}
-
-static bool AppendUnicodeCodepoint(Utf8Iterator* iter, std::string* output) {
- char32_t code = 0;
- for (size_t i = 0; i < 4 && iter->HasNext(); i++) {
- char32_t codepoint = iter->Next();
- char32_t a;
- if (codepoint >= U'0' && codepoint <= U'9') {
- a = codepoint - U'0';
- } else if (codepoint >= U'a' && codepoint <= U'f') {
- a = codepoint - U'a' + 10;
- } else if (codepoint >= U'A' && codepoint <= U'F') {
- a = codepoint - U'A' + 10;
- } else {
- return {};
- }
- code = (code << 4) | a;
- }
- return AppendCodepointToUtf8String(code, output);
-}
-
-static bool IsCodepointSpace(char32_t codepoint) {
- if (static_cast<uint32_t>(codepoint) & 0xffffff00u) {
- return false;
- }
- return isspace(static_cast<char>(codepoint));
-}
-
-StringBuilder::StringBuilder(bool preserve_spaces) : preserve_spaces_(preserve_spaces) {
-}
-
-StringBuilder& StringBuilder::Append(const StringPiece& str) {
- if (!error_.empty()) {
- return *this;
- }
-
- // Where the new data will be appended to.
- const size_t new_data_index = str_.size();
-
- Utf8Iterator iter(str);
- while (iter.HasNext()) {
- const char32_t codepoint = iter.Next();
-
- if (last_char_was_escape_) {
- switch (codepoint) {
- case U't':
- str_ += '\t';
- break;
-
- case U'n':
- str_ += '\n';
- break;
-
- case U'#':
- case U'@':
- case U'?':
- case U'"':
- case U'\'':
- case U'\\':
- str_ += static_cast<char>(codepoint);
- break;
-
- case U'u':
- if (!AppendUnicodeCodepoint(&iter, &str_)) {
- error_ = "invalid unicode escape sequence";
- return *this;
- }
- break;
-
- default:
- // Ignore the escape character and just include the codepoint.
- AppendCodepointToUtf8String(codepoint, &str_);
- break;
- }
- last_char_was_escape_ = false;
-
- } else if (!preserve_spaces_ && codepoint == U'"') {
- if (!quote_ && trailing_space_) {
- // We found an opening quote, and we have trailing space, so we should append that
- // space now.
- if (trailing_space_) {
- // We had trailing whitespace, so replace with a single space.
- if (!str_.empty()) {
- str_ += ' ';
- }
- trailing_space_ = false;
- }
- }
- quote_ = !quote_;
-
- } else if (!preserve_spaces_ && codepoint == U'\'' && !quote_) {
- // This should be escaped.
- error_ = "unescaped apostrophe";
- return *this;
-
- } else if (codepoint == U'\\') {
- // This is an escape sequence, convert to the real value.
- if (!quote_ && trailing_space_) {
- // We had trailing whitespace, so
- // replace with a single space.
- if (!str_.empty()) {
- str_ += ' ';
- }
- trailing_space_ = false;
- }
- last_char_was_escape_ = true;
- } else {
- if (preserve_spaces_ || quote_) {
- // Quotes mean everything is taken, including whitespace.
- AppendCodepointToUtf8String(codepoint, &str_);
- } else {
- // This is not quoted text, so we will accumulate whitespace and only emit a single
- // character of whitespace if it is followed by a non-whitespace character.
- if (IsCodepointSpace(codepoint)) {
- // We found whitespace.
- trailing_space_ = true;
- } else {
- if (trailing_space_) {
- // We saw trailing space before, so replace all
- // that trailing space with one space.
- if (!str_.empty()) {
- str_ += ' ';
- }
- trailing_space_ = false;
- }
- AppendCodepointToUtf8String(codepoint, &str_);
- }
- }
- }
- }
-
- // Accumulate the added string's UTF-16 length.
- ssize_t len = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(str_.data()) + new_data_index,
- str_.size() - new_data_index);
- if (len < 0) {
- error_ = "invalid unicode code point";
- return *this;
- }
- utf16_len_ += len;
- return *this;
-}
-
std::u16string Utf8ToUtf16(const StringPiece& utf8) {
ssize_t utf16_length = utf8_to_utf16_length(
reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
diff --git a/tools/aapt2/util/Util.h b/tools/aapt2/util/Util.h
index 7c949b90..0eb35d1 100644
--- a/tools/aapt2/util/Util.h
+++ b/tools/aapt2/util/Util.h
@@ -59,7 +59,15 @@
// Returns true if the string ends with suffix.
bool EndsWith(const android::StringPiece& str, const android::StringPiece& suffix);
-// Creates a new StringPiece16 that points to a substring of the original string without leading or
+// Creates a new StringPiece that points to a substring of the original string without leading
+// whitespace.
+android::StringPiece TrimLeadingWhitespace(const android::StringPiece& str);
+
+// Creates a new StringPiece that points to a substring of the original string without trailing
+// whitespace.
+android::StringPiece TrimTrailingWhitespace(const android::StringPiece& str);
+
+// Creates a new StringPiece that points to a substring of the original string without leading or
// trailing whitespace.
android::StringPiece TrimWhitespace(const android::StringPiece& str);
@@ -141,9 +149,12 @@
// break the string interpolation.
bool VerifyJavaStringFormat(const android::StringPiece& str);
+bool AppendStyledString(const android::StringPiece& input, bool preserve_spaces,
+ std::string* out_str, std::string* out_error);
+
class StringBuilder {
public:
- explicit StringBuilder(bool preserve_spaces = false);
+ StringBuilder() = default;
StringBuilder& Append(const android::StringPiece& str);
const std::string& ToString() const;
@@ -158,7 +169,6 @@
explicit operator bool() const;
private:
- bool preserve_spaces_;
std::string str_;
size_t utf16_len_ = 0;
bool quote_ = false;
diff --git a/tools/aapt2/util/Util_test.cpp b/tools/aapt2/util/Util_test.cpp
index 2d1242a..d4e3bec 100644
--- a/tools/aapt2/util/Util_test.cpp
+++ b/tools/aapt2/util/Util_test.cpp
@@ -41,45 +41,6 @@
EXPECT_TRUE(util::StartsWith("hello.xml", "he"));
}
-TEST(UtilTest, StringBuilderSplitEscapeSequence) {
- EXPECT_THAT(util::StringBuilder().Append("this is a new\\").Append("nline.").ToString(),
- Eq("this is a new\nline."));
-}
-
-TEST(UtilTest, StringBuilderWhitespaceRemoval) {
- EXPECT_THAT(util::StringBuilder().Append(" hey guys ").Append(" this is so cool ").ToString(),
- Eq("hey guys this is so cool"));
- EXPECT_THAT(
- util::StringBuilder().Append(" \" wow, so many \t ").Append("spaces. \"what? ").ToString(),
- Eq(" wow, so many \t spaces. what?"));
- EXPECT_THAT(util::StringBuilder().Append(" where \t ").Append(" \nis the pie?").ToString(),
- Eq("where is the pie?"));
-}
-
-TEST(UtilTest, StringBuilderEscaping) {
- EXPECT_THAT(util::StringBuilder()
- .Append(" hey guys\\n ")
- .Append(" this \\t is so\\\\ cool ")
- .ToString(),
- Eq("hey guys\n this \t is so\\ cool"));
- EXPECT_THAT(util::StringBuilder().Append("\\@\\?\\#\\\\\\'").ToString(), Eq("@?#\\\'"));
-}
-
-TEST(UtilTest, StringBuilderMisplacedQuote) {
- util::StringBuilder builder;
- EXPECT_FALSE(builder.Append("they're coming!"));
-}
-
-TEST(UtilTest, StringBuilderUnicodeCodes) {
- EXPECT_THAT(util::StringBuilder().Append("\\u00AF\\u0AF0 woah").ToString(),
- Eq("\u00AF\u0AF0 woah"));
- EXPECT_FALSE(util::StringBuilder().Append("\\u00 yo"));
-}
-
-TEST(UtilTest, StringBuilderPreserveSpaces) {
- EXPECT_THAT(util::StringBuilder(true /*preserve_spaces*/).Append("\"").ToString(), Eq("\""));
-}
-
TEST(UtilTest, TokenizeInput) {
auto tokenizer = util::Tokenize(StringPiece("this| is|the|end"), '|');
auto iter = tokenizer.begin();