AAPT2: Fix unrecognized CDATA
This change adds support for resources that have CDATA blocks within
their values. The blocks should allow any character to occur without
being escaped. It also should not effect the current state of quote
processing.
Bug: 80326349
Test: Created tests in aapt2_tests
Change-Id: Ie1a00e50cffc877e2eb5f788f8d7a1bda839c0cf
diff --git a/tools/aapt2/ResourceParser.cpp b/tools/aapt2/ResourceParser.cpp
index 089c9e2..f45748f 100644
--- a/tools/aapt2/ResourceParser.cpp
+++ b/tools/aapt2/ResourceParser.cpp
@@ -208,6 +208,15 @@
}
};
+// A chunk of text in the XML string within a CDATA tags.
+class CdataSegmentNode : public SegmentNode {
+ public:
+
+ void Build(StringBuilder* builder) const override {
+ builder->AppendText(data, /* preserve_spaces */ true);
+ }
+};
+
// A tag that will be encoded into the final flattened string. Tags like <b> or <i>.
class SpanNode : public Node {
public:
@@ -244,6 +253,7 @@
std::vector<Node*> node_stack;
node_stack.push_back(&root);
+ bool cdata_block = false;
bool saw_span_node = false;
SegmentNode* first_segment = nullptr;
SegmentNode* last_segment = nullptr;
@@ -253,11 +263,15 @@
const xml::XmlPullParser::Event event = parser->event();
// First take care of any SegmentNodes that should be created.
- if (event == xml::XmlPullParser::Event::kStartElement ||
- event == xml::XmlPullParser::Event::kEndElement) {
+ if (event == xml::XmlPullParser::Event::kStartElement
+ || event == xml::XmlPullParser::Event::kEndElement
+ || event == xml::XmlPullParser::Event::kCdataStart
+ || event == xml::XmlPullParser::Event::kCdataEnd) {
if (!current_text.empty()) {
- std::unique_ptr<SegmentNode> segment_node = util::make_unique<SegmentNode>();
+ std::unique_ptr<SegmentNode> segment_node = (cdata_block)
+ ? util::make_unique<CdataSegmentNode>() : util::make_unique<SegmentNode>();
segment_node->data = std::move(current_text);
+
last_segment = node_stack.back()->AddChild(std::move(segment_node));
if (first_segment == nullptr) {
first_segment = last_segment;
@@ -333,6 +347,16 @@
}
} break;
+ case xml::XmlPullParser::Event::kCdataStart: {
+ cdata_block = true;
+ break;
+ }
+
+ case xml::XmlPullParser::Event::kCdataEnd: {
+ cdata_block = false;
+ break;
+ }
+
default:
// ignore.
break;
diff --git a/tools/aapt2/ResourceParser_test.cpp b/tools/aapt2/ResourceParser_test.cpp
index 41b4041..a2e5ad1 100644
--- a/tools/aapt2/ResourceParser_test.cpp
+++ b/tools/aapt2/ResourceParser_test.cpp
@@ -971,4 +971,40 @@
ASSERT_FALSE(TestParse(input));
}
+TEST_F(ResourceParserTest, ParseCData) {
+ std::string input = R"(
+ <string name="foo"><![CDATA[some text and ' apostrophe]]></string>)";
+
+ ASSERT_TRUE(TestParse(input));
+ String* output = test::GetValue<String>(&table_, "string/foo");
+ ASSERT_THAT(output, NotNull());
+ EXPECT_THAT(*output, StrValueEq("some text and ' apostrophe"));
+
+ // Double quotes should not change the state of whitespace processing
+ input = R"(<string name="foo2">Hello<![CDATA[ "</string>' ]]> World</string>)";
+ ASSERT_TRUE(TestParse(input));
+ output = test::GetValue<String>(&table_, "string/foo2");
+ ASSERT_THAT(output, NotNull());
+ EXPECT_THAT(*output, StrValueEq(std::string("Hello \"</string>' World").data()));
+
+ // Cdata blocks should not have their whitespace trimmed
+ input = R"(<string name="foo3"> <![CDATA[ text ]]> </string>)";
+ ASSERT_TRUE(TestParse(input));
+ output = test::GetValue<String>(&table_, "string/foo3");
+ ASSERT_THAT(output, NotNull());
+ EXPECT_THAT(*output, StrValueEq(std::string(" text ").data()));
+
+ input = R"(<string name="foo4"> <![CDATA[]]> </string>)";
+ ASSERT_TRUE(TestParse(input));
+ output = test::GetValue<String>(&table_, "string/foo4");
+ ASSERT_THAT(output, NotNull());
+ EXPECT_THAT(*output, StrValueEq(std::string("").data()));
+
+ input = R"(<string name="foo5"> <![CDATA[ ]]> </string>)";
+ ASSERT_TRUE(TestParse(input));
+ output = test::GetValue<String>(&table_, "string/foo5");
+ ASSERT_THAT(output, NotNull());
+ EXPECT_THAT(*output, StrValueEq(std::string(" ").data()));
+}
+
} // namespace aapt
diff --git a/tools/aapt2/ResourceUtils.cpp b/tools/aapt2/ResourceUtils.cpp
index 560077c..c48765b 100644
--- a/tools/aapt2/ResourceUtils.cpp
+++ b/tools/aapt2/ResourceUtils.cpp
@@ -797,16 +797,20 @@
: preserve_spaces_(preserve_spaces), quote_(preserve_spaces) {
}
-StringBuilder& StringBuilder::AppendText(const std::string& text) {
+StringBuilder& StringBuilder::AppendText(const std::string& text, bool preserve_spaces) {
if (!error_.empty()) {
return *this;
}
+ // Enable preserving spaces if it is enabled for this append or the StringBuilder was constructed
+ // to preserve spaces
+ preserve_spaces = (preserve_spaces) ? preserve_spaces : preserve_spaces_;
+
const size_t previous_len = xml_string_.text.size();
Utf8Iterator iter(text);
while (iter.HasNext()) {
char32_t codepoint = iter.Next();
- if (!quote_ && iswspace(codepoint)) {
+ if (!preserve_spaces && !quote_ && iswspace(codepoint)) {
if (!last_codepoint_was_space_) {
// Emit a space if it's the first.
xml_string_.text += ' ';
@@ -827,7 +831,6 @@
case U't':
xml_string_.text += '\t';
break;
-
case U'n':
xml_string_.text += '\n';
break;
@@ -855,12 +858,12 @@
break;
}
}
- } else if (!preserve_spaces_ && codepoint == U'"') {
+ } else if (!preserve_spaces && codepoint == U'"') {
// Only toggle the quote state when we are not preserving spaces.
quote_ = !quote_;
- } else if (!quote_ && codepoint == U'\'') {
- // This should be escaped.
+ } else if (!preserve_spaces && !quote_ && codepoint == U'\'') {
+ // This should be escaped when we are not preserving spaces
error_ = StringPrintf("unescaped apostrophe in string\n\"%s\"", text.c_str());
return *this;
diff --git a/tools/aapt2/ResourceUtils.h b/tools/aapt2/ResourceUtils.h
index 7af2fe0..410ef28 100644
--- a/tools/aapt2/ResourceUtils.h
+++ b/tools/aapt2/ResourceUtils.h
@@ -267,8 +267,10 @@
// single quotations can be used without escaping them.
explicit StringBuilder(bool preserve_spaces = false);
- // Appends a chunk of text.
- StringBuilder& AppendText(const std::string& text);
+ // Appends a chunk of text. If preserve_spaces is true, whitespace removal is not performed, and
+ // single quotations can be used without escaping them for this append. Otherwise, the
+ // StringBuilder will behave as it was constructed.
+ StringBuilder& AppendText(const std::string& text, bool preserve_spaces = false);
// Starts a Span (tag) with the given name. The name is expected to be of the form:
// "tag_name;attr1=value;attr2=value;"
diff --git a/tools/aapt2/ResourceUtils_test.cpp b/tools/aapt2/ResourceUtils_test.cpp
index 11f3fa3..5ce4640 100644
--- a/tools/aapt2/ResourceUtils_test.cpp
+++ b/tools/aapt2/ResourceUtils_test.cpp
@@ -254,6 +254,29 @@
TEST(ResourceUtilsTest, StringBuilderPreserveSpaces) {
EXPECT_THAT(ResourceUtils::StringBuilder(true /*preserve_spaces*/).AppendText("\"").to_string(),
Eq("\""));
+
+ // Single quotes should be able to be used without escaping them when preserving spaces and the
+ // spaces should not be trimmed
+ EXPECT_THAT(ResourceUtils::StringBuilder()
+ .AppendText(" hey guys ")
+ .AppendText(" 'this is so cool' ", /* preserve_spaces */ true)
+ .AppendText(" wow ")
+ .to_string(),
+ Eq(" hey guys 'this is so cool' wow "));
+
+ // Reading a double quote while preserving spaces should not change the quote state
+ EXPECT_THAT(ResourceUtils::StringBuilder()
+ .AppendText(" hey guys ")
+ .AppendText(" \"this is so cool' ", /* preserve_spaces */ true)
+ .AppendText(" wow ")
+ .to_string(),
+ Eq(" hey guys \"this is so cool' wow "));
+ EXPECT_THAT(ResourceUtils::StringBuilder()
+ .AppendText(" hey guys\" ")
+ .AppendText(" \"this is so cool' ", /* preserve_spaces */ true)
+ .AppendText(" wow \" ")
+ .to_string(),
+ Eq(" hey guys \"this is so cool' wow "));
}
} // namespace aapt
diff --git a/tools/aapt2/xml/XmlPullParser.cpp b/tools/aapt2/xml/XmlPullParser.cpp
index 402e5a4..a023494 100644
--- a/tools/aapt2/xml/XmlPullParser.cpp
+++ b/tools/aapt2/xml/XmlPullParser.cpp
@@ -38,6 +38,7 @@
EndNamespaceHandler);
XML_SetCharacterDataHandler(parser_, CharacterDataHandler);
XML_SetCommentHandler(parser_, CommentDataHandler);
+ XML_SetCdataSectionHandler(parser_, StartCdataSectionHandler, EndCdataSectionHandler);
event_queue_.push(EventData{Event::kStartDocument, 0, depth_++});
}
@@ -287,6 +288,22 @@
parser->depth_, comment});
}
+void XMLCALL XmlPullParser::StartCdataSectionHandler(void* user_data) {
+ XmlPullParser* parser = reinterpret_cast<XmlPullParser*>(user_data);
+
+ parser->event_queue_.push(EventData{Event::kCdataStart,
+ XML_GetCurrentLineNumber(parser->parser_),
+ parser->depth_ });
+}
+
+void XMLCALL XmlPullParser::EndCdataSectionHandler(void* user_data) {
+ XmlPullParser* parser = reinterpret_cast<XmlPullParser*>(user_data);
+
+ parser->event_queue_.push(EventData{Event::kCdataEnd,
+ XML_GetCurrentLineNumber(parser->parser_),
+ parser->depth_ });
+}
+
Maybe<StringPiece> FindAttribute(const XmlPullParser* parser,
const StringPiece& name) {
auto iter = parser->FindAttribute("", name);
diff --git a/tools/aapt2/xml/XmlPullParser.h b/tools/aapt2/xml/XmlPullParser.h
index 63db66f..6ebaa28 100644
--- a/tools/aapt2/xml/XmlPullParser.h
+++ b/tools/aapt2/xml/XmlPullParser.h
@@ -52,6 +52,8 @@
kEndElement,
kText,
kComment,
+ kCdataStart,
+ kCdataEnd,
};
/**
@@ -159,6 +161,8 @@
static void XMLCALL EndElementHandler(void* user_data, const char* name);
static void XMLCALL EndNamespaceHandler(void* user_data, const char* prefix);
static void XMLCALL CommentDataHandler(void* user_data, const char* comment);
+ static void XMLCALL StartCdataSectionHandler(void* user_data);
+ static void XMLCALL EndCdataSectionHandler(void* user_data);
struct EventData {
Event event;
@@ -223,6 +227,10 @@
return out << "Text";
case XmlPullParser::Event::kComment:
return out << "Comment";
+ case XmlPullParser::Event::kCdataStart:
+ return out << "CdataStart";
+ case XmlPullParser::Event::kCdataEnd:
+ return out << "CdataEnd";
}
return out;
}
@@ -240,6 +248,8 @@
case Event::kText:
case Event::kComment:
case Event::kStartElement:
+ case Event::kCdataStart:
+ case Event::kCdataEnd:
return true;
default:
break;