AAPT2: Fix escaping sequence processing in XML parsing

Bug: 35483813
Test: make aapt2_tests
Change-Id: I68baba17ab3639c220b734a2a68d86aad0dedf8c
diff --git a/tools/aapt2/flatten/XmlFlattener_test.cpp b/tools/aapt2/flatten/XmlFlattener_test.cpp
index ffc2de1..ec3d75e 100644
--- a/tools/aapt2/flatten/XmlFlattener_test.cpp
+++ b/tools/aapt2/flatten/XmlFlattener_test.cpp
@@ -76,7 +76,7 @@
             <View xmlns:test="http://com.test"
                   attr="hey">
               <Layout test:hello="hi" />
-              <Layout>Some text</Layout>
+              <Layout>Some text\\</Layout>
             </View>)EOF");
 
   android::ResXMLTree tree;
@@ -128,7 +128,7 @@
 
   ASSERT_EQ(tree.next(), android::ResXMLTree::TEXT);
   const char16_t* text = tree.getText(&len);
-  EXPECT_EQ(StringPiece16(text, len), u"Some text");
+  EXPECT_EQ(StringPiece16(text, len), u"Some text\\");
 
   ASSERT_EQ(tree.next(), android::ResXMLTree::END_TAG);
   ASSERT_EQ(tree.getElementNamespace(&len), nullptr);
diff --git a/tools/aapt2/util/Util.h b/tools/aapt2/util/Util.h
index f8fa80e..7210d21 100644
--- a/tools/aapt2/util/Util.h
+++ b/tools/aapt2/util/Util.h
@@ -164,6 +164,7 @@
   StringBuilder& Append(const android::StringPiece& str);
   const std::string& ToString() const;
   const std::string& Error() const;
+  bool IsEmpty() const;
 
   // When building StyledStrings, we need UTF-16 indices into the string,
   // which is what the Java layer expects when dealing with java
@@ -185,6 +186,8 @@
 
 inline const std::string& StringBuilder::Error() const { return error_; }
 
+inline bool StringBuilder::IsEmpty() const { return str_.empty(); }
+
 inline size_t StringBuilder::Utf16Len() const { return utf16_len_; }
 
 inline StringBuilder::operator bool() const { return error_.empty(); }
diff --git a/tools/aapt2/xml/XmlDom.cpp b/tools/aapt2/xml/XmlDom.cpp
index fab2f19..d9ea1bc 100644
--- a/tools/aapt2/xml/XmlDom.cpp
+++ b/tools/aapt2/xml/XmlDom.cpp
@@ -18,7 +18,6 @@
 
 #include <expat.h>
 
-#include <cassert>
 #include <memory>
 #include <stack>
 #include <string>
@@ -41,6 +40,8 @@
   std::unique_ptr<xml::Node> root;
   std::stack<xml::Node*> node_stack;
   std::string pending_comment;
+  std::unique_ptr<xml::Text> last_text_node;
+  util::StringBuilder pending_text;
 };
 
 /**
@@ -62,6 +63,19 @@
   }
 }
 
+static void FinishPendingText(Stack* stack) {
+  if (stack->last_text_node != nullptr) {
+    if (!stack->pending_text.IsEmpty()) {
+      stack->last_text_node->text = stack->pending_text.ToString();
+      stack->pending_text = {};
+      stack->node_stack.top()->AppendChild(std::move(stack->last_text_node));
+    } else {
+      // Drop an empty text node.
+      stack->last_text_node = nullptr;
+    }
+  }
+}
+
 static void AddToStack(Stack* stack, XML_Parser parser,
                        std::unique_ptr<Node> node) {
   node->line_number = XML_GetCurrentLineNumber(parser);
@@ -83,6 +97,7 @@
                                           const char* uri) {
   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
+  FinishPendingText(stack);
 
   std::unique_ptr<Namespace> ns = util::make_unique<Namespace>();
   if (prefix) {
@@ -99,6 +114,7 @@
 static void XMLCALL EndNamespaceHandler(void* user_data, const char* prefix) {
   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
+  FinishPendingText(stack);
 
   CHECK(!stack->node_stack.empty());
   stack->node_stack.pop();
@@ -113,6 +129,7 @@
                                         const char** attrs) {
   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
+  FinishPendingText(stack);
 
   std::unique_ptr<Element> el = util::make_unique<Element>();
   SplitName(name, &el->namespace_uri, &el->name);
@@ -120,7 +137,9 @@
   while (*attrs) {
     Attribute attribute;
     SplitName(*attrs++, &attribute.namespace_uri, &attribute.name);
-    attribute.value = *attrs++;
+    util::StringBuilder builder;
+    builder.Append(*attrs++);
+    attribute.value = builder.ToString();
 
     // Insert in sorted order.
     auto iter = std::lower_bound(el->attributes.begin(), el->attributes.end(),
@@ -135,41 +154,38 @@
 static void XMLCALL EndElementHandler(void* user_data, const char* name) {
   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
+  FinishPendingText(stack);
 
   CHECK(!stack->node_stack.empty());
   // stack->nodeStack.top()->comment = std::move(stack->pendingComment);
   stack->node_stack.pop();
 }
 
-static void XMLCALL CharacterDataHandler(void* user_data, const char* s,
-                                         int len) {
+static void XMLCALL CharacterDataHandler(void* user_data, const char* s, int len) {
   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
 
-  if (!s || len <= 0) {
+  const StringPiece str(s, len);
+  if (str.empty()) {
     return;
   }
 
   // See if we can just append the text to a previous text node.
-  if (!stack->node_stack.empty()) {
-    Node* currentParent = stack->node_stack.top();
-    if (!currentParent->children.empty()) {
-      Node* last_child = currentParent->children.back().get();
-      if (Text* text = NodeCast<Text>(last_child)) {
-        text->text.append(s, len);
-        return;
-      }
-    }
+  if (stack->last_text_node != nullptr) {
+    stack->pending_text.Append(str);
+    return;
   }
 
-  std::unique_ptr<Text> text = util::make_unique<Text>();
-  text->text.assign(s, len);
-  AddToStack(stack, parser, std::move(text));
+  stack->last_text_node = util::make_unique<Text>();
+  stack->last_text_node->line_number = XML_GetCurrentLineNumber(parser);
+  stack->last_text_node->column_number = XML_GetCurrentColumnNumber(parser);
+  stack->pending_text.Append(str);
 }
 
 static void XMLCALL CommentDataHandler(void* user_data, const char* comment) {
   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
+  FinishPendingText(stack);
 
   if (!stack->pending_comment.empty()) {
     stack->pending_comment += '\n';
diff --git a/tools/aapt2/xml/XmlDom_test.cpp b/tools/aapt2/xml/XmlDom_test.cpp
index a414afe..0fc3cec6 100644
--- a/tools/aapt2/xml/XmlDom_test.cpp
+++ b/tools/aapt2/xml/XmlDom_test.cpp
@@ -49,4 +49,23 @@
   EXPECT_EQ(ns->namespace_prefix, "android");
 }
 
+TEST(XmlDomTest, HandleEscapes) {
+  std::unique_ptr<xml::XmlResource> doc = test::BuildXmlDom(
+      R"EOF(<shortcode pattern="\\d{5}">\\d{5}</shortcode>)EOF");
+
+  xml::Element* el = xml::FindRootElement(doc->root.get());
+  ASSERT_NE(nullptr, el);
+
+  xml::Attribute* attr = el->FindAttribute({}, "pattern");
+  ASSERT_NE(nullptr, attr);
+
+  EXPECT_EQ("\\d{5}", attr->value);
+
+  ASSERT_EQ(1u, el->children.size());
+
+  xml::Text* text = xml::NodeCast<xml::Text>(el->children[0].get());
+  ASSERT_NE(nullptr, text);
+  EXPECT_EQ("\\d{5}", text->text);
+}
+
 }  // namespace aapt