AAPT2: Fix styled string whitespace processing

Change styled string whitespace processing to be like AAPT's was.

Main changes:
- whitespace around tags is preserved.
- tags start exactly where they are supposed to, not off by one.

Bug: 72406283
Test: make aapt2_tests
Change-Id: I4d12728c493efd8c978e2e3d2718b56534ff52ef
diff --git a/core/res/res/values/strings.xml b/core/res/res/values/strings.xml
index cadc3ff..aa90b87 100644
--- a/core/res/res/values/strings.xml
+++ b/core/res/res/values/strings.xml
@@ -379,7 +379,7 @@
     <!-- Text message in the factory reset warning dialog. This says that the the device admin app
          is missing or corrupted. As a result the device will be erased. [CHAR LIMIT=NONE]-->
     <string name="factory_reset_message">The admin app can\'t be used. Your device will now be
-        erased.\n\nIf you have questions, contact your organization's admin.</string>
+        erased.\n\nIf you have questions, contact your organization\'s admin.</string>
 
     <!-- A toast message displayed when printing is attempted but disabled by policy. -->
     <string name="printing_disabled_by">Printing disabled by <xliff:g id="owner_app">%s</xliff:g>.</string>
@@ -764,7 +764,7 @@
     <string name="capability_title_canCaptureFingerprintGestures">Fingerprint gestures</string>
     <!-- Description for the capability of an accessibility service to perform gestures. -->
     <string name="capability_desc_canCaptureFingerprintGestures">Can capture gestures performed on
-        the device's fingerprint sensor.</string>
+        the device\'s fingerprint sensor.</string>
 
     <!--  Permissions -->
 
@@ -3774,7 +3774,7 @@
     <!-- Notification title when data usage has exceeded warning threshold. [CHAR LIMIT=50] -->
     <string name="data_usage_warning_title">Data warning</string>
     <!-- Notification body when data usage has exceeded warning threshold. [CHAR LIMIT=32] -->
-    <string name="data_usage_warning_body">You've used <xliff:g id="app" example="3.8GB">%s</xliff:g> of data</string>
+    <string name="data_usage_warning_body">You\'ve used <xliff:g id="app" example="3.8GB">%s</xliff:g> of data</string>
 
     <!-- Notification title when mobile data usage has exceeded limit threshold, and has been disabled. [CHAR LIMIT=50] -->
     <string name="data_usage_mobile_limit_title">Mobile data limit reached</string>
@@ -3788,7 +3788,7 @@
     <!-- Notification title when Wi-Fi data usage has exceeded limit threshold. [CHAR LIMIT=32] -->
     <string name="data_usage_wifi_limit_snoozed_title">Over your Wi-Fi data limit</string>
     <!-- Notification body when data usage has exceeded limit threshold. -->
-    <string name="data_usage_limit_snoozed_body">You've gone <xliff:g id="size" example="3.8GB">%s</xliff:g> over your set limit</string>
+    <string name="data_usage_limit_snoozed_body">You\'ve gone <xliff:g id="size" example="3.8GB">%s</xliff:g> over your set limit</string>
 
     <!-- Notification title when background data usage is limited. [CHAR LIMIT=32] -->
     <string name="data_usage_restricted_title">Background data restricted</string>
diff --git a/tools/aapt2/ResourceParser.cpp b/tools/aapt2/ResourceParser.cpp
index 7cffeea..1b6f882 100644
--- a/tools/aapt2/ResourceParser.cpp
+++ b/tools/aapt2/ResourceParser.cpp
@@ -26,11 +26,14 @@
 #include "ResourceUtils.h"
 #include "ResourceValues.h"
 #include "ValueVisitor.h"
+#include "text/Utf8Iterator.h"
 #include "util/ImmutableMap.h"
 #include "util/Maybe.h"
 #include "util/Util.h"
 #include "xml/XmlPullParser.h"
 
+using ::aapt::ResourceUtils::StringBuilder;
+using ::aapt::text::Utf8Iterator;
 using ::android::StringPiece;
 
 namespace aapt {
@@ -169,114 +172,212 @@
       config_(config),
       options_(options) {}
 
-/**
- * Build a string from XML that converts nested elements into Span objects.
- */
+// Base class Node for representing the various Spans and UntranslatableSections of an XML string.
+// This will be used to traverse and flatten the XML string into a single std::string, with all
+// Span and Untranslatable data maintained in parallel, as indices into the string.
+class Node {
+ public:
+  virtual ~Node() = default;
+
+  // Adds the given child node to this parent node's set of child nodes, moving ownership to the
+  // parent node as well.
+  // Returns a pointer to the child node that was added as a convenience.
+  template <typename T>
+  T* AddChild(std::unique_ptr<T> node) {
+    T* raw_ptr = node.get();
+    children.push_back(std::move(node));
+    return raw_ptr;
+  }
+
+  virtual void Build(StringBuilder* builder) const {
+    for (const auto& child : children) {
+      child->Build(builder);
+    }
+  }
+
+  std::vector<std::unique_ptr<Node>> children;
+};
+
+// A chunk of text in the XML string. This lives between other tags, such as XLIFF tags and Spans.
+class SegmentNode : public Node {
+ public:
+  std::string data;
+
+  void Build(StringBuilder* builder) const override {
+    builder->AppendText(data);
+  }
+};
+
+// A tag that will be encoded into the final flattened string. Tags like <b> or <i>.
+class SpanNode : public Node {
+ public:
+  std::string name;
+
+  void Build(StringBuilder* builder) const override {
+    StringBuilder::SpanHandle span_handle = builder->StartSpan(name);
+    Node::Build(builder);
+    builder->EndSpan(span_handle);
+  }
+};
+
+// An XLIFF 'g' tag, which marks a section of the string as untranslatable.
+class UntranslatableNode : public Node {
+ public:
+  void Build(StringBuilder* builder) const override {
+    StringBuilder::UntranslatableHandle handle = builder->StartUntranslatable();
+    Node::Build(builder);
+    builder->EndUntranslatable(handle);
+  }
+};
+
+// Build a string from XML that converts nested elements into Span objects.
 bool ResourceParser::FlattenXmlSubtree(
     xml::XmlPullParser* parser, std::string* out_raw_string, StyleString* out_style_string,
     std::vector<UntranslatableSection>* out_untranslatable_sections) {
-  // Keeps track of formatting tags (<b>, <i>) and the range of characters for which they apply.
-  // The stack elements refer to the indices in out_style_string->spans.
-  // By first adding to the out_style_string->spans vector, and then using the stack to refer
-  // to this vector, the original order of tags is preserved in cases such as <b><i>hello</b></i>.
-  std::vector<size_t> span_stack;
-
-  // Clear the output variables.
-  out_raw_string->clear();
-  out_style_string->spans.clear();
-  out_untranslatable_sections->clear();
-
-  // The StringBuilder will concatenate the various segments of text which are initially
-  // separated by tags. It also handles unicode escape codes and quotations.
-  util::StringBuilder builder;
+  std::string raw_string;
+  std::string current_text;
 
   // The first occurrence of a <xliff:g> tag. Nested <xliff:g> tags are illegal.
   Maybe<size_t> untranslatable_start_depth;
 
+  Node root;
+  std::vector<Node*> node_stack;
+  node_stack.push_back(&root);
+
+  bool saw_span_node = false;
+  SegmentNode* first_segment = nullptr;
+  SegmentNode* last_segment = nullptr;
+
   size_t depth = 1;
-  while (xml::XmlPullParser::IsGoodEvent(parser->Next())) {
+  while (depth > 0 && xml::XmlPullParser::IsGoodEvent(parser->Next())) {
     const xml::XmlPullParser::Event event = parser->event();
 
-    if (event == xml::XmlPullParser::Event::kStartElement) {
-      if (parser->element_namespace().empty()) {
-        // This is an HTML tag which we encode as a span. Add it to the span stack.
-        std::string span_name = parser->element_name();
-        const auto end_attr_iter = parser->end_attributes();
-        for (auto attr_iter = parser->begin_attributes(); attr_iter != end_attr_iter; ++attr_iter) {
-          span_name += ";";
-          span_name += attr_iter->name;
-          span_name += "=";
-          span_name += attr_iter->value;
+    // First take care of any SegmentNodes that should be created.
+    if (event == xml::XmlPullParser::Event::kStartElement ||
+        event == xml::XmlPullParser::Event::kEndElement) {
+      if (!current_text.empty()) {
+        std::unique_ptr<SegmentNode> segment_node = util::make_unique<SegmentNode>();
+        segment_node->data = std::move(current_text);
+        last_segment = node_stack.back()->AddChild(std::move(segment_node));
+        if (first_segment == nullptr) {
+          first_segment = last_segment;
         }
+        current_text = {};
+      }
+    }
 
-        // Make sure the string is representable in our binary format.
-        if (builder.Utf16Len() > std::numeric_limits<uint32_t>::max()) {
-          diag_->Error(DiagMessage(source_.WithLine(parser->line_number()))
-                       << "style string '" << builder.ToString() << "' is too long");
-          return false;
-        }
+    switch (event) {
+      case xml::XmlPullParser::Event::kText: {
+        current_text += parser->text();
+        raw_string += parser->text();
+      } break;
 
-        out_style_string->spans.push_back(
-            Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())});
-        span_stack.push_back(out_style_string->spans.size() - 1);
-      } else if (parser->element_namespace() == sXliffNamespaceUri) {
-        if (parser->element_name() == "g") {
-          if (untranslatable_start_depth) {
-            // We've already encountered an <xliff:g> tag, and nested <xliff:g> tags are illegal.
-            diag_->Error(DiagMessage(source_.WithLine(parser->line_number()))
-                         << "illegal nested XLIFF 'g' tag");
-            return false;
-          } else {
-            // Mark the start of an untranslatable section. Use UTF8 indices/lengths.
-            untranslatable_start_depth = depth;
-            const size_t current_idx = builder.ToString().size();
-            out_untranslatable_sections->push_back(UntranslatableSection{current_idx, current_idx});
+      case xml::XmlPullParser::Event::kStartElement: {
+        if (parser->element_namespace().empty()) {
+          // This is an HTML tag which we encode as a span. Add it to the span stack.
+          std::unique_ptr<SpanNode> span_node = util::make_unique<SpanNode>();
+          span_node->name = parser->element_name();
+          const auto end_attr_iter = parser->end_attributes();
+          for (auto attr_iter = parser->begin_attributes(); attr_iter != end_attr_iter;
+               ++attr_iter) {
+            span_node->name += ";";
+            span_node->name += attr_iter->name;
+            span_node->name += "=";
+            span_node->name += attr_iter->value;
           }
+
+          node_stack.push_back(node_stack.back()->AddChild(std::move(span_node)));
+          saw_span_node = true;
+        } else if (parser->element_namespace() == sXliffNamespaceUri) {
+          // This is an XLIFF tag, which is not encoded as a span.
+          if (parser->element_name() == "g") {
+            // Check that an 'untranslatable' tag is not already being processed. Nested
+            // <xliff:g> tags are illegal.
+            if (untranslatable_start_depth) {
+              diag_->Error(DiagMessage(source_.WithLine(parser->line_number()))
+                           << "illegal nested XLIFF 'g' tag");
+              return false;
+            } else {
+              // Mark the beginning of an 'untranslatable' section.
+              untranslatable_start_depth = depth;
+              node_stack.push_back(
+                  node_stack.back()->AddChild(util::make_unique<UntranslatableNode>()));
+            }
+          } else {
+            // Ignore unknown XLIFF tags, but don't warn.
+            node_stack.push_back(node_stack.back()->AddChild(util::make_unique<Node>()));
+          }
+        } else {
+          // Besides XLIFF, any other namespaced tag is unsupported and ignored.
+          diag_->Warn(DiagMessage(source_.WithLine(parser->line_number()))
+                      << "ignoring element '" << parser->element_name()
+                      << "' with unknown namespace '" << parser->element_namespace() << "'");
+          node_stack.push_back(node_stack.back()->AddChild(util::make_unique<Node>()));
         }
-        // Ignore other xliff tags, they get handled by other tools.
 
-      } else {
-        // Besides XLIFF, any other namespaced tag is unsupported and ignored.
-        diag_->Warn(DiagMessage(source_.WithLine(parser->line_number()))
-                    << "ignoring element '" << parser->element_name()
-                    << "' with unknown namespace '" << parser->element_namespace() << "'");
-      }
+        // Enter one level inside the element.
+        depth++;
+      } break;
 
-      // Enter one level inside the element.
-      depth++;
-    } else if (event == xml::XmlPullParser::Event::kText) {
-      // Record both the raw text and append to the builder to deal with escape sequences
-      // and quotations.
-      out_raw_string->append(parser->text());
-      builder.Append(parser->text());
-    } else if (event == xml::XmlPullParser::Event::kEndElement) {
-      // Return one level from within the element.
-      depth--;
-      if (depth == 0) {
+      case xml::XmlPullParser::Event::kEndElement: {
+        // Return one level from within the element.
+        depth--;
+        if (depth == 0) {
+          break;
+        }
+
+        node_stack.pop_back();
+        if (untranslatable_start_depth == make_value(depth)) {
+          // This is the end of an untranslatable section.
+          untranslatable_start_depth = {};
+        }
+      } break;
+
+      default:
+        // ignore.
         break;
-      }
-
-      if (parser->element_namespace().empty()) {
-        // This is an HTML tag which we encode as a span. Update the span
-        // stack and pop the top entry.
-        Span& top_span = out_style_string->spans[span_stack.back()];
-        top_span.last_char = builder.Utf16Len() - 1;
-        span_stack.pop_back();
-      } else if (untranslatable_start_depth == make_value(depth)) {
-        // This is the end of an untranslatable section. Use UTF8 indices/lengths.
-        UntranslatableSection& untranslatable_section = out_untranslatable_sections->back();
-        untranslatable_section.end = builder.ToString().size();
-        untranslatable_start_depth = {};
-      }
-    } else if (event == xml::XmlPullParser::Event::kComment) {
-      // Ignore.
-    } else {
-      LOG(FATAL) << "unhandled XML event";
     }
   }
 
-  CHECK(span_stack.empty()) << "spans haven't been fully processed";
-  out_style_string->str = builder.ToString();
+  // Sanity check to make sure we processed all the nodes.
+  CHECK(node_stack.size() == 1u);
+  CHECK(node_stack.back() == &root);
+
+  if (!saw_span_node) {
+    // If there were no spans, we must treat this string a little differently (according to AAPT).
+    // Find and strip the leading whitespace from the first segment, and the trailing whitespace
+    // from the last segment.
+    if (first_segment != nullptr) {
+      // Trim leading whitespace.
+      StringPiece trimmed = util::TrimLeadingWhitespace(first_segment->data);
+      if (trimmed.size() != first_segment->data.size()) {
+        first_segment->data = trimmed.to_string();
+      }
+    }
+
+    if (last_segment != nullptr) {
+      // Trim trailing whitespace.
+      StringPiece trimmed = util::TrimTrailingWhitespace(last_segment->data);
+      if (trimmed.size() != last_segment->data.size()) {
+        last_segment->data = trimmed.to_string();
+      }
+    }
+  }
+
+  // Have the XML structure flatten itself into the StringBuilder. The StringBuilder will take
+  // care of recording the correctly adjusted Spans and UntranslatableSections.
+  StringBuilder builder;
+  root.Build(&builder);
+  if (!builder) {
+    diag_->Error(DiagMessage(source_.WithLine(parser->line_number())) << builder.GetError());
+    return false;
+  }
+
+  ResourceUtils::FlattenedXmlString flattened_string = builder.GetFlattenedString();
+  *out_raw_string = std::move(raw_string);
+  *out_untranslatable_sections = std::move(flattened_string.untranslatable_sections);
+  out_style_string->str = std::move(flattened_string.text);
+  out_style_string->spans = std::move(flattened_string.spans);
   return true;
 }
 
diff --git a/tools/aapt2/ResourceParser_test.cpp b/tools/aapt2/ResourceParser_test.cpp
index 618c8ed..c98c0b9 100644
--- a/tools/aapt2/ResourceParser_test.cpp
+++ b/tools/aapt2/ResourceParser_test.cpp
@@ -95,6 +95,16 @@
   ASSERT_THAT(str, NotNull());
   EXPECT_THAT(*str, StrValueEq("  hey there "));
   EXPECT_THAT(str->untranslatable_sections, IsEmpty());
+
+  ASSERT_TRUE(TestParse(R"(<string name="bar">Isn\'t it cool?</string>)"));
+  str = test::GetValue<String>(&table_, "string/bar");
+  ASSERT_THAT(str, NotNull());
+  EXPECT_THAT(*str, StrValueEq("Isn't it cool?"));
+
+  ASSERT_TRUE(TestParse(R"(<string name="baz">"Isn't it cool?"</string>)"));
+  str = test::GetValue<String>(&table_, "string/baz");
+  ASSERT_THAT(str, NotNull());
+  EXPECT_THAT(*str, StrValueEq("Isn't it cool?"));
 }
 
 TEST_F(ResourceParserTest, ParseEscapedString) {
@@ -126,16 +136,16 @@
   StyledString* str = test::GetValue<StyledString>(&table_, "string/foo");
   ASSERT_THAT(str, NotNull());
 
-  EXPECT_THAT(str->value->value, Eq("This is my aunt\u2019s fickle string"));
+  EXPECT_THAT(str->value->value, StrEq("This is my aunt\u2019s fickle string"));
   EXPECT_THAT(str->value->spans, SizeIs(2));
   EXPECT_THAT(str->untranslatable_sections, IsEmpty());
 
-  EXPECT_THAT(*str->value->spans[0].name, Eq("b"));
-  EXPECT_THAT(str->value->spans[0].first_char, Eq(17u));
+  EXPECT_THAT(*str->value->spans[0].name, StrEq("b"));
+  EXPECT_THAT(str->value->spans[0].first_char, Eq(18u));
   EXPECT_THAT(str->value->spans[0].last_char, Eq(30u));
 
-  EXPECT_THAT(*str->value->spans[1].name, Eq("small"));
-  EXPECT_THAT(str->value->spans[1].first_char, Eq(24u));
+  EXPECT_THAT(*str->value->spans[1].name, StrEq("small"));
+  EXPECT_THAT(str->value->spans[1].first_char, Eq(25u));
   EXPECT_THAT(str->value->spans[1].last_char, Eq(30u));
 }
 
@@ -144,7 +154,7 @@
 
   String* str = test::GetValue<String>(&table_, "string/foo");
   ASSERT_THAT(str, NotNull());
-  EXPECT_THAT(*str->value, Eq("This is what I think"));
+  EXPECT_THAT(*str->value, StrEq("This is what I think"));
   EXPECT_THAT(str->untranslatable_sections, IsEmpty());
 
   ASSERT_TRUE(TestParse(R"(<string name="foo2">"  This is what  I think  "</string>)"));
@@ -154,6 +164,25 @@
   EXPECT_THAT(*str, StrValueEq("  This is what  I think  "));
 }
 
+TEST_F(ResourceParserTest, ParseStyledStringWithWhitespace) {
+  std::string input = R"(<string name="foo">  <b> My <i> favorite</i> string </b>  </string>)";
+  ASSERT_TRUE(TestParse(input));
+
+  StyledString* str = test::GetValue<StyledString>(&table_, "string/foo");
+  ASSERT_THAT(str, NotNull());
+  EXPECT_THAT(str->value->value, StrEq("  My  favorite string  "));
+  EXPECT_THAT(str->untranslatable_sections, IsEmpty());
+
+  ASSERT_THAT(str->value->spans, SizeIs(2u));
+  EXPECT_THAT(*str->value->spans[0].name, StrEq("b"));
+  EXPECT_THAT(str->value->spans[0].first_char, Eq(1u));
+  EXPECT_THAT(str->value->spans[0].last_char, Eq(21u));
+
+  EXPECT_THAT(*str->value->spans[1].name, StrEq("i"));
+  EXPECT_THAT(str->value->spans[1].first_char, Eq(5u));
+  EXPECT_THAT(str->value->spans[1].last_char, Eq(13u));
+}
+
 TEST_F(ResourceParserTest, IgnoreXliffTagsOtherThanG) {
   std::string input = R"(
       <string name="foo" xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2">
@@ -182,12 +211,9 @@
   String* str = test::GetValue<String>(&table_, "string/foo");
   ASSERT_THAT(str, NotNull());
   EXPECT_THAT(*str, StrValueEq("There are %1$d apples"));
-  ASSERT_THAT(str->untranslatable_sections, SizeIs(1));
 
-  // We expect indices and lengths that span to include the whitespace
-  // before %1$d. This is due to how the StringBuilder withholds whitespace unless
-  // needed (to deal with line breaks, etc.).
-  EXPECT_THAT(str->untranslatable_sections[0].start, Eq(9u));
+  ASSERT_THAT(str->untranslatable_sections, SizeIs(1));
+  EXPECT_THAT(str->untranslatable_sections[0].start, Eq(10u));
   EXPECT_THAT(str->untranslatable_sections[0].end, Eq(14u));
 }
 
@@ -199,14 +225,16 @@
 
   StyledString* str = test::GetValue<StyledString>(&table_, "string/foo");
   ASSERT_THAT(str, NotNull());
-  EXPECT_THAT(str->value->value, Eq("There are %1$d apples"));
-  ASSERT_THAT(str->untranslatable_sections, SizeIs(1));
+  EXPECT_THAT(str->value->value, Eq(" There are %1$d apples"));
 
-  // We expect indices and lengths that span to include the whitespace
-  // before %1$d. This is due to how the StringBuilder withholds whitespace unless
-  // needed (to deal with line breaks, etc.).
-  EXPECT_THAT(str->untranslatable_sections[0].start, Eq(9u));
-  EXPECT_THAT(str->untranslatable_sections[0].end, Eq(14u));
+  ASSERT_THAT(str->untranslatable_sections, SizeIs(1));
+  EXPECT_THAT(str->untranslatable_sections[0].start, Eq(11u));
+  EXPECT_THAT(str->untranslatable_sections[0].end, Eq(15u));
+
+  ASSERT_THAT(str->value->spans, SizeIs(1u));
+  EXPECT_THAT(*str->value->spans[0].name, StrEq("b"));
+  EXPECT_THAT(str->value->spans[0].first_char, Eq(11u));
+  EXPECT_THAT(str->value->spans[0].last_char, Eq(14u));
 }
 
 TEST_F(ResourceParserTest, ParseNull) {
diff --git a/tools/aapt2/ResourceUtils.cpp b/tools/aapt2/ResourceUtils.cpp
index 628466d..8fc3d65 100644
--- a/tools/aapt2/ResourceUtils.cpp
+++ b/tools/aapt2/ResourceUtils.cpp
@@ -18,17 +18,23 @@
 
 #include <sstream>
 
+#include "android-base/stringprintf.h"
 #include "androidfw/ResourceTypes.h"
 #include "androidfw/ResourceUtils.h"
 
 #include "NameMangler.h"
 #include "SdkConstants.h"
 #include "format/binary/ResourceTypeExtensions.h"
+#include "text/Unicode.h"
+#include "text/Utf8Iterator.h"
 #include "util/Files.h"
 #include "util/Util.h"
 
+using ::aapt::text::IsWhitespace;
+using ::aapt::text::Utf8Iterator;
 using ::android::StringPiece;
 using ::android::StringPiece16;
+using ::android::base::StringPrintf;
 
 namespace aapt {
 namespace ResourceUtils {
@@ -750,5 +756,195 @@
   return util::make_unique<BinaryPrimitive>(res_value);
 }
 
+// Converts the codepoint to UTF-8 and appends it to the string.
+static bool AppendCodepointToUtf8String(char32_t codepoint, std::string* output) {
+  ssize_t len = utf32_to_utf8_length(&codepoint, 1);
+  if (len < 0) {
+    return false;
+  }
+
+  const size_t start_append_pos = output->size();
+
+  // Make room for the next character.
+  output->resize(output->size() + len);
+
+  char* dst = &*(output->begin() + start_append_pos);
+  utf32_to_utf8(&codepoint, 1, dst, len + 1);
+  return true;
+}
+
+// Reads up to 4 UTF-8 characters that represent a Unicode escape sequence, and appends the
+// Unicode codepoint represented by the escape sequence to the string.
+static bool AppendUnicodeEscapeSequence(Utf8Iterator* iter, std::string* output) {
+  char32_t code = 0;
+  for (size_t i = 0; i < 4 && iter->HasNext(); i++) {
+    char32_t codepoint = iter->Next();
+    char32_t a;
+    if (codepoint >= U'0' && codepoint <= U'9') {
+      a = codepoint - U'0';
+    } else if (codepoint >= U'a' && codepoint <= U'f') {
+      a = codepoint - U'a' + 10;
+    } else if (codepoint >= U'A' && codepoint <= U'F') {
+      a = codepoint - U'A' + 10;
+    } else {
+      return {};
+    }
+    code = (code << 4) | a;
+  }
+  return AppendCodepointToUtf8String(code, output);
+}
+
+StringBuilder::StringBuilder(bool preserve_spaces)
+    : preserve_spaces_(preserve_spaces), quote_(preserve_spaces) {
+}
+
+StringBuilder& StringBuilder::AppendText(const std::string& text) {
+  if (!error_.empty()) {
+    return *this;
+  }
+
+  const size_t previous_len = xml_string_.text.size();
+  Utf8Iterator iter(text);
+  while (iter.HasNext()) {
+    char32_t codepoint = iter.Next();
+    if (!quote_ && text::IsWhitespace(codepoint)) {
+      if (!last_codepoint_was_space_) {
+        // Emit a space if it's the first.
+        xml_string_.text += ' ';
+        last_codepoint_was_space_ = true;
+      }
+
+      // Keep eating spaces.
+      continue;
+    }
+
+    // This is not a space.
+    last_codepoint_was_space_ = false;
+
+    if (codepoint == U'\\') {
+      if (iter.HasNext()) {
+        codepoint = iter.Next();
+        switch (codepoint) {
+          case U't':
+            xml_string_.text += '\t';
+            break;
+
+          case U'n':
+            xml_string_.text += '\n';
+            break;
+
+          case U'#':
+          case U'@':
+          case U'?':
+          case U'"':
+          case U'\'':
+          case U'\\':
+            xml_string_.text += static_cast<char>(codepoint);
+            break;
+
+          case U'u':
+            if (!AppendUnicodeEscapeSequence(&iter, &xml_string_.text)) {
+              error_ =
+                  StringPrintf("invalid unicode escape sequence in string\n\"%s\"", text.c_str());
+              return *this;
+            }
+            break;
+
+          default:
+            // Ignore the escape character and just include the codepoint.
+            AppendCodepointToUtf8String(codepoint, &xml_string_.text);
+            break;
+        }
+      }
+    } else if (!preserve_spaces_ && codepoint == U'"') {
+      // Only toggle the quote state when we are not preserving spaces.
+      quote_ = !quote_;
+
+    } else if (!quote_ && codepoint == U'\'') {
+      // This should be escaped.
+      error_ = StringPrintf("unescaped apostrophe in string\n\"%s\"", text.c_str());
+      return *this;
+
+    } else {
+      AppendCodepointToUtf8String(codepoint, &xml_string_.text);
+    }
+  }
+
+  // Accumulate the added string's UTF-16 length.
+  const uint8_t* utf8_data = reinterpret_cast<const uint8_t*>(xml_string_.text.c_str());
+  const size_t utf8_length = xml_string_.text.size();
+  ssize_t len = utf8_to_utf16_length(utf8_data + previous_len, utf8_length - previous_len);
+  if (len < 0) {
+    error_ = StringPrintf("invalid unicode code point in string\n\"%s\"", utf8_data + previous_len);
+    return *this;
+  }
+
+  utf16_len_ += static_cast<uint32_t>(len);
+  return *this;
+}
+
+StringBuilder::SpanHandle StringBuilder::StartSpan(const std::string& name) {
+  if (!error_.empty()) {
+    return 0u;
+  }
+
+  // When we start a span, all state associated with whitespace truncation and quotation is ended.
+  ResetTextState();
+  Span span;
+  span.name = name;
+  span.first_char = span.last_char = utf16_len_;
+  xml_string_.spans.push_back(std::move(span));
+  return xml_string_.spans.size() - 1;
+}
+
+void StringBuilder::EndSpan(SpanHandle handle) {
+  if (!error_.empty()) {
+    return;
+  }
+
+  // When we end a span, all state associated with whitespace truncation and quotation is ended.
+  ResetTextState();
+  xml_string_.spans[handle].last_char = utf16_len_ - 1u;
+}
+
+StringBuilder::UntranslatableHandle StringBuilder::StartUntranslatable() {
+  if (!error_.empty()) {
+    return 0u;
+  }
+
+  UntranslatableSection section;
+  section.start = section.end = xml_string_.text.size();
+  xml_string_.untranslatable_sections.push_back(section);
+  return xml_string_.untranslatable_sections.size() - 1;
+}
+
+void StringBuilder::EndUntranslatable(UntranslatableHandle handle) {
+  if (!error_.empty()) {
+    return;
+  }
+  xml_string_.untranslatable_sections[handle].end = xml_string_.text.size();
+}
+
+FlattenedXmlString StringBuilder::GetFlattenedString() const {
+  return xml_string_;
+}
+
+std::string StringBuilder::to_string() const {
+  return xml_string_.text;
+}
+
+StringBuilder::operator bool() const {
+  return error_.empty();
+}
+
+std::string StringBuilder::GetError() const {
+  return error_;
+}
+
+void StringBuilder::ResetTextState() {
+  quote_ = preserve_spaces_;
+  last_codepoint_was_space_ = false;
+}
+
 }  // namespace ResourceUtils
 }  // namespace aapt
diff --git a/tools/aapt2/ResourceUtils.h b/tools/aapt2/ResourceUtils.h
index f83d49e..7af2fe0 100644
--- a/tools/aapt2/ResourceUtils.h
+++ b/tools/aapt2/ResourceUtils.h
@@ -224,6 +224,95 @@
                                           const android::Res_value& res_value,
                                           StringPool* dst_pool);
 
+// A string flattened from an XML hierarchy, which maintains tags and untranslatable sections
+// in parallel data structures.
+struct FlattenedXmlString {
+  std::string text;
+  std::vector<UntranslatableSection> untranslatable_sections;
+  std::vector<Span> spans;
+};
+
+// Flattens an XML hierarchy into a FlattenedXmlString, formatting the text, escaping characters,
+// and removing whitespace, all while keeping the untranslatable sections and spans in sync with the
+// transformations.
+//
+// Specifically, the StringBuilder will handle escaped characters like \t, \n, \\, \', etc.
+// Single quotes *must* be escaped, unless within a pair of double-quotes.
+// Pairs of double-quotes disable whitespace stripping of the enclosed text.
+// Unicode escape codes (\u0049) are interpreted and the represented Unicode character is inserted.
+//
+// A NOTE ON WHITESPACE:
+//
+// When preserve_spaces is false, and when text is not enclosed within double-quotes,
+// StringBuilder replaces a series of whitespace with a single space character. This happens at the
+// start and end of the string as well, so leading and trailing whitespace is possible.
+//
+// When a Span is started or stopped, the whitespace counter is reset, meaning if whitespace
+// is encountered directly after the span, it will be emitted. This leads to situations like the
+// following: "This <b> is </b> spaced" -> "This  is  spaced". Without spans, this would be properly
+// compressed: "This  is  spaced" -> "This is spaced".
+//
+// Untranslatable sections do not have the same problem:
+// "This <xliff:g> is </xliff:g> not spaced" -> "This is not spaced".
+//
+// NOTE: This is all the way it is because AAPT1 did it this way. Maintaining backwards
+// compatibility is important.
+//
+class StringBuilder {
+ public:
+  using SpanHandle = size_t;
+  using UntranslatableHandle = size_t;
+
+  // Creates a StringBuilder. If preserve_spaces is true, whitespace removal is not performed, and
+  // single quotations can be used without escaping them.
+  explicit StringBuilder(bool preserve_spaces = false);
+
+  // Appends a chunk of text.
+  StringBuilder& AppendText(const std::string& text);
+
+  // Starts a Span (tag) with the given name. The name is expected to be of the form:
+  //  "tag_name;attr1=value;attr2=value;"
+  // Which is how Spans are encoded in the ResStringPool.
+  // To end the span, pass back the SpanHandle received from this method to the EndSpan() method.
+  SpanHandle StartSpan(const std::string& name);
+
+  // Ends a Span (tag). Pass in the matching SpanHandle previously obtained from StartSpan().
+  void EndSpan(SpanHandle handle);
+
+  // Starts an Untranslatable section.
+  // To end the section, pass back the UntranslatableHandle received from this method to
+  // the EndUntranslatable() method.
+  UntranslatableHandle StartUntranslatable();
+
+  // Ends an Untranslatable section. Pass in the matching UntranslatableHandle previously obtained
+  // from StartUntranslatable().
+  void EndUntranslatable(UntranslatableHandle handle);
+
+  // Returns the flattened XML string, with all spans and untranslatable sections encoded as
+  // parallel data structures.
+  FlattenedXmlString GetFlattenedString() const;
+
+  // Returns just the flattened XML text, with no spans or untranslatable sections.
+  std::string to_string() const;
+
+  // Returns true if there was no error.
+  explicit operator bool() const;
+
+  std::string GetError() const;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StringBuilder);
+
+  void ResetTextState();
+
+  std::string error_;
+  FlattenedXmlString xml_string_;
+  uint32_t utf16_len_ = 0u;
+  bool preserve_spaces_;
+  bool quote_;
+  bool last_codepoint_was_space_ = false;
+};
+
 }  // namespace ResourceUtils
 }  // namespace aapt
 
diff --git a/tools/aapt2/ResourceUtils_test.cpp b/tools/aapt2/ResourceUtils_test.cpp
index cb786d3..11f3fa3 100644
--- a/tools/aapt2/ResourceUtils_test.cpp
+++ b/tools/aapt2/ResourceUtils_test.cpp
@@ -212,4 +212,48 @@
               Pointee(ValueEq(BinaryPrimitive(Res_value::TYPE_FLOAT, expected_float_flattened))));
 }
 
+TEST(ResourceUtilsTest, StringBuilderWhitespaceRemoval) {
+  EXPECT_THAT(ResourceUtils::StringBuilder()
+                  .AppendText("    hey guys ")
+                  .AppendText(" this is so cool ")
+                  .to_string(),
+              Eq(" hey guys this is so cool "));
+  EXPECT_THAT(ResourceUtils::StringBuilder()
+                  .AppendText(" \" wow,  so many \t ")
+                  .AppendText("spaces. \"what? ")
+                  .to_string(),
+              Eq("  wow,  so many \t spaces. what? "));
+  EXPECT_THAT(ResourceUtils::StringBuilder()
+                  .AppendText("  where \t ")
+                  .AppendText(" \nis the pie?")
+                  .to_string(),
+              Eq(" where is the pie?"));
+}
+
+TEST(ResourceUtilsTest, StringBuilderEscaping) {
+  EXPECT_THAT(ResourceUtils::StringBuilder()
+                  .AppendText("hey guys\\n ")
+                  .AppendText(" this \\t is so\\\\ cool")
+                  .to_string(),
+              Eq("hey guys\n this \t is so\\ cool"));
+  EXPECT_THAT(ResourceUtils::StringBuilder().AppendText("\\@\\?\\#\\\\\\'").to_string(),
+              Eq("@?#\\\'"));
+}
+
+TEST(ResourceUtilsTest, StringBuilderMisplacedQuote) {
+  ResourceUtils::StringBuilder builder;
+  EXPECT_FALSE(builder.AppendText("they're coming!"));
+}
+
+TEST(ResourceUtilsTest, StringBuilderUnicodeCodes) {
+  EXPECT_THAT(ResourceUtils::StringBuilder().AppendText("\\u00AF\\u0AF0 woah").to_string(),
+              Eq("\u00AF\u0AF0 woah"));
+  EXPECT_FALSE(ResourceUtils::StringBuilder().AppendText("\\u00 yo"));
+}
+
+TEST(ResourceUtilsTest, StringBuilderPreserveSpaces) {
+  EXPECT_THAT(ResourceUtils::StringBuilder(true /*preserve_spaces*/).AppendText("\"").to_string(),
+              Eq("\""));
+}
+
 }  // namespace aapt
diff --git a/tools/aapt2/format/binary/XmlFlattener.cpp b/tools/aapt2/format/binary/XmlFlattener.cpp
index 067372b..781b9fe 100644
--- a/tools/aapt2/format/binary/XmlFlattener.cpp
+++ b/tools/aapt2/format/binary/XmlFlattener.cpp
@@ -25,6 +25,7 @@
 #include "androidfw/ResourceTypes.h"
 #include "utils/misc.h"
 
+#include "ResourceUtils.h"
 #include "SdkConstants.h"
 #include "ValueVisitor.h"
 #include "format/binary/ChunkWriter.h"
@@ -33,6 +34,8 @@
 
 using namespace android;
 
+using ::aapt::ResourceUtils::StringBuilder;
+
 namespace aapt {
 
 namespace {
@@ -89,9 +92,9 @@
     ResXMLTree_cdataExt* flat_text = writer.NextBlock<ResXMLTree_cdataExt>();
 
     // Process plain strings to make sure they get properly escaped.
-    util::StringBuilder builder;
-    builder.Append(node->text);
-    AddString(builder.ToString(), kLowPriority, &flat_text->data);
+    StringBuilder builder;
+    builder.AppendText(node->text);
+    AddString(builder.to_string(), kLowPriority, &flat_text->data);
 
     writer.Finish();
   }
@@ -272,7 +275,7 @@
         // There is no compiled value, so treat the raw string as compiled, once it is processed to
         // make sure escape sequences are properly interpreted.
         processed_str =
-            util::StringBuilder(true /*preserve_spaces*/).Append(xml_attr->value).ToString();
+            StringBuilder(true /*preserve_spaces*/).AppendText(xml_attr->value).to_string();
         compiled_text = StringPiece(processed_str);
       }
 
diff --git a/tools/aapt2/link/ReferenceLinker.cpp b/tools/aapt2/link/ReferenceLinker.cpp
index b8f8804..9aaaa69 100644
--- a/tools/aapt2/link/ReferenceLinker.cpp
+++ b/tools/aapt2/link/ReferenceLinker.cpp
@@ -30,6 +30,7 @@
 #include "util/Util.h"
 #include "xml/XmlUtil.h"
 
+using ::aapt::ResourceUtils::StringBuilder;
 using ::android::StringPiece;
 
 namespace aapt {
@@ -133,10 +134,11 @@
 
       // If we could not parse as any specific type, try a basic STRING.
       if (!transformed && (attr->type_mask & android::ResTable_map::TYPE_STRING)) {
-        util::StringBuilder string_builder;
-        string_builder.Append(*raw_string->value);
+        StringBuilder string_builder;
+        string_builder.AppendText(*raw_string->value);
         if (string_builder) {
-          transformed = util::make_unique<String>(string_pool_->MakeRef(string_builder.ToString()));
+          transformed =
+              util::make_unique<String>(string_pool_->MakeRef(string_builder.to_string()));
         }
       }
 
diff --git a/tools/aapt2/util/Util.cpp b/tools/aapt2/util/Util.cpp
index e42145d..d1c9ca1 100644
--- a/tools/aapt2/util/Util.cpp
+++ b/tools/aapt2/util/Util.cpp
@@ -76,6 +76,34 @@
   return str.substr(str.size() - suffix.size(), suffix.size()) == suffix;
 }
 
+StringPiece TrimLeadingWhitespace(const StringPiece& str) {
+  if (str.size() == 0 || str.data() == nullptr) {
+    return str;
+  }
+
+  const char* start = str.data();
+  const char* end = start + str.length();
+
+  while (start != end && isspace(*start)) {
+    start++;
+  }
+  return StringPiece(start, end - start);
+}
+
+StringPiece TrimTrailingWhitespace(const StringPiece& str) {
+  if (str.size() == 0 || str.data() == nullptr) {
+    return str;
+  }
+
+  const char* start = str.data();
+  const char* end = start + str.length();
+
+  while (end != start && isspace(*(end - 1))) {
+    end--;
+  }
+  return StringPiece(start, end - start);
+}
+
 StringPiece TrimWhitespace(const StringPiece& str) {
   if (str.size() == 0 || str.data() == nullptr) {
     return str;
@@ -269,162 +297,6 @@
   return true;
 }
 
-static bool AppendCodepointToUtf8String(char32_t codepoint, std::string* output) {
-  ssize_t len = utf32_to_utf8_length(&codepoint, 1);
-  if (len < 0) {
-    return false;
-  }
-
-  const size_t start_append_pos = output->size();
-
-  // Make room for the next character.
-  output->resize(output->size() + len);
-
-  char* dst = &*(output->begin() + start_append_pos);
-  utf32_to_utf8(&codepoint, 1, dst, len + 1);
-  return true;
-}
-
-static bool AppendUnicodeCodepoint(Utf8Iterator* iter, std::string* output) {
-  char32_t code = 0;
-  for (size_t i = 0; i < 4 && iter->HasNext(); i++) {
-    char32_t codepoint = iter->Next();
-    char32_t a;
-    if (codepoint >= U'0' && codepoint <= U'9') {
-      a = codepoint - U'0';
-    } else if (codepoint >= U'a' && codepoint <= U'f') {
-      a = codepoint - U'a' + 10;
-    } else if (codepoint >= U'A' && codepoint <= U'F') {
-      a = codepoint - U'A' + 10;
-    } else {
-      return {};
-    }
-    code = (code << 4) | a;
-  }
-  return AppendCodepointToUtf8String(code, output);
-}
-
-static bool IsCodepointSpace(char32_t codepoint) {
-  if (static_cast<uint32_t>(codepoint) & 0xffffff00u) {
-    return false;
-  }
-  return isspace(static_cast<char>(codepoint));
-}
-
-StringBuilder::StringBuilder(bool preserve_spaces) : preserve_spaces_(preserve_spaces) {
-}
-
-StringBuilder& StringBuilder::Append(const StringPiece& str) {
-  if (!error_.empty()) {
-    return *this;
-  }
-
-  // Where the new data will be appended to.
-  const size_t new_data_index = str_.size();
-
-  Utf8Iterator iter(str);
-  while (iter.HasNext()) {
-    const char32_t codepoint = iter.Next();
-
-    if (last_char_was_escape_) {
-      switch (codepoint) {
-        case U't':
-          str_ += '\t';
-          break;
-
-        case U'n':
-          str_ += '\n';
-          break;
-
-        case U'#':
-        case U'@':
-        case U'?':
-        case U'"':
-        case U'\'':
-        case U'\\':
-          str_ += static_cast<char>(codepoint);
-          break;
-
-        case U'u':
-          if (!AppendUnicodeCodepoint(&iter, &str_)) {
-            error_ = "invalid unicode escape sequence";
-            return *this;
-          }
-          break;
-
-        default:
-          // Ignore the escape character and just include the codepoint.
-          AppendCodepointToUtf8String(codepoint, &str_);
-          break;
-      }
-      last_char_was_escape_ = false;
-
-    } else if (!preserve_spaces_ && codepoint == U'"') {
-      if (!quote_ && trailing_space_) {
-        // We found an opening quote, and we have trailing space, so we should append that
-        // space now.
-        if (trailing_space_) {
-          // We had trailing whitespace, so replace with a single space.
-          if (!str_.empty()) {
-            str_ += ' ';
-          }
-          trailing_space_ = false;
-        }
-      }
-      quote_ = !quote_;
-
-    } else if (!preserve_spaces_ && codepoint == U'\'' && !quote_) {
-      // This should be escaped.
-      error_ = "unescaped apostrophe";
-      return *this;
-
-    } else if (codepoint == U'\\') {
-      // This is an escape sequence, convert to the real value.
-      if (!quote_ && trailing_space_) {
-        // We had trailing whitespace, so
-        // replace with a single space.
-        if (!str_.empty()) {
-          str_ += ' ';
-        }
-        trailing_space_ = false;
-      }
-      last_char_was_escape_ = true;
-    } else {
-      if (preserve_spaces_ || quote_) {
-        // Quotes mean everything is taken, including whitespace.
-        AppendCodepointToUtf8String(codepoint, &str_);
-      } else {
-        // This is not quoted text, so we will accumulate whitespace and only emit a single
-        // character of whitespace if it is followed by a non-whitespace character.
-        if (IsCodepointSpace(codepoint)) {
-          // We found whitespace.
-          trailing_space_ = true;
-        } else {
-          if (trailing_space_) {
-            // We saw trailing space before, so replace all
-            // that trailing space with one space.
-            if (!str_.empty()) {
-              str_ += ' ';
-            }
-            trailing_space_ = false;
-          }
-          AppendCodepointToUtf8String(codepoint, &str_);
-        }
-      }
-    }
-  }
-
-  // Accumulate the added string's UTF-16 length.
-  ssize_t len = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(str_.data()) + new_data_index,
-                                     str_.size() - new_data_index);
-  if (len < 0) {
-    error_ = "invalid unicode code point";
-    return *this;
-  }
-  utf16_len_ += len;
-  return *this;
-}
-
 std::u16string Utf8ToUtf16(const StringPiece& utf8) {
   ssize_t utf16_length = utf8_to_utf16_length(
       reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
diff --git a/tools/aapt2/util/Util.h b/tools/aapt2/util/Util.h
index 7c949b90..0eb35d1 100644
--- a/tools/aapt2/util/Util.h
+++ b/tools/aapt2/util/Util.h
@@ -59,7 +59,15 @@
 // Returns true if the string ends with suffix.
 bool EndsWith(const android::StringPiece& str, const android::StringPiece& suffix);
 
-// Creates a new StringPiece16 that points to a substring of the original string without leading or
+// Creates a new StringPiece that points to a substring of the original string without leading
+// whitespace.
+android::StringPiece TrimLeadingWhitespace(const android::StringPiece& str);
+
+// Creates a new StringPiece that points to a substring of the original string without trailing
+// whitespace.
+android::StringPiece TrimTrailingWhitespace(const android::StringPiece& str);
+
+// Creates a new StringPiece that points to a substring of the original string without leading or
 // trailing whitespace.
 android::StringPiece TrimWhitespace(const android::StringPiece& str);
 
@@ -141,9 +149,12 @@
 // break the string interpolation.
 bool VerifyJavaStringFormat(const android::StringPiece& str);
 
+bool AppendStyledString(const android::StringPiece& input, bool preserve_spaces,
+                        std::string* out_str, std::string* out_error);
+
 class StringBuilder {
  public:
-  explicit StringBuilder(bool preserve_spaces = false);
+  StringBuilder() = default;
 
   StringBuilder& Append(const android::StringPiece& str);
   const std::string& ToString() const;
@@ -158,7 +169,6 @@
   explicit operator bool() const;
 
  private:
-  bool preserve_spaces_;
   std::string str_;
   size_t utf16_len_ = 0;
   bool quote_ = false;
diff --git a/tools/aapt2/util/Util_test.cpp b/tools/aapt2/util/Util_test.cpp
index 2d1242a..d4e3bec 100644
--- a/tools/aapt2/util/Util_test.cpp
+++ b/tools/aapt2/util/Util_test.cpp
@@ -41,45 +41,6 @@
   EXPECT_TRUE(util::StartsWith("hello.xml", "he"));
 }
 
-TEST(UtilTest, StringBuilderSplitEscapeSequence) {
-  EXPECT_THAT(util::StringBuilder().Append("this is a new\\").Append("nline.").ToString(),
-              Eq("this is a new\nline."));
-}
-
-TEST(UtilTest, StringBuilderWhitespaceRemoval) {
-  EXPECT_THAT(util::StringBuilder().Append("    hey guys ").Append(" this is so cool ").ToString(),
-              Eq("hey guys this is so cool"));
-  EXPECT_THAT(
-      util::StringBuilder().Append(" \" wow,  so many \t ").Append("spaces. \"what? ").ToString(),
-      Eq(" wow,  so many \t spaces. what?"));
-  EXPECT_THAT(util::StringBuilder().Append("  where \t ").Append(" \nis the pie?").ToString(),
-              Eq("where is the pie?"));
-}
-
-TEST(UtilTest, StringBuilderEscaping) {
-  EXPECT_THAT(util::StringBuilder()
-                  .Append("    hey guys\\n ")
-                  .Append(" this \\t is so\\\\ cool ")
-                  .ToString(),
-              Eq("hey guys\n this \t is so\\ cool"));
-  EXPECT_THAT(util::StringBuilder().Append("\\@\\?\\#\\\\\\'").ToString(), Eq("@?#\\\'"));
-}
-
-TEST(UtilTest, StringBuilderMisplacedQuote) {
-  util::StringBuilder builder;
-  EXPECT_FALSE(builder.Append("they're coming!"));
-}
-
-TEST(UtilTest, StringBuilderUnicodeCodes) {
-  EXPECT_THAT(util::StringBuilder().Append("\\u00AF\\u0AF0 woah").ToString(),
-              Eq("\u00AF\u0AF0 woah"));
-  EXPECT_FALSE(util::StringBuilder().Append("\\u00 yo"));
-}
-
-TEST(UtilTest, StringBuilderPreserveSpaces) {
-  EXPECT_THAT(util::StringBuilder(true /*preserve_spaces*/).Append("\"").ToString(), Eq("\""));
-}
-
 TEST(UtilTest, TokenizeInput) {
   auto tokenizer = util::Tokenize(StringPiece("this| is|the|end"), '|');
   auto iter = tokenizer.begin();