AAPT2: Fix styled string whitespace processing

Change styled string whitespace processing to be like AAPT's was.

Main changes:
- whitespace around tags is preserved.
- tags start exactly where they are supposed to, not off by one.

Bug: 72406283
Test: make aapt2_tests
Change-Id: I4d12728c493efd8c978e2e3d2718b56534ff52ef
diff --git a/tools/aapt2/ResourceUtils.h b/tools/aapt2/ResourceUtils.h
index f83d49e..7af2fe0 100644
--- a/tools/aapt2/ResourceUtils.h
+++ b/tools/aapt2/ResourceUtils.h
@@ -224,6 +224,95 @@
                                           const android::Res_value& res_value,
                                           StringPool* dst_pool);
 
+// A string flattened from an XML hierarchy, which maintains tags and untranslatable sections
+// in parallel data structures.
+struct FlattenedXmlString {
+  std::string text;
+  std::vector<UntranslatableSection> untranslatable_sections;
+  std::vector<Span> spans;
+};
+
+// Flattens an XML hierarchy into a FlattenedXmlString, formatting the text, escaping characters,
+// and removing whitespace, all while keeping the untranslatable sections and spans in sync with the
+// transformations.
+//
+// Specifically, the StringBuilder will handle escaped characters like \t, \n, \\, \', etc.
+// Single quotes *must* be escaped, unless within a pair of double-quotes.
+// Pairs of double-quotes disable whitespace stripping of the enclosed text.
+// Unicode escape codes (\u0049) are interpreted and the represented Unicode character is inserted.
+//
+// A NOTE ON WHITESPACE:
+//
+// When preserve_spaces is false, and when text is not enclosed within double-quotes,
+// StringBuilder replaces a series of whitespace with a single space character. This happens at the
+// start and end of the string as well, so leading and trailing whitespace is possible.
+//
+// When a Span is started or stopped, the whitespace counter is reset, meaning if whitespace
+// is encountered directly after the span, it will be emitted. This leads to situations like the
+// following: "This <b> is </b> spaced" -> "This  is  spaced". Without spans, this would be properly
+// compressed: "This  is  spaced" -> "This is spaced".
+//
+// Untranslatable sections do not have the same problem:
+// "This <xliff:g> is </xliff:g> not spaced" -> "This is not spaced".
+//
+// NOTE: This is all the way it is because AAPT1 did it this way. Maintaining backwards
+// compatibility is important.
+//
+class StringBuilder {
+ public:
+  using SpanHandle = size_t;
+  using UntranslatableHandle = size_t;
+
+  // Creates a StringBuilder. If preserve_spaces is true, whitespace removal is not performed, and
+  // single quotations can be used without escaping them.
+  explicit StringBuilder(bool preserve_spaces = false);
+
+  // Appends a chunk of text.
+  StringBuilder& AppendText(const std::string& text);
+
+  // Starts a Span (tag) with the given name. The name is expected to be of the form:
+  //  "tag_name;attr1=value;attr2=value;"
+  // Which is how Spans are encoded in the ResStringPool.
+  // To end the span, pass back the SpanHandle received from this method to the EndSpan() method.
+  SpanHandle StartSpan(const std::string& name);
+
+  // Ends a Span (tag). Pass in the matching SpanHandle previously obtained from StartSpan().
+  void EndSpan(SpanHandle handle);
+
+  // Starts an Untranslatable section.
+  // To end the section, pass back the UntranslatableHandle received from this method to
+  // the EndUntranslatable() method.
+  UntranslatableHandle StartUntranslatable();
+
+  // Ends an Untranslatable section. Pass in the matching UntranslatableHandle previously obtained
+  // from StartUntranslatable().
+  void EndUntranslatable(UntranslatableHandle handle);
+
+  // Returns the flattened XML string, with all spans and untranslatable sections encoded as
+  // parallel data structures.
+  FlattenedXmlString GetFlattenedString() const;
+
+  // Returns just the flattened XML text, with no spans or untranslatable sections.
+  std::string to_string() const;
+
+  // Returns true if there was no error.
+  explicit operator bool() const;
+
+  std::string GetError() const;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StringBuilder);
+
+  void ResetTextState();
+
+  std::string error_;
+  FlattenedXmlString xml_string_;
+  uint32_t utf16_len_ = 0u;
+  bool preserve_spaces_;
+  bool quote_;
+  bool last_codepoint_was_space_ = false;
+};
+
 }  // namespace ResourceUtils
 }  // namespace aapt