AAPT2: Encode 4-byte strings in Modified UTF-8 Codepoints that are encoded to 4 bytes in UTF-8 are not allowed in Modified UTF-8. They instead should be encoded as surrogate pairs in the same way that CESU-8 allows for surrogate pairs. This will also cause 4 byte UTF-8 codes to be represented in 6 bytes. Bug: 37140916 Test: aapt2_tests Change-Id: I155dc24f166139d1d36a16bac088dcfcd59eb321

commit: d86ea58bddea7d5608e3539fc77e3d805c0af1d1 [log] [tgz]
author: Ryan Mitchell <rtmitchell@google.com> Wed Jun 27 11:57:18 2018 -0700
committer: Ryan Mitchell <rtmitchell@google.com> Mon Jul 02 14:14:33 2018 -0700
tree: 59aee68a419d4d4a4559ed50a3589abeaee3a258
parent: af5753836912a1c76ac35071b8343a6d00782d9e [diff] [blame]
diff --git a/tools/aapt2/StringPool.cpp b/tools/aapt2/StringPool.cpp
index b37e1fb..8eabd32 100644
--- a/tools/aapt2/StringPool.cpp
+++ b/tools/aapt2/StringPool.cpp

@@ -367,7 +367,7 @@
 static bool EncodeString(const std::string& str, const bool utf8, BigBuffer* out,
                          IDiagnostics* diag) {
   if (utf8) {
-    const std::string& encoded = str;
+    const std::string& encoded = util::Utf8ToModifiedUtf8(str);
     const ssize_t utf16_length = utf8_to_utf16_length(
         reinterpret_cast<const uint8_t*>(encoded.data()), encoded.size());
     CHECK(utf16_length >= 0);
commit	d86ea58bddea7d5608e3539fc77e3d805c0af1d1	[log] [tgz]
author	Ryan Mitchell <rtmitchell@google.com>	Wed Jun 27 11:57:18 2018 -0700
committer	Ryan Mitchell <rtmitchell@google.com>	Mon Jul 02 14:14:33 2018 -0700
tree	59aee68a419d4d4a4559ed50a3589abeaee3a258
parent	af5753836912a1c76ac35071b8343a6d00782d9e [diff] [blame]