Add a new utf8-to-utf16 conversion function.
Change-Id: I957c22fb219596ca4239db7a169473d3894b09eb
diff --git a/include/utils/Unicode.h b/include/utils/Unicode.h
index 9273533..c8c87c3 100644
--- a/include/utils/Unicode.h
+++ b/include/utils/Unicode.h
@@ -163,6 +163,13 @@
*/
void utf8_to_utf16(const uint8_t* src, size_t srcLen, char16_t* dst);
+/**
+ * Like utf8_to_utf16_no_null_terminator, but you can supply a maximum length of the
+ * decoded string. The decoded string will fill up to that length; if it is longer
+ * the returned pointer will be to the character after dstLen.
+ */
+char16_t* utf8_to_utf16_n(const uint8_t* src, size_t srcLen, char16_t* dst, size_t dstLen);
+
}
#endif
diff --git a/libs/utils/Unicode.cpp b/libs/utils/Unicode.cpp
index 41cbf03..a66e3bb 100644
--- a/libs/utils/Unicode.cpp
+++ b/libs/utils/Unicode.cpp
@@ -573,4 +573,34 @@
*end = 0;
}
+char16_t* utf8_to_utf16_n(const uint8_t* src, size_t srcLen, char16_t* dst, size_t dstLen) {
+ const uint8_t* const u8end = src + srcLen;
+ const uint8_t* u8cur = src;
+ const uint16_t* const u16end = dst + dstLen;
+ char16_t* u16cur = dst;
+
+ while (u8cur < u8end && u16cur < u16end) {
+ size_t u8len = utf8_codepoint_len(*u8cur);
+ uint32_t codepoint = utf8_to_utf32_codepoint(u8cur, u8len);
+
+ // Convert the UTF32 codepoint to one or more UTF16 codepoints
+ if (codepoint <= 0xFFFF) {
+ // Single UTF16 character
+ *u16cur++ = (char16_t) codepoint;
+ } else {
+ // Multiple UTF16 characters with surrogates
+ codepoint = codepoint - 0x10000;
+ *u16cur++ = (char16_t) ((codepoint >> 10) + 0xD800);
+ if (u16cur >= u16end) {
+ // Ooops... not enough room for this surrogate pair.
+ return u16cur-1;
+ }
+ *u16cur++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
+ }
+
+ u8cur += u8len;
+ }
+ return u16cur;
+}
+
}