adb: win32: remove widen()/narrow() in favor of UTF8ToWide()/WideToUTF8()

Now that we have a more standardized API (also available in Chromium),
switch to it. Another benefit is real error handling instead of just
killing the process on invalid Unicode.

Make UTF8ToWide()/WideToUTF8() set errno to EILSEQ on bad input. This is
the same error code that wcsrtombs(3) uses.

Update the unittest to check for EILSEQ.

Change-Id: Ie92acf74d37adaea116cf610c1bf8cd433741e16
Signed-off-by: Spencer Low <CompareAndSwap@gmail.com>
diff --git a/base/include/base/utf8.h b/base/include/base/utf8.h
index 3cc168d..3b0ed0a 100755
--- a/base/include/base/utf8.h
+++ b/base/include/base/utf8.h
@@ -19,6 +19,10 @@
 
 #ifdef _WIN32
 #include <string>
+#else
+// Bring in prototypes for standard APIs so that we can import them into the utf8 namespace.
+#include <fcntl.h>      // open
+#include <unistd.h>     // unlink
 #endif
 
 namespace android {
diff --git a/base/utf8.cpp b/base/utf8.cpp
index 62a118f..99f0f54 100755
--- a/base/utf8.cpp
+++ b/base/utf8.cpp
@@ -27,6 +27,18 @@
 namespace android {
 namespace base {
 
+// Helper to set errno based on GetLastError() after WideCharToMultiByte()/MultiByteToWideChar().
+static void SetErrnoFromLastError() {
+  switch (GetLastError()) {
+    case ERROR_NO_UNICODE_TRANSLATION:
+      errno = EILSEQ;
+      break;
+    default:
+      errno = EINVAL;
+      break;
+  }
+}
+
 bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) {
   utf8->clear();
 
@@ -49,6 +61,7 @@
   const int chars_required = WideCharToMultiByte(CP_UTF8, flags, utf16, size,
                                                  NULL, 0, NULL, NULL);
   if (chars_required <= 0) {
+    SetErrnoFromLastError();
     return false;
   }
 
@@ -59,6 +72,7 @@
                                          &(*utf8)[0], chars_required, NULL,
                                          NULL);
   if (result != chars_required) {
+    SetErrnoFromLastError();
     CHECK_LE(result, chars_required) << "WideCharToMultiByte wrote " << result
         << " chars to buffer of " << chars_required << " chars";
     utf8->clear();
@@ -80,8 +94,8 @@
 }
 
 // Internal helper function that takes MultiByteToWideChar() flags.
-static bool _UTF8ToWideWithFlags(const char* utf8, const size_t size,
-                                 std::wstring* utf16, const DWORD flags) {
+static bool UTF8ToWideWithFlags(const char* utf8, const size_t size, std::wstring* utf16,
+                                const DWORD flags) {
   utf16->clear();
 
   if (size == 0) {
@@ -93,6 +107,7 @@
   const int chars_required = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
                                                  NULL, 0);
   if (chars_required <= 0) {
+    SetErrnoFromLastError();
     return false;
   }
 
@@ -102,6 +117,7 @@
   const int result = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
                                          &(*utf16)[0], chars_required);
   if (result != chars_required) {
+    SetErrnoFromLastError();
     CHECK_LE(result, chars_required) << "MultiByteToWideChar wrote " << result
         << " chars to buffer of " << chars_required << " chars";
     utf16->clear();
@@ -113,13 +129,16 @@
 
 bool UTF8ToWide(const char* utf8, const size_t size, std::wstring* utf16) {
   // If strictly interpreting as UTF-8 succeeds, return success.
-  if (_UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) {
+  if (UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) {
     return true;
   }
 
+  const int saved_errno = errno;
+
   // Fallback to non-strict interpretation, allowing invalid characters and
   // converting as best as possible, and return false to signify a problem.
-  (void)_UTF8ToWideWithFlags(utf8, size, utf16, 0);
+  (void)UTF8ToWideWithFlags(utf8, size, utf16, 0);
+  errno = saved_errno;
   return false;
 }
 
@@ -140,7 +159,6 @@
 int open(const char* name, int flags, ...) {
   std::wstring name_utf16;
   if (!UTF8ToWide(name, &name_utf16)) {
-    errno = EINVAL;
     return -1;
   }
 
@@ -158,7 +176,6 @@
 int unlink(const char* name) {
   std::wstring name_utf16;
   if (!UTF8ToWide(name, &name_utf16)) {
-    errno = EINVAL;
     return -1;
   }
 
diff --git a/base/utf8_test.cpp b/base/utf8_test.cpp
index bbb54b1..13f6431 100755
--- a/base/utf8_test.cpp
+++ b/base/utf8_test.cpp
@@ -26,12 +26,16 @@
 TEST(UTFStringConversionsTest, ConvertInvalidUTF8) {
   std::wstring wide;
 
+  errno = 0;
+
   // Standalone \xa2 is an invalid UTF-8 sequence, so this should return an
   // error. Concatenate two C/C++ literal string constants to prevent the
   // compiler from giving an error about "\xa2af" containing a "hex escape
   // sequence out of range".
   EXPECT_FALSE(android::base::UTF8ToWide("before\xa2" "after", &wide));
 
+  EXPECT_EQ(EILSEQ, errno);
+
   // Even if an invalid character is encountered, UTF8ToWide() should still do
   // its best to convert the rest of the string. sysdeps_win32.cpp:
   // _console_write_utf8() depends on this behavior.
@@ -161,6 +165,7 @@
 
   for (size_t i = 0; i < arraysize(convert_cases); i++) {
     std::wstring converted;
+    errno = 0;
     const bool success = UTF8ToWide(convert_cases[i].utf8,
                                     strlen(convert_cases[i].utf8),
                                     &converted);
@@ -171,6 +176,8 @@
     if (success) {
       std::wstring expected(convert_cases[i].wide);
       EXPECT_EQ(expected, converted);
+    } else {
+      EXPECT_EQ(EILSEQ, errno);
     }
   }
 
@@ -227,6 +234,7 @@
 
   for (size_t i = 0; i < arraysize(convert_cases); i++) {
     std::string converted;
+    errno = 0;
     const bool success = WideToUTF8(convert_cases[i].utf16,
                                     wcslen(convert_cases[i].utf16),
                                     &converted);
@@ -237,6 +245,8 @@
     if (success) {
       std::string expected(convert_cases[i].utf8);
       EXPECT_EQ(expected, converted);
+    } else {
+      EXPECT_EQ(EILSEQ, errno);
     }
   }
 }