adb: win32: remove widen()/narrow() in favor of UTF8ToWide()/WideToUTF8()
Now that we have a more standardized API (also available in Chromium),
switch to it. Another benefit is real error handling instead of just
killing the process on invalid Unicode.
Make UTF8ToWide()/WideToUTF8() set errno to EILSEQ on bad input. This is
the same error code that wcsrtombs(3) uses.
Update the unittest to check for EILSEQ.
Change-Id: Ie92acf74d37adaea116cf610c1bf8cd433741e16
Signed-off-by: Spencer Low <CompareAndSwap@gmail.com>
diff --git a/base/include/base/utf8.h b/base/include/base/utf8.h
index 3cc168d..3b0ed0a 100755
--- a/base/include/base/utf8.h
+++ b/base/include/base/utf8.h
@@ -19,6 +19,10 @@
#ifdef _WIN32
#include <string>
+#else
+// Bring in prototypes for standard APIs so that we can import them into the utf8 namespace.
+#include <fcntl.h> // open
+#include <unistd.h> // unlink
#endif
namespace android {
diff --git a/base/utf8.cpp b/base/utf8.cpp
index 62a118f..99f0f54 100755
--- a/base/utf8.cpp
+++ b/base/utf8.cpp
@@ -27,6 +27,18 @@
namespace android {
namespace base {
+// Helper to set errno based on GetLastError() after WideCharToMultiByte()/MultiByteToWideChar().
+static void SetErrnoFromLastError() {
+ switch (GetLastError()) {
+ case ERROR_NO_UNICODE_TRANSLATION:
+ errno = EILSEQ;
+ break;
+ default:
+ errno = EINVAL;
+ break;
+ }
+}
+
bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) {
utf8->clear();
@@ -49,6 +61,7 @@
const int chars_required = WideCharToMultiByte(CP_UTF8, flags, utf16, size,
NULL, 0, NULL, NULL);
if (chars_required <= 0) {
+ SetErrnoFromLastError();
return false;
}
@@ -59,6 +72,7 @@
&(*utf8)[0], chars_required, NULL,
NULL);
if (result != chars_required) {
+ SetErrnoFromLastError();
CHECK_LE(result, chars_required) << "WideCharToMultiByte wrote " << result
<< " chars to buffer of " << chars_required << " chars";
utf8->clear();
@@ -80,8 +94,8 @@
}
// Internal helper function that takes MultiByteToWideChar() flags.
-static bool _UTF8ToWideWithFlags(const char* utf8, const size_t size,
- std::wstring* utf16, const DWORD flags) {
+static bool UTF8ToWideWithFlags(const char* utf8, const size_t size, std::wstring* utf16,
+ const DWORD flags) {
utf16->clear();
if (size == 0) {
@@ -93,6 +107,7 @@
const int chars_required = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
NULL, 0);
if (chars_required <= 0) {
+ SetErrnoFromLastError();
return false;
}
@@ -102,6 +117,7 @@
const int result = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
&(*utf16)[0], chars_required);
if (result != chars_required) {
+ SetErrnoFromLastError();
CHECK_LE(result, chars_required) << "MultiByteToWideChar wrote " << result
<< " chars to buffer of " << chars_required << " chars";
utf16->clear();
@@ -113,13 +129,16 @@
bool UTF8ToWide(const char* utf8, const size_t size, std::wstring* utf16) {
// If strictly interpreting as UTF-8 succeeds, return success.
- if (_UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) {
+ if (UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) {
return true;
}
+ const int saved_errno = errno;
+
// Fallback to non-strict interpretation, allowing invalid characters and
// converting as best as possible, and return false to signify a problem.
- (void)_UTF8ToWideWithFlags(utf8, size, utf16, 0);
+ (void)UTF8ToWideWithFlags(utf8, size, utf16, 0);
+ errno = saved_errno;
return false;
}
@@ -140,7 +159,6 @@
int open(const char* name, int flags, ...) {
std::wstring name_utf16;
if (!UTF8ToWide(name, &name_utf16)) {
- errno = EINVAL;
return -1;
}
@@ -158,7 +176,6 @@
int unlink(const char* name) {
std::wstring name_utf16;
if (!UTF8ToWide(name, &name_utf16)) {
- errno = EINVAL;
return -1;
}
diff --git a/base/utf8_test.cpp b/base/utf8_test.cpp
index bbb54b1..13f6431 100755
--- a/base/utf8_test.cpp
+++ b/base/utf8_test.cpp
@@ -26,12 +26,16 @@
TEST(UTFStringConversionsTest, ConvertInvalidUTF8) {
std::wstring wide;
+ errno = 0;
+
// Standalone \xa2 is an invalid UTF-8 sequence, so this should return an
// error. Concatenate two C/C++ literal string constants to prevent the
// compiler from giving an error about "\xa2af" containing a "hex escape
// sequence out of range".
EXPECT_FALSE(android::base::UTF8ToWide("before\xa2" "after", &wide));
+ EXPECT_EQ(EILSEQ, errno);
+
// Even if an invalid character is encountered, UTF8ToWide() should still do
// its best to convert the rest of the string. sysdeps_win32.cpp:
// _console_write_utf8() depends on this behavior.
@@ -161,6 +165,7 @@
for (size_t i = 0; i < arraysize(convert_cases); i++) {
std::wstring converted;
+ errno = 0;
const bool success = UTF8ToWide(convert_cases[i].utf8,
strlen(convert_cases[i].utf8),
&converted);
@@ -171,6 +176,8 @@
if (success) {
std::wstring expected(convert_cases[i].wide);
EXPECT_EQ(expected, converted);
+ } else {
+ EXPECT_EQ(EILSEQ, errno);
}
}
@@ -227,6 +234,7 @@
for (size_t i = 0; i < arraysize(convert_cases); i++) {
std::string converted;
+ errno = 0;
const bool success = WideToUTF8(convert_cases[i].utf16,
wcslen(convert_cases[i].utf16),
&converted);
@@ -237,6 +245,8 @@
if (success) {
std::string expected(convert_cases[i].utf8);
EXPECT_EQ(expected, converted);
+ } else {
+ EXPECT_EQ(EILSEQ, errno);
}
}
}