AAPT2: Remove usage of u16string
For legacy reasons, we kept around the use of UTF-16 internally
in AAPT2. We don't need this and this CL removes all instances of
std::u16string and StringPiece16. The only places still needed
are when interacting with the ResTable APIs that only operate in
UTF16.
Change-Id: I492475b84bb9014fa13bf992cff447ee7a5fe588
diff --git a/tools/aapt2/util/Util.cpp b/tools/aapt2/util/Util.cpp
index c41eb05..3c0e9bde 100644
--- a/tools/aapt2/util/Util.cpp
+++ b/tools/aapt2/util/Util.cpp
@@ -54,23 +54,18 @@
return splitAndTransform(str, sep, ::tolower);
}
-StringPiece16 trimWhitespace(const StringPiece16& str) {
- if (str.size() == 0 || str.data() == nullptr) {
- return str;
+bool stringStartsWith(const StringPiece& str, const StringPiece& prefix) {
+ if (str.size() < prefix.size()) {
+ return false;
}
+ return str.substr(0, prefix.size()) == prefix;
+}
- const char16_t* start = str.data();
- const char16_t* end = str.data() + str.length();
-
- while (start != end && util::isspace16(*start)) {
- start++;
+bool stringEndsWith(const StringPiece& str, const StringPiece& suffix) {
+ if (str.size() < suffix.size()) {
+ return false;
}
-
- while (end != start && util::isspace16(*(end - 1))) {
- end--;
- }
-
- return StringPiece16(start, end - start);
+ return str.substr(str.size() - suffix.size(), suffix.size()) == suffix;
}
StringPiece trimWhitespace(const StringPiece& str) {
@@ -92,11 +87,11 @@
return StringPiece(start, end - start);
}
-StringPiece16::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece16& str,
- const StringPiece16& allowedChars) {
+StringPiece::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece& str,
+ const StringPiece& allowedChars) {
const auto endIter = str.end();
for (auto iter = str.begin(); iter != endIter; ++iter) {
- char16_t c = *iter;
+ char c = *iter;
if ((c >= u'a' && c <= u'z') ||
(c >= u'A' && c <= u'Z') ||
(c >= u'0' && c <= u'9')) {
@@ -104,7 +99,7 @@
}
bool match = false;
- for (char16_t i : allowedChars) {
+ for (char i : allowedChars) {
if (c == i) {
match = true;
break;
@@ -118,51 +113,51 @@
return endIter;
}
-bool isJavaClassName(const StringPiece16& str) {
+bool isJavaClassName(const StringPiece& str) {
size_t pieces = 0;
- for (const StringPiece16& piece : tokenize(str, u'.')) {
+ for (const StringPiece& piece : tokenize(str, '.')) {
pieces++;
if (piece.empty()) {
return false;
}
// Can't have starting or trailing $ character.
- if (piece.data()[0] == u'$' || piece.data()[piece.size() - 1] == u'$') {
+ if (piece.data()[0] == '$' || piece.data()[piece.size() - 1] == '$') {
return false;
}
- if (findNonAlphaNumericAndNotInSet(piece, u"$_") != piece.end()) {
+ if (findNonAlphaNumericAndNotInSet(piece, "$_") != piece.end()) {
return false;
}
}
return pieces >= 2;
}
-bool isJavaPackageName(const StringPiece16& str) {
+bool isJavaPackageName(const StringPiece& str) {
if (str.empty()) {
return false;
}
size_t pieces = 0;
- for (const StringPiece16& piece : tokenize(str, u'.')) {
+ for (const StringPiece& piece : tokenize(str, '.')) {
pieces++;
if (piece.empty()) {
return false;
}
- if (piece.data()[0] == u'_' || piece.data()[piece.size() - 1] == u'_') {
+ if (piece.data()[0] == '_' || piece.data()[piece.size() - 1] == '_') {
return false;
}
- if (findNonAlphaNumericAndNotInSet(piece, u"_") != piece.end()) {
+ if (findNonAlphaNumericAndNotInSet(piece, "_") != piece.end()) {
return false;
}
}
return pieces >= 1;
}
-Maybe<std::u16string> getFullyQualifiedClassName(const StringPiece16& package,
- const StringPiece16& className) {
+Maybe<std::string> getFullyQualifiedClassName(const StringPiece& package,
+ const StringPiece& className) {
if (className.empty()) {
return {};
}
@@ -175,9 +170,9 @@
return {};
}
- std::u16string result(package.data(), package.size());
- if (className.data()[0] != u'.') {
- result += u'.';
+ std::string result(package.data(), package.size());
+ if (className.data()[0] != '.') {
+ result += '.';
}
result.append(className.data(), className.size());
@@ -187,23 +182,23 @@
return result;
}
-static size_t consumeDigits(const char16_t* start, const char16_t* end) {
- const char16_t* c = start;
- for (; c != end && *c >= u'0' && *c <= u'9'; c++) {}
+static size_t consumeDigits(const char* start, const char* end) {
+ const char* c = start;
+ for (; c != end && *c >= '0' && *c <= '9'; c++) {}
return static_cast<size_t>(c - start);
}
-bool verifyJavaStringFormat(const StringPiece16& str) {
- const char16_t* c = str.begin();
- const char16_t* const end = str.end();
+bool verifyJavaStringFormat(const StringPiece& str) {
+ const char* c = str.begin();
+ const char* const end = str.end();
size_t argCount = 0;
bool nonpositional = false;
while (c != end) {
- if (*c == u'%' && c + 1 < end) {
+ if (*c == '%' && c + 1 < end) {
c++;
- if (*c == u'%') {
+ if (*c == '%') {
c++;
continue;
}
@@ -213,11 +208,11 @@
size_t numDigits = consumeDigits(c, end);
if (numDigits > 0) {
c += numDigits;
- if (c != end && *c != u'$') {
+ if (c != end && *c != '$') {
// The digits were a size, but not a positional argument.
nonpositional = true;
}
- } else if (*c == u'<') {
+ } else if (*c == '<') {
// Reusing last argument, bad idea since positions can be moved around
// during translation.
nonpositional = true;
@@ -225,7 +220,7 @@
c++;
// Optionally we can have a $ after
- if (c != end && *c == u'$') {
+ if (c != end && *c == '$') {
c++;
}
} else {
@@ -233,13 +228,13 @@
}
// Ignore size, width, flags, etc.
- while (c != end && (*c == u'-' ||
- *c == u'#' ||
- *c == u'+' ||
- *c == u' ' ||
- *c == u',' ||
- *c == u'(' ||
- (*c >= u'0' && *c <= '9'))) {
+ while (c != end && (*c == '-' ||
+ *c == '#' ||
+ *c == '+' ||
+ *c == ' ' ||
+ *c == ',' ||
+ *c == '(' ||
+ (*c >= '0' && *c <= '9'))) {
c++;
}
@@ -286,11 +281,11 @@
return true;
}
-static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) {
- char16_t code = 0;
+static Maybe<std::string> parseUnicodeCodepoint(const char** start, const char* end) {
+ char32_t code = 0;
for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) {
- char16_t c = **start;
- int a;
+ char c = **start;
+ char32_t a;
if (c >= '0' && c <= '9') {
a = c - '0';
} else if (c >= 'a' && c <= 'f') {
@@ -298,51 +293,60 @@
} else if (c >= 'A' && c <= 'F') {
a = c - 'A' + 10;
} else {
- return make_nothing<char16_t>();
+ return {};
}
code = (code << 4) | a;
}
- return make_value(code);
+
+ ssize_t len = utf32_to_utf8_length(&code, 1);
+ if (len < 0) {
+ return {};
+ }
+
+ std::string resultUtf8;
+ resultUtf8.resize(len);
+ utf32_to_utf8(&code, 1, &*resultUtf8.begin(), len + 1);
+ return resultUtf8;
}
-StringBuilder& StringBuilder::append(const StringPiece16& str) {
+StringBuilder& StringBuilder::append(const StringPiece& str) {
if (!mError.empty()) {
return *this;
}
- const char16_t* const end = str.end();
- const char16_t* start = str.begin();
- const char16_t* current = start;
+ const char* const end = str.end();
+ const char* start = str.begin();
+ const char* current = start;
while (current != end) {
if (mLastCharWasEscape) {
switch (*current) {
- case u't':
- mStr += u'\t';
+ case 't':
+ mStr += '\t';
break;
- case u'n':
- mStr += u'\n';
+ case 'n':
+ mStr += '\n';
break;
- case u'#':
- mStr += u'#';
+ case '#':
+ mStr += '#';
break;
- case u'@':
- mStr += u'@';
+ case '@':
+ mStr += '@';
break;
- case u'?':
- mStr += u'?';
+ case '?':
+ mStr += '?';
break;
- case u'"':
- mStr += u'"';
+ case '"':
+ mStr += '"';
break;
- case u'\'':
- mStr += u'\'';
+ case '\'':
+ mStr += '\'';
break;
- case u'\\':
- mStr += u'\\';
+ case '\\':
+ mStr += '\\';
break;
- case u'u': {
+ case 'u': {
current++;
- Maybe<char16_t> c = parseUnicodeCodepoint(¤t, end);
+ Maybe<std::string> c = parseUnicodeCodepoint(¤t, end);
if (!c) {
mError = "invalid unicode escape sequence";
return *this;
@@ -358,7 +362,7 @@
}
mLastCharWasEscape = false;
start = current + 1;
- } else if (*current == u'"') {
+ } else if (*current == '"') {
if (!mQuote && mTrailingSpace) {
// We found an opening quote, and we have
// trailing space, so we should append that
@@ -367,7 +371,7 @@
// We had trailing whitespace, so
// replace with a single space.
if (!mStr.empty()) {
- mStr += u' ';
+ mStr += ' ';
}
mTrailingSpace = false;
}
@@ -375,17 +379,17 @@
mQuote = !mQuote;
mStr.append(start, current - start);
start = current + 1;
- } else if (*current == u'\'' && !mQuote) {
+ } else if (*current == '\'' && !mQuote) {
// This should be escaped.
mError = "unescaped apostrophe";
return *this;
- } else if (*current == u'\\') {
+ } else if (*current == '\\') {
// This is an escape sequence, convert to the real value.
if (!mQuote && mTrailingSpace) {
// We had trailing whitespace, so
// replace with a single space.
if (!mStr.empty()) {
- mStr += u' ';
+ mStr += ' ';
}
mTrailingSpace = false;
}
@@ -394,7 +398,7 @@
mLastCharWasEscape = true;
} else if (!mQuote) {
// This is not quoted text, so look for whitespace.
- if (isspace16(*current)) {
+ if (isspace(*current)) {
// We found whitespace, see if we have seen some
// before.
if (!mTrailingSpace) {
@@ -410,7 +414,7 @@
// We saw trailing space before, so replace all
// that trailing space with one space.
if (!mStr.empty()) {
- mStr += u' ';
+ mStr += ' ';
}
mTrailingSpace = false;
}
@@ -441,10 +445,8 @@
}
std::string utf8;
- // Make room for '\0' explicitly.
- utf8.resize(utf8Length + 1);
- utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin(), utf8Length + 1);
utf8.resize(utf8Length);
+ utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin(), utf8Length + 1);
return utf8;
}
@@ -467,15 +469,58 @@
return data;
}
-bool extractResFilePathParts(const StringPiece16& path, StringPiece16* outPrefix,
- StringPiece16* outEntry, StringPiece16* outSuffix) {
- if (!stringStartsWith<char16_t>(path, u"res/")) {
+typename Tokenizer::iterator& Tokenizer::iterator::operator++() {
+ const char* start = mToken.end();
+ const char* end = mStr.end();
+ if (start == end) {
+ mEnd = true;
+ mToken.assign(mToken.end(), 0);
+ return *this;
+ }
+
+ start += 1;
+ const char* current = start;
+ while (current != end) {
+ if (*current == mSeparator) {
+ mToken.assign(start, current - start);
+ return *this;
+ }
+ ++current;
+ }
+ mToken.assign(start, end - start);
+ return *this;
+}
+
+bool Tokenizer::iterator::operator==(const iterator& rhs) const {
+ // We check equality here a bit differently.
+ // We need to know that the addresses are the same.
+ return mToken.begin() == rhs.mToken.begin() && mToken.end() == rhs.mToken.end() &&
+ mEnd == rhs.mEnd;
+}
+
+bool Tokenizer::iterator::operator!=(const iterator& rhs) const {
+ return !(*this == rhs);
+}
+
+Tokenizer::iterator::iterator(StringPiece s, char sep, StringPiece tok, bool end) :
+ mStr(s), mSeparator(sep), mToken(tok), mEnd(end) {
+}
+
+Tokenizer::Tokenizer(StringPiece str, char sep) :
+ mBegin(++iterator(str, sep, StringPiece(str.begin() - 1, 0), false)),
+ mEnd(str, sep, StringPiece(str.end(), 0), true) {
+}
+
+bool extractResFilePathParts(const StringPiece& path, StringPiece* outPrefix,
+ StringPiece* outEntry, StringPiece* outSuffix) {
+ const StringPiece resPrefix("res/");
+ if (!stringStartsWith(path, resPrefix)) {
return false;
}
- StringPiece16::const_iterator lastOccurence = path.end();
- for (auto iter = path.begin() + StringPiece16(u"res/").size(); iter != path.end(); ++iter) {
- if (*iter == u'/') {
+ StringPiece::const_iterator lastOccurence = path.end();
+ for (auto iter = path.begin() + resPrefix.size(); iter != path.end(); ++iter) {
+ if (*iter == '/') {
lastOccurence = iter;
}
}
@@ -484,12 +529,30 @@
return false;
}
- auto iter = std::find(lastOccurence, path.end(), u'.');
- *outSuffix = StringPiece16(iter, path.end() - iter);
- *outEntry = StringPiece16(lastOccurence + 1, iter - lastOccurence - 1);
- *outPrefix = StringPiece16(path.begin(), lastOccurence - path.begin() + 1);
+ auto iter = std::find(lastOccurence, path.end(), '.');
+ *outSuffix = StringPiece(iter, path.end() - iter);
+ *outEntry = StringPiece(lastOccurence + 1, iter - lastOccurence - 1);
+ *outPrefix = StringPiece(path.begin(), lastOccurence - path.begin() + 1);
return true;
}
+StringPiece16 getString16(const android::ResStringPool& pool, size_t idx) {
+ size_t len;
+ const char16_t* str = pool.stringAt(idx, &len);
+ if (str != nullptr) {
+ return StringPiece16(str, len);
+ }
+ return StringPiece16();
+}
+
+std::string getString(const android::ResStringPool& pool, size_t idx) {
+ size_t len;
+ const char* str = pool.string8At(idx, &len);
+ if (str != nullptr) {
+ return std::string(str, len);
+ }
+ return utf16ToUtf8(getString16(pool, idx));
+}
+
} // namespace util
} // namespace aapt