Pseudolocalizer improvements.
Fixes accented pseudolocalization and adds RTL pseudolocale.
This change contains following modifications in the pseudolocalization logic:
1) zz_ZZ pseudolocale was removed;
2) en_XA pseudolocale was added for pseudo-accented;
3) ar_XB pseudolocale was added for pseudo-rtl;
4) Pseudo RTL localization functionality was implemented;
5) Text expansion functionality was implemented;
6) Text bracketing was implemented;
7) Couple of issues of previous implementation were fixed.
Change-Id: I9f7f27bed717e39e82717d15c398decffc8bec3c
Signed-off-by: Anton Krumin <antkrumin@google.com>
diff --git a/tools/aapt/pseudolocalize.cpp b/tools/aapt/pseudolocalize.cpp
index 9e50c5a..c02327a 100644
--- a/tools/aapt/pseudolocalize.cpp
+++ b/tools/aapt/pseudolocalize.cpp
@@ -2,89 +2,155 @@
using namespace std;
+// String basis to generate expansion
+static const String16 k_expansion_string = String16("one two three "
+ "four five six seven eight nine ten eleven twelve thirteen "
+ "fourteen fiveteen sixteen seventeen nineteen twenty");
+
+// Special unicode characters to override directionality of the words
+static const String16 k_rlm = String16("\xe2\x80\x8f");
+static const String16 k_rlo = String16("\xE2\x80\xae");
+static const String16 k_pdf = String16("\xE2\x80\xac");
+
+// Placeholder marks
+static const String16 k_placeholder_open = String16("\xc2\xbb");
+static const String16 k_placeholder_close = String16("\xc2\xab");
+
static const char*
-pseudolocalize_char(char c)
+pseudolocalize_char(const char16_t c)
{
switch (c) {
- case 'a': return "\xc4\x83";
- case 'b': return "\xcf\x84";
- case 'c': return "\xc4\x8b";
- case 'd': return "\xc4\x8f";
- case 'e': return "\xc4\x99";
+ case 'a': return "\xc3\xa5";
+ case 'b': return "\xc9\x93";
+ case 'c': return "\xc3\xa7";
+ case 'd': return "\xc3\xb0";
+ case 'e': return "\xc3\xa9";
case 'f': return "\xc6\x92";
case 'g': return "\xc4\x9d";
- case 'h': return "\xd1\x9b";
- case 'i': return "\xcf\x8a";
+ case 'h': return "\xc4\xa5";
+ case 'i': return "\xc3\xae";
case 'j': return "\xc4\xb5";
- case 'k': return "\xc4\xb8";
- case 'l': return "\xc4\xba";
+ case 'k': return "\xc4\xb7";
+ case 'l': return "\xc4\xbc";
case 'm': return "\xe1\xb8\xbf";
- case 'n': return "\xd0\xb8";
- case 'o': return "\xcf\x8c";
- case 'p': return "\xcf\x81";
+ case 'n': return "\xc3\xb1";
+ case 'o': return "\xc3\xb6";
+ case 'p': return "\xc3\xbe";
case 'q': return "\x51";
- case 'r': return "\xd2\x91";
+ case 'r': return "\xc5\x95";
case 's': return "\xc5\xa1";
- case 't': return "\xd1\x82";
- case 'u': return "\xce\xb0";
+ case 't': return "\xc5\xa3";
+ case 'u': return "\xc3\xbb";
case 'v': return "\x56";
- case 'w': return "\xe1\xba\x85";
+ case 'w': return "\xc5\xb5";
case 'x': return "\xd1\x85";
- case 'y': return "\xe1\xbb\xb3";
- case 'z': return "\xc5\xba";
+ case 'y': return "\xc3\xbd";
+ case 'z': return "\xc5\xbe";
case 'A': return "\xc3\x85";
case 'B': return "\xce\xb2";
- case 'C': return "\xc4\x88";
- case 'D': return "\xc4\x90";
- case 'E': return "\xd0\x84";
- case 'F': return "\xce\x93";
- case 'G': return "\xc4\x9e";
- case 'H': return "\xc4\xa6";
- case 'I': return "\xd0\x87";
- case 'J': return "\xc4\xb5";
+ case 'C': return "\xc3\x87";
+ case 'D': return "\xc3\x90";
+ case 'E': return "\xc3\x89";
+ case 'G': return "\xc4\x9c";
+ case 'H': return "\xc4\xa4";
+ case 'I': return "\xc3\x8e";
+ case 'J': return "\xc4\xb4";
case 'K': return "\xc4\xb6";
- case 'L': return "\xc5\x81";
+ case 'L': return "\xc4\xbb";
case 'M': return "\xe1\xb8\xbe";
- case 'N': return "\xc5\x83";
- case 'O': return "\xce\x98";
- case 'P': return "\xcf\x81";
+ case 'N': return "\xc3\x91";
+ case 'O': return "\xc3\x96";
+ case 'P': return "\xc3\x9e";
case 'Q': return "\x71";
- case 'R': return "\xd0\xaf";
- case 'S': return "\xc8\x98";
- case 'T': return "\xc5\xa6";
- case 'U': return "\xc5\xa8";
+ case 'R': return "\xc5\x94";
+ case 'S': return "\xc5\xa0";
+ case 'T': return "\xc5\xa2";
+ case 'U': return "\xc3\x9b";
case 'V': return "\xce\xbd";
- case 'W': return "\xe1\xba\x84";
+ case 'W': return "\xc5\xb4";
case 'X': return "\xc3\x97";
- case 'Y': return "\xc2\xa5";
+ case 'Y': return "\xc3\x9d";
case 'Z': return "\xc5\xbd";
+ case '!': return "\xc2\xa1";
+ case '?': return "\xc2\xbf";
+ case '$': return "\xe2\x82\xac";
default: return NULL;
}
}
+static const bool
+is_possible_normal_placeholder_end(const char16_t c) {
+ switch (c) {
+ case 's': return true;
+ case 'S': return true;
+ case 'c': return true;
+ case 'C': return true;
+ case 'd': return true;
+ case 'o': return true;
+ case 'x': return true;
+ case 'X': return true;
+ case 'f': return true;
+ case 'e': return true;
+ case 'E': return true;
+ case 'g': return true;
+ case 'G': return true;
+ case 'a': return true;
+ case 'A': return true;
+ case 'b': return true;
+ case 'B': return true;
+ case 'h': return true;
+ case 'H': return true;
+ case '%': return true;
+ case 'n': return true;
+ default: return false;
+ }
+}
+
+String16
+pseudo_generate_expansion(const unsigned int length) {
+ String16 result = k_expansion_string;
+ const char16_t* s = result.string();
+ if (result.size() < length) {
+ result += String16(" ");
+ result += pseudo_generate_expansion(length - result.size());
+ } else {
+ int ext = 0;
+ // Should contain only whole words, so looking for a space
+ for (unsigned int i = length + 1; i < result.size(); ++i) {
+ ++ext;
+ if (s[i] == ' ') {
+ break;
+ }
+ }
+ result.remove(length + ext, 0);
+ }
+ return result;
+}
+
/**
* Converts characters so they look like they've been localized.
*
* Note: This leaves escape sequences untouched so they can later be
* processed by ResTable::collectString in the normal way.
*/
-string
-pseudolocalize_string(const string& source)
+String16
+pseudolocalize_string(const String16& source)
{
- const char* s = source.c_str();
- string result;
- const size_t I = source.length();
+ const char16_t* s = source.string();
+ String16 result;
+ const size_t I = source.size();
for (size_t i=0; i<I; i++) {
- char c = s[i];
+ char16_t c = s[i];
if (c == '\\') {
+ // Escape syntax, no need to pseudolocalize
if (i<I-1) {
- result += '\\';
+ result += String16("\\");
i++;
c = s[i];
switch (c) {
case 'u':
// this one takes up 5 chars
- result += string(s+i, 5);
+ result += String16(s+i, 5);
i += 4;
break;
case 't':
@@ -96,24 +162,107 @@
case '\'':
case '\\':
default:
- result += c;
+ result.append(&c, 1);
break;
}
} else {
- result += c;
+ result.append(&c, 1);
+ }
+ } else if (c == '%') {
+ // Placeholder syntax, no need to pseudolocalize
+ result += k_placeholder_open;
+ bool end = false;
+ result.append(&c, 1);
+ while (!end && i < I) {
+ ++i;
+ c = s[i];
+ result.append(&c, 1);
+ if (is_possible_normal_placeholder_end(c)) {
+ end = true;
+ } else if (c == 't') {
+ ++i;
+ c = s[i];
+ result.append(&c, 1);
+ end = true;
+ }
+ }
+ result += k_placeholder_close;
+ } else if (c == '<' || c == '&') {
+ // html syntax, no need to pseudolocalize
+ bool tag_closed = false;
+ while (!tag_closed && i < I) {
+ if (c == '&') {
+ String16 escape_text;
+ escape_text.append(&c, 1);
+ bool end = false;
+ size_t htmlCodePos = i;
+ while (!end && htmlCodePos < I) {
+ ++htmlCodePos;
+ c = s[htmlCodePos];
+ escape_text.append(&c, 1);
+ // Valid html code
+ if (c == ';') {
+ end = true;
+ i = htmlCodePos;
+ }
+ // Wrong html code
+ else if (!((c == '#' ||
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9')))) {
+ end = true;
+ }
+ }
+ result += escape_text;
+ if (escape_text != String16("<")) {
+ tag_closed = true;
+ }
+ continue;
+ }
+ if (c == '>') {
+ tag_closed = true;
+ result.append(&c, 1);
+ continue;
+ }
+ result.append(&c, 1);
+ i++;
+ c = s[i];
}
} else {
+ // This is a pure text that should be pseudolocalized
const char* p = pseudolocalize_char(c);
if (p != NULL) {
- result += p;
+ result += String16(p);
} else {
- result += c;
+ result.append(&c, 1);
}
}
}
-
- //printf("result=\'%s\'\n", result.c_str());
return result;
}
+String16
+pseudobidi_string(const String16& source)
+{
+ const char16_t* s = source.string();
+ String16 result;
+ result += k_rlm;
+ result += k_rlo;
+ for (size_t i=0; i<source.size(); i++) {
+ char16_t c = s[i];
+ switch(c) {
+ case ' ': result += k_pdf;
+ result += k_rlm;
+ result.append(&c, 1);
+ result += k_rlm;
+ result += k_rlo;
+ break;
+ default: result.append(&c, 1);
+ break;
+ }
+ }
+ result += k_pdf;
+ result += k_rlm;
+ return result;
+}