Improved word break for BiDi pseudolocalizer.
Characted sequences \n and \t are now treated as word separators
by BiDi pseudolocalizer. This solves issues when text rendering
engine breaks a line in the middle of a text chunk marked with
RLM+RLO and PDF+RLM sequences.
Bug:34064580
Change-Id: I52e6018785fae25479fa167440f24c534b0e3253
Fixes:34064580
Test: make aapt2_tests
Test: Run aapt2_tests binary
diff --git a/tools/aapt/pseudolocalize.cpp b/tools/aapt/pseudolocalize.cpp
index c7fee2c..5c47e0f 100644
--- a/tools/aapt/pseudolocalize.cpp
+++ b/tools/aapt/pseudolocalize.cpp
@@ -360,9 +360,15 @@
String16 result;
bool lastspace = true;
bool space = true;
+ bool escape = false;
+ const char16_t ESCAPE_CHAR = '\\';
for (size_t i=0; i<source.size(); i++) {
char16_t c = s[i];
- space = is_space(c);
+ if (!escape && c == ESCAPE_CHAR) {
+ escape = true;
+ continue;
+ }
+ space = (!escape && is_space(c)) || (escape && (c == 'n' || c == 't'));
if (lastspace && !space) {
// Word start
result += k_rlm + k_rlo;
@@ -371,6 +377,10 @@
result += k_pdf + k_rlm;
}
lastspace = space;
+ if (escape) {
+ result.append(&ESCAPE_CHAR, 1);
+ escape=false;
+ }
result.append(&c, 1);
}
if (!lastspace) {
diff --git a/tools/aapt/tests/Pseudolocales_test.cpp b/tools/aapt/tests/Pseudolocales_test.cpp
index 4670e9f..a6aed3a 100644
--- a/tools/aapt/tests/Pseudolocales_test.cpp
+++ b/tools/aapt/tests/Pseudolocales_test.cpp
@@ -87,6 +87,10 @@
"\xe2\x80\x8f\xE2\x80\xaehello\xE2\x80\xac\xe2\x80\x8f\n" \
" \xe2\x80\x8f\xE2\x80\xaeworld\xE2\x80\xac\xe2\x80\x8f\n",
PSEUDO_BIDI);
+ simple_helper("hello\\nworld\\n",
+ "\xe2\x80\x8f\xE2\x80\xaehello\xE2\x80\xac\xe2\x80\x8f\\n"
+ "\xe2\x80\x8f\xE2\x80\xaeworld\xE2\x80\xac\xe2\x80\x8f\\n",
+ PSEUDO_BIDI);
}
TEST(Pseudolocales, SimpleICU) {