Improved word break for BiDi pseudolocalizer.
Characted sequences \n and \t are now treated as word separators
by BiDi pseudolocalizer. This solves issues when text rendering
engine breaks a line in the middle of a text chunk marked with
RLM+RLO and PDF+RLM sequences.
Bug:34064580
Change-Id: I52e6018785fae25479fa167440f24c534b0e3253
Fixes:34064580
Test: make aapt2_tests
Test: Run aapt2_tests binary
diff --git a/tools/aapt/pseudolocalize.cpp b/tools/aapt/pseudolocalize.cpp
index c7fee2c..5c47e0f 100644
--- a/tools/aapt/pseudolocalize.cpp
+++ b/tools/aapt/pseudolocalize.cpp
@@ -360,9 +360,15 @@
String16 result;
bool lastspace = true;
bool space = true;
+ bool escape = false;
+ const char16_t ESCAPE_CHAR = '\\';
for (size_t i=0; i<source.size(); i++) {
char16_t c = s[i];
- space = is_space(c);
+ if (!escape && c == ESCAPE_CHAR) {
+ escape = true;
+ continue;
+ }
+ space = (!escape && is_space(c)) || (escape && (c == 'n' || c == 't'));
if (lastspace && !space) {
// Word start
result += k_rlm + k_rlo;
@@ -371,6 +377,10 @@
result += k_pdf + k_rlm;
}
lastspace = space;
+ if (escape) {
+ result.append(&ESCAPE_CHAR, 1);
+ escape=false;
+ }
result.append(&c, 1);
}
if (!lastspace) {
diff --git a/tools/aapt/tests/Pseudolocales_test.cpp b/tools/aapt/tests/Pseudolocales_test.cpp
index 4670e9f..a6aed3a 100644
--- a/tools/aapt/tests/Pseudolocales_test.cpp
+++ b/tools/aapt/tests/Pseudolocales_test.cpp
@@ -87,6 +87,10 @@
"\xe2\x80\x8f\xE2\x80\xaehello\xE2\x80\xac\xe2\x80\x8f\n" \
" \xe2\x80\x8f\xE2\x80\xaeworld\xE2\x80\xac\xe2\x80\x8f\n",
PSEUDO_BIDI);
+ simple_helper("hello\\nworld\\n",
+ "\xe2\x80\x8f\xE2\x80\xaehello\xE2\x80\xac\xe2\x80\x8f\\n"
+ "\xe2\x80\x8f\xE2\x80\xaeworld\xE2\x80\xac\xe2\x80\x8f\\n",
+ PSEUDO_BIDI);
}
TEST(Pseudolocales, SimpleICU) {
diff --git a/tools/aapt2/compile/Pseudolocalizer.cpp b/tools/aapt2/compile/Pseudolocalizer.cpp
index 15a3d8c..3a515fa 100644
--- a/tools/aapt2/compile/Pseudolocalizer.cpp
+++ b/tools/aapt2/compile/Pseudolocalizer.cpp
@@ -445,9 +445,15 @@
std::string result;
bool lastspace = true;
bool space = true;
+ bool escape = false;
+ const char ESCAPE_CHAR = '\\';
for (size_t i = 0; i < source.size(); i++) {
char c = s[i];
- space = isspace(c);
+ if (!escape && c == ESCAPE_CHAR) {
+ escape = true;
+ continue;
+ }
+ space = (!escape && isspace(c)) || (escape && (c == 'n' || c == 't'));
if (lastspace && !space) {
// Word start
result += kRlm + kRlo;
@@ -456,6 +462,10 @@
result += kPdf + kRlm;
}
lastspace = space;
+ if (escape) {
+ result.append(&ESCAPE_CHAR, 1);
+ escape=false;
+ }
result.append(&c, 1);
}
if (!lastspace) {
diff --git a/tools/aapt2/compile/Pseudolocalizer_test.cpp b/tools/aapt2/compile/Pseudolocalizer_test.cpp
index d3b7b02..65d2472 100644
--- a/tools/aapt2/compile/Pseudolocalizer_test.cpp
+++ b/tools/aapt2/compile/Pseudolocalizer_test.cpp
@@ -97,6 +97,11 @@
"\xe2\x80\x8f\xE2\x80\xaehello\xE2\x80\xac\xe2\x80\x8f\n"
" \xe2\x80\x8f\xE2\x80\xaeworld\xE2\x80\xac\xe2\x80\x8f\n",
Pseudolocalizer::Method::kBidi));
+ EXPECT_TRUE(
+ SimpleHelper("hello\\nworld\\n",
+ "\xe2\x80\x8f\xE2\x80\xaehello\xE2\x80\xac\xe2\x80\x8f\\n"
+ "\xe2\x80\x8f\xE2\x80\xaeworld\xE2\x80\xac\xe2\x80\x8f\\n",
+ Pseudolocalizer::Method::kBidi));
}
TEST(PseudolocalizerTest, SimpleICU) {