AAPT2: Fix JavaDoc first sentence extraction. The old algorithm for detecting the first sentence of a JavaDoc comment looked for the first occurence of '.'. This does not work when code or a {@link android.R.styleable} link is encountered in the first sentence. Switch to checking for whitespace characters after the '.' character. Bug: 62900335 Test: make aapt2_tests Change-Id: I8238f6a6304c9c2f92e2e576ca8962a59c2b20ea

commit: e967d3f6ac2e1e1f612f99b9c76abcb9e13bb7a2 [log] [tgz]
author: Adam Lesinski <adamlesinski@google.com> Mon Jul 24 18:19:36 2017 -0700
committer: Adam Lesinski <adamlesinski@google.com> Tue Jul 25 15:53:15 2017 -0700
tree: 57a3a90478daeaf536af29463e3ac3866777511c
parent: 3370d3230d4e3084ca95eb6d6bb63f27c3cb6f63 [diff]
diff --git a/tools/aapt2/text/Unicode.cpp b/tools/aapt2/text/Unicode.cpp
index 38ec9c4..75eeb46 100644
--- a/tools/aapt2/text/Unicode.cpp
+++ b/tools/aapt2/text/Unicode.cpp

@@ -66,6 +66,17 @@
   return FindCharacterProperties(codepoint) & CharacterProperties::kXidContinue;
 }
 
+// Hardcode the White_Space characters since they are few and the external/icu project doesn't
+// list them as data files to parse.
+// Sourced from http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
+bool IsWhitespace(char32_t codepoint) {
+  return (codepoint >= 0x0009 && codepoint <= 0x000d) || (codepoint == 0x0020) ||
+         (codepoint == 0x0085) || (codepoint == 0x00a0) || (codepoint == 0x1680) ||
+         (codepoint >= 0x2000 && codepoint <= 0x200a) || (codepoint == 0x2028) ||
+         (codepoint == 0x2029) || (codepoint == 0x202f) || (codepoint == 0x205f) ||
+         (codepoint == 0x3000);
+}
+
 bool IsJavaIdentifier(const StringPiece& str) {
   Utf8Iterator iter(str);
 

diff --git a/tools/aapt2/text/Unicode.h b/tools/aapt2/text/Unicode.h
index 2707187..546714e 100644
--- a/tools/aapt2/text/Unicode.h
+++ b/tools/aapt2/text/Unicode.h

@@ -40,6 +40,10 @@
 // characters in the ID_Continue set.
 bool IsXidContinue(char32_t codepoint);
 
+// Returns true if the Unicode codepoint has the White_Space property.
+// http://unicode.org/reports/tr44/#White_Space
+bool IsWhitespace(char32_t codepoint);
+
 // Returns true if the UTF8 string can be used as a Java identifier.
 // NOTE: This does not check against the set of reserved Java keywords.
 bool IsJavaIdentifier(const android::StringPiece& str);

diff --git a/tools/aapt2/text/Utf8Iterator.cpp b/tools/aapt2/text/Utf8Iterator.cpp
index 0d43353..20b9073 100644
--- a/tools/aapt2/text/Utf8Iterator.cpp
+++ b/tools/aapt2/text/Utf8Iterator.cpp

@@ -25,18 +25,17 @@
 namespace text {
 
 Utf8Iterator::Utf8Iterator(const StringPiece& str)
-    : str_(str), next_pos_(0), current_codepoint_(0) {
+    : str_(str), current_pos_(0), next_pos_(0), current_codepoint_(0) {
   DoNext();
 }
 
 void Utf8Iterator::DoNext() {
-  size_t next_pos = 0u;
-  int32_t result = utf32_from_utf8_at(str_.data(), str_.size(), next_pos_, &next_pos);
+  current_pos_ = next_pos_;
+  int32_t result = utf32_from_utf8_at(str_.data(), str_.size(), current_pos_, &next_pos_);
   if (result == -1) {
     current_codepoint_ = 0u;
   } else {
     current_codepoint_ = static_cast<char32_t>(result);
-    next_pos_ = next_pos;
   }
 }
 
@@ -44,6 +43,10 @@
   return current_codepoint_ != 0;
 }
 
+size_t Utf8Iterator::Position() const {
+  return current_pos_;
+}
+
 void Utf8Iterator::Skip(int amount) {
   while (amount > 0 && HasNext()) {
     Next();

diff --git a/tools/aapt2/text/Utf8Iterator.h b/tools/aapt2/text/Utf8Iterator.h
index 6923957..9318401 100644
--- a/tools/aapt2/text/Utf8Iterator.h
+++ b/tools/aapt2/text/Utf8Iterator.h

@@ -29,6 +29,10 @@
 
   bool HasNext() const;
 
+  // Returns the current position of the iterator in bytes of the source UTF8 string.
+  // This position is the start of the codepoint returned by the next call to Next().
+  size_t Position() const;
+
   void Skip(int amount);
 
   char32_t Next();
@@ -39,6 +43,7 @@
   void DoNext();
 
   android::StringPiece str_;
+  size_t current_pos_;
   size_t next_pos_;
   char32_t current_codepoint_;
 };

diff --git a/tools/aapt2/text/Utf8Iterator_test.cpp b/tools/aapt2/text/Utf8Iterator_test.cpp
index f3111c0..8c3e774 100644
--- a/tools/aapt2/text/Utf8Iterator_test.cpp
+++ b/tools/aapt2/text/Utf8Iterator_test.cpp

@@ -18,6 +18,7 @@
 
 #include "test/Test.h"
 
+using ::android::StringPiece;
 using ::testing::Eq;
 
 namespace aapt {
@@ -63,5 +64,32 @@
   EXPECT_FALSE(iter.HasNext());
 }
 
+TEST(Utf8IteratorTest, PositionPointsToTheCorrectPlace) {
+  const StringPiece expected("Mm🍩");
+  Utf8Iterator iter(expected);
+
+  // Before any character, the position should be 0.
+  EXPECT_THAT(iter.Position(), Eq(0u));
+
+  // The 'M' character, one byte.
+  ASSERT_TRUE(iter.HasNext());
+  iter.Next();
+  EXPECT_THAT(iter.Position(), Eq(1u));
+
+  // The 'm' character, one byte.
+  ASSERT_TRUE(iter.HasNext());
+  iter.Next();
+  EXPECT_THAT(iter.Position(), Eq(2u));
+
+  // The doughnut character, 4 bytes.
+  ASSERT_TRUE(iter.HasNext());
+  iter.Next();
+  EXPECT_THAT(iter.Position(), Eq(6u));
+
+  // There should be nothing left.
+  EXPECT_FALSE(iter.HasNext());
+  EXPECT_THAT(iter.Position(), Eq(expected.size()));
+}
+
 }  // namespace text
 }  // namespace aapt
commit	e967d3f6ac2e1e1f612f99b9c76abcb9e13bb7a2	[log] [tgz]
author	Adam Lesinski <adamlesinski@google.com>	Mon Jul 24 18:19:36 2017 -0700
committer	Adam Lesinski <adamlesinski@google.com>	Tue Jul 25 15:53:15 2017 -0700
tree	57a3a90478daeaf536af29463e3ac3866777511c
parent	3370d3230d4e3084ca95eb6d6bb63f27c3cb6f63 [diff]