AAPT2: Fix JavaDoc first sentence extraction.
The old algorithm for detecting the first sentence of a JavaDoc comment
looked for the first occurence of '.'. This does not work when code or a
{@link android.R.styleable} link is encountered in the first sentence.
Switch to checking for whitespace characters after the '.' character.
Bug: 62900335
Test: make aapt2_tests
Change-Id: I8238f6a6304c9c2f92e2e576ca8962a59c2b20ea
diff --git a/tools/aapt2/text/Unicode.cpp b/tools/aapt2/text/Unicode.cpp
index 38ec9c4..75eeb46 100644
--- a/tools/aapt2/text/Unicode.cpp
+++ b/tools/aapt2/text/Unicode.cpp
@@ -66,6 +66,17 @@
return FindCharacterProperties(codepoint) & CharacterProperties::kXidContinue;
}
+// Hardcode the White_Space characters since they are few and the external/icu project doesn't
+// list them as data files to parse.
+// Sourced from http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
+bool IsWhitespace(char32_t codepoint) {
+ return (codepoint >= 0x0009 && codepoint <= 0x000d) || (codepoint == 0x0020) ||
+ (codepoint == 0x0085) || (codepoint == 0x00a0) || (codepoint == 0x1680) ||
+ (codepoint >= 0x2000 && codepoint <= 0x200a) || (codepoint == 0x2028) ||
+ (codepoint == 0x2029) || (codepoint == 0x202f) || (codepoint == 0x205f) ||
+ (codepoint == 0x3000);
+}
+
bool IsJavaIdentifier(const StringPiece& str) {
Utf8Iterator iter(str);
diff --git a/tools/aapt2/text/Unicode.h b/tools/aapt2/text/Unicode.h
index 2707187..546714e 100644
--- a/tools/aapt2/text/Unicode.h
+++ b/tools/aapt2/text/Unicode.h
@@ -40,6 +40,10 @@
// characters in the ID_Continue set.
bool IsXidContinue(char32_t codepoint);
+// Returns true if the Unicode codepoint has the White_Space property.
+// http://unicode.org/reports/tr44/#White_Space
+bool IsWhitespace(char32_t codepoint);
+
// Returns true if the UTF8 string can be used as a Java identifier.
// NOTE: This does not check against the set of reserved Java keywords.
bool IsJavaIdentifier(const android::StringPiece& str);
diff --git a/tools/aapt2/text/Utf8Iterator.cpp b/tools/aapt2/text/Utf8Iterator.cpp
index 0d43353..20b9073 100644
--- a/tools/aapt2/text/Utf8Iterator.cpp
+++ b/tools/aapt2/text/Utf8Iterator.cpp
@@ -25,18 +25,17 @@
namespace text {
Utf8Iterator::Utf8Iterator(const StringPiece& str)
- : str_(str), next_pos_(0), current_codepoint_(0) {
+ : str_(str), current_pos_(0), next_pos_(0), current_codepoint_(0) {
DoNext();
}
void Utf8Iterator::DoNext() {
- size_t next_pos = 0u;
- int32_t result = utf32_from_utf8_at(str_.data(), str_.size(), next_pos_, &next_pos);
+ current_pos_ = next_pos_;
+ int32_t result = utf32_from_utf8_at(str_.data(), str_.size(), current_pos_, &next_pos_);
if (result == -1) {
current_codepoint_ = 0u;
} else {
current_codepoint_ = static_cast<char32_t>(result);
- next_pos_ = next_pos;
}
}
@@ -44,6 +43,10 @@
return current_codepoint_ != 0;
}
+size_t Utf8Iterator::Position() const {
+ return current_pos_;
+}
+
void Utf8Iterator::Skip(int amount) {
while (amount > 0 && HasNext()) {
Next();
diff --git a/tools/aapt2/text/Utf8Iterator.h b/tools/aapt2/text/Utf8Iterator.h
index 6923957..9318401 100644
--- a/tools/aapt2/text/Utf8Iterator.h
+++ b/tools/aapt2/text/Utf8Iterator.h
@@ -29,6 +29,10 @@
bool HasNext() const;
+ // Returns the current position of the iterator in bytes of the source UTF8 string.
+ // This position is the start of the codepoint returned by the next call to Next().
+ size_t Position() const;
+
void Skip(int amount);
char32_t Next();
@@ -39,6 +43,7 @@
void DoNext();
android::StringPiece str_;
+ size_t current_pos_;
size_t next_pos_;
char32_t current_codepoint_;
};
diff --git a/tools/aapt2/text/Utf8Iterator_test.cpp b/tools/aapt2/text/Utf8Iterator_test.cpp
index f3111c0..8c3e774 100644
--- a/tools/aapt2/text/Utf8Iterator_test.cpp
+++ b/tools/aapt2/text/Utf8Iterator_test.cpp
@@ -18,6 +18,7 @@
#include "test/Test.h"
+using ::android::StringPiece;
using ::testing::Eq;
namespace aapt {
@@ -63,5 +64,32 @@
EXPECT_FALSE(iter.HasNext());
}
+TEST(Utf8IteratorTest, PositionPointsToTheCorrectPlace) {
+ const StringPiece expected("Mm🍩");
+ Utf8Iterator iter(expected);
+
+ // Before any character, the position should be 0.
+ EXPECT_THAT(iter.Position(), Eq(0u));
+
+ // The 'M' character, one byte.
+ ASSERT_TRUE(iter.HasNext());
+ iter.Next();
+ EXPECT_THAT(iter.Position(), Eq(1u));
+
+ // The 'm' character, one byte.
+ ASSERT_TRUE(iter.HasNext());
+ iter.Next();
+ EXPECT_THAT(iter.Position(), Eq(2u));
+
+ // The doughnut character, 4 bytes.
+ ASSERT_TRUE(iter.HasNext());
+ iter.Next();
+ EXPECT_THAT(iter.Position(), Eq(6u));
+
+ // There should be nothing left.
+ EXPECT_FALSE(iter.HasNext());
+ EXPECT_THAT(iter.Position(), Eq(expected.size()));
+}
+
} // namespace text
} // namespace aapt