AAPT2: Verify positional Java String format arguments in strings

Change-Id: Id415969035a0d5712857c0e11e140155566a960c
diff --git a/tools/aapt2/util/Util.cpp b/tools/aapt2/util/Util.cpp
index 6ef4ce5..59b8385 100644
--- a/tools/aapt2/util/Util.cpp
+++ b/tools/aapt2/util/Util.cpp
@@ -189,6 +189,105 @@
     return result;
 }
 
+static size_t consumeDigits(const char16_t* start, const char16_t* end) {
+    const char16_t* c = start;
+    for (; c != end && *c >= u'0' && *c <= u'9'; c++) {}
+    return static_cast<size_t>(c - start);
+}
+
+bool verifyJavaStringFormat(const StringPiece16& str) {
+    const char16_t* c = str.begin();
+    const char16_t* const end = str.end();
+
+    size_t argCount = 0;
+    bool nonpositional = false;
+    while (c != end) {
+        if (*c == u'%' && c + 1 < end) {
+            c++;
+
+            if (*c == u'%') {
+                c++;
+                continue;
+            }
+
+            argCount++;
+
+            size_t numDigits = consumeDigits(c, end);
+            if (numDigits > 0) {
+                c += numDigits;
+                if (c != end && *c != u'$') {
+                    // The digits were a size, but not a positional argument.
+                    nonpositional = true;
+                }
+            } else if (*c == u'<') {
+                // Reusing last argument, bad idea since positions can be moved around
+                // during translation.
+                nonpositional = true;
+
+                c++;
+
+                // Optionally we can have a $ after
+                if (c != end && *c == u'$') {
+                    c++;
+                }
+            } else {
+                nonpositional = true;
+            }
+
+            // Ignore size, width, flags, etc.
+            while (c != end && (*c == u'-' ||
+                    *c == u'#' ||
+                    *c == u'+' ||
+                    *c == u' ' ||
+                    *c == u',' ||
+                    *c == u'(' ||
+                    (*c >= u'0' && *c <= '9'))) {
+                c++;
+            }
+
+            /*
+             * This is a shortcut to detect strings that are going to Time.format()
+             * instead of String.format()
+             *
+             * Comparison of String.format() and Time.format() args:
+             *
+             * String: ABC E GH  ST X abcdefgh  nost x
+             *   Time:    DEFGHKMS W Za  d   hkm  s w yz
+             *
+             * Therefore we know it's definitely Time if we have:
+             *     DFKMWZkmwyz
+             */
+            if (c != end) {
+                switch (*c) {
+                case 'D':
+                case 'F':
+                case 'K':
+                case 'M':
+                case 'W':
+                case 'Z':
+                case 'k':
+                case 'm':
+                case 'w':
+                case 'y':
+                case 'z':
+                    return true;
+                }
+            }
+        }
+
+        if (c != end) {
+            c++;
+        }
+    }
+
+    if (argCount > 1 && nonpositional) {
+        // Multiple arguments were specified, but some or all were non positional. Translated
+        // strings may rearrange the order of the arguments, which will break the string.
+        return false;
+    }
+    return true;
+}
+
 static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) {
     char16_t code = 0;
     for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) {