Jeff Sharkey | 85ccd04 | 2020-10-07 10:28:42 -0600 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | #-*- coding: utf-8 -*- |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 3 | |
| 4 | # Copyright (C) 2018 The Android Open Source Project |
| 5 | # |
| 6 | # Licensed under the Apache License, Version 2.0 (the 'License'); |
| 7 | # you may not use this file except in compliance with the License. |
| 8 | # You may obtain a copy of the License at |
| 9 | # |
| 10 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | # |
| 12 | # Unless required by applicable law or agreed to in writing, software |
| 13 | # distributed under the License is distributed on an 'AS IS' BASIS, |
| 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | # See the License for the specific language governing permissions and |
| 16 | # limitations under the License. |
| 17 | |
| 18 | """ |
| 19 | Enforces common Android string best-practices. It ignores lint messages from |
| 20 | a previous strings file, if provided. |
| 21 | |
| 22 | Usage: stringslint.py strings.xml |
| 23 | Usage: stringslint.py strings.xml old_strings.xml |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 24 | |
| 25 | In general: |
| 26 | * Errors signal issues that must be fixed before submitting, and are only |
| 27 | used when there are no false-positives. |
| 28 | * Warnings signal issues that might need to be fixed, but need manual |
| 29 | inspection due to risk of false-positives. |
| 30 | * Info signal issues that should be fixed to match best-practices, such |
| 31 | as providing comments to aid translation. |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 32 | """ |
| 33 | |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 34 | import re, sys, codecs |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 35 | import lxml.etree as ET |
| 36 | |
| 37 | BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) |
| 38 | |
| 39 | def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False): |
| 40 | # manually derived from http://en.wikipedia.org/wiki/ANSI_escape_code#Codes |
| 41 | codes = [] |
| 42 | if reset: codes.append("0") |
| 43 | else: |
| 44 | if not fg is None: codes.append("3%d" % (fg)) |
| 45 | if not bg is None: |
| 46 | if not bright: codes.append("4%d" % (bg)) |
| 47 | else: codes.append("10%d" % (bg)) |
| 48 | if bold: codes.append("1") |
| 49 | elif dim: codes.append("2") |
| 50 | else: codes.append("22") |
| 51 | return "\033[%sm" % (";".join(codes)) |
| 52 | |
| 53 | warnings = None |
| 54 | |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 55 | def warn(tag, msg, actual, expected, color=YELLOW): |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 56 | global warnings |
| 57 | key = "%s:%d" % (tag.attrib["name"], hash(msg)) |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 58 | value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True), |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 59 | tag.sourceline, |
| 60 | tag.attrib["name"], |
| 61 | format(reset=True), |
| 62 | msg) |
| 63 | if not actual is None: value += "\n\tActual: %s%s%s" % (format(dim=True), |
| 64 | actual, |
| 65 | format(reset=True)) |
| 66 | if not expected is None: value += "\n\tExample: %s%s%s" % (format(dim=True), |
| 67 | expected, |
| 68 | format(reset=True)) |
| 69 | warnings[key] = value |
| 70 | |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 71 | |
| 72 | def error(tag, msg, actual, expected): |
| 73 | warn(tag, msg, actual, expected, RED) |
| 74 | |
| 75 | def info(tag, msg, actual, expected): |
| 76 | warn(tag, msg, actual, expected, CYAN) |
| 77 | |
| 78 | # Escaping logic borrowed from https://stackoverflow.com/a/24519338 |
| 79 | ESCAPE_SEQUENCE_RE = re.compile(r''' |
| 80 | ( \\U........ # 8-digit hex escapes |
| 81 | | \\u.... # 4-digit hex escapes |
| 82 | | \\x.. # 2-digit hex escapes |
| 83 | | \\[0-7]{1,3} # Octal escapes |
| 84 | | \\N\{[^}]+\} # Unicode characters by name |
| 85 | | \\[\\'"abfnrtv] # Single-character escapes |
| 86 | )''', re.UNICODE | re.VERBOSE) |
| 87 | |
| 88 | def decode_escapes(s): |
| 89 | def decode_match(match): |
| 90 | return codecs.decode(match.group(0), 'unicode-escape') |
| 91 | |
| 92 | s = re.sub(r"\n\s*", " ", s) |
| 93 | s = ESCAPE_SEQUENCE_RE.sub(decode_match, s) |
| 94 | s = re.sub(r"%(\d+\$)?[a-z]", "____", s) |
| 95 | s = re.sub(r"\^\d+", "____", s) |
| 96 | s = re.sub(r"<br/?>", "\n", s) |
| 97 | s = re.sub(r"</?[a-z]+>", "", s) |
| 98 | return s |
| 99 | |
| 100 | def sample_iter(tag): |
| 101 | if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib: |
| 102 | yield tag.attrib["example"] |
| 103 | elif tag.text: |
| 104 | yield decode_escapes(tag.text) |
| 105 | for e in tag: |
| 106 | for v in sample_iter(e): |
| 107 | yield v |
| 108 | if e.tail: |
| 109 | yield decode_escapes(e.tail) |
| 110 | |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 111 | def lint(path): |
| 112 | global warnings |
| 113 | warnings = {} |
| 114 | |
| 115 | with open(path) as f: |
| 116 | raw = f.read() |
| 117 | if len(raw.strip()) == 0: |
| 118 | return warnings |
Jeff Sharkey | 85ccd04 | 2020-10-07 10:28:42 -0600 | [diff] [blame] | 119 | tree = ET.fromstring(bytes(raw, encoding='utf-8')) |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 120 | root = tree #tree.getroot() |
| 121 | |
| 122 | last_comment = None |
| 123 | for child in root: |
| 124 | # TODO: handle plurals |
| 125 | if isinstance(child, ET._Comment): |
| 126 | last_comment = child |
| 127 | elif child.tag == "string": |
| 128 | # We always consume comment |
| 129 | comment = last_comment |
| 130 | last_comment = None |
| 131 | |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 132 | # Prepare string for analysis |
| 133 | text = "".join(child.itertext()) |
| 134 | sample = "".join(sample_iter(child)).strip().strip("'\"") |
| 135 | |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 136 | # Validate comment |
| 137 | if comment is None: |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 138 | info(child, "Missing string comment to aid translation", |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 139 | None, None) |
| 140 | continue |
| 141 | if "do not translate" in comment.text.lower(): |
| 142 | continue |
| 143 | if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false": |
| 144 | continue |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 145 | |
Anton Hansson | 2f5dd12 | 2018-11-23 15:21:20 +0000 | [diff] [blame] | 146 | misspelled_attributes = [ |
| 147 | ("translateable", "translatable"), |
| 148 | ] |
| 149 | for misspelling, expected in misspelled_attributes: |
| 150 | if misspelling in child.attrib: |
| 151 | error(child, "Misspelled <string> attribute.", misspelling, expected) |
| 152 | |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 153 | limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) |
| 154 | if limit is None: |
| 155 | info(child, "Missing CHAR LIMIT to aid translation", |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 156 | repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->") |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 157 | elif re.match("\d+", limit.group(1)): |
| 158 | limit = int(limit.group(1)) |
| 159 | if len(sample) > limit: |
| 160 | warn(child, "Expanded string length is larger than CHAR LIMIT", |
| 161 | sample, None) |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 162 | |
| 163 | # Look for common mistakes/substitutions |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 164 | if "'" in text: |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 165 | error(child, "Turned quotation mark glyphs are more polished", |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 166 | text, "This doesn\u2019t need to \u2018happen\u2019 today") |
| 167 | if '"' in text and not text.startswith('"') and text.endswith('"'): |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 168 | error(child, "Turned quotation mark glyphs are more polished", |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 169 | text, "This needs to \u201chappen\u201d today") |
| 170 | if "..." in text: |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 171 | error(child, "Ellipsis glyph is more polished", |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 172 | text, "Loading\u2026") |
| 173 | if "wi-fi" in text.lower(): |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 174 | error(child, "Non-breaking glyph is more polished", |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 175 | text, "Wi\u2011Fi") |
| 176 | if "wifi" in text.lower(): |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 177 | error(child, "Using non-standard spelling", |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 178 | text, "Wi\u2011Fi") |
| 179 | if re.search("\d-\d", text): |
| 180 | warn(child, "Ranges should use en dash glyph", |
| 181 | text, "You will find this material in chapters 8\u201312") |
| 182 | if "--" in text: |
| 183 | warn(child, "Phrases should use em dash glyph", |
| 184 | text, "Upon discovering errors\u2014all 124 of them\u2014they recalled.") |
| 185 | if ". " in text: |
| 186 | warn(child, "Only use single space between sentences", |
| 187 | text, "First idea. Second idea.") |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 188 | if re.match(r"^[A-Z\s]{5,}$", text): |
| 189 | warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym", |
| 190 | text, "Refresh data") |
| 191 | if " phone " in text and "product" not in child.attrib: |
| 192 | warn(child, "Strings mentioning phones should have variants for tablets", |
| 193 | text, None) |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 194 | |
| 195 | # When more than one substitution, require indexes |
| 196 | if len(re.findall("%[^%]", text)) > 1: |
| 197 | if len(re.findall("%[^\d]", text)) > 0: |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 198 | error(child, "Substitutions must be indexed", |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 199 | text, "Add %1$s to %2$s") |
| 200 | |
| 201 | # Require xliff substitutions |
| 202 | for gc in child.iter(): |
| 203 | badsub = False |
| 204 | if gc.tail and re.search("%[^%]", gc.tail): badsub = True |
| 205 | if re.match("{.*xliff.*}g", gc.tag): |
| 206 | if "id" not in gc.attrib: |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 207 | error(child, "Substitutions must define id attribute", |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 208 | None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") |
| 209 | if "example" not in gc.attrib: |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 210 | error(child, "Substitutions must define example attribute", |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 211 | None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") |
| 212 | else: |
| 213 | if gc.text and re.search("%[^%]", gc.text): badsub = True |
| 214 | if badsub: |
Jeff Sharkey | 47c7924 | 2018-06-05 15:55:45 -0600 | [diff] [blame] | 215 | error(child, "Substitutions must be inside xliff tags", |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 216 | text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") |
| 217 | |
| 218 | return warnings |
| 219 | |
| 220 | if len(sys.argv) > 2: |
| 221 | before = lint(sys.argv[2]) |
| 222 | else: |
| 223 | before = {} |
| 224 | after = lint(sys.argv[1]) |
| 225 | |
| 226 | for b in before: |
| 227 | if b in after: |
| 228 | del after[b] |
| 229 | |
| 230 | if len(after) > 0: |
| 231 | for a in sorted(after.keys()): |
Jeff Sharkey | 85ccd04 | 2020-10-07 10:28:42 -0600 | [diff] [blame] | 232 | print(after[a]) |
| 233 | print() |
Jeff Sharkey | abe058c | 2018-03-26 09:38:01 -0600 | [diff] [blame] | 234 | sys.exit(1) |