blob: 15088fc81e888c51735a9f3025fcb746a58e4255 [file] [log] [blame]
Jeff Sharkey85ccd042020-10-07 10:28:42 -06001#!/usr/bin/env python3
2#-*- coding: utf-8 -*-
Jeff Sharkeyabe058c2018-03-26 09:38:01 -06003
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the 'License');
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an 'AS IS' BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""
19Enforces common Android string best-practices. It ignores lint messages from
20a previous strings file, if provided.
21
22Usage: stringslint.py strings.xml
23Usage: stringslint.py strings.xml old_strings.xml
Jeff Sharkey47c79242018-06-05 15:55:45 -060024
25In general:
26* Errors signal issues that must be fixed before submitting, and are only
27 used when there are no false-positives.
28* Warnings signal issues that might need to be fixed, but need manual
29 inspection due to risk of false-positives.
30* Info signal issues that should be fixed to match best-practices, such
31 as providing comments to aid translation.
Jeff Sharkeyabe058c2018-03-26 09:38:01 -060032"""
33
Jeff Sharkey47c79242018-06-05 15:55:45 -060034import re, sys, codecs
Jeff Sharkeyabe058c2018-03-26 09:38:01 -060035import lxml.etree as ET
36
37BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
38
39def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
40 # manually derived from http://en.wikipedia.org/wiki/ANSI_escape_code#Codes
41 codes = []
42 if reset: codes.append("0")
43 else:
44 if not fg is None: codes.append("3%d" % (fg))
45 if not bg is None:
46 if not bright: codes.append("4%d" % (bg))
47 else: codes.append("10%d" % (bg))
48 if bold: codes.append("1")
49 elif dim: codes.append("2")
50 else: codes.append("22")
51 return "\033[%sm" % (";".join(codes))
52
53warnings = None
54
Jeff Sharkey47c79242018-06-05 15:55:45 -060055def warn(tag, msg, actual, expected, color=YELLOW):
Jeff Sharkeyabe058c2018-03-26 09:38:01 -060056 global warnings
57 key = "%s:%d" % (tag.attrib["name"], hash(msg))
Jeff Sharkey47c79242018-06-05 15:55:45 -060058 value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True),
Jeff Sharkeyabe058c2018-03-26 09:38:01 -060059 tag.sourceline,
60 tag.attrib["name"],
61 format(reset=True),
62 msg)
63 if not actual is None: value += "\n\tActual: %s%s%s" % (format(dim=True),
64 actual,
65 format(reset=True))
66 if not expected is None: value += "\n\tExample: %s%s%s" % (format(dim=True),
67 expected,
68 format(reset=True))
69 warnings[key] = value
70
Jeff Sharkey47c79242018-06-05 15:55:45 -060071
72def error(tag, msg, actual, expected):
73 warn(tag, msg, actual, expected, RED)
74
75def info(tag, msg, actual, expected):
76 warn(tag, msg, actual, expected, CYAN)
77
78# Escaping logic borrowed from https://stackoverflow.com/a/24519338
79ESCAPE_SEQUENCE_RE = re.compile(r'''
80 ( \\U........ # 8-digit hex escapes
81 | \\u.... # 4-digit hex escapes
82 | \\x.. # 2-digit hex escapes
83 | \\[0-7]{1,3} # Octal escapes
84 | \\N\{[^}]+\} # Unicode characters by name
85 | \\[\\'"abfnrtv] # Single-character escapes
86 )''', re.UNICODE | re.VERBOSE)
87
88def decode_escapes(s):
89 def decode_match(match):
90 return codecs.decode(match.group(0), 'unicode-escape')
91
92 s = re.sub(r"\n\s*", " ", s)
93 s = ESCAPE_SEQUENCE_RE.sub(decode_match, s)
94 s = re.sub(r"%(\d+\$)?[a-z]", "____", s)
95 s = re.sub(r"\^\d+", "____", s)
96 s = re.sub(r"<br/?>", "\n", s)
97 s = re.sub(r"</?[a-z]+>", "", s)
98 return s
99
100def sample_iter(tag):
101 if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib:
102 yield tag.attrib["example"]
103 elif tag.text:
104 yield decode_escapes(tag.text)
105 for e in tag:
106 for v in sample_iter(e):
107 yield v
108 if e.tail:
109 yield decode_escapes(e.tail)
110
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600111def lint(path):
112 global warnings
113 warnings = {}
114
115 with open(path) as f:
116 raw = f.read()
117 if len(raw.strip()) == 0:
118 return warnings
Jeff Sharkey85ccd042020-10-07 10:28:42 -0600119 tree = ET.fromstring(bytes(raw, encoding='utf-8'))
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600120 root = tree #tree.getroot()
121
122 last_comment = None
123 for child in root:
124 # TODO: handle plurals
125 if isinstance(child, ET._Comment):
126 last_comment = child
127 elif child.tag == "string":
128 # We always consume comment
129 comment = last_comment
130 last_comment = None
131
Jeff Sharkey47c79242018-06-05 15:55:45 -0600132 # Prepare string for analysis
133 text = "".join(child.itertext())
134 sample = "".join(sample_iter(child)).strip().strip("'\"")
135
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600136 # Validate comment
137 if comment is None:
Jeff Sharkey47c79242018-06-05 15:55:45 -0600138 info(child, "Missing string comment to aid translation",
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600139 None, None)
140 continue
141 if "do not translate" in comment.text.lower():
142 continue
143 if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false":
144 continue
Jeff Sharkey47c79242018-06-05 15:55:45 -0600145
Anton Hansson2f5dd122018-11-23 15:21:20 +0000146 misspelled_attributes = [
147 ("translateable", "translatable"),
148 ]
149 for misspelling, expected in misspelled_attributes:
150 if misspelling in child.attrib:
151 error(child, "Misspelled <string> attribute.", misspelling, expected)
152
Jeff Sharkey47c79242018-06-05 15:55:45 -0600153 limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text)
154 if limit is None:
155 info(child, "Missing CHAR LIMIT to aid translation",
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600156 repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->")
Jeff Sharkey47c79242018-06-05 15:55:45 -0600157 elif re.match("\d+", limit.group(1)):
158 limit = int(limit.group(1))
159 if len(sample) > limit:
160 warn(child, "Expanded string length is larger than CHAR LIMIT",
161 sample, None)
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600162
163 # Look for common mistakes/substitutions
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600164 if "'" in text:
Jeff Sharkey47c79242018-06-05 15:55:45 -0600165 error(child, "Turned quotation mark glyphs are more polished",
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600166 text, "This doesn\u2019t need to \u2018happen\u2019 today")
167 if '"' in text and not text.startswith('"') and text.endswith('"'):
Jeff Sharkey47c79242018-06-05 15:55:45 -0600168 error(child, "Turned quotation mark glyphs are more polished",
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600169 text, "This needs to \u201chappen\u201d today")
170 if "..." in text:
Jeff Sharkey47c79242018-06-05 15:55:45 -0600171 error(child, "Ellipsis glyph is more polished",
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600172 text, "Loading\u2026")
173 if "wi-fi" in text.lower():
Jeff Sharkey47c79242018-06-05 15:55:45 -0600174 error(child, "Non-breaking glyph is more polished",
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600175 text, "Wi\u2011Fi")
176 if "wifi" in text.lower():
Jeff Sharkey47c79242018-06-05 15:55:45 -0600177 error(child, "Using non-standard spelling",
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600178 text, "Wi\u2011Fi")
179 if re.search("\d-\d", text):
180 warn(child, "Ranges should use en dash glyph",
181 text, "You will find this material in chapters 8\u201312")
182 if "--" in text:
183 warn(child, "Phrases should use em dash glyph",
184 text, "Upon discovering errors\u2014all 124 of them\u2014they recalled.")
185 if ". " in text:
186 warn(child, "Only use single space between sentences",
187 text, "First idea. Second idea.")
Jeff Sharkey47c79242018-06-05 15:55:45 -0600188 if re.match(r"^[A-Z\s]{5,}$", text):
189 warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym",
190 text, "Refresh data")
191 if " phone " in text and "product" not in child.attrib:
192 warn(child, "Strings mentioning phones should have variants for tablets",
193 text, None)
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600194
195 # When more than one substitution, require indexes
196 if len(re.findall("%[^%]", text)) > 1:
197 if len(re.findall("%[^\d]", text)) > 0:
Jeff Sharkey47c79242018-06-05 15:55:45 -0600198 error(child, "Substitutions must be indexed",
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600199 text, "Add %1$s to %2$s")
200
201 # Require xliff substitutions
202 for gc in child.iter():
203 badsub = False
204 if gc.tail and re.search("%[^%]", gc.tail): badsub = True
205 if re.match("{.*xliff.*}g", gc.tag):
206 if "id" not in gc.attrib:
Jeff Sharkey47c79242018-06-05 15:55:45 -0600207 error(child, "Substitutions must define id attribute",
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600208 None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
209 if "example" not in gc.attrib:
Jeff Sharkey47c79242018-06-05 15:55:45 -0600210 error(child, "Substitutions must define example attribute",
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600211 None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
212 else:
213 if gc.text and re.search("%[^%]", gc.text): badsub = True
214 if badsub:
Jeff Sharkey47c79242018-06-05 15:55:45 -0600215 error(child, "Substitutions must be inside xliff tags",
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600216 text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
217
218 return warnings
219
220if len(sys.argv) > 2:
221 before = lint(sys.argv[2])
222else:
223 before = {}
224after = lint(sys.argv[1])
225
226for b in before:
227 if b in after:
228 del after[b]
229
230if len(after) > 0:
231 for a in sorted(after.keys()):
Jeff Sharkey85ccd042020-10-07 10:28:42 -0600232 print(after[a])
233 print()
Jeff Sharkeyabe058c2018-03-26 09:38:01 -0600234 sys.exit(1)