Blame - tools/fonts/fontchain_linter.py - platform_frameworks_base

2016-03-09 23:08:45 -0800

[diff] [blame]

1

#!/usr/bin/env python

2

3

import collections

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

4

import copy

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

import glob

from os import path

import sys

from xml.etree import ElementTree

9

10

from fontTools import ttLib

11

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

12

EMOJI_VS = 0xFE0F

13

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

14

LANG_TO_SCRIPT = {

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

15

'as': 'Beng',

Roozbeh Pournader

7e04dd1

2017-10-13 17:41:31 -0700

[diff] [blame]

16

'be': 'Cyrl',

Roozbeh Pournader

033b222

2017-02-22 18:53:39 -0800

[diff] [blame]

17

'bg': 'Cyrl',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

18

'bn': 'Beng',

Roozbeh Pournader

033b222

2017-02-22 18:53:39 -0800

[diff] [blame]

19

'cu': 'Cyrl',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

20

'cy': 'Latn',

21

'da': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

22

'de': 'Latn',

23

'en': 'Latn',

24

'es': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

25

'et': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

26

'eu': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

'fr': 'Latn',

'ga': 'Latn',

'gu': 'Gujr',

'hi': 'Deva',

'hr': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

32

'hu': 'Latn',

33

'hy': 'Armn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

34

'ja': 'Jpan',

35

'kn': 'Knda',

36

'ko': 'Kore',

Roozbeh Pournader

7e04dd1

2017-10-13 17:41:31 -0700

[diff] [blame]

37

'la': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

38

'ml': 'Mlym',

39

'mn': 'Cyrl',

40

'mr': 'Deva',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

41

'nb': 'Latn',

42

'nn': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

43

'or': 'Orya',

44

'pa': 'Guru',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

45

'pt': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

'sl': 'Latn',

'ta': 'Taml',

'te': 'Telu',

'tk': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

50

}

51

52

def lang_to_script(lang_code):

53

lang = lang_code.lower()

54

while lang not in LANG_TO_SCRIPT:

55

hyphen_idx = lang.rfind('-')

56

assert hyphen_idx != -1, (

57

'We do not know what script the "%s" language is written in.'

58

% lang_code)

59

assumed_script = lang[hyphen_idx+1:]

60

if len(assumed_script) == 4 and assumed_script.isalpha():

61

# This is actually the script

62

return assumed_script.title()

63

lang = lang[:hyphen_idx]

64

return LANG_TO_SCRIPT[lang]

65

66

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

67

def printable(inp):

68

if type(inp) is set: # set of character sequences

69

return '{' + ', '.join([printable(seq) for seq in inp]) + '}'

70

if type(inp) is tuple: # character sequence

71

return '<' + (', '.join([printable(ch) for ch in inp])) + '>'

72

else: # single character

73

return 'U+%04X' % inp

74

75

76

def open_font(font):

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

77

font_file, index = font

78

font_path = path.join(_fonts_dir, font_file)

79

if index is not None:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

80

return ttLib.TTFont(font_path, fontNumber=index)

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

81

else:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

82

return ttLib.TTFont(font_path)

83

84

85

def get_best_cmap(font):

86

ttfont = open_font(font)

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

87

all_unicode_cmap = None

88

bmp_cmap = None

89

for cmap in ttfont['cmap'].tables:

90

specifier = (cmap.format, cmap.platformID, cmap.platEncID)

91

if specifier == (4, 3, 1):

92

assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, )

93

bmp_cmap = cmap

94

elif specifier == (12, 3, 10):

95

assert all_unicode_cmap is None, (

96

'More than one UCS-4 cmap in %s' % (font, ))

97

all_unicode_cmap = cmap

98

99

return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap

100

101

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

102

def get_variation_sequences_cmap(font):

103

ttfont = open_font(font)

104

vs_cmap = None

105

for cmap in ttfont['cmap'].tables:

106

specifier = (cmap.format, cmap.platformID, cmap.platEncID)

107

if specifier == (14, 0, 5):

108

assert vs_cmap is None, 'More than one VS cmap in %s' % (font, )

vs_cmap = cmap

return vs_cmap

def get_emoji_map(font):

114

# Add normal characters

115

emoji_map = copy.copy(get_best_cmap(font))

116

reverse_cmap = {glyph: code for code, glyph in emoji_map.items()}

117

118

# Add variation sequences

119

vs_dict = get_variation_sequences_cmap(font).uvsDict

120

for vs in vs_dict:

121

for base, glyph in vs_dict[vs]:

122

if glyph is None:

123

emoji_map[(base, vs)] = emoji_map[base]

124

else:

125

emoji_map[(base, vs)] = glyph

126

127

# Add GSUB rules

128

ttfont = open_font(font)

129

for lookup in ttfont['GSUB'].table.LookupList.Lookup:

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

130

if lookup.LookupType != 4:

131

# Other lookups are used in the emoji font for fallback.

132

# We ignore them for now.

133

continue

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

134

for subtable in lookup.SubTable:

135

ligatures = subtable.ligatures

136

for first_glyph in ligatures:

137

for ligature in ligatures[first_glyph]:

138

sequence = [first_glyph] + ligature.Component

139

sequence = [reverse_cmap[glyph] for glyph in sequence]

140

sequence = tuple(sequence)

141

# Make sure no starting subsequence of 'sequence' has been

142

# seen before.

143

for sub_len in range(2, len(sequence)+1):

144

subsequence = sequence[:sub_len]

145

assert subsequence not in emoji_map

146

emoji_map[sequence] = ligature.LigGlyph

return emoji_map

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

151

def assert_font_supports_any_of_chars(font, chars):

152

best_cmap = get_best_cmap(font)

153

for char in chars:

154

if char in best_cmap:

155

return

156

sys.exit('None of characters in %s were found in %s' % (chars, font))

157

158

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

159

def assert_font_supports_all_of_chars(font, chars):

160

best_cmap = get_best_cmap(font)

161

for char in chars:

162

assert char in best_cmap, (

163

'U+%04X was not found in %s' % (char, font))

164

165

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

166

def assert_font_supports_none_of_chars(font, chars, fallbackName):

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

167

best_cmap = get_best_cmap(font)

168

for char in chars:

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

169

if fallbackName:

170

assert char not in best_cmap, 'U+%04X was found in %s' % (char, font)

171

else:

172

assert char not in best_cmap, (

173

'U+%04X was found in %s in fallback %s' % (char, font, fallbackName))

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

174

175

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

176

def assert_font_supports_all_sequences(font, sequences):

177

vs_dict = get_variation_sequences_cmap(font).uvsDict

178

for base, vs in sorted(sequences):

179

assert vs in vs_dict and (base, None) in vs_dict[vs], (

180

'<U+%04X, U+%04X> was not found in %s' % (base, vs, font))

181

182

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

183

def check_hyphens(hyphens_dir):

184

# Find all the scripts that need automatic hyphenation

185

scripts = set()

186

for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')):

187

hyb_file = path.basename(hyb_file)

188

assert hyb_file.startswith('hyph-'), (

189

'Unknown hyphenation file %s' % hyb_file)

190

lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')]

191

scripts.add(lang_to_script(lang_code))

192

193

HYPHENS = {0x002D, 0x2010}

194

for script in scripts:

195

fonts = _script_to_font_map[script]

196

assert fonts, 'No fonts found for the "%s" script' % script

197

for font in fonts:

198

assert_font_supports_any_of_chars(font, HYPHENS)

199

200

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

201

class FontRecord(object):

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

202

def __init__(self, name, scripts, variant, weight, style, fallback_for, font):

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

203

self.name = name

204

self.scripts = scripts

205

self.variant = variant

206

self.weight = weight

207

self.style = style

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

208

self.fallback_for = fallback_for

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

self.font = font

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

212

def parse_fonts_xml(fonts_xml_path):

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

213

global _script_to_font_map, _fallback_chains, _all_fonts

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

214

_script_to_font_map = collections.defaultdict(set)

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

215

_fallback_chains = {}

216

_all_fonts = []

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

217

tree = ElementTree.parse(fonts_xml_path)

Seigo Nonaka

9092dc2

2017-01-06 16:54:52 +0900

[diff] [blame]

218

families = tree.findall('family')

219

# Minikin supports up to 254 but users can place their own font at the first

220

# place. Thus, 253 is the maximum allowed number of font families in the

221

# default collection.

222

assert len(families) < 254, (

223

'System font collection can contains up to 253 font families.')

224

for family in families:

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

225

name = family.get('name')

226

variant = family.get('variant')

227

langs = family.get('lang')

228

if name:

229

assert variant is None, (

230

'No variant expected for LGC font %s.' % name)

231

assert langs is None, (

232

'No language expected for LGC fonts %s.' % name)

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

233

assert name not in _fallback_chains, 'Duplicated name entry %s' % name

234

_fallback_chains[name] = []

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

235

else:

236

assert variant in {None, 'elegant', 'compact'}, (

237

'Unexpected value for variant: %s' % variant)

238

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

239

for family in families:

240

name = family.get('name')

241

variant = family.get('variant')

242

langs = family.get('lang')

243

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

244

if langs:

245

langs = langs.split()

246

scripts = {lang_to_script(lang) for lang in langs}

else:

scripts = set()

for child in family:

assert child.tag == 'font', (

252

'Unknown tag <%s>' % child.tag)

Jungshik Shin

88b1114

2017-03-17 14:56:17 -0700

[diff] [blame]

253

font_file = child.text.rstrip()

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

254

weight = int(child.get('weight'))

255

assert weight % 100 == 0, (

256

'Font weight "%d" is not a multiple of 100.' % weight)

257

258

style = child.get('style')

259

assert style in {'normal', 'italic'}, (

260

'Unknown style "%s"' % style)

261

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

262

fallback_for = child.get('fallbackFor')

263

264

assert not name or not fallback_for, (

265

'name and fallbackFor cannot be present at the same time')

266

assert not fallback_for or fallback_for in _fallback_chains, (

267

'Unknown fallback name: %s' % fallback_for)

268

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

269

index = child.get('index')

if index:

index = int(index)

Seigo Nonaka

2018-01-18 17:24:31 -0800

[diff] [blame]

273

if not path.exists(path.join(_fonts_dir, font_file)):

274

continue # Missing font is a valid case. Just ignore the missing font files.

275

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

276

record = FontRecord(

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

name,

frozenset(scripts),

variant,

weight,

style,

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

fallback_for,

(font_file, index))

_all_fonts.append(record)

286

287

if not fallback_for:

288

if not name or name == 'sans-serif':

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

289

for _, fallback in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

290

fallback.append(record)

291

else:

292

_fallback_chains[name].append(record)

293

else:

294

_fallback_chains[fallback_for].append(record)

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

295

296

if name: # non-empty names are used for default LGC fonts

297

map_scripts = {'Latn', 'Grek', 'Cyrl'}

298

else:

299

map_scripts = scripts

300

for script in map_scripts:

301

_script_to_font_map[script].add((font_file, index))

302

303

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

304

def check_emoji_coverage(all_emoji, equivalent_emoji):

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

305

emoji_font = get_emoji_font()

306

check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji)

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

307

308

309

def get_emoji_font():

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

310

emoji_fonts = [

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

311

record.font for record in _all_fonts

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

312

if 'Zsye' in record.scripts]

Roozbeh Pournader

27ec3ac

2016-03-31 13:05:32 -0700

[diff] [blame]

313

assert len(emoji_fonts) == 1, 'There are %d emoji fonts.' % len(emoji_fonts)

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

314

return emoji_fonts[0]

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

315

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

316

317

def check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji):

318

coverage = get_emoji_map(emoji_font)

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

errors = []

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

322

for sequence in all_emoji:

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

323

if not sequence in coverage:

324

errors.append('%s is not supported in the emoji font.' % printable(sequence))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

325

326

for sequence in coverage:

327

if sequence in {0x0000, 0x000D, 0x0020}:

328

# The font needs to support a few extra characters, which is OK

329

continue

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

330

if sequence not in all_emoji:

331

errors.append('%s support unexpected in the emoji font.' % printable(sequence))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

332

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

333

for first, second in equivalent_emoji.items():

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

334

if first not in coverage or second not in coverage:

335

continue # sequence will be reported missing

336

if coverage[first] != coverage[second]:

337

errors.append('%s and %s should map to the same glyph.' % (

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

printable(first),

printable(second)))

for glyph in set(coverage.values()):

342

maps_to_glyph = [seq for seq in coverage if coverage[seq] == glyph]

343

if len(maps_to_glyph) > 1:

344

# There are more than one sequences mapping to the same glyph. We

345

# need to make sure they were expected to be equivalent.

346

equivalent_seqs = set()

347

for seq in maps_to_glyph:

348

equivalent_seq = seq

349

while equivalent_seq in equivalent_emoji:

350

equivalent_seq = equivalent_emoji[equivalent_seq]

351

equivalent_seqs.add(equivalent_seq)

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

352

if len(equivalent_seqs) != 1:

353

errors.append('The sequences %s should not result in the same glyph %s' % (

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

354

printable(equivalent_seqs),

355

glyph))

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

356

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

357

assert not errors, '%d emoji font errors:\n%s\n%d emoji font coverage errors' % (len(errors), '\n'.join(errors), len(errors))

358

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

359

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

360

def check_emoji_defaults(default_emoji):

361

missing_text_chars = _emoji_properties['Emoji'] - default_emoji

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

362

for name, fallback_chain in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

363

emoji_font_seen = False

364

for record in fallback_chain:

365

if 'Zsye' in record.scripts:

366

emoji_font_seen = True

367

# No need to check the emoji font

368

continue

369

# For later fonts, we only check them if they have a script

370

# defined, since the defined script may get them to a higher

371

# score even if they appear after the emoji font. However,

372

# we should skip checking the text symbols font, since

373

# symbol fonts should be able to override the emoji display

374

# style when 'Zsym' is explicitly specified by the user.

375

if emoji_font_seen and (not record.scripts or 'Zsym' in record.scripts):

376

continue

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

377

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

378

# Check default emoji-style characters

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

379

assert_font_supports_none_of_chars(record.font, default_emoji, name)

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

380

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

381

# Mark default text-style characters appearing in fonts above the emoji

382

# font as seen

383

if not emoji_font_seen:

384

missing_text_chars -= set(get_best_cmap(record.font))

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

385

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

386

# Noto does not have monochrome glyphs for Unicode 7.0 wingdings and

387

# webdings yet.

388

missing_text_chars -= _chars_by_age['7.0']

389

assert missing_text_chars == set(), (

390

'Text style version of some emoji characters are missing: ' +

391

repr(missing_text_chars))

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

392

393

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

394

# Setting reverse to true returns a dictionary that maps the values to sets of

395

# characters, useful for some binary properties. Otherwise, we get a

396

# dictionary that maps characters to the property values, assuming there's only

397

# one property in the file.

398

def parse_unicode_datafile(file_path, reverse=False):

399

if reverse:

400

output_dict = collections.defaultdict(set)

401

else:

402

output_dict = {}

403

with open(file_path) as datafile:

404

for line in datafile:

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

405

if '#' in line:

406

line = line[:line.index('#')]

407

line = line.strip()

408

if not line:

409

continue

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

410

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

411

chars, prop = line.split(';')[:2]

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

412

chars = chars.strip()

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

413

prop = prop.strip()

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

414

415

if ' ' in chars: # character sequence

416

sequence = [int(ch, 16) for ch in chars.split(' ')]

417

additions = [tuple(sequence)]

418

elif '..' in chars: # character range

419

char_start, char_end = chars.split('..')

420

char_start = int(char_start, 16)

421

char_end = int(char_end, 16)

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

422

additions = range(char_start, char_end+1)

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

423

else: # singe character

424

additions = [int(chars, 16)]

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

425

if reverse:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

426

output_dict[prop].update(additions)

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

427

else:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

428

for addition in additions:

429

assert addition not in output_dict

430

output_dict[addition] = prop

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

return output_dict

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

434

def parse_emoji_variants(file_path):

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

435

emoji_set = set()

436

text_set = set()

437

with open(file_path) as datafile:

438

for line in datafile:

439

if '#' in line:

440

line = line[:line.index('#')]

line = line.strip()

if not line:

continue

sequence, description, _ = line.split(';')

445

sequence = sequence.strip().split(' ')

446

base = int(sequence[0], 16)

447

vs = int(sequence[1], 16)

448

description = description.strip()

449

if description == 'text style':

450

text_set.add((base, vs))

451

elif description == 'emoji style':

452

emoji_set.add((base, vs))

453

return text_set, emoji_set

454

455

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

456

def parse_ucd(ucd_path):

457

global _emoji_properties, _chars_by_age

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

458

global _text_variation_sequences, _emoji_variation_sequences

459

global _emoji_sequences, _emoji_zwj_sequences

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

460

_emoji_properties = parse_unicode_datafile(

461

path.join(ucd_path, 'emoji-data.txt'), reverse=True)

Roozbeh Pournader

f7a68c1

2017-04-04 18:59:31 -0700

[diff] [blame]

462

emoji_properties_additions = parse_unicode_datafile(

463

path.join(ucd_path, 'additions', 'emoji-data.txt'), reverse=True)

464

for prop in emoji_properties_additions.keys():

465

_emoji_properties[prop].update(emoji_properties_additions[prop])

466

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

467

_chars_by_age = parse_unicode_datafile(

468

path.join(ucd_path, 'DerivedAge.txt'), reverse=True)

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

469

sequences = parse_emoji_variants(

470

path.join(ucd_path, 'emoji-variation-sequences.txt'))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

471

_text_variation_sequences, _emoji_variation_sequences = sequences

472

_emoji_sequences = parse_unicode_datafile(

473

path.join(ucd_path, 'emoji-sequences.txt'))

Siyamed Sinir

6e06ad0

2017-04-19 18:18:35 -0700

[diff] [blame]

474

_emoji_sequences.update(parse_unicode_datafile(

475

path.join(ucd_path, 'additions', 'emoji-sequences.txt')))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

476

_emoji_zwj_sequences = parse_unicode_datafile(

477

path.join(ucd_path, 'emoji-zwj-sequences.txt'))

Roozbeh Pournader

1800ba4

2017-03-17 18:23:23 -0700

[diff] [blame]

478

_emoji_zwj_sequences.update(parse_unicode_datafile(

479

path.join(ucd_path, 'additions', 'emoji-zwj-sequences.txt')))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

480

Siyamed Sinir

d97df5a

2018-04-12 13:11:42 -0700

[diff] [blame]

481

exclusions = parse_unicode_datafile(path.join(ucd_path, 'additions', 'emoji-exclusions.txt'))

482

_emoji_sequences = remove_emoji_exclude(_emoji_sequences, exclusions)

483

_emoji_zwj_sequences = remove_emoji_exclude(_emoji_zwj_sequences, exclusions)

484

_emoji_variation_sequences = remove_emoji_variation_exclude(_emoji_variation_sequences, exclusions)

Qingqing Deng

5e98771

2019-03-25 16:53:34 -0700

[diff] [blame]

485

# Unicode 12.0 adds Basic_Emoji in emoji-sequences.txt. We ignore them here since we are already

486

# checking the emoji presentations with emoji-variation-sequences.txt.

487

# Please refer to http://unicode.org/reports/tr51/#def_basic_emoji_set .

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

488

_emoji_sequences = {k: v for k, v in _emoji_sequences.items() if not v == 'Basic_Emoji' }

Qingqing Deng

5e98771

2019-03-25 16:53:34 -0700

[diff] [blame]

489

Siyamed Sinir

d97df5a

2018-04-12 13:11:42 -0700

[diff] [blame]

490

491

def remove_emoji_variation_exclude(source, items):

492

return source.difference(items.keys())

493

494

def remove_emoji_exclude(source, items):

495

return {k: v for k, v in source.items() if k not in items}

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

496

497

def flag_sequence(territory_code):

498

return tuple(0x1F1E6 + ord(ch) - ord('A') for ch in territory_code)

499

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

500

EQUIVALENT_FLAGS = {

501

flag_sequence('BV'): flag_sequence('NO'),

502

flag_sequence('CP'): flag_sequence('FR'),

503

flag_sequence('HM'): flag_sequence('AU'),

504

flag_sequence('SJ'): flag_sequence('NO'),

505

flag_sequence('UM'): flag_sequence('US'),

506

}

507

508

COMBINING_KEYCAP = 0x20E3

509

510

LEGACY_ANDROID_EMOJI = {

511

0xFE4E5: flag_sequence('JP'),

512

0xFE4E6: flag_sequence('US'),

513

0xFE4E7: flag_sequence('FR'),

514

0xFE4E8: flag_sequence('DE'),

515

0xFE4E9: flag_sequence('IT'),

516

0xFE4EA: flag_sequence('GB'),

517

0xFE4EB: flag_sequence('ES'),

518

0xFE4EC: flag_sequence('RU'),

519

0xFE4ED: flag_sequence('CN'),

520

0xFE4EE: flag_sequence('KR'),

521

0xFE82C: (ord('#'), COMBINING_KEYCAP),

522

0xFE82E: (ord('1'), COMBINING_KEYCAP),

523

0xFE82F: (ord('2'), COMBINING_KEYCAP),

524

0xFE830: (ord('3'), COMBINING_KEYCAP),

525

0xFE831: (ord('4'), COMBINING_KEYCAP),

526

0xFE832: (ord('5'), COMBINING_KEYCAP),

527

0xFE833: (ord('6'), COMBINING_KEYCAP),

528

0xFE834: (ord('7'), COMBINING_KEYCAP),

529

0xFE835: (ord('8'), COMBINING_KEYCAP),

530

0xFE836: (ord('9'), COMBINING_KEYCAP),

531

0xFE837: (ord('0'), COMBINING_KEYCAP),

532

}

533

Siyamed Sinir

77a1b14

2018-07-12 12:02:18 -0700

[diff] [blame]

534

# This is used to define the emoji that should have the same glyph.

535

# i.e. previously we had gender based Kiss (0x1F48F), which had the same glyph

536

# with Kiss: Woman, Man (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468)

537

# in that case a valid row would be:

538

# (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468): 0x1F48F,

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

539

ZWJ_IDENTICALS = {

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

540

}

541

Seigo Nonaka

c180863

2018-05-14 13:39:40 -0700

[diff] [blame]

542

SAME_FLAG_MAPPINGS = [

543

# Diego Garcia and British Indian Ocean Territory

544

((0x1F1EE, 0x1F1F4), (0x1F1E9, 0x1F1EC)),

545

# St. Martin and France

546

((0x1F1F2, 0x1F1EB), (0x1F1EB, 0x1F1F7)),

547

# Spain and Ceuta & Melilla

548

((0x1F1EA, 0x1F1F8), (0x1F1EA, 0x1F1E6)),

549

]

550

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

551

ZWJ = 0x200D

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

552

553

def is_fitzpatrick_modifier(cp):

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

554

return 0x1F3FB <= cp <= 0x1F3FF

555

556

557

def reverse_emoji(seq):

558

rev = list(reversed(seq))

559

# if there are fitzpatrick modifiers in the sequence, keep them after

560

# the emoji they modify

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

561

for i in range(1, len(rev)):

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

562

if is_fitzpatrick_modifier(rev[i-1]):

563

rev[i], rev[i-1] = rev[i-1], rev[i]

564

return tuple(rev)

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

565

566

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

567

def compute_expected_emoji():

568

equivalent_emoji = {}

569

sequence_pieces = set()

570

all_sequences = set()

571

all_sequences.update(_emoji_variation_sequences)

572

Raph Levien

2b8b819

2016-08-09 14:28:54 -0700

[diff] [blame]

573

# add zwj sequences not in the current emoji-zwj-sequences.txt

574

adjusted_emoji_zwj_sequences = dict(_emoji_zwj_sequences)

575

adjusted_emoji_zwj_sequences.update(_emoji_zwj_sequences)

Raph Levien

2b8b819

2016-08-09 14:28:54 -0700

[diff] [blame]

576

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

577

# Add empty flag tag sequence that is supported as fallback

578

_emoji_sequences[(0x1F3F4, 0xE007F)] = 'Emoji_Tag_Sequence'

579

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

580

for sequence in _emoji_sequences.keys():

581

sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)

582

all_sequences.add(sequence)

583

sequence_pieces.update(sequence)

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

584

if _emoji_sequences.get(sequence, None) == 'Emoji_Tag_Sequence':

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

585

# Add reverse of all emoji ZWJ sequences, which are added to the

586

# fonts as a workaround to get the sequences work in RTL text.

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

587

# TODO: test if these are actually needed by Minikin/HarfBuzz.

588

reversed_seq = reverse_emoji(sequence)

589

all_sequences.add(reversed_seq)

590

equivalent_emoji[reversed_seq] = sequence

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

591

Raph Levien

2b8b819

2016-08-09 14:28:54 -0700

[diff] [blame]

592

for sequence in adjusted_emoji_zwj_sequences.keys():

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

593

sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)

594

all_sequences.add(sequence)

595

sequence_pieces.update(sequence)

596

# Add reverse of all emoji ZWJ sequences, which are added to the fonts

597

# as a workaround to get the sequences work in RTL text.

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

598

reversed_seq = reverse_emoji(sequence)

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

599

all_sequences.add(reversed_seq)

600

equivalent_emoji[reversed_seq] = sequence

601

Seigo Nonaka

c180863

2018-05-14 13:39:40 -0700

[diff] [blame]

602

for first, second in SAME_FLAG_MAPPINGS:

603

equivalent_emoji[first] = second

604

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

605

# Add all tag characters used in flags

606

sequence_pieces.update(range(0xE0030, 0xE0039 + 1))

607

sequence_pieces.update(range(0xE0061, 0xE007A + 1))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

608

609

all_emoji = (

610

_emoji_properties['Emoji'] |

611

all_sequences |

612

sequence_pieces |

613

set(LEGACY_ANDROID_EMOJI.keys()))

614

default_emoji = (

615

_emoji_properties['Emoji_Presentation'] |

616

all_sequences |

617

set(LEGACY_ANDROID_EMOJI.keys()))

618

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

619

equivalent_emoji.update(EQUIVALENT_FLAGS)

620

equivalent_emoji.update(LEGACY_ANDROID_EMOJI)

621

equivalent_emoji.update(ZWJ_IDENTICALS)

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

622

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

623

for seq in _emoji_variation_sequences:

624

equivalent_emoji[seq] = seq[0]

625

626

return all_emoji, default_emoji, equivalent_emoji

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

627

628

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

629

def check_compact_only_fallback():

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

630

for name, fallback_chain in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

631

for record in fallback_chain:

632

if record.variant == 'compact':

633

same_script_elegants = [x for x in fallback_chain

634

if x.scripts == record.scripts and x.variant == 'elegant']

635

assert same_script_elegants, (

636

'%s must be in elegant of %s as fallback of "%s" too' % (

637

record.font, record.scripts, record.fallback_for),)

638

639

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

640

def check_vertical_metrics():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

641

for record in _all_fonts:

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

642

if record.name in ['sans-serif', 'sans-serif-condensed']:

643

font = open_font(record.font)

Roozbeh Pournader

ede3a17

2016-07-27 16:35:12 -0700

[diff] [blame]

644

assert font['head'].yMax == 2163 and font['head'].yMin == -555, (

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

645

'yMax and yMin of %s do not match expected values.' % (

646

record.font,))

Roozbeh Pournader

ede3a17

2016-07-27 16:35:12 -0700

[diff] [blame]

647

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

648

if record.name in ['sans-serif', 'sans-serif-condensed',

649

'serif', 'monospace']:

Roozbeh Pournader

ede3a17

2016-07-27 16:35:12 -0700

[diff] [blame]

650

font = open_font(record.font)

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

651

assert (font['hhea'].ascent == 1900 and

652

font['hhea'].descent == -500), (

653

'ascent and descent of %s do not match expected '

654

'values.' % (record.font,))

655

656

657

def check_cjk_punctuation():

658

cjk_scripts = {'Hans', 'Hant', 'Jpan', 'Kore'}

659

cjk_punctuation = range(0x3000, 0x301F + 1)

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

660

for name, fallback_chain in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

661

for record in fallback_chain:

662

if record.scripts.intersection(cjk_scripts):

663

# CJK font seen. Stop checking the rest of the fonts.

664

break

665

assert_font_supports_none_of_chars(record.font, cjk_punctuation, name)

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

666

667

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

668

def main():

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

669

global _fonts_dir

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

670

target_out = sys.argv[1]

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

671

_fonts_dir = path.join(target_out, 'fonts')

672

673

fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml')

674

parse_fonts_xml(fonts_xml_path)

675

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

676

check_compact_only_fallback()

677

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

678

check_vertical_metrics()

679

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

680

hyphens_dir = path.join(target_out, 'usr', 'hyphen-data')

681

check_hyphens(hyphens_dir)

682

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

683

check_cjk_punctuation()

684

Roozbeh Pournader

27ec3ac

2016-03-31 13:05:32 -0700

[diff] [blame]

685

check_emoji = sys.argv[2]

686

if check_emoji == 'true':

687

ucd_path = sys.argv[3]

688

parse_ucd(ucd_path)

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

689

all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()

690

check_emoji_coverage(all_emoji, equivalent_emoji)

691

check_emoji_defaults(default_emoji)

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

692

Roozbeh Pournader