Blame - cm_crowdin_sync.py - crowdin

2014-02-28 01:06:03 +0100

[diff] [blame]

1

#!/usr/bin/python2

Marco Brohet

f174272

2014-03-04 22:41:18 +0100

[diff] [blame]

2

# -*- coding: utf-8 -*-

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

3

# cm_crowdin_sync.py

4

#

5

# Updates Crowdin source translations and pulls translations

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

6

# directly to CyanogenMod's Git.

#

#

# Licensed under the Apache License, Version 2.0 (the "License");

11

# you may not use this file except in compliance with the License.

12

# You may obtain a copy of the License at

13

#

14

# http://www.apache.org/licenses/LICENSE-2.0

15

#

16

# Unless required by applicable law or agreed to in writing, software

17

# distributed under the License is distributed on an "AS IS" BASIS,

18

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

19

# See the License for the specific language governing permissions and

20

# limitations under the License.

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

21

Marco Brohet

f174272

2014-03-04 22:41:18 +0100

[diff] [blame]

22

import codecs

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

23

import git

24

import mmap

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

25

import os

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

import os.path

import re

import shutil

import subprocess

import sys

from urllib import urlretrieve

32

from xml.dom import minidom

33

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

34

def get_caf_additions(strings_base, strings_cm):

35

# Load AOSP file and resources

36

xml_base = minidom.parse(strings_base)

37

list_base_string = xml_base.getElementsByTagName('string')

38

list_base_string_array = xml_base.getElementsByTagName('string-array')

39

list_base_plurals = xml_base.getElementsByTagName('plurals')

40

# Load CM file and resources

41

xml_cm = minidom.parse(strings_cm)

42

list_cm_string = xml_cm.getElementsByTagName('string')

43

list_cm_string_array = xml_cm.getElementsByTagName('string-array')

44

list_cm_plurals = xml_cm.getElementsByTagName('plurals')

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

45

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

46

# All names from CM

47

names_cm_string = []

48

names_cm_string_array = []

49

names_cm_plurals = []

50

# All names from AOSP

51

names_base_string = []

52

names_base_string_array = []

53

names_base_plurals = []

54

55

# Get all names from CM

56

for s in list_cm_string :

57

if not s.hasAttribute('translatable') and not s.hasAttribute('translate'):

58

names_cm_string.append(s.attributes['name'].value)

59

for s in list_cm_string_array :

60

if not s.hasAttribute('translatable') and not s.hasAttribute('translate'):

61

names_cm_string_array.append(s.attributes['name'].value)

62

for s in list_cm_plurals :

63

if not s.hasAttribute('translatable') and not s.hasAttribute('translate'):

64

names_cm_plurals.append(s.attributes['name'].value)

65

# Get all names from AOSP

66

for s in list_base_string :

67

if not s.hasAttribute('translatable') and not s.hasAttribute('translate'):

68

names_base_string.append(s.attributes['name'].value)

69

for s in list_base_string_array :

70

if not s.hasAttribute('translatable') and not s.hasAttribute('translate'):

71

names_base_string_array.append(s.attributes['name'].value)

72

for s in list_base_plurals :

73

if not s.hasAttribute('translatable') and not s.hasAttribute('translate'):

74

names_base_plurals.append(s.attributes['name'].value)

75

76

# Store all differences in this list

77

caf_additions = []

78

Marco Brohet

2014-03-08 21:12:09 +0100

[diff] [blame]

79

# Store all found strings/arrays/plurals.

80

# Prevent duplicates with product attribute

81

found_string = []

82

found_string_array = []

83

found_plurals = []

84

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

85

# Add all CAF additions to the list 'caf_additions'

86

for z in names_cm_string:

Marco Brohet

2014-03-08 21:12:09 +0100

[diff] [blame]

87

if z not in names_base_string and z not in found_string:

Marco Brohet

2014-03-08 19:13:07 +0100

[diff] [blame]

88

for string_item in list_cm_string:

89

if string_item.attributes['name'].value == z:

90

caf_additions.append(' ' + string_item.toxml())

Marco Brohet

2014-03-08 21:12:09 +0100

[diff] [blame]

91

found_string.append(z)

Marco Brohet

2014-03-08 19:13:07 +0100

[diff] [blame]

92

for y in names_cm_string_array:

Marco Brohet

2014-03-08 21:12:09 +0100

[diff] [blame]

93

if y not in names_base_string_array and y not in found_string_array:

Marco Brohet

2014-03-08 19:13:07 +0100

[diff] [blame]

94

for string_array_item in list_cm_string_array:

95

if string_array_item.attributes['name'].value == y:

96

caf_additions.append(' ' + string_array_item.toxml())

Marco Brohet

2014-03-08 21:12:09 +0100

[diff] [blame]

97

found_string_array.append(y)

Marco Brohet

2014-03-08 19:13:07 +0100

[diff] [blame]

98

for x in names_cm_plurals:

Marco Brohet

2014-03-08 21:12:09 +0100

[diff] [blame]

99

if x not in names_base_plurals and x not in found_plurals:

Marco Brohet

2014-03-08 19:13:07 +0100

[diff] [blame]

100

for plurals_item in list_cm_plurals:

101

if plurals_item.attributes['name'].value == x:

102

caf_additions.append(' ' + plurals_item.toxml())

Marco Brohet

2014-03-08 21:12:09 +0100

[diff] [blame]

103

found_plurals.append(x)

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

# Done :-)

return caf_additions

Marco Brohet

2014-03-02 17:31:17 +0100

[diff] [blame]

108

def sync_js_translations(sync_type, path, lang=''):

109

# lang is necessary in download mode

110

if sync_type == 'download' and lang == '':

111

sys.exit('Invalid syntax. Language code is required in download mode.')

112

113

# Read source en.js file. This is necessary for both upload and download modes

Marco Brohet

44657ed

2014-03-04 22:49:23 +0100

[diff] [blame]

114

with codecs.open(path + 'en.js', 'r', 'utf-8') as f:

Marco Brohet

2014-03-02 17:31:17 +0100

[diff] [blame]

115

content = f.readlines()

116

117

if sync_type == 'upload':

118

# Prepare XML file structure

119

doc = xml.dom.minidom.Document()

120

header = doc.createElement('resources')

Marco Brohet

44657ed

2014-03-04 22:49:23 +0100

[diff] [blame]

121

file_write = codecs.open(path + 'en.xml', 'w', 'utf-8')

Marco Brohet

2014-03-02 17:31:17 +0100

[diff] [blame]

122

else:

123

# Open translation files

Marco Brohet

44657ed

2014-03-04 22:49:23 +0100

[diff] [blame]

124

file_write = codecs.open(path + lang + '.js', 'w', 'utf-8')

Marco Brohet

2014-03-02 17:31:17 +0100

[diff] [blame]

125

xml_base = xml.dom.minidom.parse(path + lang + '.xml')

126

tags = xml_base.getElementsByTagName('string')

127

128

# Read each line of en.js

129

for a_line in content:

130

# Regex to determine string id

131

m = re.search(' (.*): [\'|\"]', a_line)

132

if m is not None:

133

for string_id in m.groups():

134

if string_id is not None:

135

# Find string id

136

string_id = string_id.replace(' ', '')

137

m2 = re.search('\'(.*)\'|"(.*)"', a_line)

138

# Find string contents

139

for string_content in m2.groups():

140

if string_content is not None:

141

break

142

if sync_type == 'upload':

143

# In upload mode, create the appropriate string element.

144

contents = doc.createElement('string')

145

contents.attributes['name'] = string_id

146

contents.appendChild(doc.createTextNode(string_content))

147

header.appendChild(contents)

148

else:

149

# In download mode, check if string_id matches a name attribute in the translation XML file.

150

# If it does, replace English text with the translation.

151

# If it does not, English text will remain and will be added to the file to retain the file structure.

152

for string in tags:

153

if string.attributes['name'].value == string_id:

154

a_line = a_line.replace(string_content.rstrip(), string.firstChild.nodeValue)

155

break

156

break

157

# In download mode do not write comments

158

if sync_type == 'download' and not '//' in a_line:

159

# Add language identifier (1)

160

if 'cmaccount.l10n.en' in a_line:

161

a_line = a_line.replace('l10n.en', 'l10n.' + lang)

162

# Add language identifier (2)

163

if 'l10n.add(\'en\'' in a_line:

164

a_line = a_line.replace('l10n.add(\'en\'', 'l10n.add(\'' + lang + '\'')

165

# Now write the line

166

file_write.write(a_line)

167

168

169

# Create XML file structure

170

if sync_type == 'upload':

171

header.appendChild(contents)

172

contents = header.toxml().replace('<string', '\n <string').replace('</resources>', '\n</resources>')

173

file_write.write('<?xml version="1.0" encoding="utf-8"?>\n')

174

file_write.write('\n')

175

file_write.write(contents)

# Close file

file_write.close()

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

180

def push_as_commit(path, name):

181

# Get path

182

path = os.getcwd() + '/' + path

183

184

# Create git commit

185

repo = git.Repo(path)

186

repo.git.add(path)

187

try:

188

repo.git.commit(m='DO NOT MERGE: Automatic translation import test commit')

189

# repo.git.push('ssh://cobjeM@review.cyanogenmod.org:29418/' + name, 'HEAD:refs/for/cm-11.0')

190

print 'Succesfully pushed commit for ' + name

191

except:

192

# If git commit fails, it's probably because of no changes.

193

# Just continue.

194

print 'No commit pushed (probably empty?) for ' + name

195

print 'WARNING: If the repository name was not obtained from default.xml, the name might be wrong!'

196

197

print('Welcome to the CM Crowdin sync script!')

198

199

print('\nSTEP 0: Checking dependencies')

Marco Brohet

2014-03-02 17:31:17 +0100

[diff] [blame]

200

# Check for Ruby version of crowdin-cli

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

201

if subprocess.check_output(['rvm', 'all', 'do', 'gem', 'list', 'crowdin-cli', '-i']) == 'true':

202

sys.exit('You have not installed crowdin-cli. Terminating.')

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

203

else:

204

print('Found: crowdin-cli')

Marco Brohet

2014-03-02 17:31:17 +0100

[diff] [blame]

205

# Check for caf.xml

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

206

if not os.path.isfile('caf.xml'):

207

sys.exit('You have no caf.xml. Terminating.')

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

208

else:

209

print('Found: caf.xml')

Marco Brohet

2014-03-02 17:31:17 +0100

[diff] [blame]

210

# Check for default.xml

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

211

if not os.path.isfile('default.xml'):

212

sys.exit('You have no default.xml. Terminating.')

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

213

else:

214

print('Found: default.xml')

Marco Brohet

2014-03-02 17:31:17 +0100

[diff] [blame]

215

# Check for repo

216

try:

217

subprocess.check_output(['which', 'repo'])

218

except:

219

sys.exit('You have not installed repo. Terminating.')

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

220

Marco Brohet

2014-03-08 19:13:07 +0100

[diff] [blame]

221

print('\nSTEP 1: Create cm_caf.xml')

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

222

# Load caf.xml

Marco Brohet

2014-03-08 19:13:07 +0100

[diff] [blame]

223

print('Loading caf.xml')

224

xml = minidom.parse('caf.xml')

225

items = xml.getElementsByTagName('item')

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

226

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

227

# Store all created cm_caf.xml files in here.

228

# Easier to remove them afterwards, as they cannot be committed

Marco Brohet

2014-03-08 19:13:07 +0100

[diff] [blame]

229

cm_caf = []

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

230

Marco Brohet

2014-03-08 19:13:07 +0100

[diff] [blame]

231

for item in items:

232

# Create tmp dir for download of AOSP base file

233

path_to_values = item.attributes["path"].value

234

subprocess.call(['mkdir', '-p', 'tmp/' + path_to_values])

235

# Create cm_caf.xml - header

236

f = codecs.open(path_to_values + '/cm_caf.xml', 'w', 'utf-8')

237

f.write('<?xml version="1.0" encoding="utf-8"?>\n')

238

f.write('<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2">\n')

239

# Create cm_caf.xml - contents

240

# This means we also support multiple base files (e.g. checking if strings.xml and arrays.xml are changed)

241

contents = []

242

item_aosp = item.getElementsByTagName('aosp')

243

for aosp_item in item_aosp:

244

url = aosp_item.firstChild.nodeValue

245

xml_file = aosp_item.attributes["file"].value

246

path_to_base = 'tmp/' + path_to_values + '/' + xml_file

247

path_to_cm = path_to_values + '/' + xml_file

248

urlretrieve(url, path_to_base)

249

contents = contents + get_caf_additions(path_to_base, path_to_cm)

250

for addition in contents:

251

f.write(addition + '\n')

252

# Create cm_caf.xml - the end

253

f.write('</resources>')

254

f.close()

255

cm_caf.append(path_to_values + '/cm_caf.xml')

256

print('Created ' + path_to_values + '/cm_caf.xml')

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

257

258

print('\nSTEP 2: Upload Crowdin source translations')

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

259

# Execute 'crowdin-cli upload sources' and show output

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

260

print(subprocess.check_output(['crowdin-cli', 'upload', 'sources']))

261

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

262

print('STEP 3: Download Crowdin translations')

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

263

# Execute 'crowdin-cli download' and show output

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

264

print(subprocess.check_output(['crowdin-cli', "download"]))

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

265

Marco Brohet

2014-03-08 19:13:07 +0100

[diff] [blame]

266

print('STEP 4A: Clean up of source cm_caf.xmls')

267

# Remove all cm_caf.xml files, which you can find in the list 'cm_caf'

268

for cm_caf_file in cm_caf:

269

print ('Removing ' + cm_caf_file)

270

os.remove(cm_caf_file)

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

271

Marco Brohet

2014-03-08 19:13:07 +0100

[diff] [blame]

272

print('\nSTEP 4B: Clean up of temp dir')

273

# We are done with cm_caf.xml files, so remove tmp/

274

shutil.rmtree(os.getcwd() + '/tmp')

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

275

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

276

print('\nSTEP 4C: Clean up of empty translations')

277

# Some line of code that I found to find all XML files

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

278

result = [os.path.join(dp, f) for dp, dn, filenames in os.walk(os.getcwd()) for f in filenames if os.path.splitext(f)[1] == '.xml']

279

for xml_file in result:

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

280

# We hate empty, useless files. Crowdin exports them with <resources/> (sometimes with xliff).

281

# That means: easy to find

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

282

if '<resources/>' in open(xml_file).read():

283

print ('Removing ' + xml_file)

284

os.remove(xml_file)

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

285

elif '<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"/>' in open(xml_file).read():

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

286

print ('Removing ' + xml_file)

287

os.remove(xml_file)

288

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

289

print('\nSTEP 5: Push translations to Git')

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

290

# Get all files that Crowdin pushed

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

291

proc = subprocess.Popen(['crowdin-cli', 'list', 'sources'],stdout=subprocess.PIPE)

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

292

xml = minidom.parse('default.xml')

293

items = xml.getElementsByTagName('project')

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

294

all_projects = []

Marco Brohet

2014-02-28 01:06:03 +0100

[diff] [blame]

295

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

296

for path in iter(proc.stdout.readline,''):

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

297

# Remove the \n at the end of each line

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

298

path = path.rstrip()

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

299

# Get project root dir from Crowdin's output

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

300

301

for good_path in m.groups():

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

302

# When a project has multiple translatable files, Crowdin will give duplicates.

303

# We don't want that (useless empty commits), so we save each project in all_projects

304

# and check if it's already in there.

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

305

if good_path is not None and not good_path in all_projects:

306

all_projects.append(good_path)

307

working = 'false'

308

for project_item in items:

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

309

# We need to have the Github repository for the git push url. Obtain them from

310

# default.xml based on the project root dir.

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

311

if project_item.attributes["path"].value == good_path:

312

working = 'true'

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

313

push_as_commit(good_path, project_item.attributes['name'].value)

314

print 'Committing ' + project_item.attributes['name'].value + ' (based on default.xml)'

315

# We also translate repositories that are not downloaded by default (e.g. device parts).

316

# This is just a fallback.

317

# WARNING: If the name is wrong, this will not stop the script.

Marco Brohet

2014-02-28 18:48:17 +0100

[diff] [blame]

318

if working == 'false':

Marco Brohet

2014-02-28 21:01:26 +0100

[diff] [blame]

319

push_as_commit(good_path, 'CyanogenMod/android_' + good_path.replace('/', '_'))

320

print 'Committing ' + project_item.attributes['name'].value + ' (workaround)'

Marco Brohet