Blame - tools/get_search_engines.py - android_packages_apps_Gello

blob: 2eecec3a103efcade113947910c45d3d8042c902 [file] [log] [blame]

Bjorn Bringert	d69f51d	2010-09-13 14:06:41 +0100	[diff] [blame^]	1	#!/usr/bin/python2.4
				2	#
				3	# Copyright (C) 2010 The Android Open Source Project
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the "License");
				6	# you may not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# http://www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an "AS IS" BASIS,
				13	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16	#
				17	"""
				18	Creates the list of search engines
				19
				20	The created list is placed in the res/values-<locale> directory. Also updates
				21	res/values/all_search_engines.xml if required with new data.
				22
				23	Usage: get_search_engines.py
				24
				25	Copyright (C) 2010 The Android Open Source Project
				26	"""
				27
				28	import os
				29	import re
				30	import sys
				31	import urllib
				32	from xml.dom import minidom
				33
				34	# Locales to generate search engine lists for
				35	locales = ["cs-CZ", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU",
				36	"en-GB", "en-IE", "en-NZ", "en-SG", "en-ZA", "es-ES", "fr-BE", "fr-FR",
				37	"it-IT", "ja-JP", "ko-KR", "nb-NO", "nl-BE", "nl-NL", "pl-PL", "pt-PT",
				38	"pt-BR", "ru-RU", "sv-SE", "tr-TR", "zh-CN", "zh-HK", "zh-MO", "zh-TW"]
				39
				40	class SearchEngineManager(object):
				41	"""Manages list of search engines and creates locale specific lists.
				42
				43	The main method useful for the caller is generateListForLocale(), which
				44	creates a locale specific search_engines.xml file suitable for use by the
				45	Android WebSearchProvider implementation.
				46	"""
				47
				48	def __init__(self):
				49	"""Inits SearchEngineManager with relevant search engine data.
				50
				51	The search engine data is downloaded from the Chrome source repository.
				52	"""
				53	self.chrome_data = urllib.urlopen(
				54	'http://src.chromium.org/viewvc/chrome/trunk/src/chrome/'
				55	'browser/search_engines/template_url_prepopulate_data.cc').read()
				56	if self.chrome_data.lower().find('repository not found') != -1:
				57	print 'Unable to get Chrome source data for search engine list.\nExiting.'
				58	sys.exit(2)
				59
				60	self.resdir = os.path.normpath(os.path.join(sys.path[0], '../res'))
				61
				62	self.all_engines = set()
				63
				64	def getXmlString(self, str):
				65	"""Returns an XML-safe string for the given string.
				66
				67	Given a string from the search engine data structure, convert it to a
				68	string suitable to write to our XML data file by stripping away NULLs,
				69	unwanted quotes, wide-string declarations (L"") and replacing C-style
				70	unicode characters with XML equivalents.
				71	"""
				72	str = str.strip()
				73	if str.upper() == 'NULL':
				74	return ''
				75
				76	if str.startswith('L"'):
				77	str = str[2:]
				78	if str.startswith('@') or str.startswith('?'):
				79	str = '\\' + str
				80
				81	str = str.strip('"')
				82	str = str.replace('&', '&').replace('<', '<').replace('>', '>')
				83	str = str.replace('"', '"').replace('\'', ''')
				84	str = re.sub(r'\\x([a-fA-F0-9]+)', r'&#x\1;', str)
				85
				86	return str
				87
				88	def getEngineData(self, name):
				89	"""Returns an array of strings describing the specified search engine.
				90
				91	The returned strings are in the same order as in the Chrome source data file
				92	except that the internal name of the search engine is inserted at the
				93	beginning of the list.
				94	"""
				95	# Find the first occurance of this search engine name in the form
				96	# " <name> =" in the chrome data file.
				97	re_exp = '\s' + name + '\s*='
				98	search_obj = re.search(re_exp, self.chrome_data)
				99	if not search_obj:
				100	print ('Unable to find data for search engine ' + name +
				101	'. Please check the chrome data file for format changes.')
				102	return None
				103
				104	# Extract the struct declaration between the curly braces.
				105	start_pos = self.chrome_data.find('{', search_obj.start()) + 1;
				106	end_pos = self.chrome_data.find('};', start_pos);
				107	engine_data_str = self.chrome_data[start_pos:end_pos]
				108
				109	# Remove c++ style '//' comments at the ends of each line
				110	engine_data_lines = engine_data_str.split('\n')
				111	engine_data_str = ""
				112	for line in engine_data_lines:
				113	start_pos = line.find(' // ')
				114	if start_pos != -1:
				115	line = line[:start_pos]
				116	engine_data_str = engine_data_str + line + '\n'
				117
				118	# Join multiple line strings into a single string.
				119	engine_data_str = re.sub('\"\s+\"', '', engine_data_str)
				120	engine_data_str = re.sub('\"\s+L\"', '', engine_data_str)
				121	engine_data_str = engine_data_str.replace('"L"', '')
				122
				123	engine_data = engine_data_str.split(',')
				124	for i in range(len(engine_data)):
				125	engine_data[i] = self.getXmlString(engine_data[i])
				126
				127	# If the last element was an empty string (due to an extra comma at the
				128	# end), ignore it.
				129	if not engine_data[len(engine_data) - 1]:
				130	engine_data.pop()
				131
				132	engine_data.insert(0, name)
				133
				134	return engine_data
				135
				136	def getSearchEnginesForCountry(self, country):
				137	"""Returns the list of search engine names for the given country.
				138
				139	The data comes from the Chrome data file.
				140	"""
				141	# The Chrome data file has an array defined with the name 'engines_XX'
				142	# where XX = country.
				143	pos = self.chrome_data.find('engines_' + country)
				144	if pos == -1:
				145	print ('Unable to find search engine data for country ' + country + '.')
				146	return
				147
				148	# Extract the text between the curly braces for this array declaration
				149	engines_start = self.chrome_data.find('{', pos) + 1;
				150	engines_end = self.chrome_data.find('}', engines_start);
				151	engines_str = self.chrome_data[engines_start:engines_end]
				152
				153	# Remove embedded /**/ style comments, white spaces, address-of operators
				154	# and the trailing comma if any.
				155	engines_str = re.sub('\/\.+\\/', '', engines_str)
				156	engines_str = re.sub('\s+', '', engines_str)
				157	engines_str = engines_str.replace('&','')
				158	engines_str = engines_str.rstrip(',')
				159
				160	# Split the array into it's elements
				161	engines = engines_str.split(',')
				162
				163	return engines
				164
				165	def writeAllEngines(self):
				166	"""Writes all search engines to the all_search_engines.xml file.
				167	"""
				168
				169	all_search_engines_path = os.path.join(self.resdir, 'values/all_search_engines.xml')
				170
				171	text = []
				172
				173	for engine_name in self.all_engines:
				174	engine_data = self.getEngineData(engine_name)
				175	text.append(' <string-array name="%s" translatable="false">\n' % (engine_data[0]))
				176	for i in range(1, 7):
				177	text.append(' <item>%s</item>\n' % (engine_data[i]))
				178	text.append(' </string-array>\n')
				179	print engine_data[1] + " added to all_search_engines.xml"
				180
				181	self.generateXmlFromTemplate(os.path.join(sys.path[0], 'all_search_engines.template.xml'),
				182	all_search_engines_path, text)
				183
				184	def generateDefaultList(self):
				185	self.writeEngineList(os.path.join(self.resdir, 'values'), "default")
				186
				187	def generateListForLocale(self, locale):
				188	"""Creates a new locale specific search_engines.xml file.
				189
				190	The new file contains search engines specific to that country. If required
				191	this function updates all_search_engines.xml file with any new search
				192	engine data necessary.
				193	"""
				194	separator_pos = locale.find('-')
				195	if separator_pos == -1:
				196	print ('Locale must be of format <language>-<country>. For e.g.'
				197	' "es-US" or "en-GB"')
				198	return
				199
				200	language = locale[0:separator_pos]
				201	country = locale[separator_pos + 1:].upper()
				202	dir_path = os.path.join(self.resdir, 'values-' + language + '-r' + country)
				203
				204	self.writeEngineList(dir_path, country)
				205
				206	def writeEngineList(self, dir_path, country):
				207	if os.path.exists(dir_path) and not os.path.isdir(dir_path):
				208	print "File exists in output directory path " + dir_path + ". Please remove it and try again."
				209	return
				210
				211	engines = self.getSearchEnginesForCountry(country)
				212	if not engines:
				213	return
				214	for engine in engines:
				215	self.all_engines.add(engine)
				216
				217	# Create the locale specific search_engines.xml file. Each
				218	# search_engines.xml file has a hardcoded list of 7 items. If there are less
				219	# than 7 search engines for this country, the remaining items are marked as
				220	# enabled=false.
				221	text = []
				222	text.append(' <string-array name="search_engines" translatable="false">\n');
				223	for engine in engines:
				224	engine_data = self.getEngineData(engine)
				225	name = engine_data[0]
				226	text.append(' <item>%s</item>\n' % (name))
				227	text.append(' </string-array>\n');
				228
				229	self.generateXmlFromTemplate(os.path.join(sys.path[0], 'search_engines.template.xml'),
				230	os.path.join(dir_path, 'search_engines.xml'),
				231	text)
				232
				233	def generateXmlFromTemplate(self, template_path, out_path, text):
				234	# Load the template file and insert the new contents before the last line.
				235	template_text = open(template_path).read()
				236	pos = template_text.rfind('\n', 0, -2) + 1
				237	contents = template_text[0:pos] + ''.join(text) + template_text[pos:]
				238
				239	# Make sure what we have created is valid XML :) No need to check for errors
				240	# as the script will terminate with an exception if the XML was malformed.
				241	engines_dom = minidom.parseString(contents)
				242
				243	dir_path = os.path.dirname(out_path)
				244	if not os.path.exists(dir_path):
				245	os.makedirs(dir_path)
				246	print 'Created directory ' + dir_path
				247	file = open(out_path, 'w')
				248	file.write(contents)
				249	file.close()
				250	print 'Wrote ' + out_path
				251
				252	if __name__ == "__main__":
				253	manager = SearchEngineManager()
				254	manager.generateDefaultList()
				255	for locale in locales:
				256	manager.generateListForLocale(locale)
				257	manager.writeAllEngines()
				258