Blame - libc/kernel/tools/cpp.py - platform_bionic

blob: e3a9038ce488181496e4b460af5cfdcc2809cb62 [file] [log] [blame]

The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1	# a glorified C pre-processor parser
				2
				3	import sys, re, string
				4	from utils import *
				5	from defaults import *
				6
				7	debugTokens = False
				8	debugDirectiveTokenizer = False
				9	debugLineParsing = False
				10	debugCppExpr = False
				11	debugOptimIf01 = False
				12
				13	#####################################################################################
				14	#####################################################################################
				15	##### #####
				16	##### C P P T O K E N S #####
				17	##### #####
				18	#####################################################################################
				19	#####################################################################################
				20
				21	# the list of supported C-preprocessor tokens
				22	# plus a couple of C tokens as well
				23	tokEOF = "\0"
				24	tokLN = "\n"
				25	tokSTRINGIFY = "#"
				26	tokCONCAT = "##"
				27	tokLOGICAND = "&&"
				28	tokLOGICOR = "\|\|"
				29	tokSHL = "<<"
				30	tokSHR = ">>"
				31	tokEQUAL = "=="
				32	tokNEQUAL = "!="
				33	tokLT = "<"
				34	tokLTE = "<="
				35	tokGT = ">"
				36	tokGTE = ">="
				37	tokELLIPSIS = "..."
				38	tokSPACE = " "
				39	tokDEFINED = "defined"
				40	tokLPAREN = "("
				41	tokRPAREN = ")"
				42	tokNOT = "!"
				43	tokPLUS = "+"
				44	tokMINUS = "-"
				45	tokMULTIPLY = "*"
				46	tokDIVIDE = "/"
				47	tokMODULUS = "%"
				48	tokBINAND = "&"
				49	tokBINOR = "\|"
				50	tokBINXOR = "^"
				51	tokCOMMA = ","
				52	tokLBRACE = "{"
				53	tokRBRACE = "}"
				54	tokARROW = "->"
				55	tokINCREMENT = "++"
				56	tokDECREMENT = "--"
				57	tokNUMBER = "<number>"
				58	tokIDENT = "<ident>"
				59	tokSTRING = "<string>"
				60
				61	class Token:
				62	"""a simple class to hold information about a given token.
				63	each token has a position in the source code, as well as
				64	an 'id' and a 'value'. the id is a string that identifies
				65	the token's class, while the value is the string of the
				66	original token itself.
				67
				68	for example, the tokenizer concatenates a series of spaces
				69	and tabs as a single tokSPACE id, whose value if the original
				70	spaces+tabs sequence."""
				71
				72	def __init__(self):
				73	self.id = None
				74	self.value = None
				75	self.lineno = 0
				76	self.colno = 0
				77
				78	def set(self,id,val=None):
				79	self.id = id
				80	if val:
				81	self.value = val
				82	else:
				83	self.value = id
				84	return None
				85
				86	def copyFrom(self,src):
				87	self.id = src.id
				88	self.value = src.value
				89	self.lineno = src.lineno
				90	self.colno = src.colno
				91
				92	def __repr__(self):
				93	if self.id == tokIDENT:
				94	return "(ident %s)" % self.value
				95	if self.id == tokNUMBER:
				96	return "(number %s)" % self.value
				97	if self.id == tokSTRING:
				98	return "(string '%s')" % self.value
				99	if self.id == tokLN:
				100	return "<LN>"
				101	if self.id == tokEOF:
				102	return "<EOF>"
				103	if self.id == tokSPACE and self.value == "\\":
				104	# this corresponds to a trailing \ that was transformed into a tokSPACE
				105	return "<\\>"
				106
				107	return self.id
				108
				109	def __str__(self):
				110	if self.id == tokIDENT:
				111	return self.value
				112	if self.id == tokNUMBER:
				113	return self.value
				114	if self.id == tokSTRING:
				115	return self.value
				116	if self.id == tokEOF:
				117	return "<EOF>"
				118	if self.id == tokSPACE:
				119	if self.value == "\\": # trailing \
				120	return "\\\n"
				121	else:
				122	return self.value
				123
				124	return self.id
				125
				126	class BadExpectedToken(Exception):
				127	def __init__(self,msg):
				128	print msg
				129
				130	#####################################################################################
				131	#####################################################################################
				132	##### #####
				133	##### C P P T O K E N C U R S O R #####
				134	##### #####
				135	#####################################################################################
				136	#####################################################################################
				137
				138	class TokenCursor:
				139	"""a small class to iterate over a list of Token objects"""
				140	def __init__(self,tokens):
				141	self.tokens = tokens
				142	self.n = 0
				143	self.count = len(tokens)
				144
				145	def set(self,n):
				146	"""set the current position"""
				147	if n < 0:
				148	n = 0
				149	if n > self.count:
				150	n = self.count
				151	self.n = n
				152
				153	def peekId(self):
				154	"""retrieve the id of the current token"""
				155	if (self.n >= self.count):
				156	return None
				157	return self.tokens[self.n].id
				158
				159	def peek(self):
				160	"""retrieve the current token. does not change position"""
				161	if (self.n >= self.count):
				162	return None
				163	return self.tokens[self.n]
				164
				165	def skip(self):
				166	"""increase current token position"""
				167	if (self.n < self.count):
				168	self.n += 1
				169
				170	def skipSpaces(self):
				171	"""skip over all space tokens, this includes tokSPACE and tokLN"""
				172	while 1:
				173	tok = self.peekId()
				174	if tok != tokSPACE and tok != tokLN:
				175	break
				176	self.skip()
				177
				178	def skipIfId(self,id):
				179	"""skip an optional token"""
				180	if self.peekId() == id:
				181	self.skip()
				182
				183	def expectId(self,id):
				184	"""raise an exception if the current token hasn't a given id.
				185	otherwise skip over it"""
				186	tok = self.peek()
				187	if tok.id != id:
				188	raise BadExpectedToken, "%d:%d: '%s' expected, received '%s'" % (tok.lineno, tok.colno, id, tok.id)
				189	self.skip()
				190
				191	def remain(self):
				192	"""return the list of remaining tokens"""
				193	return self.tokens[self.n:]
				194
				195
				196	#####################################################################################
				197	#####################################################################################
				198	##### #####
				199	##### C P P T O K E N I Z E R #####
				200	##### #####
				201	#####################################################################################
				202	#####################################################################################
				203
				204	# list of long symbols, i.e. those that take more than one characters
				205	cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\
				206	tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ]
				207
				208	class CppTokenizer:
				209	"""an abstract class used to convert some input text into a list
				210	of tokens. real implementations follow and differ in the format
				211	of the input text only"""
				212
				213	def __init__(self):
				214	"""initialize a new CppTokenizer object"""
				215	self.eof = False # end of file reached ?
				216	self.text = None # content of current line, with final \n stripped
				217	self.line = 0 # number of current line
				218	self.pos = 0 # current character position in current line
				219	self.len = 0 # length of current line text
				220	self.held = Token()
				221
				222	def setLineText(self,line):
				223	"""set the content of the (next) current line. should be called
				224	by fillLineText() in derived classes"""
				225	self.text = line
				226	self.len = len(line)
				227	self.pos = 0
				228
				229	def fillLineText(self):
				230	"""refresh the content of 'line' with a new line of input"""
				231	# to be overriden
				232	self.eof = True
				233
				234	def markPos(self,tok):
				235	"""mark the position of the current token in the source file"""
				236	if self.eof or self.pos > self.len:
				237	tok.lineno = self.line + 1
				238	tok.colno = 0
				239	else:
				240	tok.lineno = self.line
				241	tok.colno = self.pos
				242
				243	def peekChar(self):
				244	"""return the current token under the cursor without moving it"""
				245	if self.eof:
				246	return tokEOF
				247
				248	if self.pos > self.len:
				249	self.pos = 0
				250	self.line += 1
				251	self.fillLineText()
				252	if self.eof:
				253	return tokEOF
				254
				255	if self.pos == self.len:
				256	return tokLN
				257	else:
				258	return self.text[self.pos]
				259
				260	def peekNChar(self,n):
				261	"""try to peek the next n chars on the same line"""
				262	if self.pos + n > self.len:
				263	return None
				264	return self.text[self.pos:self.pos+n]
				265
				266	def skipChar(self):
				267	"""increment the token cursor position"""
				268	if not self.eof:
				269	self.pos += 1
				270
				271	def skipNChars(self,n):
				272	if self.pos + n <= self.len:
				273	self.pos += n
				274	else:
				275	while n > 0:
				276	self.skipChar()
				277	n -= 1
				278
				279	def nextChar(self):
				280	"""retrieve the token at the current cursor position, then skip it"""
				281	result = self.peekChar()
				282	self.skipChar()
				283	return result
				284
				285	def getEscape(self):
				286	# try to get all characters after a backslash (\)
				287	result = self.nextChar()
				288	if result == "0":
				289	# octal number ?
				290	num = self.peekNChar(3)
				291	if num != None:
				292	isOctal = True
				293	for d in num:
				294	if not d in "01234567":
				295	isOctal = False
				296	break
				297	if isOctal:
				298	result += num
				299	self.skipNChars(3)
				300	elif result == "x" or result == "X":
				301	# hex number ?
				302	num = self.peekNChar(2)
				303	if num != None:
				304	isHex = True
				305	for d in num:
				306	if not d in "012345678abcdefABCDEF":
				307	isHex = False
				308	break
				309	if isHex:
				310	result += num
				311	self.skipNChars(2)
				312	elif result == "u" or result == "U":
				313	# unicode char ?
				314	num = self.peekNChar(4)
				315	if num != None:
				316	isHex = True
				317	for d in num:
				318	if not d in "012345678abcdefABCDEF":
				319	isHex = False
				320	break
				321	if isHex:
				322	result += num
				323	self.skipNChars(4)
				324
				325	return result
				326
				327	def nextRealToken(self,tok):
				328	"""return next CPP token, used internally by nextToken()"""
				329	c = self.nextChar()
				330	if c == tokEOF or c == tokLN:
				331	return tok.set(c)
				332
				333	if c == '/':
				334	c = self.peekChar()
				335	if c == '/': # C++ comment line
				336	self.skipChar()
				337	while 1:
				338	c = self.nextChar()
				339	if c == tokEOF or c == tokLN:
				340	break
				341	return tok.set(tokLN)
				342	if c == '*': # C comment start
				343	self.skipChar()
				344	value = "/*"
				345	prev_c = None
				346	while 1:
				347	c = self.nextChar()
				348	if c == tokEOF:
				349	#print "## EOF after '%s'" % value
				350	return tok.set(tokEOF,value)
				351	if c == '/' and prev_c == '*':
				352	break
				353	prev_c = c
				354	value += c
				355
				356	value += "/"
				357	#print "## COMMENT: '%s'" % value
				358	return tok.set(tokSPACE,value)
				359	c = '/'
				360
				361	if c.isspace():
				362	while 1:
				363	c2 = self.peekChar()
				364	if c2 == tokLN or not c2.isspace():
				365	break
				366	c += c2
				367	self.skipChar()
				368	return tok.set(tokSPACE,c)
				369
				370	if c == '\\':
				371	if debugTokens:
				372	print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar())
				373	if self.peekChar() == tokLN: # trailing \
				374	# eat the tokLN
				375	self.skipChar()
				376	# we replace a trailing \ by a tokSPACE whose value is
				377	# simply "\\". this allows us to detect them later when
				378	# needed.
				379	return tok.set(tokSPACE,"\\")
				380	else:
				381	# treat as a single token here ?
				382	c +=self.getEscape()
				383	return tok.set(c)
				384
				385	if c == "'": # chars
				386	c2 = self.nextChar()
				387	c += c2
				388	if c2 == '\\':
				389	c += self.getEscape()
				390
				391	while 1:
				392	c2 = self.nextChar()
				393	if c2 == tokEOF:
				394	break
				395	c += c2
				396	if c2 == "'":
				397	break
				398
				399	return tok.set(tokSTRING, c)
				400
				401	if c == '"': # strings
				402	quote = 0
				403	while 1:
				404	c2 = self.nextChar()
				405	if c2 == tokEOF:
				406	return tok.set(tokSTRING,c)
				407
				408	c += c2
				409	if not quote:
				410	if c2 == '"':
				411	return tok.set(tokSTRING,c)
				412	if c2 == "\\":
				413	quote = 1
				414	else:
				415	quote = 0
				416
				417	if c >= "0" and c <= "9": # integers ?
				418	while 1:
				419	c2 = self.peekChar()
				420	if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
				421	break
				422	c += c2
				423	self.skipChar()
				424	return tok.set(tokNUMBER,c)
				425
				426	if c.isalnum() or c == "_": # identifiers ?
				427	while 1:
				428	c2 = self.peekChar()
				429	if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
				430	break
				431	c += c2
				432	self.skipChar()
				433	if c == tokDEFINED:
				434	return tok.set(tokDEFINED)
				435	else:
				436	return tok.set(tokIDENT,c)
				437
				438	# check special symbols
				439	for sk in cppLongSymbols:
				440	if c == sk[0]:
				441	sklen = len(sk[1:])
				442	if self.pos + sklen <= self.len and \
				443	self.text[self.pos:self.pos+sklen] == sk[1:]:
				444	self.pos += sklen
				445	return tok.set(sk)
				446
				447	return tok.set(c)
				448
				449	def nextToken(self,tok):
				450	"""return the next token from the input text. this function
				451	really updates 'tok', and does not return a new one"""
				452	self.markPos(tok)
				453	self.nextRealToken(tok)
				454
				455	def getToken(self):
				456	tok = Token()
				457	self.nextToken(tok)
				458	if debugTokens:
				459	print "getTokens: %s" % repr(tok)
				460	return tok
				461
				462	def toTokenList(self):
				463	"""convert the input text of a CppTokenizer into a direct
				464	list of token objects. tokEOF is stripped from the result"""
				465	result = []
				466	while 1:
				467	tok = Token()
				468	self.nextToken(tok)
				469	if tok.id == tokEOF:
				470	break
				471	result.append(tok)
				472	return result
				473
				474	class CppLineTokenizer(CppTokenizer):
				475	"""a CppTokenizer derived class that accepts a single line of text as input"""
				476	def __init__(self,line,lineno=1):
				477	CppTokenizer.__init__(self)
				478	self.line = lineno
				479	self.setLineText(line)
				480
				481
				482	class CppLinesTokenizer(CppTokenizer):
				483	"""a CppTokenizer derived class that accepts a list of texdt lines as input.
				484	the lines must not have a trailing \n"""
				485	def __init__(self,lines=[],lineno=1):
				486	"""initialize a CppLinesTokenizer. you can later add lines using addLines()"""
				487	CppTokenizer.__init__(self)
				488	self.line = lineno
				489	self.lines = lines
				490	self.index = 0
				491	self.count = len(lines)
				492
				493	if self.count > 0:
				494	self.fillLineText()
				495	else:
				496	self.eof = True
				497
				498	def addLine(self,line):
				499	"""add a line to a CppLinesTokenizer. this can be done after tokenization
				500	happens"""
				501	if self.count == 0:
				502	self.setLineText(line)
				503	self.index = 1
				504	self.lines.append(line)
				505	self.count += 1
				506	self.eof = False
				507
				508	def fillLineText(self):
				509	if self.index < self.count:
				510	self.setLineText(self.lines[self.index])
				511	self.index += 1
				512	else:
				513	self.eof = True
				514
				515
				516	class CppFileTokenizer(CppTokenizer):
				517	def __init__(self,file,lineno=1):
				518	CppTokenizer.__init__(self)
				519	self.file = file
				520	self.line = lineno
				521
				522	def fillLineText(self):
				523	line = self.file.readline()
				524	if len(line) > 0:
				525	if line[-1] == '\n':
				526	line = line[:-1]
				527	if len(line) > 0 and line[-1] == "\r":
				528	line = line[:-1]
				529	self.setLineText(line)
				530	else:
				531	self.eof = True
				532
				533	# Unit testing
				534	#
				535	class CppTokenizerTester:
				536	"""a class used to test CppTokenizer classes"""
				537	def __init__(self,tokenizer=None):
				538	self.tokenizer = tokenizer
				539	self.token = Token()
				540
				541	def setTokenizer(self,tokenizer):
				542	self.tokenizer = tokenizer
				543
				544	def expect(self,id):
				545	self.tokenizer.nextToken(self.token)
				546	tokid = self.token.id
				547	if tokid == id:
				548	return
				549	if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER):
				550	return
				551	raise BadExpectedToken, "### BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id)
				552
				553	def expectToken(self,id,line,col):
				554	self.expect(id)
				555	if self.token.lineno != line:
				556	raise BadExpectedToken, "### BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line)
				557	if self.token.colno != col:
				558	raise BadExpectedToken, "### BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col)
				559
				560	def expectTokenVal(self,id,value,line,col):
				561	self.expectToken(id,line,col)
				562	if self.token.value != value:
				563	raise BadExpectedToken, "### BAD VALUE: '%s' expecting '%s'" % (self.token.value,value)
				564
				565	def expectList(self,list):
				566	for item in list:
				567	self.expect(item)
				568
				569	def test_CppTokenizer():
				570	print "running CppTokenizer tests"
				571	tester = CppTokenizerTester()
				572
				573	tester.setTokenizer( CppLineTokenizer("#an/example && (01923_xy)") )
				574	tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \
				575	tokRPAREN, tokLN, tokEOF] )
				576
				577	tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") )
				578	tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE,
				579	tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] )
				580
				581	tester.setTokenizer( CppLinesTokenizer( ["/", "#", "/"] ) )
				582	tester.expectList( [ tokSPACE, tokLN, tokEOF ] )
				583
				584	tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) )
				585	tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] )
				586
				587	tester.setTokenizer( CppLinesTokenizer( ["first second", " third"] ) )
				588	tester.expectToken( "first", 1, 0 )
				589	tester.expectToken( tokSPACE, 1, 5 )
				590	tester.expectToken( "second", 1, 6 )
				591	tester.expectToken( tokLN, 1, 12 )
				592	tester.expectToken( tokSPACE, 2, 0 )
				593	tester.expectToken( "third", 2, 2 )
				594
				595	tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) )
				596	tester.expectList( [ "boo", tokSPACE ] )
				597	tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 )
				598	tester.expectList( [ tokLN, tokEOF ] )
				599
				600	tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) )
				601	tester.expectToken( "an", 1, 0 )
				602	tester.expectToken( tokSPACE, 1, 2 )
				603	tester.expectTokenVal( tokSPACE, "\\", 1, 3 )
				604	tester.expectToken( tokSPACE, 2, 0 )
				605	tester.expectToken( "example", 2, 1 )
				606	tester.expectToken( tokLN, 2, 8 )
				607
				608	return True
				609
				610
				611	#####################################################################################
				612	#####################################################################################
				613	##### #####
				614	##### C P P E X P R E S S I O N S #####
				615	##### #####
				616	#####################################################################################
				617	#####################################################################################
				618
				619	# Cpp expressions are modeled by tuples of the form (op,arg) or (op,arg1,arg2), etc..
				620	# op is an "operator" string
				621
				622	class Expr:
				623	"""a class used to model a CPP expression"""
				624	opInteger = "int"
				625	opIdent = "ident"
				626	opCall = "call"
				627	opDefined = "defined"
				628	opTest = "?"
				629	opLogicNot = "!"
				630	opNot = "~"
				631	opNeg = "[-]"
				632	opUnaryPlus = "[+]"
				633	opAdd = "+"
				634	opSub = "-"
				635	opMul = "*"
				636	opDiv = "/"
				637	opMod = "%"
				638	opAnd = "&"
				639	opOr = "\|"
				640	opXor = "^"
				641	opLogicAnd = "&&"
				642	opLogicOr = "\|\|"
				643	opEqual = "=="
				644	opNotEqual = "!="
				645	opLess = "<"
				646	opLessEq = "<="
				647	opGreater = ">"
				648	opGreaterEq = ">="
				649	opShl = "<<"
				650	opShr = ">>"
				651
				652	unaries = [ opLogicNot, opNot, opNeg, opUnaryPlus ]
				653	binaries = [ opAdd, opSub, opMul, opDiv, opMod, opAnd, opOr, opXor, opLogicAnd, opLogicOr,
				654	opEqual, opNotEqual, opLess, opLessEq, opGreater, opGreaterEq ]
				655
				656	precedences = {
				657	opTest: 0,
				658	opLogicOr: 1,
				659	opLogicNot: 2,
				660	opOr : 3,
				661	opXor: 4,
				662	opAnd: 5,
				663	opEqual: 6, opNotEqual: 6,
				664	opLess:7, opLessEq:7, opGreater:7, opGreaterEq:7,
				665	opShl:8, opShr:8,
				666	opAdd:9, opSub:9,
				667	opMul:10, opDiv:10, opMod:10,
				668	opLogicNot:11,
				669	opNot: 12,
				670	}
				671
				672	def __init__(self,op):
				673	self.op = op
				674
				675	def __repr__(self):
				676	return "(%s)" % self.op
				677
				678	def __str__(self):
				679	return "operator(%s)" % self.op
				680
				681	def precedence(self):
				682	"""return the precedence of a given operator"""
				683	return Expr.precedences.get(self.op, 1000)
				684
				685	def isUnary(self):
				686	return self.op in Expr.unaries
				687
				688	def isBinary(self):
				689	return self.op in Expr.binaries
				690
				691	def isDefined(self):
				692	return self.op is opDefined
				693
				694	def toInt(self):
				695	"""return the integer value of a given expression. only valid for integer expressions
				696	will return None otherwise"""
				697	return None
				698
				699	class IntExpr(Expr):
				700	def __init__(self,value):
				701	Expr.__init__(self,opInteger)
				702	self.arg = value
				703
				704	def __repr__(self):
				705	return "(int %s)" % self.arg
				706
				707	def __str__(self):
				708	return self.arg
				709
				710	def toInt(self):
				711	s = self.arg # string value
				712	# get rid of U or L suffixes
				713	while len(s) > 0 and s[-1] in "LUlu":
				714	s = s[:-1]
				715	return string.atoi(s)
				716
				717	class IdentExpr(Expr):
				718	def __init__(self,name):
				719	Expr.__init__(self,opIdent)
				720	self.name = name
				721
				722	def __repr__(self):
				723	return "(ident %s)" % self.name
				724
				725	def __str__(self):
				726	return self.name
				727
				728	class CallExpr(Expr):
				729	def __init__(self,funcname,params):
				730	Expr.__init__(self,opCall)
				731	self.funcname = funcname
				732	self.params = params
				733
				734	def __repr__(self):
				735	result = "(call %s [" % self.funcname
				736	comma = ""
				737	for param in self.params:
				738	result += "%s%s" % (comma, repr(param))
				739	comma = ","
				740	result += "])"
				741	return result
				742
				743	def __str__(self):
				744	result = "%s(" % self.funcname
				745	comma = ""
				746	for param in self.params:
				747	result += "%s%s" % (comma, str(param))
				748	comma = ","
				749
				750	result += ")"
				751	return result
				752
				753	class TestExpr(Expr):
				754	def __init__(self,cond,iftrue,iffalse):
				755	Expr.__init__(self,opTest)
				756	self.cond = cond
				757	self.iftrue = iftrue
				758	self.iffalse = iffalse
				759
				760	def __repr__(self):
				761	return "(?: %s %s %s)" % (repr(self.cond),repr(self.iftrue),repr(self.iffalse))
				762
				763	def __str__(self):
				764	return "(%s) ? (%s) : (%s)" % (self.cond, self.iftrue, self.iffalse)
				765
				766	class SingleArgExpr(Expr):
				767	def __init__(self,op,arg):
				768	Expr.__init__(self,op)
				769	self.arg = arg
				770
				771	def __repr__(self):
				772	return "(%s %s)" % (self.op, repr(self.arg))
				773
				774	class DefinedExpr(SingleArgExpr):
				775	def __init__(self,op,macroname):
				776	SingleArgExpr.__init__(self.opDefined,macroname)
				777
				778	def __str__(self):
				779	return "defined(%s)" % self.arg
				780
				781
				782	class UnaryExpr(SingleArgExpr):
				783	def __init__(self,op,arg,opstr=None):
				784	SingleArgExpr.__init__(self,op,arg)
				785	if not opstr:
				786	opstr = op
				787	self.opstr = opstr
				788
				789	def __str__(self):
				790	arg_s = str(self.arg)
				791	arg_prec = self.arg.precedence()
				792	self_prec = self.precedence()
				793	if arg_prec < self_prec:
				794	return "%s(%s)" % (self.opstr,arg_s)
				795	else:
				796	return "%s%s" % (self.opstr, arg_s)
				797
				798	class TwoArgExpr(Expr):
				799	def __init__(self,op,arg1,arg2):
				800	Expr.__init__(self,op)
				801	self.arg1 = arg1
				802	self.arg2 = arg2
				803
				804	def __repr__(self):
				805	return "(%s %s %s)" % (self.op, repr(self.arg1), repr(self.arg2))
				806
				807	class BinaryExpr(TwoArgExpr):
				808	def __init__(self,op,arg1,arg2,opstr=None):
				809	TwoArgExpr.__init__(self,op,arg1,arg2)
				810	if not opstr:
				811	opstr = op
				812	self.opstr = opstr
				813
				814	def __str__(self):
				815	arg1_s = str(self.arg1)
				816	arg2_s = str(self.arg2)
				817	arg1_prec = self.arg1.precedence()
				818	arg2_prec = self.arg2.precedence()
				819	self_prec = self.precedence()
				820
				821	result = ""
				822	if arg1_prec < self_prec:
				823	result += "(%s)" % arg1_s
				824	else:
				825	result += arg1_s
				826
				827	result += " %s " % self.opstr
				828
				829	if arg2_prec < self_prec:
				830	result += "(%s)" % arg2_s
				831	else:
				832	result += arg2_s
				833
				834	return result
				835
				836	#####################################################################################
				837	#####################################################################################
				838	##### #####
				839	##### C P P E X P R E S S I O N P A R S E R #####
				840	##### #####
				841	#####################################################################################
				842	#####################################################################################
				843
				844
				845	class ExprParser:
				846	"""a class used to convert a list of tokens into a cpp Expr object"""
				847
				848	re_octal = re.compile(r"\s$0[0-7]+$.")
				849	re_decimal = re.compile(r"\s$\d+[ulUL]$.*")
				850	re_hexadecimal = re.compile(r"\s$0[xX][0-9a-fA-F]$.*")
				851
				852	def __init__(self,tokens):
				853	self.tok = tokens
				854	self.n = len(self.tok)
				855	self.i = 0
				856
				857	def mark(self):
				858	return self.i
				859
				860	def release(self,pos):
				861	self.i = pos
				862
				863	def peekId(self):
				864	if self.i < self.n:
				865	return self.tok[self.i].id
				866	return None
				867
				868	def peek(self):
				869	if self.i < self.n:
				870	return self.tok[self.i]
				871	return None
				872
				873	def skip(self):
				874	if self.i < self.n:
				875	self.i += 1
				876
				877	def skipOptional(self,id):
				878	if self.i < self.n and self.tok[self.i].id == id:
				879	self.i += 1
				880
				881	def skipSpaces(self):
				882	i = self.i
				883	n = self.n
				884	tok = self.tok
				885	while i < n and (tok[i] == tokSPACE or tok[i] == tokLN):
				886	i += 1
				887	self.i = i
				888
				889	# all the isXXX functions returns a (expr,nextpos) pair if a match is found
				890	# or None if not
				891
				892	def is_integer(self):
				893	id = self.tok[self.i].id
				894	c = id[0]
				895	if c < '0' or c > '9':
				896	return None
				897
				898	m = ExprParser.re_octal.match(id)
				899	if m:
				900	return (IntExpr(id), m.end(1))
				901
				902	m = ExprParser.re_decimal.match(id)
				903	if m:
				904	return (IntExpr(id), m.end(1))
				905
				906	m = ExprParser.re_hexadecimal(id)
				907	if m:
				908	return (IntExpr(id), m.end(1))
				909
				910	return None
				911
				912	def is_defined(self):
				913	id = self.tok[self.i].id
				914	if id != "defined":
				915	return None
				916
				917	pos = self.mark()
				918
				919	use_paren = 0
				920	if self.peekId() == tokLPAREN:
				921	self.skip()
				922	use_paren = 1
				923
				924	if self.peekId() != tokIDENT:
				925	self.throw( BadExpectedToken, "identifier expected")
				926
				927	macroname = self.peek().value
				928	self.skip()
				929	if use_paren:
				930	self.skipSpaces()
				931	if self.peekId() != tokRPAREN:
				932	self.throw( BadExpectedToken, "missing right-paren after 'defined' directive")
				933	self.skip()
				934
				935	i = self.i
				936	return (DefinedExpr(macroname),i+1)
				937
				938	def is_call_or_ident(self):
				939	pass
				940
				941	def parse(self, i):
				942	return None
				943
				944	#####################################################################################
				945	#####################################################################################
				946	##### #####
				947	##### C P P E X P R E S S I O N S #####
				948	##### #####
				949	#####################################################################################
				950	#####################################################################################
				951
				952	class CppInvalidExpression(Exception):
				953	"""an exception raised when an invalid/unsupported cpp expression is detected"""
				954	pass
				955
				956	class CppExpr:
				957	"""a class that models the condition of #if directives into
				958	an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2)
				959	where "op" is a string describing the operation"""
				960
				961	unaries = [ "!", "~" ]
				962	binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "\|\|", "*", "/", "%", "&", "\|", "^", "<<", ">>", "==", "!=" ]
				963	precedences = { "\|\|": 1,
				964	"&&": 2,
				965	"\|": 3,
				966	"^": 4,
				967	"&": 5,
				968	"==":6, "!=":6,
				969	"<":7, "<=":7, ">":7, ">=":7,
				970	"<<":8, ">>":8,
				971	"+":9, "-":9,
				972	"*":10, "/":10, "%":10,
				973	"!":11, "~":12
				974	}
				975
				976	def __init__(self, tokens):
				977	"""initialize a CppExpr. 'tokens' must be a CppToken list"""
				978	self.tok = tokens
				979	self.n = len(tokens)
				980	if debugCppExpr:
				981	print "CppExpr: trying to parse %s" % repr(tokens)
				982	expr = self.is_expr(0)
				983	if debugCppExpr:
				984	print "CppExpr: got " + repr(expr)
				985	self.expr = expr[0]
				986
				987	re_cpp_constant = re.compile(r"((\d\|\w\|_)+)")
				988
				989	def throw(self,exception,i,msg):
				990	if i < self.n:
				991	tok = self.tok[i]
				992	print "%d:%d: %s" % (tok.lineno,tok.colno,msg)
				993	else:
				994	print "EOF: %s" % msg
				995	raise exception
				996
				997	def skip_spaces(self,i):
				998	"""skip spaces in input token list"""
				999	while i < self.n:
				1000	t = self.tok[i]
				1001	if t.id != tokSPACE and t.id != tokLN:
				1002	break
				1003	i += 1
				1004	return i
				1005
				1006	def expectId(self,i,id):
				1007	"""check that a given token id is at the current position, then skip over it"""
				1008	i = self.skip_spaces(i)
				1009	if i >= self.n or self.tok[i].id != id:
				1010	self.throw(BadExpectedToken,i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[i].id))
				1011	return i+1
				1012
				1013	def expectIdent(self,i):
				1014	i = self.skip_spaces(i)
				1015	if i >= self.n or self.tok[i].id != tokIDENT:
				1016	self.throw(BadExpectedToken,i,"### expecting identifier in expression, got '%s'" % (id, self.tok[i].id))
				1017	return i+1
				1018
				1019	# the is_xxxxx function returns either None or a pair (e,nextpos)
				1020	# where 'e' is an expression tuple (e.g. (op,arg)) and 'nextpos' is
				1021	# the corresponding next position in the input token list
				1022	#
				1023
				1024	def is_decimal(self,i):
				1025	v = self.tok[i].value[:]
				1026	while len(v) > 0 and v[-1] in "ULul":
				1027	v = v[:-1]
				1028	for digit in v:
				1029	if not digit.isdigit():
				1030	return None
				1031
				1032	# for an integer expression tuple, the argument
				1033	# is simply the value as an integer
				1034	val = string.atoi(v)
				1035	return ("int", val), i+1
				1036
				1037	def is_hexadecimal(self,i):
				1038	v = self.tok[i].value[:]
				1039	while len(v) > 0 and v[-1] in "ULul":
				1040	v = v[:-1]
				1041	if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"):
				1042	for digit in v[2:]:
				1043	if not digit in "0123456789abcdefABCDEF":
				1044	return None
				1045
				1046	# for an hex expression tuple, the argument
				1047	# is the value as an integer
				1048	val = int(v[2:], 16)
				1049	return ("hex", val), i+1
				1050
				1051	return None
				1052
				1053	def is_integer(self,i):
				1054	if self.tok[i].id != tokNUMBER:
				1055	return None
				1056
				1057	c = self.is_decimal(i)
				1058	if c: return c
				1059
				1060	c = self.is_hexadecimal(i)
				1061	if c: return c
				1062
				1063	return None
				1064
				1065	def is_number(self,i):
				1066	t = self.tok[i]
				1067	if t.id == tokMINUS and i+1 < self.n:
				1068	c = self.is_integer(i+1)
				1069	if c:
				1070	e, i2 = c
				1071	op, val = e
				1072	return (op, -val), i2
				1073	if t.id == tokPLUS and i+1 < self.n:
				1074	c = self.is_integer(i+1)
				1075	if c: return c
				1076
				1077	return self.is_integer(i)
				1078
				1079
				1080	def is_alnum(self,i):
				1081	"""test wether a given token is alpha-numeric"""
				1082	i = self.skip_spaces(i)
				1083	if i >= self.n:
				1084	return None
				1085	t = self.tok[i]
				1086	m = CppExpr.re_cpp_constant.match(t.id)
				1087	if m:
				1088	#print "... alnum '%s'" % m.group(1)
				1089	r = m.group(1)
				1090	return ("ident", r), i+1
				1091	return None
				1092
				1093	def is_defined(self,i):
				1094	t = self.tok[i]
				1095	if t.id != tokDEFINED:
				1096	return None
				1097
				1098	# we have the defined keyword, check the rest
				1099	i = self.skip_spaces(i+1)
				1100	use_parens = 0
				1101	if i < self.n and self.tok[i].id == tokLPAREN:
				1102	use_parens = 1
				1103	i = self.skip_spaces(i+1)
				1104
				1105	if i >= self.n:
				1106	self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name or left paren")
				1107
				1108	t = self.tok[i]
				1109	if t.id != tokIDENT:
				1110	self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name")
				1111
				1112	i += 1
				1113	if use_parens:
				1114	i = self.expectId(i,tokRPAREN)
				1115
				1116	return ("defined",t.value), i
				1117
				1118
				1119	def is_call_or_ident(self,i):
				1120	i = self.skip_spaces(i)
				1121	if i >= self.n:
				1122	return None
				1123
				1124	t = self.tok[i]
				1125	if t.id != tokIDENT:
				1126	return None
				1127
				1128	name = t.value
				1129
				1130	i = self.skip_spaces(i+1)
				1131	if i >= self.n or self.tok[i].id != tokLPAREN:
				1132	return ("ident", name), i
				1133
				1134	params = []
				1135	depth = 1
				1136	i += 1
				1137	j = i
				1138	while i < self.n:
				1139	id = self.tok[i].id
				1140	if id == tokLPAREN:
				1141	depth += 1
				1142	elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
				1143	while j < i and self.tok[j].id == tokSPACE:
				1144	j += 1
				1145	k = i
				1146	while k > j and self.tok[k-1].id == tokSPACE:
				1147	k -= 1
				1148	param = self.tok[j:k]
				1149	params.append( param )
				1150	if id == tokRPAREN:
				1151	break
				1152	j = i+1
				1153	elif id == tokRPAREN:
				1154	depth -= 1
				1155	i += 1
				1156
				1157	if i >= self.n:
				1158	return None
				1159
				1160	return ("call", (name, params)), i+1
				1161
				1162	def is_token(self,i,token):
				1163	i = self.skip_spaces(i)
				1164	if i >= self.n or self.tok[i].id != token:
				1165	return None
				1166	return token, i+1
				1167
				1168
				1169	def is_value(self,i):
				1170	t = self.tok[i]
				1171	if t.id == tokSTRING:
				1172	return ("string", t.value), i+1
				1173
				1174	c = self.is_number(i)
				1175	if c: return c
				1176
				1177	c = self.is_defined(i)
				1178	if c: return c
				1179
				1180	c = self.is_call_or_ident(i)
				1181	if c: return c
				1182
				1183	i = self.skip_spaces(i)
				1184	if i >= self.n or self.tok[i].id != tokLPAREN:
				1185	return None
				1186
				1187	popcount = 1
				1188	i2 = i+1
				1189	while i2 < self.n:
				1190	t = self.tok[i2]
				1191	if t.id == tokLPAREN:
				1192	popcount += 1
				1193	elif t.id == tokRPAREN:
				1194	popcount -= 1
				1195	if popcount == 0:
				1196	break
				1197	i2 += 1
				1198
				1199	if popcount != 0:
				1200	self.throw(CppInvalidExpression, i, "expression missing closing parenthesis")
				1201
				1202	if debugCppExpr:
				1203	print "CppExpr: trying to parse sub-expression %s" % repr(self.tok[i+1:i2])
				1204	oldcount = self.n
				1205	self.n = i2
				1206	c = self.is_expr(i+1)
				1207	self.n = oldcount
				1208	if not c:
				1209	self.throw(CppInvalidExpression, i, "invalid expression within parenthesis")
				1210
				1211	e, i = c
				1212	return e, i2+1
				1213
				1214	def is_unary(self,i):
				1215	i = self.skip_spaces(i)
				1216	if i >= self.n:
				1217	return None
				1218
				1219	t = self.tok[i]
				1220	if t.id in CppExpr.unaries:
				1221	c = self.is_unary(i+1)
				1222	if not c:
				1223	self.throw(CppInvalidExpression, i, "%s operator must be followed by value" % t.id)
				1224	e, i = c
				1225	return (t.id, e), i
				1226
				1227	return self.is_value(i)
				1228
				1229	def is_binary(self,i):
				1230	i = self.skip_spaces(i)
				1231	if i >= self.n:
				1232	return None
				1233
				1234	c = self.is_unary(i)
				1235	if not c:
				1236	return None
				1237
				1238	e1, i2 = c
				1239	i2 = self.skip_spaces(i2)
				1240	if i2 >= self.n:
				1241	return c
				1242
				1243	t = self.tok[i2]
				1244	if t.id in CppExpr.binaries:
				1245	c = self.is_binary(i2+1)
				1246	if not c:
				1247	self.throw(CppInvalidExpression, i,"### %s operator must be followed by value" % t.id )
				1248	e2, i3 = c
				1249	return (t.id, e1, e2), i3
				1250
				1251	return None
				1252
				1253	def is_expr(self,i):
				1254	return self.is_binary(i)
				1255
				1256	def dump_node(self,e):
				1257	op = e[0]
				1258	line = "(" + op
				1259	if op == "int":
				1260	line += " %d)" % e[1]
				1261	elif op == "hex":
				1262	line += " 0x%x)" % e[1]
				1263	elif op == "ident":
				1264	line += " %s)" % e[1]
				1265	elif op == "defined":
				1266	line += " %s)" % e[1]
				1267	elif op == "call":
				1268	arg = e[1]
				1269	line += " %s [" % arg[0]
				1270	prefix = ""
				1271	for param in arg[1]:
				1272	par = ""
				1273	for tok in param:
				1274	par += str(tok)
				1275	line += "%s%s" % (prefix, par)
				1276	prefix = ","
				1277	line += "])"
				1278	elif op in CppExpr.unaries:
				1279	line += " %s)" % self.dump_node(e[1])
				1280	elif op in CppExpr.binaries:
				1281	line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
				1282	else:
				1283	line += " ?%s)" % repr(e[1])
				1284
				1285	return line
				1286
				1287	def __repr__(self):
				1288	return self.dump_node(self.expr)
				1289
				1290	def source_node(self,e):
				1291	op = e[0]
				1292	if op == "int":
				1293	return "%d" % e[1]
				1294	if op == "hex":
				1295	return "0x%x" % e[1]
				1296	if op == "ident":
				1297	# XXX: should try to expand
				1298	return e[1]
				1299	if op == "defined":
				1300	return "defined(%s)" % e[1]
				1301
				1302	prec = CppExpr.precedences.get(op,1000)
				1303	arg = e[1]
				1304	if op in CppExpr.unaries:
				1305	arg_src = self.source_node(arg)
				1306	arg_op = arg[0]
				1307	arg_prec = CppExpr.precedences.get(arg[0],1000)
				1308	if arg_prec < prec:
				1309	return "!(" + arg_src + ")"
				1310	else:
				1311	return "!" + arg_src
				1312	if op in CppExpr.binaries:
				1313	arg2 = e[2]
				1314	arg1_op = arg[0]
				1315	arg2_op = arg2[0]
				1316	arg1_src = self.source_node(arg)
				1317	arg2_src = self.source_node(arg2)
				1318	if CppExpr.precedences.get(arg1_op,1000) < prec:
				1319	arg1_src = "(%s)" % arg1_src
				1320	if CppExpr.precedences.get(arg2_op,1000) < prec:
				1321	arg2_src = "(%s)" % arg2_src
				1322
				1323	return "%s %s %s" % (arg1_src, op, arg2_src)
				1324	return "???"
				1325
				1326	def __str__(self):
				1327	return self.source_node(self.expr)
				1328
				1329	def int_node(self,e):
				1330	if e[0] == "int":
				1331	return e[1]
				1332	elif e[1] == "hex":
				1333	return int(e[1],16)
				1334	else:
				1335	return None
				1336
				1337	def toInt(self):
				1338	return self.int_node(self.expr)
				1339
				1340	def optimize_node(self,e,macros={}):
				1341	op = e[0]
				1342	if op == "defined":
				1343	name = e[1]
				1344	if macros.has_key(name):
				1345	if macros[name] == kCppUndefinedMacro:
				1346	return ("int", 0)
				1347	else:
Elliott Hughes	d3e64a3	2013-09-30 17:41:08 -0700	[diff] [blame^]	1348	try:
				1349	value = int(macros[name])
				1350	return ("int", value)
				1351	except:
				1352	return ("defined", macros[name])
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1353
				1354	if kernel_remove_config_macros and name.startswith("CONFIG_"):
				1355	return ("int", 0)
				1356
				1357	elif op == "!":
				1358	op, v = e
				1359	v = self.optimize_node(v, macros)
				1360	if v[0] == "int":
				1361	if v[1] == 0:
				1362	return ("int", 1)
				1363	else:
				1364	return ("int", 0)
				1365
				1366	elif op == "&&":
				1367	op, l, r = e
				1368	l = self.optimize_node(l, macros)
				1369	r = self.optimize_node(r, macros)
				1370	li = self.int_node(l)
				1371	ri = self.int_node(r)
				1372	if li != None:
				1373	if li == 0:
				1374	return ("int", 0)
				1375	else:
				1376	return r
				1377
				1378	elif op == "\|\|":
				1379	op, l, r = e
				1380	l = self.optimize_node(l, macros)
				1381	r = self.optimize_node(r, macros)
				1382	li = self.int_node(l)
				1383	ri = self.int_node(r)
				1384	if li != None:
				1385	if li == 0:
				1386	return r
				1387	else:
				1388	return ("int", 1)
				1389	elif ri != None:
				1390	if ri == 0:
				1391	return l
				1392	else:
				1393	return ("int", 1)
				1394	return e
				1395
				1396	def optimize(self,macros={}):
				1397	self.expr = self.optimize_node(self.expr,macros)
				1398
				1399	def removePrefixedNode(self,e,prefix,names):
				1400	op = e[0]
				1401	if op == "defined":
				1402	name = e[1]
				1403	if name.startswith(prefix):
				1404	if names.has_key[name] and names[name] == "y":
				1405	return ("int", 1)
				1406	else:
				1407	return ("int", 0)
				1408
				1409	elif op in CppExpr.unaries:
				1410	op, v = e
				1411	v = self.removePrefixedNode(v,prefix,names)
				1412	return (op, v)
				1413	elif op in CppExpr.binaries:
				1414	op, v1, v2 = e
				1415	v1 = self.removePrefixedNode(v1,prefix,names)
				1416	v2 = self.removePrefixedNode(v2,prefix,names)
				1417	return (op, v1, v2)
				1418	elif op == "call":
				1419	func, params = e[1]
				1420	params2 = []
				1421	for param in params:
				1422	params2.append( self.removePrefixedNode(param,prefix,names) )
				1423	return (op, (func, params2))
				1424
				1425	return e
				1426
				1427	def removePrefixed(self,prefix,names={}):
				1428	self.expr = self.removePrefixedNode(self.expr,prefix,names)
				1429
				1430	def is_equal_node(self,e1,e2):
				1431	if e1[0] != e2[0] or len(e1) != len(e2):
				1432	return False
				1433
				1434	op = e1[0]
				1435	if op == "int" or op == "hex" or op == "!" or op == "defined":
				1436	return e1[0] == e2[0]
				1437
				1438	return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2])
				1439
				1440	def is_equal(self,other):
				1441	return self.is_equal_node(self.expr,other.expr)
				1442
				1443	def test_cpp_expr(expr, expected):
				1444	e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
				1445	#print repr(e.expr)
				1446	s1 = repr(e)
				1447	if s1 != expected:
				1448	print "KO: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
				1449	else:
				1450	#print "OK: expression '%s'" % expr
				1451	pass
				1452
				1453	def test_cpp_expr_optim(expr, expected, macros={}):
				1454	e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
				1455	e.optimize(macros)
				1456
				1457	s1 = repr(e)
				1458	if s1 != expected:
				1459	print "KO: optimized expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
				1460	else:
				1461	#print "OK: optmized expression '%s'" % expr
				1462	pass
				1463
				1464	def test_cpp_expr_source(expr, expected):
				1465	e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
				1466	s1 = str(e)
				1467	if s1 != expected:
				1468	print "KO: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
				1469	else:
				1470	#print "OK: source expression '%s'" % expr
				1471	pass
				1472
				1473	def test_CppExpr():
				1474	print "testing CppExpr"
				1475	test_cpp_expr( "0", "(int 0)" )
				1476	test_cpp_expr( "1", "(int 1)" )
				1477	test_cpp_expr( "1 && 1", "(&& (int 1) (int 1))" )
				1478	test_cpp_expr( "1 && 0", "(&& (int 1) (int 0))" )
				1479	test_cpp_expr( "EXAMPLE", "(ident EXAMPLE)" )
				1480	test_cpp_expr( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
				1481	test_cpp_expr( "defined(EXAMPLE)", "(defined EXAMPLE)" )
				1482	test_cpp_expr( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
				1483	test_cpp_expr( "defined(ABC) \|\| defined(BINGO)", "(\|\| (defined ABC) (defined BINGO))" )
				1484	test_cpp_expr( "FOO(BAR)", "(call FOO [BAR])" )
				1485
				1486	test_cpp_expr_optim( "0", "(int 0)" )
				1487	test_cpp_expr_optim( "1", "(int 1)" )
				1488	test_cpp_expr_optim( "1 && 1", "(int 1)" )
				1489	test_cpp_expr_optim( "1 && 0", "(int 0)" )
				1490	test_cpp_expr_optim( "0 && 1", "(int 0)" )
				1491	test_cpp_expr_optim( "0 && 0", "(int 0)" )
				1492	test_cpp_expr_optim( "1 \|\| 1", "(int 1)" )
				1493	test_cpp_expr_optim( "1 \|\| 0", "(int 1)" )
				1494	test_cpp_expr_optim( "0 \|\| 1", "(int 1)" )
				1495	test_cpp_expr_optim( "0 \|\| 0", "(int 0)" )
				1496	test_cpp_expr_optim( "EXAMPLE", "(ident EXAMPLE)" )
				1497	test_cpp_expr_optim( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
				1498	test_cpp_expr_optim( "defined(EXAMPLE)", "(defined EXAMPLE)" )
				1499	test_cpp_expr_optim( "defined(EXAMPLE)", "(int 1)", { "EXAMPLE": "XOWOE" } )
				1500	test_cpp_expr_optim( "defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro} )
				1501	test_cpp_expr_optim( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
				1502	test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 0)", { "EXAMPLE" : "XOWOE" } )
				1503	test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro } )
				1504	test_cpp_expr_optim( "defined(ABC) \|\| defined(BINGO)", "(\|\| (defined ABC) (defined BINGO))" )
				1505	test_cpp_expr_optim( "defined(ABC) \|\| defined(BINGO)", "(int 1)", { "ABC" : "1" } )
				1506	test_cpp_expr_optim( "defined(ABC) \|\| defined(BINGO)", "(int 1)", { "BINGO" : "1" } )
				1507	test_cpp_expr_optim( "defined(ABC) \|\| defined(BINGO)", "(defined ABC)", { "BINGO" : kCppUndefinedMacro } )
				1508	test_cpp_expr_optim( "defined(ABC) \|\| defined(BINGO)", "(int 0)", { "ABC" : kCppUndefinedMacro, "BINGO" : kCppUndefinedMacro } )
				1509
				1510	test_cpp_expr_source( "0", "0" )
				1511	test_cpp_expr_source( "1", "1" )
				1512	test_cpp_expr_source( "1 && 1", "1 && 1" )
				1513	test_cpp_expr_source( "1 && 0", "1 && 0" )
				1514	test_cpp_expr_source( "0 && 1", "0 && 1" )
				1515	test_cpp_expr_source( "0 && 0", "0 && 0" )
				1516	test_cpp_expr_source( "1 \|\| 1", "1 \|\| 1" )
				1517	test_cpp_expr_source( "1 \|\| 0", "1 \|\| 0" )
				1518	test_cpp_expr_source( "0 \|\| 1", "0 \|\| 1" )
				1519	test_cpp_expr_source( "0 \|\| 0", "0 \|\| 0" )
				1520	test_cpp_expr_source( "EXAMPLE", "EXAMPLE" )
				1521	test_cpp_expr_source( "EXAMPLE - 3", "EXAMPLE - 3" )
				1522	test_cpp_expr_source( "defined(EXAMPLE)", "defined(EXAMPLE)" )
				1523	test_cpp_expr_source( "defined EXAMPLE", "defined(EXAMPLE)" )
				1524
				1525
				1526	#####################################################################################
				1527	#####################################################################################
				1528	##### #####
				1529	##### C P P B L O C K #####
				1530	##### #####
				1531	#####################################################################################
				1532	#####################################################################################
				1533
				1534	class Block:
				1535	"""a class used to model a block of input source text. there are two block types:
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1536	- directive blocks: contain the tokens of a single pre-processor directive (e.g. #if)
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1537	- text blocks, contain the tokens of non-directive blocks
				1538
				1539	the cpp parser class below will transform an input source file into a list of Block
				1540	objects (grouped in a BlockList object for convenience)"""
				1541
				1542	def __init__(self,tokens,directive=None,lineno=0):
				1543	"""initialize a new block, if 'directive' is None, this is a text block
				1544	NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)'
				1545	and '#ifndef MACRO' into '#if !defined(MACRO)'"""
				1546	if directive == "ifdef":
				1547	tok = Token()
				1548	tok.set(tokDEFINED)
				1549	tokens = [ tok ] + tokens
				1550	directive = "if"
				1551
				1552	elif directive == "ifndef":
				1553	tok1 = Token()
				1554	tok2 = Token()
				1555	tok1.set(tokNOT)
				1556	tok2.set(tokDEFINED)
				1557	tokens = [ tok1, tok2 ] + tokens
				1558	directive = "if"
				1559
				1560	self.tokens = tokens
				1561	self.directive = directive
				1562	if lineno > 0:
				1563	self.lineno = lineno
				1564	else:
				1565	self.lineno = self.tokens[0].lineno
				1566
				1567	if self.isIf():
				1568	self.expr = CppExpr( self.tokens )
				1569
				1570	def isDirective(self):
				1571	"""returns True iff this is a directive block"""
				1572	return self.directive != None
				1573
				1574	def isConditional(self):
				1575	"""returns True iff this is a conditional directive block"""
				1576	return self.directive in ["if","ifdef","ifndef","else","elif","endif"]
				1577
				1578	def isDefine(self):
				1579	"""returns the macro name in a #define directive, or None otherwise"""
				1580	if self.directive != "define":
				1581	return None
				1582
				1583	return self.tokens[0].value
				1584
				1585	def isIf(self):
				1586	"""returns True iff this is an #if-like directive block"""
				1587	return self.directive in ["if","ifdef","ifndef","elif"]
				1588
				1589	def isInclude(self):
				1590	"""checks wether this is a #include directive. if true, then returns the
				1591	corresponding file name (with brackets or double-qoutes). None otherwise"""
				1592	if self.directive != "include":
				1593	return None
				1594
				1595	#print "iii " + repr(self.tokens)
				1596	if self.tokens[0].id == tokSTRING:
				1597	# a double-quote include, that's easy
				1598	return self.tokens[0].value
				1599
				1600	# we only want the bracket part, not any comments or junk after it
				1601	if self.tokens[0].id == "<":
				1602	i = 0
				1603	tok = self.tokens
				1604	n = len(tok)
				1605	while i < n and tok[i].id != ">":
				1606	i += 1
				1607
				1608	if i >= n:
				1609	return None
				1610
				1611	return string.join([ str(x) for x in tok[:i+1] ],"")
				1612
				1613	else:
				1614	return None
				1615
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1616	def removeWhiteSpace(self):
				1617	# Remove trailing whitespace and empty lines
				1618	# All whitespace is also contracted to a single space
				1619	if self.directive != None:
				1620	return
				1621
				1622	tokens = []
				1623	line = 0 # index of line start
				1624	space = -1 # index of first space, or -1
				1625	ii = 0
				1626	nn = len(self.tokens)
				1627	while ii < nn:
				1628	tok = self.tokens[ii]
				1629
				1630	# If we find a space, record its position if this is the first
				1631	# one the line start or the previous character. Don't append
				1632	# anything to tokens array yet though.
				1633	if tok.id == tokSPACE:
				1634	if space < 0:
				1635	space = ii
				1636	ii += 1
				1637	continue
				1638
				1639	# If this is a line space, ignore the spaces we found previously
				1640	# on the line, and remove empty lines.
				1641	if tok.id == tokLN:
				1642	old_line = line
				1643	old_space = space
				1644	#print "N line=%d space=%d ii=%d" % (line, space, ii)
				1645	ii += 1
				1646	line = ii
				1647	space = -1
				1648	if old_space == old_line: # line only contains spaces
				1649	#print "-s"
				1650	continue
				1651	if ii-1 == old_line: # line is empty
				1652	#print "-e"
				1653	continue
				1654	tokens.append(tok)
				1655	continue
				1656
				1657	# Other token, append any space range if any, converting each
				1658	# one to a single space character, then append the token.
				1659	if space >= 0:
				1660	jj = space
				1661	space = -1
				1662	while jj < ii:
				1663	tok2 = self.tokens[jj]
				1664	tok2.value = " "
				1665	tokens.append(tok2)
				1666	jj += 1
				1667
				1668	tokens.append(tok)
				1669	ii += 1
				1670
				1671	self.tokens = tokens
				1672
				1673	def writeWithWarning(self,out,warning,left_count,repeat_count):
				1674	# removeWhiteSpace() will sometimes creates non-directive blocks
				1675	# without any tokens. These come from blocks that only contained
				1676	# empty lines and spaces. They should not be printed in the final
				1677	# output, and then should not be counted for this operation.
				1678	#
				1679	if not self.directive and self.tokens == []:
				1680	return left_count
				1681
				1682	if self.directive:
Elliott Hughes	c95eb57	2013-01-29 18:15:55 -0800	[diff] [blame]	1683	out.write(str(self).rstrip() + "\n")
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1684	left_count -= 1
				1685	if left_count == 0:
				1686	out.write(warning)
				1687	left_count = repeat_count
				1688
				1689	else:
				1690	for tok in self.tokens:
				1691	out.write(str(tok))
				1692	if tok.id == tokLN:
				1693	left_count -= 1
				1694	if left_count == 0:
				1695	out.write(warning)
				1696	left_count = repeat_count
				1697
				1698	return left_count
				1699
				1700
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1701	def __repr__(self):
				1702	"""generate the representation of a given block"""
				1703	if self.directive:
				1704	result = "#%s " % self.directive
				1705	if self.isIf():
				1706	result += repr(self.expr)
				1707	else:
				1708	for tok in self.tokens:
				1709	result += repr(tok)
				1710	else:
				1711	result = ""
				1712	for tok in self.tokens:
				1713	result += repr(tok)
				1714
				1715	return result
				1716
				1717	def __str__(self):
				1718	"""generate the string representation of a given block"""
				1719	if self.directive:
				1720	if self.directive == "if":
				1721	# small optimization to re-generate #ifdef and #ifndef
				1722	e = self.expr.expr
				1723	op = e[0]
				1724	if op == "defined":
				1725	result = "#ifdef %s" % e[1]
				1726	elif op == "!" and e[1][0] == "defined":
				1727	result = "#ifndef %s" % e[1][1]
				1728	else:
				1729	result = "#if " + str(self.expr)
				1730	else:
				1731	result = "#%s" % self.directive
				1732	if len(self.tokens):
				1733	result += " "
				1734	for tok in self.tokens:
				1735	result += str(tok)
				1736	else:
				1737	result = ""
				1738	for tok in self.tokens:
				1739	result += str(tok)
				1740
				1741	return result
				1742
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1743	class BlockList:
				1744	"""a convenience class used to hold and process a list of blocks returned by
				1745	the cpp parser"""
				1746	def __init__(self,blocks):
				1747	self.blocks = blocks
				1748
				1749	def __len__(self):
				1750	return len(self.blocks)
				1751
				1752	def __getitem__(self,n):
				1753	return self.blocks[n]
				1754
				1755	def __repr__(self):
				1756	return repr(self.blocks)
				1757
				1758	def __str__(self):
				1759	result = ""
				1760	for b in self.blocks:
				1761	result += str(b)
				1762	if b.isDirective():
Elliott Hughes	c95eb57	2013-01-29 18:15:55 -0800	[diff] [blame]	1763	result = result.rstrip() + '\n'
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1764	return result
				1765
				1766	def optimizeIf01(self):
				1767	"""remove the code between #if 0 .. #endif in a BlockList"""
				1768	self.blocks = optimize_if01(self.blocks)
				1769
				1770	def optimizeMacros(self, macros):
				1771	"""remove known defined and undefined macros from a BlockList"""
				1772	for b in self.blocks:
				1773	if b.isIf():
				1774	b.expr.optimize(macros)
				1775
				1776	def removeMacroDefines(self,macros):
				1777	"""remove known macro definitions from a BlockList"""
				1778	self.blocks = remove_macro_defines(self.blocks,macros)
				1779
				1780	def removePrefixed(self,prefix,names):
				1781	for b in self.blocks:
				1782	if b.isIf():
				1783	b.expr.removePrefixed(prefix,names)
				1784
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1785	def removeWhiteSpace(self):
				1786	for b in self.blocks:
				1787	b.removeWhiteSpace()
				1788
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1789	def optimizeAll(self,macros):
				1790	self.optimizeMacros(macros)
				1791	self.optimizeIf01()
				1792	return
				1793
				1794	def findIncludes(self):
				1795	"""return the list of included files in a BlockList"""
				1796	result = []
				1797	for b in self.blocks:
				1798	i = b.isInclude()
				1799	if i:
				1800	result.append(i)
				1801
				1802	return result
				1803
				1804
				1805	def write(self,out):
				1806	out.write(str(self))
				1807
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1808	def writeWithWarning(self,out,warning,repeat_count):
				1809	left_count = repeat_count
				1810	for b in self.blocks:
				1811	left_count = b.writeWithWarning(out,warning,left_count,repeat_count)
				1812
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1813	def removeComments(self):
				1814	for b in self.blocks:
				1815	for tok in b.tokens:
				1816	if tok.id == tokSPACE:
				1817	tok.value = " "
				1818
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1819	def removeVarsAndFuncs(self,knownStatics=set()):
				1820	"""remove all extern and static declarations corresponding
				1821	to variable and function declarations. we only accept typedefs
				1822	and enum/structs/union declarations.
				1823
				1824	however, we keep the definitions corresponding to the set
				1825	of known static inline functions in the set 'knownStatics',
				1826	which is useful for optimized byteorder swap functions and
				1827	stuff like that.
				1828	"""
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1829	# state = 0 => normal (i.e. LN + spaces)
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1830	# state = 1 => typedef/struct encountered, ends with ";"
				1831	# state = 2 => var declaration encountered, ends with ";"
				1832	# state = 3 => func declaration encountered, ends with "}"
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1833	state = 0
				1834	depth = 0
				1835	blocks2 = []
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1836	skipTokens = False
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1837	for b in self.blocks:
				1838	if b.isDirective():
				1839	blocks2.append(b)
				1840	else:
				1841	n = len(b.tokens)
				1842	i = 0
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1843	if skipTokens:
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1844	first = n
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1845	else:
				1846	first = 0
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1847	while i < n:
				1848	tok = b.tokens[i]
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1849	tokid = tok.id
				1850	# If we are not looking for the start of a new
				1851	# type/var/func, then skip over tokens until
				1852	# we find our terminator, managing the depth of
				1853	# accolades as we go.
				1854	if state > 0:
				1855	terminator = False
				1856	if tokid == '{':
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1857	depth += 1
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1858	elif tokid == '}':
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1859	if depth > 0:
				1860	depth -= 1
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1861	if (depth == 0) and (state == 3):
				1862	terminator = True
				1863	elif tokid == ';' and depth == 0:
				1864	terminator = True
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1865
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1866	if terminator:
				1867	# we found the terminator
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1868	state = 0
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1869	if skipTokens:
				1870	skipTokens = False
				1871	first = i+1
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1872
David 'Digit' Turner	fc26931	2010-10-11 22:11:06 +0200	[diff] [blame]	1873	i = i+1
				1874	continue
				1875
				1876	# We are looking for the start of a new type/func/var
				1877	# ignore whitespace
				1878	if tokid in [tokLN, tokSPACE]:
				1879	i = i+1
				1880	continue
				1881
				1882	# Is it a new type definition, then start recording it
				1883	if tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]:
				1884	#print "$$$ keep type declr" + repr(b.tokens[i:])
				1885	state = 1
				1886	i = i+1
				1887	continue
				1888
				1889	# Is it a variable or function definition. If so, first
				1890	# try to determine which type it is, and also extract
				1891	# its name.
				1892	#
				1893	# We're going to parse the next tokens of the same block
				1894	# until we find a semi-column or a left parenthesis.
				1895	#
				1896	# The semi-column corresponds to a variable definition,
				1897	# the left-parenthesis to a function definition.
				1898	#
				1899	# We also assume that the var/func name is the last
				1900	# identifier before the terminator.
				1901	#
				1902	j = i+1
				1903	ident = ""
				1904	while j < n:
				1905	tokid = b.tokens[j].id
				1906	if tokid == '(': # a function declaration
				1907	state = 3
				1908	break
				1909	elif tokid == ';': # a variable declaration
				1910	state = 2
				1911	break
				1912	if tokid == tokIDENT:
				1913	ident = b.tokens[j].value
				1914	j += 1
				1915
				1916	if j >= n:
				1917	# This can only happen when the declaration
				1918	# does not end on the current block (e.g. with
				1919	# a directive mixed inside it.
				1920	#
				1921	# We will treat it as malformed because
				1922	# it's very hard to recover from this case
				1923	# without making our parser much more
				1924	# complex.
				1925	#
				1926	#print "### skip unterminated static '%s'" % ident
				1927	break
				1928
				1929	if ident in knownStatics:
				1930	#print "### keep var/func '%s': %s" % (ident,repr(b.tokens[i:j]))
				1931	pass
				1932	else:
				1933	# We're going to skip the tokens for this declaration
				1934	#print "### skip variable /func'%s': %s" % (ident,repr(b.tokens[i:j]))
				1935	if i > first:
				1936	blocks2.append( Block(b.tokens[first:i]))
				1937	skipTokens = True
				1938	first = n
				1939
				1940	i = i+1
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1941
				1942	if i > first:
				1943	#print "### final '%s'" % repr(b.tokens[first:i])
				1944	blocks2.append( Block(b.tokens[first:i]) )
				1945
				1946	self.blocks = blocks2
				1947
				1948	def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"):
				1949	"""insert your standard issue disclaimer that this is an
				1950	auto-generated file, etc.."""
				1951	tokens = CppLineTokenizer( disclaimer ).toTokenList()
				1952	tokens = tokens[:-1] # remove trailing tokLN
				1953	self.blocks = [ Block(tokens) ] + self.blocks
				1954
Martin Storsjo	d32c805	2010-12-08 11:38:14 +0100	[diff] [blame]	1955	def replaceTokens(self,replacements=dict()):
				1956	"""replace tokens according to the given dict
				1957	"""
				1958	for b in self.blocks:
				1959	if not b.isDirective():
				1960	for tok in b.tokens:
				1961	if tok.id == tokIDENT:
				1962	if tok.value in replacements:
				1963	tok.value = replacements[tok.value]
				1964
The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1965	class BlockParser:
				1966	"""a class used to convert an input source file into a BlockList object"""
				1967
				1968	def __init__(self,tokzer=None):
				1969	"""initialize a block parser. the input source is provided through a Tokenizer
				1970	object"""
				1971	self.reset(tokzer)
				1972
				1973	def reset(self,tokzer):
				1974	self.state = 1
				1975	self.tokzer = tokzer
				1976
				1977	def getBlocks(self,tokzer=None):
				1978	"""tokenize and parse the input source, return a BlockList object
				1979	NOTE: empty and line-numbering directives are ignored and removed
				1980	from the result. as a consequence, it is possible to have
				1981	two successive text blocks in the result"""
				1982	# state 0 => in source code
				1983	# state 1 => in source code, after a LN
				1984	# state 2 => in source code, after LN then some space
				1985	state = 1
				1986	lastLN = 0
				1987	current = []
				1988	blocks = []
				1989
				1990	if tokzer == None:
				1991	tokzer = self.tokzer
				1992
				1993	while 1:
				1994	tok = tokzer.getToken()
				1995	if tok.id == tokEOF:
				1996	break
				1997
				1998	if tok.id == tokLN:
				1999	state = 1
				2000	current.append(tok)
				2001	lastLN = len(current)
				2002
				2003	elif tok.id == tokSPACE:
				2004	if state == 1:
				2005	state = 2
				2006	current.append(tok)
				2007
				2008	elif tok.id == "#":
				2009	if state > 0:
				2010	# this is the start of a directive
				2011
				2012	if lastLN > 0:
				2013	# record previous tokens as text block
				2014	block = Block(current[:lastLN])
				2015	blocks.append(block)
				2016	lastLN = 0
				2017
				2018	current = []
				2019
				2020	# skip spaces after the #
				2021	while 1:
				2022	tok = tokzer.getToken()
				2023	if tok.id != tokSPACE:
				2024	break
				2025
				2026	if tok.id != tokIDENT:
				2027	# empty or line-numbering, ignore it
				2028	if tok.id != tokLN and tok.id != tokEOF:
				2029	while 1:
				2030	tok = tokzer.getToken()
				2031	if tok.id == tokLN or tok.id == tokEOF:
				2032	break
				2033	continue
				2034
				2035	directive = tok.value
				2036	lineno = tok.lineno
				2037
				2038	# skip spaces
				2039	tok = tokzer.getToken()
				2040	while tok.id == tokSPACE:
				2041	tok = tokzer.getToken()
				2042
				2043	# then record tokens until LN
				2044	dirtokens = []
				2045	while tok.id != tokLN and tok.id != tokEOF:
				2046	dirtokens.append(tok)
				2047	tok = tokzer.getToken()
				2048
				2049	block = Block(dirtokens,directive,lineno)
				2050	blocks.append(block)
				2051	state = 1
				2052
				2053	else:
				2054	state = 0
				2055	current.append(tok)
				2056
				2057	if len(current) > 0:
				2058	block = Block(current)
				2059	blocks.append(block)
				2060
				2061	return BlockList(blocks)
				2062
				2063	def parse(self,tokzer):
				2064	return self.getBlocks( tokzer )
				2065
				2066	def parseLines(self,lines):
				2067	"""parse a list of text lines into a BlockList object"""
				2068	return self.getBlocks( CppLinesTokenizer(lines) )
				2069
				2070	def parseFile(self,path):
				2071	"""parse a file into a BlockList object"""
				2072	file = open(path, "rt")
				2073	result = self.getBlocks( CppFileTokenizer(file) )
				2074	file.close()
				2075	return result
				2076
				2077
				2078	def test_block_parsing(lines,expected):
				2079	blocks = BlockParser().parse( CppLinesTokenizer(lines) )
				2080	if len(blocks) != len(expected):
				2081	raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \
				2082	% (str(blocks), repr(expected))
				2083	for n in range(len(blocks)):
				2084	if str(blocks[n]) != expected[n]:
				2085	raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \
				2086	% (n, str(blocks[n]), expected[n])
				2087	#for block in blocks:
				2088	# print block
				2089
				2090	def test_BlockParser():
				2091	test_block_parsing(["#error hello"],["#error hello"])
				2092	test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ])
				2093	test_block_parsing([ "foo", " # ", "bar" ], [ "foo\n","bar\n" ])
				2094	test_block_parsing(\
				2095	[ "foo", " # ", " # /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ],
				2096	[ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] )
				2097
				2098
				2099	#####################################################################################
				2100	#####################################################################################
				2101	##### #####
				2102	##### B L O C K L I S T O P T I M I Z A T I O N #####
				2103	##### #####
				2104	#####################################################################################
				2105	#####################################################################################
				2106
				2107	def remove_macro_defines( blocks, excludedMacros=set() ):
				2108	"""remove macro definitions like #define <macroName> ...."""
				2109	result = []
				2110	for b in blocks:
				2111	macroName = b.isDefine()
				2112	if macroName == None or not macroName in excludedMacros:
				2113	result.append(b)
				2114
				2115	return result
				2116
				2117	def find_matching_endif( blocks, i ):
				2118	n = len(blocks)
				2119	depth = 1
				2120	while i < n:
				2121	if blocks[i].isDirective():
				2122	dir = blocks[i].directive
				2123	if dir in [ "if", "ifndef", "ifdef" ]:
				2124	depth += 1
				2125	elif depth == 1 and dir in [ "else", "elif" ]:
				2126	return i
				2127	elif dir == "endif":
				2128	depth -= 1
				2129	if depth == 0:
				2130	return i
				2131	i += 1
				2132	return i
				2133
				2134	def optimize_if01( blocks ):
				2135	"""remove the code between #if 0 .. #endif in a list of CppBlocks"""
				2136	i = 0
				2137	n = len(blocks)
				2138	result = []
				2139	while i < n:
				2140	j = i
				2141	while j < n and not blocks[j].isIf():
				2142	j += 1
				2143	if j > i:
				2144	D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno))
				2145	result += blocks[i:j]
				2146	if j >= n:
				2147	break
				2148	expr = blocks[j].expr
				2149	r = expr.toInt()
				2150	if r == None:
				2151	result.append(blocks[j])
				2152	i = j + 1
				2153	continue
				2154
				2155	if r == 0:
				2156	# if 0 => skip everything until the corresponding #endif
				2157	j = find_matching_endif( blocks, j+1 )
				2158	if j >= n:
				2159	# unterminated #if 0, finish here
				2160	break
				2161	dir = blocks[j].directive
				2162	if dir == "endif":
				2163	D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno))
				2164	i = j + 1
				2165	elif dir == "else":
				2166	# convert 'else' into 'if 1'
				2167	D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno))
				2168	blocks[j].directive = "if"
				2169	blocks[j].expr = CppExpr( CppLineTokenizer("1").toTokenList() )
				2170	i = j
				2171	elif dir == "elif":
				2172	# convert 'elif' into 'if'
				2173	D2("convert 'if 0' .. 'elif' into 'if'")
				2174	blocks[j].directive = "if"
				2175	i = j
				2176	continue
				2177
				2178	# if 1 => find corresponding endif and remove/transform them
				2179	k = find_matching_endif( blocks, j+1 )
				2180	if k >= n:
				2181	# unterminated #if 1, finish here
				2182	D2("unterminated 'if 1'")
				2183	result += blocks[j+1:k]
				2184	break
				2185
				2186	dir = blocks[k].directive
				2187	if dir == "endif":
				2188	D2("convert 'if 1' .. 'endif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
				2189	result += optimize_if01(blocks[j+1:k])
				2190	i = k+1
				2191	elif dir == "else":
				2192	# convert 'else' into 'if 0'
				2193	D2("convert 'if 1' .. 'else' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
				2194	result += optimize_if01(blocks[j+1:k])
				2195	blocks[k].directive = "if"
				2196	blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() )
				2197	i = k
				2198	elif dir == "elif":
				2199	# convert 'elif' into 'if 0'
				2200	D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
				2201	result += optimize_if01(blocks[j+1:k])
				2202	blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() )
				2203	i = k
				2204	return result
				2205
				2206	def test_optimizeAll():
				2207	text = """\
				2208	#if 1
				2209	#define GOOD_1
				2210	#endif
				2211	#if 0
				2212	#define BAD_2
				2213	#define BAD_3
				2214	#endif
				2215
				2216	#if 1
				2217	#define GOOD_2
				2218	#else
				2219	#define BAD_4
				2220	#endif
				2221
				2222	#if 0
				2223	#define BAD_5
				2224	#else
				2225	#define GOOD_3
				2226	#endif
				2227
				2228	#if 0
				2229	#if 1
				2230	#define BAD_6
				2231	#endif
				2232	#endif\
				2233	"""
				2234
				2235	expected = """\
				2236	#define GOOD_1
				2237
				2238	#define GOOD_2
				2239
				2240	#define GOOD_3
				2241
				2242	"""
				2243
				2244	print "running test_BlockList.optimizeAll"
				2245	out = StringOutput()
				2246	lines = string.split(text, '\n')
				2247	list = BlockParser().parse( CppLinesTokenizer(lines) )
				2248	#D_setlevel(2)
				2249	list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} )
				2250	#print repr(list)
				2251	list.write(out)
				2252	if out.get() != expected:
				2253	print "KO: macro optimization failed\n"
				2254	print "<<<< expecting '",
				2255	print expected,
				2256	print "'\n>>>> result '"
				2257	print out.get(),
				2258	print "'\n----"
				2259
				2260
				2261	#####################################################################################
				2262	#####################################################################################
				2263	##### #####
				2264	##### #####
				2265	##### #####
				2266	#####################################################################################
				2267	#####################################################################################
				2268
				2269	def runUnitTests():
				2270	"""run all unit tests for this program"""
				2271	print "running unit tests"
				2272	test_CppTokenizer()
				2273	test_CppExpr()
				2274	test_optimizeAll()
				2275	test_BlockParser()
				2276	print "OK"
				2277
				2278	if __name__ == "__main__":
				2279	runUnitTests()