auto import from //depot/cupcake/@135843

commit: 1dc9e472e19acfe6dc7f41e429236e7eef7ceda1 [log] [tgz]
author: The Android Open Source Project <initial-contribution@android.com> Tue Mar 03 19:28:35 2009 -0800
committer: The Android Open Source Project <initial-contribution@android.com> Tue Mar 03 19:28:35 2009 -0800
tree: 3be0c520fae17689bbf5584e1136fb820caef26f
parent: 1767f908af327fa388b1c66883760ad851267013 [diff] [blame]
diff --git a/libc/kernel/tools/cpp.py b/libc/kernel/tools/cpp.py
new file mode 100644
index 0000000..4b4bd38
--- /dev/null
+++ b/libc/kernel/tools/cpp.py

@@ -0,0 +1,2180 @@
+# a glorified C pre-processor parser
+
+import sys, re, string
+from utils import *
+from defaults import *
+
+debugTokens             = False
+debugDirectiveTokenizer = False
+debugLineParsing        = False
+debugCppExpr            = False
+debugOptimIf01          = False
+
+#####################################################################################
+#####################################################################################
+#####                                                                           #####
+#####           C P P   T O K E N S                                             #####
+#####                                                                           #####
+#####################################################################################
+#####################################################################################
+
+# the list of supported C-preprocessor tokens
+# plus a couple of C tokens as well
+tokEOF       = "\0"
+tokLN        = "\n"
+tokSTRINGIFY = "#"
+tokCONCAT    = "##"
+tokLOGICAND  = "&&"
+tokLOGICOR   = "||"
+tokSHL       = "<<"
+tokSHR       = ">>"
+tokEQUAL     = "=="
+tokNEQUAL    = "!="
+tokLT        = "<"
+tokLTE       = "<="
+tokGT        = ">"
+tokGTE       = ">="
+tokELLIPSIS  = "..."
+tokSPACE     = " "
+tokDEFINED   = "defined"
+tokLPAREN    = "("
+tokRPAREN    = ")"
+tokNOT       = "!"
+tokPLUS      = "+"
+tokMINUS     = "-"
+tokMULTIPLY  = "*"
+tokDIVIDE    = "/"
+tokMODULUS   = "%"
+tokBINAND    = "&"
+tokBINOR     = "|"
+tokBINXOR    = "^"
+tokCOMMA     = ","
+tokLBRACE    = "{"
+tokRBRACE    = "}"
+tokARROW     = "->"
+tokINCREMENT = "++"
+tokDECREMENT = "--"
+tokNUMBER    = "<number>"
+tokIDENT     = "<ident>"
+tokSTRING    = "<string>"
+
+class Token:
+    """a simple class to hold information about a given token.
+       each token has a position in the source code, as well as
+       an 'id' and a 'value'. the id is a string that identifies
+       the token's class, while the value is the string of the
+       original token itself.
+
+       for example, the tokenizer concatenates a series of spaces
+       and tabs as a single tokSPACE id, whose value if the original
+       spaces+tabs sequence."""
+
+    def __init__(self):
+        self.id     = None
+        self.value  = None
+        self.lineno = 0
+        self.colno  = 0
+
+    def set(self,id,val=None):
+        self.id = id
+        if val:
+            self.value = val
+        else:
+            self.value = id
+        return None
+
+    def copyFrom(self,src):
+        self.id     = src.id
+        self.value  = src.value
+        self.lineno = src.lineno
+        self.colno  = src.colno
+
+    def __repr__(self):
+        if self.id == tokIDENT:
+            return "(ident %s)" % self.value
+        if self.id == tokNUMBER:
+            return "(number %s)" % self.value
+        if self.id == tokSTRING:
+            return "(string '%s')" % self.value
+        if self.id == tokLN:
+            return "<LN>"
+        if self.id == tokEOF:
+            return "<EOF>"
+        if self.id == tokSPACE and self.value == "\\":
+            # this corresponds to a trailing \ that was transformed into a tokSPACE
+            return "<\\>"
+
+        return self.id
+
+    def __str__(self):
+        if self.id == tokIDENT:
+            return self.value
+        if self.id == tokNUMBER:
+            return self.value
+        if self.id == tokSTRING:
+            return self.value
+        if self.id == tokEOF:
+            return "<EOF>"
+        if self.id == tokSPACE:
+            if self.value == "\\":  # trailing \
+                return "\\\n"
+            else:
+                return self.value
+
+        return self.id
+
+class BadExpectedToken(Exception):
+    def __init__(self,msg):
+        print msg
+
+#####################################################################################
+#####################################################################################
+#####                                                                           #####
+#####          C P P   T O K E N   C U R S O R                                  #####
+#####                                                                           #####
+#####################################################################################
+#####################################################################################
+
+class TokenCursor:
+    """a small class to iterate over a list of Token objects"""
+    def __init__(self,tokens):
+        self.tokens = tokens
+        self.n      = 0
+        self.count  = len(tokens)
+
+    def set(self,n):
+        """set the current position"""
+        if n < 0:
+            n = 0
+        if n > self.count:
+            n = self.count
+        self.n = n
+
+    def peekId(self):
+        """retrieve the id of the current token"""
+        if (self.n >= self.count):
+            return None
+        return self.tokens[self.n].id
+
+    def peek(self):
+        """retrieve the current token. does not change position"""
+        if (self.n >= self.count):
+            return None
+        return self.tokens[self.n]
+
+    def skip(self):
+        """increase current token position"""
+        if (self.n < self.count):
+            self.n += 1
+
+    def skipSpaces(self):
+        """skip over all space tokens, this includes tokSPACE and tokLN"""
+        while 1:
+            tok = self.peekId()
+            if tok != tokSPACE and tok != tokLN:
+                break
+            self.skip()
+
+    def skipIfId(self,id):
+        """skip an optional token"""
+        if self.peekId() == id:
+            self.skip()
+
+    def expectId(self,id):
+        """raise an exception if the current token hasn't a given id.
+           otherwise skip over it"""
+        tok = self.peek()
+        if tok.id != id:
+            raise BadExpectedToken, "%d:%d: '%s' expected, received '%s'" % (tok.lineno, tok.colno, id, tok.id)
+        self.skip()
+
+    def remain(self):
+        """return the list of remaining tokens"""
+        return self.tokens[self.n:]
+
+
+#####################################################################################
+#####################################################################################
+#####                                                                           #####
+#####           C P P   T O K E N I Z E R                                       #####
+#####                                                                           #####
+#####################################################################################
+#####################################################################################
+
+# list of long symbols, i.e. those that take more than one characters
+cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\
+                   tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ]
+
+class CppTokenizer:
+    """an abstract class used to convert some input text into a list
+       of tokens. real implementations follow and differ in the format
+       of the input text only"""
+
+    def __init__(self):
+        """initialize a new CppTokenizer object"""
+        self.eof  = False  # end of file reached ?
+        self.text = None   # content of current line, with final \n stripped
+        self.line = 0      # number of current line
+        self.pos  = 0      # current character position in current line
+        self.len  = 0      # length of current line text
+        self.held = Token()
+
+    def setLineText(self,line):
+        """set the content of the (next) current line. should be called
+           by fillLineText() in derived classes"""
+        self.text = line
+        self.len  = len(line)
+        self.pos  = 0
+
+    def fillLineText(self):
+        """refresh the content of 'line' with a new line of input"""
+        # to be overriden
+        self.eof = True
+
+    def markPos(self,tok):
+        """mark the position of the current token in the source file"""
+        if self.eof or self.pos > self.len:
+            tok.lineno = self.line + 1
+            tok.colno  = 0
+        else:
+            tok.lineno = self.line
+            tok.colno  = self.pos
+
+    def peekChar(self):
+        """return the current token under the cursor without moving it"""
+        if self.eof:
+            return tokEOF
+
+        if self.pos > self.len:
+            self.pos   = 0
+            self.line += 1
+            self.fillLineText()
+            if self.eof:
+                return tokEOF
+
+        if self.pos == self.len:
+            return tokLN
+        else:
+            return self.text[self.pos]
+
+    def peekNChar(self,n):
+        """try to peek the next n chars on the same line"""
+        if self.pos + n > self.len:
+            return None
+        return self.text[self.pos:self.pos+n]
+
+    def skipChar(self):
+        """increment the token cursor position"""
+        if not self.eof:
+            self.pos += 1
+
+    def skipNChars(self,n):
+        if self.pos + n <= self.len:
+            self.pos += n
+        else:
+            while n > 0:
+                self.skipChar()
+                n -= 1
+
+    def nextChar(self):
+        """retrieve the token at the current cursor position, then skip it"""
+        result = self.peekChar()
+        self.skipChar()
+        return  result
+
+    def getEscape(self):
+        # try to get all characters after a backslash (\)
+        result = self.nextChar()
+        if result == "0":
+            # octal number ?
+            num = self.peekNChar(3)
+            if num != None:
+                isOctal = True
+                for d in num:
+                    if not d in "01234567":
+                        isOctal = False
+                        break
+                if isOctal:
+                    result += num
+                    self.skipNChars(3)
+        elif result == "x" or result == "X":
+            # hex number ?
+            num = self.peekNChar(2)
+            if num != None:
+                isHex = True
+                for d in num:
+                    if not d in "012345678abcdefABCDEF":
+                        isHex = False
+                        break
+                if isHex:
+                    result += num
+                    self.skipNChars(2)
+        elif result == "u" or result == "U":
+            # unicode char ?
+            num = self.peekNChar(4)
+            if num != None:
+                isHex = True
+                for d in num:
+                    if not d in "012345678abcdefABCDEF":
+                        isHex = False
+                        break
+                if isHex:
+                    result += num
+                    self.skipNChars(4)
+
+        return result
+
+    def nextRealToken(self,tok):
+        """return next CPP token, used internally by nextToken()"""
+        c = self.nextChar()
+        if c == tokEOF or c == tokLN:
+            return tok.set(c)
+
+        if c == '/':
+            c = self.peekChar()
+            if c == '/':   # C++ comment line
+                self.skipChar()
+                while 1:
+                    c = self.nextChar()
+                    if c == tokEOF or c == tokLN:
+                        break
+                return tok.set(tokLN)
+            if c == '*':   # C comment start
+                self.skipChar()
+                value = "/*"
+                prev_c = None
+                while 1:
+                    c = self.nextChar()
+                    if c == tokEOF:
+                        #print "## EOF after '%s'" % value
+                        return tok.set(tokEOF,value)
+                    if c == '/' and prev_c == '*':
+                        break
+                    prev_c = c
+                    value += c
+
+                value += "/"
+                #print "## COMMENT: '%s'" % value
+                return tok.set(tokSPACE,value)
+            c = '/'
+
+        if c.isspace():
+            while 1:
+                c2 = self.peekChar()
+                if c2 == tokLN or not c2.isspace():
+                    break
+                c += c2
+                self.skipChar()
+            return tok.set(tokSPACE,c)
+
+        if c == '\\':
+            if debugTokens:
+                print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar())
+            if self.peekChar() == tokLN:   # trailing \
+                # eat the tokLN
+                self.skipChar()
+                # we replace a trailing \ by a tokSPACE whose value is
+                # simply "\\". this allows us to detect them later when
+                # needed.
+                return tok.set(tokSPACE,"\\")
+            else:
+                # treat as a single token here ?
+                c +=self.getEscape()
+                return tok.set(c)
+
+        if c == "'":  # chars
+            c2 = self.nextChar()
+            c += c2
+            if c2 == '\\':
+                c += self.getEscape()
+
+            while 1:
+                c2 = self.nextChar()
+                if c2 == tokEOF:
+                    break
+                c += c2
+                if c2 == "'":
+                    break
+
+            return tok.set(tokSTRING, c)
+
+        if c == '"':  # strings
+            quote = 0
+            while 1:
+                c2  = self.nextChar()
+                if c2 == tokEOF:
+                    return tok.set(tokSTRING,c)
+
+                c += c2
+                if not quote:
+                    if c2 == '"':
+                        return tok.set(tokSTRING,c)
+                    if c2 == "\\":
+                        quote = 1
+                else:
+                    quote = 0
+
+        if c >= "0" and c <= "9":  # integers ?
+            while 1:
+                c2 = self.peekChar()
+                if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
+                    break
+                c += c2
+                self.skipChar()
+            return tok.set(tokNUMBER,c)
+
+        if c.isalnum() or c == "_":  # identifiers ?
+            while 1:
+                c2 = self.peekChar()
+                if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
+                    break
+                c += c2
+                self.skipChar()
+            if c == tokDEFINED:
+                return tok.set(tokDEFINED)
+            else:
+                return tok.set(tokIDENT,c)
+
+        # check special symbols
+        for sk in cppLongSymbols:
+            if c == sk[0]:
+                sklen = len(sk[1:])
+                if self.pos + sklen <= self.len and \
+                   self.text[self.pos:self.pos+sklen] == sk[1:]:
+                    self.pos += sklen
+                    return tok.set(sk)
+
+        return tok.set(c)
+
+    def nextToken(self,tok):
+        """return the next token from the input text. this function
+           really updates 'tok', and does not return a new one"""
+        self.markPos(tok)
+        self.nextRealToken(tok)
+
+    def getToken(self):
+        tok = Token()
+        self.nextToken(tok)
+        if debugTokens:
+            print "getTokens: %s" % repr(tok)
+        return tok
+
+    def toTokenList(self):
+        """convert the input text of a CppTokenizer into a direct
+           list of token objects. tokEOF is stripped from the result"""
+        result = []
+        while 1:
+            tok = Token()
+            self.nextToken(tok)
+            if tok.id == tokEOF:
+                break
+            result.append(tok)
+        return result
+
+class CppLineTokenizer(CppTokenizer):
+    """a CppTokenizer derived class that accepts a single line of text as input"""
+    def __init__(self,line,lineno=1):
+        CppTokenizer.__init__(self)
+        self.line = lineno
+        self.setLineText(line)
+
+
+class CppLinesTokenizer(CppTokenizer):
+    """a CppTokenizer derived class that accepts a list of texdt lines as input.
+       the lines must not have a trailing \n"""
+    def __init__(self,lines=[],lineno=1):
+        """initialize a CppLinesTokenizer. you can later add lines using addLines()"""
+        CppTokenizer.__init__(self)
+        self.line  = lineno
+        self.lines = lines
+        self.index = 0
+        self.count = len(lines)
+
+        if self.count > 0:
+            self.fillLineText()
+        else:
+            self.eof = True
+
+    def addLine(self,line):
+        """add a line to a CppLinesTokenizer. this can be done after tokenization
+           happens"""
+        if self.count == 0:
+            self.setLineText(line)
+            self.index = 1
+        self.lines.append(line)
+        self.count += 1
+        self.eof    = False
+
+    def fillLineText(self):
+        if self.index < self.count:
+            self.setLineText(self.lines[self.index])
+            self.index += 1
+        else:
+            self.eof = True
+
+
+class CppFileTokenizer(CppTokenizer):
+    def __init__(self,file,lineno=1):
+        CppTokenizer.__init__(self)
+        self.file = file
+        self.line = lineno
+
+    def fillLineText(self):
+        line = self.file.readline()
+        if len(line) > 0:
+            if line[-1] == '\n':
+                line = line[:-1]
+            if len(line) > 0 and line[-1] == "\r":
+                line = line[:-1]
+            self.setLineText(line)
+        else:
+            self.eof = True
+
+# Unit testing
+#
+class CppTokenizerTester:
+    """a class used to test CppTokenizer classes"""
+    def __init__(self,tokenizer=None):
+        self.tokenizer = tokenizer
+        self.token     = Token()
+
+    def setTokenizer(self,tokenizer):
+        self.tokenizer = tokenizer
+
+    def expect(self,id):
+        self.tokenizer.nextToken(self.token)
+        tokid = self.token.id
+        if tokid == id:
+            return
+        if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER):
+            return
+        raise BadExpectedToken, "###  BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id)
+
+    def expectToken(self,id,line,col):
+        self.expect(id)
+        if self.token.lineno != line:
+            raise BadExpectedToken, "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line)
+        if self.token.colno != col:
+            raise BadExpectedToken, "###  BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col)
+
+    def expectTokenVal(self,id,value,line,col):
+        self.expectToken(id,line,col)
+        if self.token.value != value:
+            raise BadExpectedToken, "###  BAD VALUE: '%s' expecting '%s'" % (self.token.value,value)
+
+    def expectList(self,list):
+        for item in list:
+            self.expect(item)
+
+def test_CppTokenizer():
+    print "running CppTokenizer tests"
+    tester = CppTokenizerTester()
+
+    tester.setTokenizer( CppLineTokenizer("#an/example  && (01923_xy)") )
+    tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \
+                       tokRPAREN, tokLN, tokEOF] )
+
+    tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") )
+    tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE,
+                        tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] )
+
+    tester.setTokenizer( CppLinesTokenizer( ["/*", "#", "*/"] ) )
+    tester.expectList( [ tokSPACE, tokLN, tokEOF ] )
+
+    tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) )
+    tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] )
+
+    tester.setTokenizer( CppLinesTokenizer( ["first second", "  third"] ) )
+    tester.expectToken( "first", 1, 0 )
+    tester.expectToken( tokSPACE, 1, 5 )
+    tester.expectToken( "second", 1, 6 )
+    tester.expectToken( tokLN, 1, 12 )
+    tester.expectToken( tokSPACE, 2, 0 )
+    tester.expectToken( "third", 2, 2 )
+
+    tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) )
+    tester.expectList( [ "boo", tokSPACE ] )
+    tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 )
+    tester.expectList( [ tokLN, tokEOF ] )
+
+    tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) )
+    tester.expectToken( "an", 1, 0 )
+    tester.expectToken( tokSPACE, 1, 2 )
+    tester.expectTokenVal( tokSPACE, "\\", 1, 3 )
+    tester.expectToken( tokSPACE, 2, 0 )
+    tester.expectToken( "example", 2, 1 )
+    tester.expectToken( tokLN, 2, 8 )
+
+    return True
+
+
+#####################################################################################
+#####################################################################################
+#####                                                                           #####
+#####           C P P   E X P R E S S I O N S                                   #####
+#####                                                                           #####
+#####################################################################################
+#####################################################################################
+
+# Cpp expressions are modeled by tuples of the form (op,arg) or (op,arg1,arg2), etc..
+# op is an "operator" string
+
+class Expr:
+    """a class used to model a CPP expression"""
+    opInteger   = "int"
+    opIdent     = "ident"
+    opCall      = "call"
+    opDefined   = "defined"
+    opTest      = "?"
+    opLogicNot  = "!"
+    opNot       = "~"
+    opNeg       = "[-]"
+    opUnaryPlus = "[+]"
+    opAdd = "+"
+    opSub = "-"
+    opMul = "*"
+    opDiv = "/"
+    opMod = "%"
+    opAnd = "&"
+    opOr  = "|"
+    opXor = "^"
+    opLogicAnd = "&&"
+    opLogicOr  = "||"
+    opEqual = "=="
+    opNotEqual = "!="
+    opLess = "<"
+    opLessEq = "<="
+    opGreater = ">"
+    opGreaterEq = ">="
+    opShl = "<<"
+    opShr = ">>"
+
+    unaries  = [ opLogicNot, opNot, opNeg, opUnaryPlus ]
+    binaries = [ opAdd, opSub, opMul, opDiv, opMod, opAnd, opOr, opXor, opLogicAnd, opLogicOr,
+                 opEqual, opNotEqual, opLess, opLessEq, opGreater, opGreaterEq ]
+
+    precedences = {
+                    opTest: 0,
+                    opLogicOr:  1,
+                    opLogicNot: 2,
+                    opOr : 3,
+                    opXor: 4,
+                    opAnd: 5,
+                    opEqual: 6, opNotEqual: 6,
+                    opLess:7, opLessEq:7, opGreater:7, opGreaterEq:7,
+                    opShl:8, opShr:8,
+                    opAdd:9, opSub:9,
+                    opMul:10, opDiv:10, opMod:10,
+                    opLogicNot:11,
+                    opNot: 12,
+                    }
+
+    def __init__(self,op):
+        self.op = op
+
+    def __repr__(self):
+        return "(%s)" % self.op
+
+    def __str__(self):
+        return "operator(%s)" % self.op
+
+    def precedence(self):
+        """return the precedence of a given operator"""
+        return Expr.precedences.get(self.op, 1000)
+
+    def isUnary(self):
+        return self.op in Expr.unaries
+
+    def isBinary(self):
+        return self.op in Expr.binaries
+
+    def isDefined(self):
+        return self.op is opDefined
+
+    def toInt(self):
+        """return the integer value of a given expression. only valid for integer expressions
+           will return None otherwise"""
+        return None
+
+class IntExpr(Expr):
+    def __init__(self,value):
+        Expr.__init__(self,opInteger)
+        self.arg   = value
+
+    def __repr__(self):
+        return "(int %s)" % self.arg
+
+    def __str__(self):
+        return self.arg
+
+    def toInt(self):
+        s = self.arg  # string value
+        # get rid of U or L suffixes
+        while len(s) > 0 and s[-1] in "LUlu":
+            s = s[:-1]
+        return string.atoi(s)
+
+class IdentExpr(Expr):
+    def __init__(self,name):
+        Expr.__init__(self,opIdent)
+        self.name = name
+
+    def __repr__(self):
+        return "(ident %s)" % self.name
+
+    def __str__(self):
+        return self.name
+
+class CallExpr(Expr):
+    def __init__(self,funcname,params):
+        Expr.__init__(self,opCall)
+        self.funcname = funcname
+        self.params   = params
+
+    def __repr__(self):
+        result = "(call %s [" % self.funcname
+        comma  = ""
+        for param in self.params:
+            result += "%s%s" % (comma, repr(param))
+            comma   = ","
+        result += "])"
+        return result
+
+    def __str__(self):
+        result = "%s(" % self.funcname
+        comma = ""
+        for param in self.params:
+            result += "%s%s" % (comma, str(param))
+            comma = ","
+
+        result += ")"
+        return result
+
+class TestExpr(Expr):
+    def __init__(self,cond,iftrue,iffalse):
+        Expr.__init__(self,opTest)
+        self.cond    = cond
+        self.iftrue  = iftrue
+        self.iffalse = iffalse
+
+    def __repr__(self):
+        return "(?: %s %s %s)" % (repr(self.cond),repr(self.iftrue),repr(self.iffalse))
+
+    def __str__(self):
+        return "(%s) ? (%s) : (%s)" % (self.cond, self.iftrue, self.iffalse)
+
+class SingleArgExpr(Expr):
+    def __init__(self,op,arg):
+        Expr.__init__(self,op)
+        self.arg = arg
+
+    def __repr__(self):
+        return "(%s %s)" % (self.op, repr(self.arg))
+
+class DefinedExpr(SingleArgExpr):
+    def __init__(self,op,macroname):
+        SingleArgExpr.__init__(self.opDefined,macroname)
+
+    def __str__(self):
+        return "defined(%s)" % self.arg
+
+
+class UnaryExpr(SingleArgExpr):
+    def __init__(self,op,arg,opstr=None):
+        SingleArgExpr.__init__(self,op,arg)
+        if not opstr:
+            opstr = op
+        self.opstr = opstr
+
+    def __str__(self):
+        arg_s     = str(self.arg)
+        arg_prec  = self.arg.precedence()
+        self_prec = self.precedence()
+        if arg_prec < self_prec:
+            return "%s(%s)" % (self.opstr,arg_s)
+        else:
+            return "%s%s" % (self.opstr, arg_s)
+
+class TwoArgExpr(Expr):
+    def __init__(self,op,arg1,arg2):
+        Expr.__init__(self,op)
+        self.arg1 = arg1
+        self.arg2 = arg2
+
+    def __repr__(self):
+        return "(%s %s %s)" % (self.op, repr(self.arg1), repr(self.arg2))
+
+class BinaryExpr(TwoArgExpr):
+    def __init__(self,op,arg1,arg2,opstr=None):
+        TwoArgExpr.__init__(self,op,arg1,arg2)
+        if not opstr:
+            opstr = op
+        self.opstr = opstr
+
+    def __str__(self):
+        arg1_s    = str(self.arg1)
+        arg2_s    = str(self.arg2)
+        arg1_prec = self.arg1.precedence()
+        arg2_prec = self.arg2.precedence()
+        self_prec = self.precedence()
+
+        result = ""
+        if arg1_prec < self_prec:
+            result += "(%s)" % arg1_s
+        else:
+            result += arg1_s
+
+        result += " %s " % self.opstr
+
+        if arg2_prec < self_prec:
+            result += "(%s)" % arg2_s
+        else:
+            result += arg2_s
+
+        return result
+
+#####################################################################################
+#####################################################################################
+#####                                                                           #####
+#####           C P P   E X P R E S S I O N   P A R S E R                       #####
+#####                                                                           #####
+#####################################################################################
+#####################################################################################
+
+
+class ExprParser:
+    """a class used to convert a list of tokens into a cpp Expr object"""
+
+    re_octal   = re.compile(r"\s*\(0[0-7]+\).*")
+    re_decimal = re.compile(r"\s*\(\d+[ulUL]*\).*")
+    re_hexadecimal = re.compile(r"\s*\(0[xX][0-9a-fA-F]*\).*")
+
+    def __init__(self,tokens):
+        self.tok = tokens
+        self.n   = len(self.tok)
+        self.i   = 0
+
+    def mark(self):
+        return self.i
+
+    def release(self,pos):
+        self.i = pos
+
+    def peekId(self):
+        if self.i < self.n:
+            return self.tok[self.i].id
+        return None
+
+    def peek(self):
+        if self.i < self.n:
+            return self.tok[self.i]
+        return None
+
+    def skip(self):
+        if self.i < self.n:
+            self.i += 1
+
+    def skipOptional(self,id):
+        if self.i < self.n and self.tok[self.i].id == id:
+            self.i += 1
+
+    def skipSpaces(self):
+        i   = self.i
+        n   = self.n
+        tok = self.tok
+        while i < n and (tok[i] == tokSPACE or tok[i] == tokLN):
+            i += 1
+        self.i = i
+
+    # all the isXXX functions returns a (expr,nextpos) pair if a match is found
+    # or None if not
+
+    def is_integer(self):
+        id = self.tok[self.i].id
+        c  = id[0]
+        if c < '0' or c > '9':
+            return None
+
+        m = ExprParser.re_octal.match(id)
+        if m:
+            return (IntExpr(id), m.end(1))
+
+        m = ExprParser.re_decimal.match(id)
+        if m:
+            return (IntExpr(id), m.end(1))
+
+        m = ExprParser.re_hexadecimal(id)
+        if m:
+            return (IntExpr(id), m.end(1))
+
+        return None
+
+    def is_defined(self):
+        id = self.tok[self.i].id
+        if id != "defined":
+            return None
+
+        pos = self.mark()
+
+        use_paren = 0
+        if self.peekId() == tokLPAREN:
+            self.skip()
+            use_paren = 1
+
+        if self.peekId() != tokIDENT:
+            self.throw( BadExpectedToken, "identifier expected")
+
+        macroname = self.peek().value
+        self.skip()
+        if use_paren:
+            self.skipSpaces()
+            if self.peekId() != tokRPAREN:
+                self.throw( BadExpectedToken, "missing right-paren after 'defined' directive")
+            self.skip()
+
+        i = self.i
+        return (DefinedExpr(macroname),i+1)
+
+    def is_call_or_ident(self):
+        pass
+
+    def parse(self, i):
+        return None
+
+#####################################################################################
+#####################################################################################
+#####                                                                           #####
+#####           C P P   E X P R E S S I O N S                                   #####
+#####                                                                           #####
+#####################################################################################
+#####################################################################################
+
+class CppInvalidExpression(Exception):
+    """an exception raised when an invalid/unsupported cpp expression is detected"""
+    pass
+
+class CppExpr:
+    """a class that models the condition of #if directives into
+        an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2)
+        where "op" is a string describing the operation"""
+
+    unaries  = [ "!", "~" ]
+    binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", "&", "|", "^", "<<", ">>", "==", "!=" ]
+    precedences = { "||": 1,
+                    "&&": 2,
+                     "|": 3,
+                     "^": 4,
+                     "&": 5,
+                     "==":6, "!=":6,
+                     "<":7, "<=":7, ">":7, ">=":7,
+                     "<<":8, ">>":8,
+                     "+":9, "-":9,
+                     "*":10, "/":10, "%":10,
+                     "!":11, "~":12
+                     }
+
+    def __init__(self, tokens):
+        """initialize a CppExpr. 'tokens' must be a CppToken list"""
+        self.tok  = tokens
+        self.n    = len(tokens)
+        if debugCppExpr:
+            print "CppExpr: trying to parse %s" % repr(tokens)
+        expr      = self.is_expr(0)
+        if debugCppExpr:
+            print "CppExpr: got " + repr(expr)
+        self.expr = expr[0]
+
+    re_cpp_constant = re.compile(r"((\d|\w|_)+)")
+
+    def throw(self,exception,i,msg):
+        if i < self.n:
+            tok = self.tok[i]
+            print "%d:%d: %s" % (tok.lineno,tok.colno,msg)
+        else:
+            print "EOF: %s" % msg
+        raise exception
+
+    def skip_spaces(self,i):
+        """skip spaces in input token list"""
+        while i < self.n:
+            t = self.tok[i]
+            if t.id != tokSPACE and t.id != tokLN:
+                break
+            i += 1
+        return i
+
+    def expectId(self,i,id):
+        """check that a given token id is at the current position, then skip over it"""
+        i = self.skip_spaces(i)
+        if i >= self.n or self.tok[i].id != id:
+            self.throw(BadExpectedToken,i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[i].id))
+        return i+1
+
+    def expectIdent(self,i):
+        i = self.skip_spaces(i)
+        if i >= self.n or self.tok[i].id != tokIDENT:
+            self.throw(BadExpectedToken,i,"### expecting identifier in expression, got '%s'" % (id, self.tok[i].id))
+        return i+1
+
+    # the is_xxxxx function returns either None or a pair (e,nextpos)
+    # where 'e' is an expression tuple (e.g. (op,arg)) and 'nextpos' is
+    # the corresponding next position in the input token list
+    #
+
+    def is_decimal(self,i):
+        v = self.tok[i].value[:]
+        while len(v) > 0 and v[-1] in "ULul":
+            v = v[:-1]
+        for digit in v:
+            if not digit.isdigit():
+                return None
+
+        # for an integer expression tuple, the argument
+        # is simply the value as an integer
+        val = string.atoi(v)
+        return ("int", val), i+1
+
+    def is_hexadecimal(self,i):
+        v = self.tok[i].value[:]
+        while len(v) > 0 and v[-1] in "ULul":
+            v = v[:-1]
+        if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"):
+            for digit in v[2:]:
+                if not digit in "0123456789abcdefABCDEF":
+                    return None
+
+            # for an hex expression tuple, the argument
+            # is the value as an integer
+            val = int(v[2:], 16)
+            return ("hex", val), i+1
+
+        return None
+
+    def is_integer(self,i):
+        if self.tok[i].id != tokNUMBER:
+            return None
+
+        c = self.is_decimal(i)
+        if c: return c
+
+        c = self.is_hexadecimal(i)
+        if c: return c
+
+        return None
+
+    def is_number(self,i):
+        t = self.tok[i]
+        if t.id == tokMINUS and i+1 < self.n:
+            c = self.is_integer(i+1)
+            if c:
+                e, i2 = c
+                op, val  = e
+                return (op, -val), i2
+        if t.id == tokPLUS and i+1 < self.n:
+            c = self.is_integer(i+1)
+            if c: return c
+
+        return self.is_integer(i)
+
+
+    def is_alnum(self,i):
+        """test wether a given token is alpha-numeric"""
+        i = self.skip_spaces(i)
+        if i >= self.n:
+            return None
+        t = self.tok[i]
+        m = CppExpr.re_cpp_constant.match(t.id)
+        if m:
+            #print "... alnum '%s'" % m.group(1)
+            r = m.group(1)
+            return ("ident", r), i+1
+        return None
+
+    def is_defined(self,i):
+        t = self.tok[i]
+        if t.id != tokDEFINED:
+            return None
+
+        # we have the defined keyword, check the rest
+        i = self.skip_spaces(i+1)
+        use_parens = 0
+        if i < self.n and self.tok[i].id == tokLPAREN:
+            use_parens = 1
+            i = self.skip_spaces(i+1)
+
+        if i >= self.n:
+            self.throw(CppConstantExpected,i,"### 'defined' must be followed  by macro name or left paren")
+
+        t = self.tok[i]
+        if t.id != tokIDENT:
+            self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name")
+
+        i += 1
+        if use_parens:
+            i = self.expectId(i,tokRPAREN)
+
+        return ("defined",t.value), i
+
+
+    def is_call_or_ident(self,i):
+        i = self.skip_spaces(i)
+        if i >= self.n:
+            return None
+
+        t = self.tok[i]
+        if t.id != tokIDENT:
+            return None
+
+        name = t.value
+
+        i = self.skip_spaces(i+1)
+        if i >= self.n or self.tok[i].id != tokLPAREN:
+            return ("ident", name), i
+
+        params    = []
+        depth     = 1
+        i += 1
+        j  = i
+        while i < self.n:
+            id = self.tok[i].id
+            if id == tokLPAREN:
+                depth += 1
+            elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
+                while j < i and self.tok[j].id == tokSPACE:
+                    j += 1
+                k = i
+                while k > j and self.tok[k-1].id == tokSPACE:
+                    k -= 1
+                param = self.tok[j:k]
+                params.append( param )
+                if id == tokRPAREN:
+                    break
+                j = i+1
+            elif id == tokRPAREN:
+                depth -= 1
+            i += 1
+
+        if i >= self.n:
+            return None
+
+        return ("call", (name, params)), i+1
+
+    def is_token(self,i,token):
+        i = self.skip_spaces(i)
+        if i >= self.n or self.tok[i].id != token:
+            return None
+        return token, i+1
+
+
+    def is_value(self,i):
+        t = self.tok[i]
+        if t.id == tokSTRING:
+            return ("string", t.value), i+1
+
+        c = self.is_number(i)
+        if c: return c
+
+        c = self.is_defined(i)
+        if c: return c
+
+        c = self.is_call_or_ident(i)
+        if c: return c
+
+        i = self.skip_spaces(i)
+        if i >= self.n or self.tok[i].id != tokLPAREN:
+            return None
+
+        popcount = 1
+        i2       = i+1
+        while i2 < self.n:
+            t = self.tok[i2]
+            if t.id == tokLPAREN:
+                popcount += 1
+            elif t.id == tokRPAREN:
+                popcount -= 1
+                if popcount == 0:
+                    break
+            i2 += 1
+
+        if popcount != 0:
+            self.throw(CppInvalidExpression, i, "expression missing closing parenthesis")
+
+        if debugCppExpr:
+            print "CppExpr: trying to parse sub-expression %s" % repr(self.tok[i+1:i2])
+        oldcount   = self.n
+        self.n     = i2
+        c          = self.is_expr(i+1)
+        self.n     = oldcount
+        if not c:
+            self.throw(CppInvalidExpression, i, "invalid expression within parenthesis")
+
+        e, i = c
+        return e, i2+1
+
+    def is_unary(self,i):
+        i = self.skip_spaces(i)
+        if i >= self.n:
+            return None
+
+        t = self.tok[i]
+        if t.id in CppExpr.unaries:
+            c = self.is_unary(i+1)
+            if not c:
+                self.throw(CppInvalidExpression, i, "%s operator must be followed by value" % t.id)
+            e, i = c
+            return (t.id, e), i
+
+        return self.is_value(i)
+
+    def is_binary(self,i):
+        i = self.skip_spaces(i)
+        if i >= self.n:
+            return None
+
+        c = self.is_unary(i)
+        if not c:
+            return None
+
+        e1, i2 = c
+        i2 = self.skip_spaces(i2)
+        if i2 >= self.n:
+            return c
+
+        t = self.tok[i2]
+        if t.id in CppExpr.binaries:
+            c = self.is_binary(i2+1)
+            if not c:
+                self.throw(CppInvalidExpression, i,"### %s operator must be followed by value" % t.id )
+            e2, i3 = c
+            return (t.id, e1, e2), i3
+
+        return None
+
+    def is_expr(self,i):
+        return self.is_binary(i)
+
+    def dump_node(self,e):
+        op = e[0]
+        line = "(" + op
+        if op == "int":
+            line += " %d)" % e[1]
+        elif op == "hex":
+            line += " 0x%x)" % e[1]
+        elif op == "ident":
+            line += " %s)" % e[1]
+        elif op == "defined":
+            line += " %s)" % e[1]
+        elif op == "call":
+            arg = e[1]
+            line += " %s [" % arg[0]
+            prefix = ""
+            for param in arg[1]:
+                par = ""
+                for tok in param:
+                    par += str(tok)
+                line += "%s%s" % (prefix, par)
+                prefix = ","
+            line += "])"
+        elif op in CppExpr.unaries:
+            line += " %s)" % self.dump_node(e[1])
+        elif op in CppExpr.binaries:
+            line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
+        else:
+            line += " ?%s)" % repr(e[1])
+
+        return line
+
+    def __repr__(self):
+        return self.dump_node(self.expr)
+
+    def source_node(self,e):
+        op = e[0]
+        if op == "int":
+            return "%d" % e[1]
+        if op == "hex":
+            return "0x%x" % e[1]
+        if op == "ident":
+            # XXX: should try to expand
+            return e[1]
+        if op == "defined":
+            return "defined(%s)" % e[1]
+
+        prec = CppExpr.precedences.get(op,1000)
+        arg  = e[1]
+        if op in CppExpr.unaries:
+            arg_src = self.source_node(arg)
+            arg_op  = arg[0]
+            arg_prec = CppExpr.precedences.get(arg[0],1000)
+            if arg_prec < prec:
+                return "!(" + arg_src + ")"
+            else:
+                return "!" + arg_src
+        if op in CppExpr.binaries:
+            arg2     = e[2]
+            arg1_op  = arg[0]
+            arg2_op  = arg2[0]
+            arg1_src = self.source_node(arg)
+            arg2_src = self.source_node(arg2)
+            if CppExpr.precedences.get(arg1_op,1000) < prec:
+                arg1_src = "(%s)" % arg1_src
+            if CppExpr.precedences.get(arg2_op,1000) < prec:
+                arg2_src = "(%s)" % arg2_src
+
+            return "%s %s %s" % (arg1_src, op, arg2_src)
+        return "???"
+
+    def __str__(self):
+        return self.source_node(self.expr)
+
+    def int_node(self,e):
+        if e[0] == "int":
+            return e[1]
+        elif e[1] == "hex":
+            return int(e[1],16)
+        else:
+            return None
+
+    def toInt(self):
+        return self.int_node(self.expr)
+
+    def optimize_node(self,e,macros={}):
+        op = e[0]
+        if op == "defined":
+            name = e[1]
+            if macros.has_key(name):
+                if macros[name] == kCppUndefinedMacro:
+                    return ("int", 0)
+                else:
+                    return ("int", 1)
+
+            if kernel_remove_config_macros and name.startswith("CONFIG_"):
+                return ("int", 0)
+
+        elif op == "!":
+            op, v = e
+            v = self.optimize_node(v, macros)
+            if v[0] == "int":
+                if v[1] == 0:
+                    return ("int", 1)
+                else:
+                    return ("int", 0)
+
+        elif op == "&&":
+            op, l, r = e
+            l  = self.optimize_node(l, macros)
+            r  = self.optimize_node(r, macros)
+            li = self.int_node(l)
+            ri = self.int_node(r)
+            if li != None:
+                if li == 0:
+                    return ("int", 0)
+                else:
+                    return r
+
+        elif op == "||":
+            op, l, r = e
+            l  = self.optimize_node(l, macros)
+            r  = self.optimize_node(r, macros)
+            li = self.int_node(l)
+            ri = self.int_node(r)
+            if li != None:
+                if li == 0:
+                    return r
+                else:
+                    return ("int", 1)
+            elif ri != None:
+                if ri == 0:
+                    return l
+                else:
+                    return ("int", 1)
+        return e
+
+    def optimize(self,macros={}):
+        self.expr = self.optimize_node(self.expr,macros)
+
+    def removePrefixedNode(self,e,prefix,names):
+        op = e[0]
+        if op == "defined":
+            name = e[1]
+            if name.startswith(prefix):
+                if names.has_key[name] and names[name] == "y":
+                    return ("int", 1)
+                else:
+                    return ("int", 0)
+
+        elif op in CppExpr.unaries:
+            op, v = e
+            v = self.removePrefixedNode(v,prefix,names)
+            return (op, v)
+        elif op in CppExpr.binaries:
+            op, v1, v2 = e
+            v1 = self.removePrefixedNode(v1,prefix,names)
+            v2 = self.removePrefixedNode(v2,prefix,names)
+            return (op, v1, v2)
+        elif op == "call":
+            func, params = e[1]
+            params2 = []
+            for param in params:
+                params2.append( self.removePrefixedNode(param,prefix,names) )
+            return (op, (func, params2))
+
+        return e
+
+    def removePrefixed(self,prefix,names={}):
+        self.expr = self.removePrefixedNode(self.expr,prefix,names)
+
+    def is_equal_node(self,e1,e2):
+        if e1[0] != e2[0] or len(e1) != len(e2):
+            return False
+
+        op = e1[0]
+        if op == "int" or op == "hex" or op == "!" or op == "defined":
+            return e1[0] == e2[0]
+
+        return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2])
+
+    def is_equal(self,other):
+        return self.is_equal_node(self.expr,other.expr)
+
+def test_cpp_expr(expr, expected):
+    e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
+    #print repr(e.expr)
+    s1 = repr(e)
+    if s1 != expected:
+        print "KO: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
+    else:
+        #print "OK: expression '%s'" % expr
+        pass
+
+def test_cpp_expr_optim(expr, expected, macros={}):
+    e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
+    e.optimize(macros)
+
+    s1 = repr(e)
+    if s1 != expected:
+        print "KO: optimized expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
+    else:
+        #print "OK: optmized expression '%s'" % expr
+        pass
+
+def test_cpp_expr_source(expr, expected):
+    e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
+    s1 = str(e)
+    if s1 != expected:
+        print "KO: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
+    else:
+        #print "OK: source expression '%s'" % expr
+        pass
+
+def test_CppExpr():
+    print "testing CppExpr"
+    test_cpp_expr( "0", "(int 0)" )
+    test_cpp_expr( "1", "(int 1)" )
+    test_cpp_expr( "1 && 1", "(&& (int 1) (int 1))" )
+    test_cpp_expr( "1 && 0", "(&& (int 1) (int 0))" )
+    test_cpp_expr( "EXAMPLE", "(ident EXAMPLE)" )
+    test_cpp_expr( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
+    test_cpp_expr( "defined(EXAMPLE)", "(defined EXAMPLE)" )
+    test_cpp_expr( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
+    test_cpp_expr( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
+    test_cpp_expr( "FOO(BAR)", "(call FOO [BAR])" )
+
+    test_cpp_expr_optim( "0", "(int 0)" )
+    test_cpp_expr_optim( "1", "(int 1)" )
+    test_cpp_expr_optim( "1 && 1", "(int 1)" )
+    test_cpp_expr_optim( "1 && 0", "(int 0)" )
+    test_cpp_expr_optim( "0 && 1", "(int 0)" )
+    test_cpp_expr_optim( "0 && 0", "(int 0)" )
+    test_cpp_expr_optim( "1 || 1", "(int 1)" )
+    test_cpp_expr_optim( "1 || 0", "(int 1)" )
+    test_cpp_expr_optim( "0 || 1", "(int 1)" )
+    test_cpp_expr_optim( "0 || 0", "(int 0)" )
+    test_cpp_expr_optim( "EXAMPLE", "(ident EXAMPLE)" )
+    test_cpp_expr_optim( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
+    test_cpp_expr_optim( "defined(EXAMPLE)", "(defined EXAMPLE)" )
+    test_cpp_expr_optim( "defined(EXAMPLE)", "(int 1)", { "EXAMPLE": "XOWOE" } )
+    test_cpp_expr_optim( "defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro} )
+    test_cpp_expr_optim( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
+    test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 0)", { "EXAMPLE" : "XOWOE" } )
+    test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro } )
+    test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
+    test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "ABC" : "1" } )
+    test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "BINGO" : "1" } )
+    test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(defined ABC)", { "BINGO" : kCppUndefinedMacro } )
+    test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 0)", { "ABC" : kCppUndefinedMacro, "BINGO" : kCppUndefinedMacro } )
+
+    test_cpp_expr_source( "0", "0" )
+    test_cpp_expr_source( "1", "1" )
+    test_cpp_expr_source( "1 && 1", "1 && 1" )
+    test_cpp_expr_source( "1 && 0", "1 && 0" )
+    test_cpp_expr_source( "0 && 1", "0 && 1" )
+    test_cpp_expr_source( "0 && 0", "0 && 0" )
+    test_cpp_expr_source( "1 || 1", "1 || 1" )
+    test_cpp_expr_source( "1 || 0", "1 || 0" )
+    test_cpp_expr_source( "0 || 1", "0 || 1" )
+    test_cpp_expr_source( "0 || 0", "0 || 0" )
+    test_cpp_expr_source( "EXAMPLE", "EXAMPLE" )
+    test_cpp_expr_source( "EXAMPLE - 3", "EXAMPLE - 3" )
+    test_cpp_expr_source( "defined(EXAMPLE)", "defined(EXAMPLE)" )
+    test_cpp_expr_source( "defined EXAMPLE", "defined(EXAMPLE)" )
+
+
+#####################################################################################
+#####################################################################################
+#####                                                                           #####
+#####          C P P   B L O C K                                                #####
+#####                                                                           #####
+#####################################################################################
+#####################################################################################
+
+class Block:
+    """a class used to model a block of input source text. there are two block types:
+        - direcive blocks: contain the tokens of a single pre-processor directive (e.g. #if)
+        - text blocks, contain the tokens of non-directive blocks
+
+       the cpp parser class below will transform an input source file into a list of Block
+       objects (grouped in a BlockList object for convenience)"""
+
+    def __init__(self,tokens,directive=None,lineno=0):
+        """initialize a new block, if 'directive' is None, this is a text block
+           NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)'
+                 and '#ifndef MACRO' into '#if !defined(MACRO)'"""
+        if directive == "ifdef":
+            tok = Token()
+            tok.set(tokDEFINED)
+            tokens = [ tok ] + tokens
+            directive = "if"
+
+        elif directive == "ifndef":
+            tok1 = Token()
+            tok2 = Token()
+            tok1.set(tokNOT)
+            tok2.set(tokDEFINED)
+            tokens = [ tok1, tok2 ] + tokens
+            directive = "if"
+
+        self.tokens    = tokens
+        self.directive = directive
+        if lineno > 0:
+            self.lineno = lineno
+        else:
+            self.lineno = self.tokens[0].lineno
+
+        if self.isIf():
+            self.expr = CppExpr( self.tokens )
+
+    def isDirective(self):
+        """returns True iff this is a directive block"""
+        return self.directive != None
+
+    def isConditional(self):
+        """returns True iff this is a conditional directive block"""
+        return self.directive in ["if","ifdef","ifndef","else","elif","endif"]
+
+    def isDefine(self):
+        """returns the macro name in a #define directive, or None otherwise"""
+        if self.directive != "define":
+            return None
+
+        return self.tokens[0].value
+
+    def isIf(self):
+        """returns True iff this is an #if-like directive block"""
+        return self.directive in ["if","ifdef","ifndef","elif"]
+
+    def isInclude(self):
+        """checks wether this is a #include directive. if true, then returns the
+           corresponding file name (with brackets or double-qoutes). None otherwise"""
+        if self.directive != "include":
+            return None
+
+        #print "iii " + repr(self.tokens)
+        if self.tokens[0].id == tokSTRING:
+            # a double-quote include, that's easy
+            return self.tokens[0].value
+
+        # we only want the bracket part, not any comments or junk after it
+        if self.tokens[0].id == "<":
+            i   = 0
+            tok = self.tokens
+            n   = len(tok)
+            while i < n and tok[i].id != ">":
+                i += 1
+
+            if i >= n:
+                return None
+
+            return string.join([ str(x) for x in tok[:i+1] ],"")
+
+        else:
+            return None
+
+    def __repr__(self):
+        """generate the representation of a given block"""
+        if self.directive:
+            result = "#%s " % self.directive
+            if self.isIf():
+                result += repr(self.expr)
+            else:
+                for tok in self.tokens:
+                    result += repr(tok)
+        else:
+            result = ""
+            for tok in self.tokens:
+                result += repr(tok)
+
+        return result
+
+    def __str__(self):
+        """generate the string representation of a given block"""
+        if self.directive:
+            if self.directive == "if":
+                # small optimization to re-generate #ifdef and #ifndef
+                e = self.expr.expr
+                op = e[0]
+                if op == "defined":
+                    result = "#ifdef %s" % e[1]
+                elif op == "!" and e[1][0] == "defined":
+                    result = "#ifndef %s" % e[1][1]
+                else:
+                    result = "#if " + str(self.expr)
+            else:
+                result = "#%s" % self.directive
+                if len(self.tokens):
+                    result += " "
+                for tok in self.tokens:
+                    result += str(tok)
+        else:
+            result = ""
+            for tok in self.tokens:
+                result += str(tok)
+
+        return result
+
+
+class BlockList:
+    """a convenience class used to hold and process a list of blocks returned by
+       the cpp parser"""
+    def __init__(self,blocks):
+        self.blocks = blocks
+
+    def __len__(self):
+        return len(self.blocks)
+
+    def __getitem__(self,n):
+        return self.blocks[n]
+
+    def __repr__(self):
+        return repr(self.blocks)
+
+    def __str__(self):
+        result = ""
+        for b in self.blocks:
+            result += str(b)
+            if b.isDirective():
+                result += '\n'
+        return result
+
+    def  optimizeIf01(self):
+        """remove the code between #if 0 .. #endif in a BlockList"""
+        self.blocks = optimize_if01(self.blocks)
+
+    def optimizeMacros(self, macros):
+        """remove known defined and undefined macros from a BlockList"""
+        for b in self.blocks:
+            if b.isIf():
+                b.expr.optimize(macros)
+
+    def removeMacroDefines(self,macros):
+        """remove known macro definitions from a BlockList"""
+        self.blocks = remove_macro_defines(self.blocks,macros)
+
+    def removePrefixed(self,prefix,names):
+        for b in self.blocks:
+            if b.isIf():
+                b.expr.removePrefixed(prefix,names)
+
+    def optimizeAll(self,macros):
+        self.optimizeMacros(macros)
+        self.optimizeIf01()
+        return
+
+    def findIncludes(self):
+        """return the list of included files in a BlockList"""
+        result = []
+        for b in self.blocks:
+            i = b.isInclude()
+            if i:
+                result.append(i)
+
+        return result
+
+
+    def write(self,out):
+        out.write(str(self))
+
+    def removeComments(self):
+        for b in self.blocks:
+            for tok in b.tokens:
+                if tok.id == tokSPACE:
+                    tok.value = " "
+
+    def removeEmptyLines(self):
+        # state = 1 => previous line was tokLN
+        # state = 0 => previous line was directive
+        state  = 1
+        for b in self.blocks:
+            if b.isDirective():
+                #print "$$$ directive %s" % str(b)
+                state = 0
+            else:
+                # a tokLN followed by spaces is replaced by a single tokLN
+                # several successive tokLN are replaced by a single one
+                #
+                dst   = []
+                src   = b.tokens
+                n     = len(src)
+                i     = 0
+                #print "$$$ parsing %s" % repr(src)
+                while i < n:
+                    # find final tokLN
+                    j = i
+                    while j < n and src[j].id != tokLN:
+                        j += 1
+
+                    if j >= n:
+                        # uhhh
+                        dst += src[i:]
+                        break
+
+                    if src[i].id == tokSPACE:
+                        k = i+1
+                        while src[k].id == tokSPACE:
+                            k += 1
+
+                        if k == j: # empty lines with spaces in it
+                            i = j  # remove the spaces
+
+                    if i == j:
+                        # an empty line
+                        if state == 1:
+                            i += 1   # remove it
+                        else:
+                            state = 1
+                            dst.append(src[i])
+                            i   += 1
+                    else:
+                        # this line is not empty, remove trailing spaces
+                        k = j
+                        while k > i and src[k-1].id == tokSPACE:
+                            k -= 1
+
+                        nn = i
+                        while nn < k:
+                            dst.append(src[nn])
+                            nn += 1
+                        dst.append(src[j])
+                        state = 0
+                        i = j+1
+
+                b.tokens = dst
+
+    def removeVarsAndFuncs(self,knownStatics=set()):
+        """remove all extern and static declarations corresponding
+           to variable and function declarations. we only accept typedefs
+           and enum/structs/union declarations.
+
+           however, we keep the definitions corresponding to the set
+           of known static inline functions in the set 'knownStatics',
+           which is useful for optimized byteorder swap functions and
+           stuff like that.
+           """
+        # state = 1 => typedef/struct encountered
+        # state = 2 => vars or func declaration encountered, skipping until ";"
+        # state = 0 => normal (i.e. LN + spaces)
+        state      = 0
+        depth      = 0
+        blocks2    = []
+        for b in self.blocks:
+            if b.isDirective():
+                blocks2.append(b)
+            else:
+                n     = len(b.tokens)
+                i     = 0
+                first = 0
+                if state == 2:
+                    first = n
+                while i < n:
+                    tok = b.tokens[i]
+                    if state == 0:
+                        bad = 0
+                        if tok.id in [tokLN, tokSPACE]:
+                            pass
+                        elif tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]:
+                            state = 1
+                        else:
+                            if tok.value in [ 'static', 'extern', '__KINLINE' ]:
+                                j = i+1
+                                ident = ""
+                                while j < n and not (b.tokens[j].id in [ '(', ';' ]):
+                                    if b.tokens[j].id == tokIDENT:
+                                        ident = b.tokens[j].value
+                                    j += 1
+                                if j < n and ident in knownStatics:
+                                    # this is a known static, we're going to keep its
+                                    # definition in the final output
+                                    state = 1
+                                else:
+                                    #print "### skip static '%s'" % ident
+                                    pass
+
+                            if state == 0:
+                                if i > first:
+                                    #print "### intermediate from '%s': '%s'" % (tok.value, repr(b.tokens[first:i]))
+                                    blocks2.append( Block(b.tokens[first:i]) )
+                                state = 2
+                                first = n
+
+                    else:  # state > 0
+                        if tok.id == '{':
+                            depth += 1
+
+                        elif tok.id == '}':
+                            if depth > 0:
+                                depth -= 1
+
+                        elif depth == 0 and tok.id == ';':
+                            if state == 2:
+                                first = i+1
+                            state = 0
+
+                    i += 1
+
+                if i > first:
+                    #print "### final '%s'" % repr(b.tokens[first:i])
+                    blocks2.append( Block(b.tokens[first:i]) )
+
+        self.blocks = blocks2
+
+    def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"):
+        """insert your standard issue disclaimer that this is an
+           auto-generated file, etc.."""
+        tokens = CppLineTokenizer( disclaimer ).toTokenList()
+        tokens = tokens[:-1]  # remove trailing tokLN
+        self.blocks = [ Block(tokens) ] + self.blocks
+
+class BlockParser:
+    """a class used to convert an input source file into a BlockList object"""
+
+    def __init__(self,tokzer=None):
+        """initialize a block parser. the input source is provided through a Tokenizer
+           object"""
+        self.reset(tokzer)
+
+    def reset(self,tokzer):
+        self.state  = 1
+        self.tokzer = tokzer
+
+    def getBlocks(self,tokzer=None):
+        """tokenize and parse the input source, return a BlockList object
+           NOTE: empty and line-numbering directives are ignored and removed
+                 from the result. as a consequence, it is possible to have
+                 two successive text blocks in the result"""
+        # state 0 => in source code
+        # state 1 => in source code, after a LN
+        # state 2 => in source code, after LN then some space
+        state   = 1
+        lastLN  = 0
+        current = []
+        blocks  = []
+
+        if tokzer == None:
+            tokzer = self.tokzer
+
+        while 1:
+            tok = tokzer.getToken()
+            if tok.id == tokEOF:
+                break
+
+            if tok.id == tokLN:
+                state    = 1
+                current.append(tok)
+                lastLN   = len(current)
+
+            elif tok.id == tokSPACE:
+                if state == 1:
+                    state = 2
+                current.append(tok)
+
+            elif tok.id == "#":
+                if state > 0:
+                    # this is the start of a directive
+
+                    if lastLN > 0:
+                        # record previous tokens as text block
+                        block   = Block(current[:lastLN])
+                        blocks.append(block)
+                        lastLN  = 0
+
+                    current = []
+
+                    # skip spaces after the #
+                    while 1:
+                        tok = tokzer.getToken()
+                        if tok.id != tokSPACE:
+                            break
+
+                    if tok.id != tokIDENT:
+                        # empty or line-numbering, ignore it
+                        if tok.id != tokLN and tok.id != tokEOF:
+                            while 1:
+                                tok = tokzer.getToken()
+                                if tok.id == tokLN or tok.id == tokEOF:
+                                    break
+                        continue
+
+                    directive = tok.value
+                    lineno    = tok.lineno
+
+                    # skip spaces
+                    tok = tokzer.getToken()
+                    while tok.id == tokSPACE:
+                        tok = tokzer.getToken()
+
+                    # then record tokens until LN
+                    dirtokens = []
+                    while tok.id != tokLN and tok.id != tokEOF:
+                        dirtokens.append(tok)
+                        tok = tokzer.getToken()
+
+                    block = Block(dirtokens,directive,lineno)
+                    blocks.append(block)
+                    state   = 1
+
+            else:
+                state = 0
+                current.append(tok)
+
+        if len(current) > 0:
+            block = Block(current)
+            blocks.append(block)
+
+        return BlockList(blocks)
+
+    def parse(self,tokzer):
+        return self.getBlocks( tokzer )
+
+    def parseLines(self,lines):
+        """parse a list of text lines into a BlockList object"""
+        return self.getBlocks( CppLinesTokenizer(lines) )
+
+    def parseFile(self,path):
+        """parse a file into a BlockList object"""
+        file = open(path, "rt")
+        result = self.getBlocks( CppFileTokenizer(file) )
+        file.close()
+        return result
+
+
+def test_block_parsing(lines,expected):
+    blocks = BlockParser().parse( CppLinesTokenizer(lines) )
+    if len(blocks) != len(expected):
+        raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \
+              % (str(blocks), repr(expected))
+    for n in range(len(blocks)):
+        if str(blocks[n]) != expected[n]:
+            raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \
+                  % (n, str(blocks[n]), expected[n])
+    #for block in blocks:
+    #    print block
+
+def test_BlockParser():
+    test_block_parsing(["#error hello"],["#error hello"])
+    test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ])
+    test_block_parsing([ "foo", "  #  ", "bar" ], [ "foo\n","bar\n" ])
+    test_block_parsing(\
+        [ "foo", "   #  ", "  #  /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ],
+        [ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] )
+
+
+#####################################################################################
+#####################################################################################
+#####                                                                           #####
+#####        B L O C K   L I S T   O P T I M I Z A T I O N                      #####
+#####                                                                           #####
+#####################################################################################
+#####################################################################################
+
+def  remove_macro_defines( blocks, excludedMacros=set() ):
+    """remove macro definitions like #define <macroName>  ...."""
+    result = []
+    for b in blocks:
+        macroName = b.isDefine()
+        if macroName == None or not macroName in excludedMacros:
+            result.append(b)
+
+    return result
+
+def  find_matching_endif( blocks, i ):
+    n     = len(blocks)
+    depth = 1
+    while i < n:
+        if blocks[i].isDirective():
+            dir = blocks[i].directive
+            if dir in [ "if", "ifndef", "ifdef" ]:
+                depth += 1
+            elif depth == 1 and dir in [ "else", "elif" ]:
+                return i
+            elif dir == "endif":
+                depth -= 1
+                if depth == 0:
+                    return i
+        i += 1
+    return i
+
+def  optimize_if01( blocks ):
+    """remove the code between #if 0 .. #endif in a list of CppBlocks"""
+    i = 0
+    n = len(blocks)
+    result = []
+    while i < n:
+        j = i
+        while j < n and not blocks[j].isIf():
+            j += 1
+        if j > i:
+            D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno))
+            result += blocks[i:j]
+        if j >= n:
+            break
+        expr = blocks[j].expr
+        r    = expr.toInt()
+        if r == None:
+            result.append(blocks[j])
+            i = j + 1
+            continue
+
+        if r == 0:
+            # if 0 => skip everything until the corresponding #endif
+            j = find_matching_endif( blocks, j+1 )
+            if j >= n:
+                # unterminated #if 0, finish here
+                break
+            dir = blocks[j].directive
+            if dir == "endif":
+                D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno))
+                i = j + 1
+            elif dir == "else":
+                # convert 'else' into 'if 1'
+                D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno))
+                blocks[j].directive = "if"
+                blocks[j].expr      = CppExpr( CppLineTokenizer("1").toTokenList() )
+                i = j
+            elif dir == "elif":
+                # convert 'elif' into 'if'
+                D2("convert 'if 0' .. 'elif' into 'if'")
+                blocks[j].directive = "if"
+                i = j
+            continue
+
+        # if 1 => find corresponding endif and remove/transform them
+        k = find_matching_endif( blocks, j+1 )
+        if k >= n:
+            # unterminated #if 1, finish here
+            D2("unterminated 'if 1'")
+            result += blocks[j+1:k]
+            break
+
+        dir = blocks[k].directive
+        if dir == "endif":
+            D2("convert 'if 1' .. 'endif' (lines %d to %d)"  % (blocks[j].lineno, blocks[k].lineno))
+            result += optimize_if01(blocks[j+1:k])
+            i       = k+1
+        elif dir == "else":
+            # convert 'else' into 'if 0'
+            D2("convert 'if 1' .. 'else' (lines %d to %d)"  % (blocks[j].lineno, blocks[k].lineno))
+            result += optimize_if01(blocks[j+1:k])
+            blocks[k].directive = "if"
+            blocks[k].expr      = CppExpr( CppLineTokenizer("0").toTokenList() )
+            i = k
+        elif dir == "elif":
+            # convert 'elif' into 'if 0'
+            D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
+            result += optimize_if01(blocks[j+1:k])
+            blocks[k].expr      = CppExpr( CppLineTokenizer("0").toTokenList() )
+            i = k
+    return result
+
+def  test_optimizeAll():
+    text = """\
+#if 1
+#define  GOOD_1
+#endif
+#if 0
+#define  BAD_2
+#define  BAD_3
+#endif
+
+#if 1
+#define  GOOD_2
+#else
+#define  BAD_4
+#endif
+
+#if 0
+#define  BAD_5
+#else
+#define  GOOD_3
+#endif
+
+#if 0
+#if 1
+#define  BAD_6
+#endif
+#endif\
+"""
+
+    expected = """\
+#define GOOD_1
+
+#define GOOD_2
+
+#define GOOD_3
+
+"""
+
+    print "running test_BlockList.optimizeAll"
+    out = StringOutput()
+    lines = string.split(text, '\n')
+    list = BlockParser().parse( CppLinesTokenizer(lines) )
+    #D_setlevel(2)
+    list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} )
+    #print repr(list)
+    list.write(out)
+    if out.get() != expected:
+        print "KO: macro optimization failed\n"
+        print "<<<< expecting '",
+        print expected,
+        print "'\n>>>> result '"
+        print out.get(),
+        print "'\n----"
+
+
+#####################################################################################
+#####################################################################################
+#####                                                                           #####
+#####                                                                           #####
+#####                                                                           #####
+#####################################################################################
+#####################################################################################
+
+def runUnitTests():
+    """run all unit tests for this program"""
+    print "running unit tests"
+    test_CppTokenizer()
+    test_CppExpr()
+    test_optimizeAll()
+    test_BlockParser()
+    print "OK"
+
+if __name__ == "__main__":
+    runUnitTests()
commit	1dc9e472e19acfe6dc7f41e429236e7eef7ceda1	[log] [tgz]
author	The Android Open Source Project <initial-contribution@android.com>	Tue Mar 03 19:28:35 2009 -0800
committer	The Android Open Source Project <initial-contribution@android.com>	Tue Mar 03 19:28:35 2009 -0800
tree	3be0c520fae17689bbf5584e1136fb820caef26f
parent	1767f908af327fa388b1c66883760ad851267013 [diff] [blame]