libc: Update auto-gen scripts
Make the scripts use external/kernel-headers/original by default.
clean_header.py: Document -k<path>, add -d<path>
find_headers.py: Make kernel config files optional
update_all.py: Allow setting the path to kernel headers on the command-line
update_all.py: Better formatting of output on ttys
update_all.py: Automatically perform "git add/rm" on affected files.
SYSCALLS.TXT: Fix typo in __socketcall definition.
checksyscalls.py: Add support for superH architecture in the checks.
gensyscalls.py: Automatically perform "git add/rm" on affected files.
cpp.py: Fixed a bug that prevented certain type definitions to
be kept in the generated clean header (e.g.
struct ethtool_drvinfo in <linux/ethtool.h>)
All scripts will use the content of external/kernel-headers/original by default now.
The generated code removes all empty lines and trailing whitespace. This is useful
to ensure a unified output even if we change the parser again in the future.
The top-level disclaimer has been edited with update instructions to regenerate
the headers when needed.
Also, a warning is now inserted every 8th line in the final output:
/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
Changes under kernel/arch-arm and kernel/arch-x86 should correspond to whitespace
differences and additionnal struct definitions that were missed by the previous
parser implementation.
Change-Id: Icd1c056bacd766759f3e9b7bb5d63a246f3d656a
WARNING: If you run these script, do not submit the result to gerrit for now.
It seems there are discrepancies between the content of original headers
and those currently commited under bionic/libc/kernel/.
(This problem is the main motivation to insert the warning repeatedly).
Current list of issues:
- Missing SuperH headers (i.e. external/kernel-headers/original/asm-sh)
diff --git a/libc/kernel/tools/cpp.py b/libc/kernel/tools/cpp.py
index 8828a5d..8e15a67 100644
--- a/libc/kernel/tools/cpp.py
+++ b/libc/kernel/tools/cpp.py
@@ -1529,7 +1529,7 @@
class Block:
"""a class used to model a block of input source text. there are two block types:
- - direcive blocks: contain the tokens of a single pre-processor directive (e.g. #if)
+ - directive blocks: contain the tokens of a single pre-processor directive (e.g. #if)
- text blocks, contain the tokens of non-directive blocks
the cpp parser class below will transform an input source file into a list of Block
@@ -1609,6 +1609,91 @@
else:
return None
+ def removeWhiteSpace(self):
+ # Remove trailing whitespace and empty lines
+ # All whitespace is also contracted to a single space
+ if self.directive != None:
+ return
+
+ tokens = []
+ line = 0 # index of line start
+ space = -1 # index of first space, or -1
+ ii = 0
+ nn = len(self.tokens)
+ while ii < nn:
+ tok = self.tokens[ii]
+
+ # If we find a space, record its position if this is the first
+ # one the line start or the previous character. Don't append
+ # anything to tokens array yet though.
+ if tok.id == tokSPACE:
+ if space < 0:
+ space = ii
+ ii += 1
+ continue
+
+ # If this is a line space, ignore the spaces we found previously
+ # on the line, and remove empty lines.
+ if tok.id == tokLN:
+ old_line = line
+ old_space = space
+ #print "N line=%d space=%d ii=%d" % (line, space, ii)
+ ii += 1
+ line = ii
+ space = -1
+ if old_space == old_line: # line only contains spaces
+ #print "-s"
+ continue
+ if ii-1 == old_line: # line is empty
+ #print "-e"
+ continue
+ tokens.append(tok)
+ continue
+
+ # Other token, append any space range if any, converting each
+ # one to a single space character, then append the token.
+ if space >= 0:
+ jj = space
+ space = -1
+ while jj < ii:
+ tok2 = self.tokens[jj]
+ tok2.value = " "
+ tokens.append(tok2)
+ jj += 1
+
+ tokens.append(tok)
+ ii += 1
+
+ self.tokens = tokens
+
+ def writeWithWarning(self,out,warning,left_count,repeat_count):
+ # removeWhiteSpace() will sometimes creates non-directive blocks
+ # without any tokens. These come from blocks that only contained
+ # empty lines and spaces. They should not be printed in the final
+ # output, and then should not be counted for this operation.
+ #
+ if not self.directive and self.tokens == []:
+ return left_count
+
+ if self.directive:
+ out.write(str(self) + "\n")
+ left_count -= 1
+ if left_count == 0:
+ out.write(warning)
+ left_count = repeat_count
+
+ else:
+ for tok in self.tokens:
+ out.write(str(tok))
+ if tok.id == tokLN:
+ left_count -= 1
+ if left_count == 0:
+ out.write(warning)
+ left_count = repeat_count
+
+ return left_count
+
+
def __repr__(self):
"""generate the representation of a given block"""
if self.directive:
@@ -1651,7 +1736,6 @@
return result
-
class BlockList:
"""a convenience class used to hold and process a list of blocks returned by
the cpp parser"""
@@ -1694,6 +1778,10 @@
if b.isIf():
b.expr.removePrefixed(prefix,names)
+ def removeWhiteSpace(self):
+ for b in self.blocks:
+ b.removeWhiteSpace()
+
def optimizeAll(self,macros):
self.optimizeMacros(macros)
self.optimizeIf01()
@@ -1713,72 +1801,17 @@
def write(self,out):
out.write(str(self))
+ def writeWithWarning(self,out,warning,repeat_count):
+ left_count = repeat_count
+ for b in self.blocks:
+ left_count = b.writeWithWarning(out,warning,left_count,repeat_count)
+
def removeComments(self):
for b in self.blocks:
for tok in b.tokens:
if tok.id == tokSPACE:
tok.value = " "
- def removeEmptyLines(self):
- # state = 1 => previous line was tokLN
- # state = 0 => previous line was directive
- state = 1
- for b in self.blocks:
- if b.isDirective():
- #print "$$$ directive %s" % str(b)
- state = 0
- else:
- # a tokLN followed by spaces is replaced by a single tokLN
- # several successive tokLN are replaced by a single one
- #
- dst = []
- src = b.tokens
- n = len(src)
- i = 0
- #print "$$$ parsing %s" % repr(src)
- while i < n:
- # find final tokLN
- j = i
- while j < n and src[j].id != tokLN:
- j += 1
-
- if j >= n:
- # uhhh
- dst += src[i:]
- break
-
- if src[i].id == tokSPACE:
- k = i+1
- while src[k].id == tokSPACE:
- k += 1
-
- if k == j: # empty lines with spaces in it
- i = j # remove the spaces
-
- if i == j:
- # an empty line
- if state == 1:
- i += 1 # remove it
- else:
- state = 1
- dst.append(src[i])
- i += 1
- else:
- # this line is not empty, remove trailing spaces
- k = j
- while k > i and src[k-1].id == tokSPACE:
- k -= 1
-
- nn = i
- while nn < k:
- dst.append(src[nn])
- nn += 1
- dst.append(src[j])
- state = 0
- i = j+1
-
- b.tokens = dst
-
def removeVarsAndFuncs(self,knownStatics=set()):
"""remove all extern and static declarations corresponding
to variable and function declarations. we only accept typedefs
@@ -1789,66 +1822,118 @@
which is useful for optimized byteorder swap functions and
stuff like that.
"""
- # state = 1 => typedef/struct encountered
- # state = 2 => vars or func declaration encountered, skipping until ";"
# state = 0 => normal (i.e. LN + spaces)
+ # state = 1 => typedef/struct encountered, ends with ";"
+ # state = 2 => var declaration encountered, ends with ";"
+ # state = 3 => func declaration encountered, ends with "}"
state = 0
depth = 0
blocks2 = []
+ skipTokens = False
for b in self.blocks:
if b.isDirective():
blocks2.append(b)
else:
n = len(b.tokens)
i = 0
- first = 0
- if state == 2:
+ if skipTokens:
first = n
+ else:
+ first = 0
while i < n:
tok = b.tokens[i]
- if state == 0:
- bad = 0
- if tok.id in [tokLN, tokSPACE]:
- pass
- elif tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]:
- state = 1
- else:
- if tok.value in [ 'static', 'extern', '__KINLINE' ]:
- j = i+1
- ident = ""
- while j < n and not (b.tokens[j].id in [ '(', ';' ]):
- if b.tokens[j].id == tokIDENT:
- ident = b.tokens[j].value
- j += 1
- if j < n and ident in knownStatics:
- # this is a known static, we're going to keep its
- # definition in the final output
- state = 1
- else:
- #print "### skip static '%s'" % ident
- pass
-
- if state == 0:
- if i > first:
- #print "### intermediate from '%s': '%s'" % (tok.value, repr(b.tokens[first:i]))
- blocks2.append( Block(b.tokens[first:i]) )
- state = 2
- first = n
-
- else: # state > 0
- if tok.id == '{':
+ tokid = tok.id
+ # If we are not looking for the start of a new
+ # type/var/func, then skip over tokens until
+ # we find our terminator, managing the depth of
+ # accolades as we go.
+ if state > 0:
+ terminator = False
+ if tokid == '{':
depth += 1
-
- elif tok.id == '}':
+ elif tokid == '}':
if depth > 0:
depth -= 1
+ if (depth == 0) and (state == 3):
+ terminator = True
+ elif tokid == ';' and depth == 0:
+ terminator = True
- elif depth == 0 and tok.id == ';':
- if state == 2:
- first = i+1
+ if terminator:
+ # we found the terminator
state = 0
+ if skipTokens:
+ skipTokens = False
+ first = i+1
- i += 1
+ i = i+1
+ continue
+
+ # We are looking for the start of a new type/func/var
+ # ignore whitespace
+ if tokid in [tokLN, tokSPACE]:
+ i = i+1
+ continue
+
+ # Is it a new type definition, then start recording it
+ if tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]:
+ #print "$$$ keep type declr" + repr(b.tokens[i:])
+ state = 1
+ i = i+1
+ continue
+
+ # Is it a variable or function definition. If so, first
+ # try to determine which type it is, and also extract
+ # its name.
+ #
+ # We're going to parse the next tokens of the same block
+ # until we find a semi-column or a left parenthesis.
+ #
+ # The semi-column corresponds to a variable definition,
+ # the left-parenthesis to a function definition.
+ #
+ # We also assume that the var/func name is the last
+ # identifier before the terminator.
+ #
+ j = i+1
+ ident = ""
+ while j < n:
+ tokid = b.tokens[j].id
+ if tokid == '(': # a function declaration
+ state = 3
+ break
+ elif tokid == ';': # a variable declaration
+ state = 2
+ break
+ if tokid == tokIDENT:
+ ident = b.tokens[j].value
+ j += 1
+
+ if j >= n:
+ # This can only happen when the declaration
+ # does not end on the current block (e.g. with
+ # a directive mixed inside it.
+ #
+ # We will treat it as malformed because
+ # it's very hard to recover from this case
+ # without making our parser much more
+ # complex.
+ #
+ #print "### skip unterminated static '%s'" % ident
+ break
+
+ if ident in knownStatics:
+ #print "### keep var/func '%s': %s" % (ident,repr(b.tokens[i:j]))
+ pass
+ else:
+ # We're going to skip the tokens for this declaration
+ #print "### skip variable /func'%s': %s" % (ident,repr(b.tokens[i:j]))
+ if i > first:
+ blocks2.append( Block(b.tokens[first:i]))
+ skipTokens = True
+ first = n
+
+ i = i+1
if i > first:
#print "### final '%s'" % repr(b.tokens[first:i])