blob: 79b4ea97e97b76202583e57f571e84c4bf825170 [file] [log] [blame]
Elliott Hughes387d4b72012-08-09 15:17:46 -07001#!/usr/bin/python
2# Run with directory arguments from any directory, with no special setup required.
Elliott Hughes965b8672012-09-13 16:51:57 -07003# Or:
Elliott Hughes6b2b5852014-12-18 16:27:30 -08004# for i in libc libdl libm linker libstdc++ ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done
Elliott Hughes387d4b72012-08-09 15:17:46 -07005
6import ftplib
7import hashlib
8import os
9import re
10import shutil
11import string
12import subprocess
13import sys
14import tarfile
15import tempfile
16
17def IsUninteresting(path):
18 path = path.lower()
19 if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"):
20 return True
21 if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"):
22 return True
Elliott Hughes46127632012-10-19 14:55:19 -070023 if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"):
Elliott Hughes387d4b72012-08-09 15:17:46 -070024 return True
25 return False
26
27def IsAutoGenerated(content):
Elliott Hughes22a0d6f2014-03-06 15:10:22 -080028 if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
Elliott Hughes387d4b72012-08-09 15:17:46 -070029 return True
30 if "This header was automatically generated from a Linux kernel header" in content:
31 return True
32 return False
33
34copyrights = set()
35
36def ExtractCopyrightAt(lines, i):
37 hash = lines[i].startswith("#")
38
Elliott Hughes261e2232012-08-14 15:04:05 -070039 # Do we need to back up to find the start of the copyright header?
40 start = i
41 if not hash:
42 while start > 0:
43 if "/*" in lines[start - 1]:
44 break
45 start -= 1
46
Elliott Hughes387d4b72012-08-09 15:17:46 -070047 # Read comment lines until we hit something that terminates a
48 # copyright header.
Elliott Hughes387d4b72012-08-09 15:17:46 -070049 while i < len(lines):
50 if "*/" in lines[i]:
51 break
52 if hash and len(lines[i]) == 0:
53 break
54 if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
55 break
56 if "\tcitrus Id: " in lines[i]:
57 break
Elliott Hughesbfa582d2014-05-05 14:58:17 -070058 if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
Elliott Hughes387d4b72012-08-09 15:17:46 -070059 break
60 if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
61 break
Elliott Hughesbfa582d2014-05-05 14:58:17 -070062 # OpenBSD likes to say where stuff originally came from:
63 if "Original version ID:" in lines[i]:
64 break
Elliott Hughes387d4b72012-08-09 15:17:46 -070065 i += 1
66
67 end = i
68
69 # Trim trailing cruft.
70 while end > 0:
71 if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
72 break
73 end -= 1
74
75 # Remove C/assembler comment formatting, pulling out just the text.
76 clean_lines = []
77 for line in lines[start:end]:
78 line = line.replace("\t", " ")
79 line = line.replace("/* ", "")
Elliott Hughes3758a242014-07-22 21:24:47 -070080 line = re.sub("^ \* ", "", line)
Elliott Hughes387d4b72012-08-09 15:17:46 -070081 line = line.replace("** ", "")
82 line = line.replace("# ", "")
83 if line.startswith("++Copyright++"):
84 continue
85 line = line.replace("--Copyright--", "")
86 line = line.rstrip()
87 # These come last and take care of "blank" comment lines.
88 if line == "#" or line == " *" or line == "**" or line == "-":
89 line = ""
90 clean_lines.append(line)
91
92 # Trim blank lines from head and tail.
93 while clean_lines[0] == "":
94 clean_lines = clean_lines[1:]
95 while clean_lines[len(clean_lines) - 1] == "":
96 clean_lines = clean_lines[0:(len(clean_lines) - 1)]
97
98 copyright = "\n".join(clean_lines)
99 copyrights.add(copyright)
100
101 return i
102
103args = sys.argv[1:]
104if len(args) == 0:
105 args = [ "." ]
106
107for arg in args:
108 sys.stderr.write('Searching for source files in "%s"...\n' % arg)
109
110 for directory, sub_directories, filenames in os.walk(arg):
111 if ".git" in sub_directories:
112 sub_directories.remove(".git")
113 sub_directories = sorted(sub_directories)
114
115 for filename in sorted(filenames):
116 path = os.path.join(directory, filename)
117 if IsUninteresting(path):
118 #print "ignoring uninteresting file %s" % path
119 continue
120
121 try:
122 content = open(path, 'r').read().decode('utf-8')
123 except:
Elliott Hughes387d4b72012-08-09 15:17:46 -0700124 sys.stderr.write('warning: bad UTF-8 in %s\n' % path)
125 content = open(path, 'r').read().decode('iso-8859-1')
126
127 lines = content.split("\n")
128
129 if len(lines) <= 4:
130 #print "ignoring short file %s" % path
131 continue
132
133 if IsAutoGenerated(content):
134 #print "ignoring auto-generated file %s" % path
135 continue
136
137 if not "Copyright" in content:
138 if "public domain" in content.lower():
139 #print "ignoring public domain file %s" % path
140 continue
141 sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines)))
142 continue
143
144 i = 0
145 while i < len(lines):
Elliott Hughes5d2f86f2014-07-23 11:10:48 -0700146 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
Elliott Hughes387d4b72012-08-09 15:17:46 -0700147 i = ExtractCopyrightAt(lines, i)
148 i += 1
149
150 #print path
151
Elliott Hughes261e2232012-08-14 15:04:05 -0700152for copyright in sorted(copyrights):
Elliott Hughes387d4b72012-08-09 15:17:46 -0700153 print copyright.encode('utf-8')
154 print
155 print '-------------------------------------------------------------------'
156 print
157
158sys.exit(0)