Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # Run with directory arguments from any directory, with no special setup required. |
Elliott Hughes | 965b867 | 2012-09-13 16:51:57 -0700 | [diff] [blame] | 3 | # Or: |
Elliott Hughes | 6b2b585 | 2014-12-18 16:27:30 -0800 | [diff] [blame^] | 4 | # for i in libc libdl libm linker libstdc++ ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 5 | |
| 6 | import ftplib |
| 7 | import hashlib |
| 8 | import os |
| 9 | import re |
| 10 | import shutil |
| 11 | import string |
| 12 | import subprocess |
| 13 | import sys |
| 14 | import tarfile |
| 15 | import tempfile |
| 16 | |
| 17 | def IsUninteresting(path): |
| 18 | path = path.lower() |
| 19 | if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"): |
| 20 | return True |
| 21 | if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"): |
| 22 | return True |
Elliott Hughes | 4612763 | 2012-10-19 14:55:19 -0700 | [diff] [blame] | 23 | if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"): |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 24 | return True |
| 25 | return False |
| 26 | |
| 27 | def IsAutoGenerated(content): |
Elliott Hughes | 22a0d6f | 2014-03-06 15:10:22 -0800 | [diff] [blame] | 28 | if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 29 | return True |
| 30 | if "This header was automatically generated from a Linux kernel header" in content: |
| 31 | return True |
| 32 | return False |
| 33 | |
| 34 | copyrights = set() |
| 35 | |
| 36 | def ExtractCopyrightAt(lines, i): |
| 37 | hash = lines[i].startswith("#") |
| 38 | |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 39 | # Do we need to back up to find the start of the copyright header? |
| 40 | start = i |
| 41 | if not hash: |
| 42 | while start > 0: |
| 43 | if "/*" in lines[start - 1]: |
| 44 | break |
| 45 | start -= 1 |
| 46 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 47 | # Read comment lines until we hit something that terminates a |
| 48 | # copyright header. |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 49 | while i < len(lines): |
| 50 | if "*/" in lines[i]: |
| 51 | break |
| 52 | if hash and len(lines[i]) == 0: |
| 53 | break |
| 54 | if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: |
| 55 | break |
| 56 | if "\tcitrus Id: " in lines[i]: |
| 57 | break |
Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 58 | if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 59 | break |
| 60 | if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: |
| 61 | break |
Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 62 | # OpenBSD likes to say where stuff originally came from: |
| 63 | if "Original version ID:" in lines[i]: |
| 64 | break |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 65 | i += 1 |
| 66 | |
| 67 | end = i |
| 68 | |
| 69 | # Trim trailing cruft. |
| 70 | while end > 0: |
| 71 | if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": |
| 72 | break |
| 73 | end -= 1 |
| 74 | |
| 75 | # Remove C/assembler comment formatting, pulling out just the text. |
| 76 | clean_lines = [] |
| 77 | for line in lines[start:end]: |
| 78 | line = line.replace("\t", " ") |
| 79 | line = line.replace("/* ", "") |
Elliott Hughes | 3758a24 | 2014-07-22 21:24:47 -0700 | [diff] [blame] | 80 | line = re.sub("^ \* ", "", line) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 81 | line = line.replace("** ", "") |
| 82 | line = line.replace("# ", "") |
| 83 | if line.startswith("++Copyright++"): |
| 84 | continue |
| 85 | line = line.replace("--Copyright--", "") |
| 86 | line = line.rstrip() |
| 87 | # These come last and take care of "blank" comment lines. |
| 88 | if line == "#" or line == " *" or line == "**" or line == "-": |
| 89 | line = "" |
| 90 | clean_lines.append(line) |
| 91 | |
| 92 | # Trim blank lines from head and tail. |
| 93 | while clean_lines[0] == "": |
| 94 | clean_lines = clean_lines[1:] |
| 95 | while clean_lines[len(clean_lines) - 1] == "": |
| 96 | clean_lines = clean_lines[0:(len(clean_lines) - 1)] |
| 97 | |
| 98 | copyright = "\n".join(clean_lines) |
| 99 | copyrights.add(copyright) |
| 100 | |
| 101 | return i |
| 102 | |
| 103 | args = sys.argv[1:] |
| 104 | if len(args) == 0: |
| 105 | args = [ "." ] |
| 106 | |
| 107 | for arg in args: |
| 108 | sys.stderr.write('Searching for source files in "%s"...\n' % arg) |
| 109 | |
| 110 | for directory, sub_directories, filenames in os.walk(arg): |
| 111 | if ".git" in sub_directories: |
| 112 | sub_directories.remove(".git") |
| 113 | sub_directories = sorted(sub_directories) |
| 114 | |
| 115 | for filename in sorted(filenames): |
| 116 | path = os.path.join(directory, filename) |
| 117 | if IsUninteresting(path): |
| 118 | #print "ignoring uninteresting file %s" % path |
| 119 | continue |
| 120 | |
| 121 | try: |
| 122 | content = open(path, 'r').read().decode('utf-8') |
| 123 | except: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 124 | sys.stderr.write('warning: bad UTF-8 in %s\n' % path) |
| 125 | content = open(path, 'r').read().decode('iso-8859-1') |
| 126 | |
| 127 | lines = content.split("\n") |
| 128 | |
| 129 | if len(lines) <= 4: |
| 130 | #print "ignoring short file %s" % path |
| 131 | continue |
| 132 | |
| 133 | if IsAutoGenerated(content): |
| 134 | #print "ignoring auto-generated file %s" % path |
| 135 | continue |
| 136 | |
| 137 | if not "Copyright" in content: |
| 138 | if "public domain" in content.lower(): |
| 139 | #print "ignoring public domain file %s" % path |
| 140 | continue |
| 141 | sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines))) |
| 142 | continue |
| 143 | |
| 144 | i = 0 |
| 145 | while i < len(lines): |
Elliott Hughes | 5d2f86f | 2014-07-23 11:10:48 -0700 | [diff] [blame] | 146 | if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 147 | i = ExtractCopyrightAt(lines, i) |
| 148 | i += 1 |
| 149 | |
| 150 | #print path |
| 151 | |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 152 | for copyright in sorted(copyrights): |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 153 | print copyright.encode('utf-8') |
| 154 | print |
| 155 | print '-------------------------------------------------------------------' |
| 156 | print |
| 157 | |
| 158 | sys.exit(0) |