Blame - libc/tools/generate-NOTICE.py - platform_bionic

blob: eaae328d7f2b4c2784e8a62626ba4f60c81e5ac3 [file] [log] [blame]

Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame^]	1	#!/usr/bin/python
				2	# Run with directory arguments from any directory, with no special setup required.
				3
				4	import ftplib
				5	import hashlib
				6	import os
				7	import re
				8	import shutil
				9	import string
				10	import subprocess
				11	import sys
				12	import tarfile
				13	import tempfile
				14
				15	def IsUninteresting(path):
				16	path = path.lower()
				17	if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"):
				18	return True
				19	if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"):
				20	return True
				21	if path.endswith("/zoneinfo.dat") or path.endswith("/zoneinfo.idx") or path.endswith("/zoneinfo.version") or path.endswith("/zoneinfo/generate"):
				22	return True
				23	return False
				24
				25	def IsAutoGenerated(content):
				26	if "generated by gensyscalls.py" in content or "generated by genserv.py" in content:
				27	return True
				28	if "This header was automatically generated from a Linux kernel header" in content:
				29	return True
				30	return False
				31
				32	copyrights = set()
				33
				34	def ExtractCopyrightAt(lines, i):
				35	hash = lines[i].startswith("#")
				36
				37	# Read comment lines until we hit something that terminates a
				38	# copyright header.
				39	start = i
				40	while i < len(lines):
				41	if "*/" in lines[i]:
				42	break
				43	if hash and len(lines[i]) == 0:
				44	break
				45	if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
				46	break
				47	if "\tcitrus Id: " in lines[i]:
				48	break
				49	if "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
				50	break
				51	if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
				52	break
				53	i += 1
				54
				55	end = i
				56
				57	# Trim trailing cruft.
				58	while end > 0:
				59	if lines[end - 1] != " " and lines[end - 1] != " ====================================================":
				60	break
				61	end -= 1
				62
				63	# Remove C/assembler comment formatting, pulling out just the text.
				64	clean_lines = []
				65	for line in lines[start:end]:
				66	line = line.replace("\t", " ")
				67	line = line.replace("/* ", "")
				68	line = line.replace(" * ", "")
				69	line = line.replace("** ", "")
				70	line = line.replace("# ", "")
				71	if line.startswith("++Copyright++"):
				72	continue
				73	line = line.replace("--Copyright--", "")
				74	line = line.rstrip()
				75	# These come last and take care of "blank" comment lines.
				76	if line == "#" or line == " " or line == "*" or line == "-":
				77	line = ""
				78	clean_lines.append(line)
				79
				80	# Trim blank lines from head and tail.
				81	while clean_lines[0] == "":
				82	clean_lines = clean_lines[1:]
				83	while clean_lines[len(clean_lines) - 1] == "":
				84	clean_lines = clean_lines[0:(len(clean_lines) - 1)]
				85
				86	copyright = "\n".join(clean_lines)
				87	copyrights.add(copyright)
				88
				89	return i
				90
				91	args = sys.argv[1:]
				92	if len(args) == 0:
				93	args = [ "." ]
				94
				95	for arg in args:
				96	sys.stderr.write('Searching for source files in "%s"...\n' % arg)
				97
				98	for directory, sub_directories, filenames in os.walk(arg):
				99	if ".git" in sub_directories:
				100	sub_directories.remove(".git")
				101	sub_directories = sorted(sub_directories)
				102
				103	for filename in sorted(filenames):
				104	path = os.path.join(directory, filename)
				105	if IsUninteresting(path):
				106	#print "ignoring uninteresting file %s" % path
				107	continue
				108
				109	try:
				110	content = open(path, 'r').read().decode('utf-8')
				111	except:
				112	# TODO: update hash.h, md5.c, and md5.h; upstream is probably UTF-8 already.
				113	sys.stderr.write('warning: bad UTF-8 in %s\n' % path)
				114	content = open(path, 'r').read().decode('iso-8859-1')
				115
				116	lines = content.split("\n")
				117
				118	if len(lines) <= 4:
				119	#print "ignoring short file %s" % path
				120	continue
				121
				122	if IsAutoGenerated(content):
				123	#print "ignoring auto-generated file %s" % path
				124	continue
				125
				126	if not "Copyright" in content:
				127	if "public domain" in content.lower():
				128	#print "ignoring public domain file %s" % path
				129	continue
				130	sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines)))
				131	continue
				132
				133	i = 0
				134	while i < len(lines):
				135	if "Copyright" in lines[i]:
				136	i = ExtractCopyrightAt(lines, i)
				137	i += 1
				138
				139	#print path
				140
				141	for copyright in copyrights:
				142	print copyright.encode('utf-8')
				143	print
				144	print '-------------------------------------------------------------------'
				145	print
				146
				147	sys.exit(0)