blob: 36630db311d0d0de8b79bca99d955fe0faebc901 [file] [log] [blame]
Daniel Berlinf5a97d72012-03-29 10:33:19 -04001#!/usr/bin/env python
2#
3# Copyright (C) 2012 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""
17Usage: generate-notice-files [plain text output file] [html output file] [file title] [directory of notices]
18
19Generate the Android notice files, including both text and html files.
20
21-h to display this usage message and exit.
22"""
Anthony Kingc713d762015-11-03 00:23:11 +000023
24from __future__ import print_function
25
Daniel Berlinf5a97d72012-03-29 10:33:19 -040026from collections import defaultdict
27import getopt
28import hashlib
29import itertools
30import os
31import os.path
32import re
33import sys
34
35MD5_BLOCKSIZE = 1024 * 1024
36HTML_ESCAPE_TABLE = {
37 "&": "&",
38 '"': """,
39 "'": "'",
40 ">": ">",
41 "<": "&lt;",
42 }
43
44try:
45 opts, args = getopt.getopt(sys.argv[1:], "h")
Anthony Kingc713d762015-11-03 00:23:11 +000046except getopt.GetoptError as err:
47 print(str(err))
48 print(__doc__)
Daniel Berlinf5a97d72012-03-29 10:33:19 -040049 sys.exit(2)
50
51for o, a in opts:
52 if o == "-h":
Anthony Kingc713d762015-11-03 00:23:11 +000053 print(__doc__)
Daniel Berlinf5a97d72012-03-29 10:33:19 -040054 sys.exit(2)
55 else:
Anthony Kingc713d762015-11-03 00:23:11 +000056 print("unhandled option %s" % o)
Daniel Berlinf5a97d72012-03-29 10:33:19 -040057
58if len(args) != 4:
Anthony Kingc713d762015-11-03 00:23:11 +000059 print("""need exactly four arguments, the two output files, the file title
60 and the directory containing notices, not %d""" % len(args))
61 print(__doc__)
Daniel Berlinf5a97d72012-03-29 10:33:19 -040062 sys.exit(1)
63
64def hexify(s):
65 return ("%02x"*len(s)) % tuple(map(ord, s))
66
67def md5sum(filename):
68 """Calculate an MD5 of the file given by FILENAME,
69 and return hex digest as a string.
70 Output should be compatible with md5sum command"""
71
72 f = open(filename, "rb")
73 sum = hashlib.md5()
74 while 1:
75 block = f.read(MD5_BLOCKSIZE)
76 if not block:
77 break
78 sum.update(block)
79 f.close()
80 return hexify(sum.digest())
81
82
83def html_escape(text):
84 """Produce entities within text."""
85 return "".join(HTML_ESCAPE_TABLE.get(c,c) for c in text)
86
87HTML_OUTPUT_CSS="""
88<style type="text/css">
89body { padding: 0; font-family: sans-serif; }
90.same-license { background-color: #eeeeee; border-top: 20px solid white; padding: 10px; }
91.label { font-weight: bold; }
92.file-list { margin-left: 1em; color: blue; }
93</style>
94"""
95
96def combine_notice_files_html(file_hash, input_dir, output_filename):
97 """Combine notice files in FILE_HASH and output a HTML version to OUTPUT_FILENAME."""
98
99 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
100
101 # Set up a filename to row id table (anchors inside tables don't work in
102 # most browsers, but href's to table row ids do)
103 id_table = {}
104 id_count = 0
105 for value in file_hash.values():
106 for filename in value:
107 id_table[filename] = id_count
108 id_count += 1
109
110 # Open the output file, and output the header pieces
111 output_file = open(output_filename, "wb")
112
Anthony Kingc713d762015-11-03 00:23:11 +0000113 print(output_file, "<html><head>", file=output_file)
114 print(HTML_OUTPUT_CSS, file=output_file)
115 print('</head><body topmargin="0" leftmargin="0" rightmargin="0" bottommargin="0">', file=output_file)
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400116
117 # Output our table of contents
Anthony Kingc713d762015-11-03 00:23:11 +0000118 print('<div class="toc">', file=output_file)
119 print("<ul>", file=output_file)
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400120
121 # Flatten the list of lists into a single list of filenames
122 sorted_filenames = sorted(itertools.chain.from_iterable(file_hash.values()))
123
124 # Print out a nice table of contents
125 for filename in sorted_filenames:
126 stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename)
Anthony Kingc713d762015-11-03 00:23:11 +0000127 print('<li><a href="#id%d">%s</a></li>' % (id_table.get(filename), stripped_filename), file=output_file)
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400128
Anthony Kingc713d762015-11-03 00:23:11 +0000129 print("</ul>", file=output_file)
130 print("</div><!-- table of contents -->", file=output_file)
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400131 # Output the individual notice file lists
Anthony Kingc713d762015-11-03 00:23:11 +0000132 print('<table cellpadding="0" cellspacing="0" border="0">', file=output_file)
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400133 for value in file_hash.values():
Anthony Kingc713d762015-11-03 00:23:11 +0000134 print('<tr id="id%d"><td class="same-license">' % id_table.get(value[0]), file=output_file)
135 print('<div class="label">Notices for file(s):</div>', file=output_file)
136 print('<div class="file-list">', file=output_file)
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400137 for filename in sorted(value):
Anthony Kingc713d762015-11-03 00:23:11 +0000138 print("%s <br/>" % (SRC_DIR_STRIP_RE.sub(r"\1", filename)), file=output_file)
139 print("</div><!-- file-list -->", file=output_file)
140 print(file=output_file)
141 print('<pre class="license-text">', file=output_file)
142 print(html_escape(open(value[0]).read()), file=output_file)
143 print("</pre><!-- license-text -->", file=output_file)
144 print("</td></tr><!-- same-license -->", file=output_file)
145 print(file=output_file)
146 print(file=output_file)
147 print(file=output_file)
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400148
149 # Finish off the file output
Anthony Kingc713d762015-11-03 00:23:11 +0000150 print( "</table>", file=output_file)
151 print("</body></html>", file=output_file)
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400152 output_file.close()
153
154def combine_notice_files_text(file_hash, input_dir, output_filename, file_title):
155 """Combine notice files in FILE_HASH and output a text version to OUTPUT_FILENAME."""
156
157 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
158 output_file = open(output_filename, "wb")
Anthony Kingc713d762015-11-03 00:23:11 +0000159 print(file_title, file=output_file)
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400160 for value in file_hash.values():
Anthony Kingc713d762015-11-03 00:23:11 +0000161 print("============================================================", file=output_file)
162 print("Notices for file(s):", file=output_file)
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400163 for filename in sorted(value):
Anthony Kingc713d762015-11-03 00:23:11 +0000164 print(SRC_DIR_STRIP_RE.sub(r"\1", filename), file=output_file)
165 print("------------------------------------------------------------", file=output_file)
166 print(open(value[0]).read(), file=output_file)
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400167 output_file.close()
168
169def main(args):
170 txt_output_file = args[0]
171 html_output_file = args[1]
172 file_title = args[2]
173
174 # Find all the notice files and md5 them
175 input_dir = os.path.normpath(args[3])
176 files_with_same_hash = defaultdict(list)
177 for root, dir, files in os.walk(input_dir):
178 for file in files:
179 if file.endswith(".txt"):
180 filename = os.path.join(root, file)
181 file_md5sum = md5sum(filename)
182 files_with_same_hash[file_md5sum].append(filename)
183
184
Anthony Kingc713d762015-11-03 00:23:11 +0000185 print("Combining NOTICE files into HTML")
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400186 combine_notice_files_html(files_with_same_hash, input_dir, html_output_file)
Anthony Kingc713d762015-11-03 00:23:11 +0000187 print("Combining NOTICE files into text")
Daniel Berlinf5a97d72012-03-29 10:33:19 -0400188 combine_notice_files_text(files_with_same_hash, input_dir, txt_output_file, file_title)
189
190if __name__ == "__main__":
191 main(args)