mirror of
https://github.com/python/cpython.git
synced 2025-07-24 03:35:53 +00:00
First (rough) cut at HTML index generation in Python. Not very pretty, but
easier to work with than the Perl code.
This commit is contained in:
parent
19e2ce58bf
commit
ec56109a0a
1 changed files with 211 additions and 0 deletions
211
Doc/tools/buildindex.py
Executable file
211
Doc/tools/buildindex.py
Executable file
|
@ -0,0 +1,211 @@
|
|||
#! /usr/bin/env python
|
||||
|
||||
"""
|
||||
"""
|
||||
__version__ = '$Revision$'
|
||||
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
|
||||
|
||||
class Node:
|
||||
|
||||
__rmtt = re.compile(r"(.*)<tt>(.*)</tt>(.*)$", re.IGNORECASE)
|
||||
__rmjunk = re.compile("<#\d+#>")
|
||||
|
||||
def __init__(self, link, str, seqno):
|
||||
self.links = [link]
|
||||
self.seqno = seqno
|
||||
# remove <#\d+#> left in by moving the data out of LaTeX2HTML
|
||||
str = self.__rmjunk.sub('', str)
|
||||
# now remove <tt>...</tt> markup; contents remain.
|
||||
if '<' in str:
|
||||
m = self.__rmtt.match(str)
|
||||
if m:
|
||||
kstr = string.join(m.group(1, 2, 3), '')
|
||||
else:
|
||||
kstr = str
|
||||
else:
|
||||
kstr = str
|
||||
kstr = string.lower(kstr)
|
||||
# build up the text
|
||||
self.text = []
|
||||
parts = string.split(str, '!')
|
||||
parts = map(string.split, parts, ['@'] * len(parts))
|
||||
for entry in parts:
|
||||
if len(entry) != 1:
|
||||
key, text = entry
|
||||
else:
|
||||
text = entry[0]
|
||||
self.text.append(text)
|
||||
# Building the key must be separate since any <tt> has been stripped
|
||||
# from the key, but can be avoided if both key and text sources are
|
||||
# the same.
|
||||
if kstr != str:
|
||||
self.key = []
|
||||
kparts = string.split(kstr, '!')
|
||||
kparts = map(string.split, kparts, ['@'] * len(kparts))
|
||||
for entry in kparts:
|
||||
if len(entry) != 1:
|
||||
key, text = entry
|
||||
else:
|
||||
key = entry[0]
|
||||
self.key.append(key)
|
||||
else:
|
||||
self.key = self.text
|
||||
|
||||
def __cmp__(self, other):
|
||||
"""Comparison operator includes sequence number, for use with
|
||||
list.sort()."""
|
||||
return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
|
||||
|
||||
def cmp_entry(self, other):
|
||||
"""Comparison 'operator' that ignores sequence number."""
|
||||
for i in range(min(len(self.key), len(other.key))):
|
||||
c = (cmp(self.key[i], other.key[i])
|
||||
or cmp(self.text[i], other.text[i]))
|
||||
if c:
|
||||
return c
|
||||
return cmp(self.key, other.key)
|
||||
|
||||
def __repr__(self):
|
||||
return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno)
|
||||
|
||||
def __str__(self):
|
||||
return string.join(self.key, '!')
|
||||
|
||||
def dump(self):
|
||||
return "%s\0%s###%s\n" \
|
||||
% (string.join(self.links, "\0"),
|
||||
string.join(self.text, '!'),
|
||||
self.seqno)
|
||||
|
||||
|
||||
def load(fp):
|
||||
nodes = []
|
||||
rx = re.compile(r"(.*)\0(.*)###(.*)$")
|
||||
while 1:
|
||||
line = fp.readline()
|
||||
if not line:
|
||||
break
|
||||
m = rx.match(line)
|
||||
if m:
|
||||
link, str, seqno = m.group(1, 2, 3)
|
||||
nodes.append(Node(link, str, seqno))
|
||||
return nodes
|
||||
|
||||
|
||||
def split_letters(nodes):
|
||||
letter_groups = []
|
||||
group = []
|
||||
append = group.append
|
||||
if nodes:
|
||||
letter = nodes[0].key[0][0]
|
||||
letter_groups.append((letter, group))
|
||||
for node in nodes:
|
||||
nletter = node.key[0][0]
|
||||
if letter != nletter:
|
||||
letter = nletter
|
||||
group = []
|
||||
letter_groups.append((letter, group))
|
||||
append = group.append
|
||||
append(node)
|
||||
return letter_groups
|
||||
|
||||
|
||||
def format_nodes(nodes):
|
||||
# Does not create multiple links to multiple targets for the same entry;
|
||||
# uses a separate entry for each target. This is a bug.
|
||||
level = 0
|
||||
strings = ["<dl compact>"]
|
||||
append = strings.append
|
||||
prev = None
|
||||
for node in nodes:
|
||||
nlevel = len(node.key) - 1
|
||||
if nlevel > level:
|
||||
if prev is None or node.key[level] != prev.key[level]:
|
||||
append("%s\n<dl compact>" % node.text[level])
|
||||
else:
|
||||
append("<dl compact>")
|
||||
level = nlevel
|
||||
elif nlevel < level:
|
||||
append("</dl>" * (level - len(node.key) + 1))
|
||||
level = nlevel
|
||||
if prev is not None and node.key[level] != prev.key[level]:
|
||||
append("</dl>")
|
||||
else:
|
||||
append("<dl compact>")
|
||||
elif level:
|
||||
if node.key[level-1] != prev.key[level-1]:
|
||||
append("</dl>\n%s<dl compact>"
|
||||
% node.text[level-1])
|
||||
append("%s%s</a><br>" % (node.links[0], node.text[-1]))
|
||||
for link in node.links[1:]:
|
||||
strings[-1] = strings[-1][:-4] + ","
|
||||
append(link + "[Link]</a><br>")
|
||||
prev = node
|
||||
append("</dl>" * (level + 1))
|
||||
append("")
|
||||
append("")
|
||||
return string.join(strings, "\n")
|
||||
|
||||
|
||||
def format_letter(letter):
|
||||
if letter == '.':
|
||||
lettername = ". (dot)"
|
||||
elif letter == '_':
|
||||
lettername = "_ (underscore)"
|
||||
else:
|
||||
lettername = string.upper(letter)
|
||||
return "<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
|
||||
% (letter, lettername)
|
||||
|
||||
|
||||
def format_html(nodes):
|
||||
letter_groups = split_letters(nodes)
|
||||
items = []
|
||||
for letter, nodes in letter_groups:
|
||||
s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
|
||||
items.append(s)
|
||||
s = "<hr><center>\n%s</center>\n" % string.join(items, " |\n")
|
||||
for letter, nodes in letter_groups:
|
||||
s = s + format_letter(letter) + format_nodes(nodes)
|
||||
return s
|
||||
|
||||
|
||||
def collapse(nodes):
|
||||
"""Collapse sequences of nodes with matching keys into a single node.
|
||||
Destructive."""
|
||||
if len(nodes) < 2:
|
||||
return
|
||||
prev = nodes[0]
|
||||
i = 1
|
||||
while i < len(nodes):
|
||||
node = nodes[i]
|
||||
if not node.cmp_entry(prev):
|
||||
prev.links.append(node.links[0])
|
||||
del nodes[i]
|
||||
## sys.stderr.write("collapsing %s\n" % `node`)
|
||||
else:
|
||||
i = i + 1
|
||||
prev = node
|
||||
|
||||
|
||||
def dump(nodes, fp):
|
||||
for node in nodes:
|
||||
fp.write(node.dump())
|
||||
|
||||
|
||||
def main():
|
||||
fn = sys.argv[1]
|
||||
nodes = load(open(fn))
|
||||
nodes.sort()
|
||||
dump(nodes, open(fn + ".dump-1", "w"))
|
||||
collapse(nodes)
|
||||
dump(nodes, open(fn + ".dump-2", "w"))
|
||||
sys.stdout.write(format_html(nodes))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue