cpython/Doc/tools/toc2bkm.py
Fred Drake ac77b79df6 clean_title(): Clean a little more carefully. Still does funny things with
underscores.  Might this be a fundamental PDF limitation?  Hm, could
	still be a TeX thing.
1998-03-10 14:02:35 +00:00

133 lines
3.2 KiB
Python
Executable file

#! /usr/bin/env python
"""Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.
The output file has an extension of '.bkm' instead of '.out', since hyperref
already uses that extension. Let's avoid clashing.
"""
import getopt
import os
import re
import string
import sys
# Ench item in an entry is a tuple of:
#
# Section #, Title String, Page #, List of Sub-entries
cline_re = r"""^
\\contentsline\ \{([a-z]*)} # type of section in $1
\{(?:\\numberline\ \{([0-9.A-Z]+)})? # section number
(.*)} # title string
\{(\d+)}$""" # page number
cline_rx = re.compile(cline_re, re.VERBOSE)
OUTER_TO_INNER = -1
_transition_map = {
('chapter', 'section'): OUTER_TO_INNER,
('section', 'subsection'): OUTER_TO_INNER,
('subsection', 'subsubsection'): OUTER_TO_INNER,
('subsubsection', 'subsection'): 1,
('subsection', 'section'): 1,
('section', 'chapter'): 1,
('subsection', 'chapter'): 2,
('subsubsection', 'section'): 2,
('subsubsection', 'chapter'): 3,
}
def parse_toc(fp, bigpart=None):
toc = top = []
stack = [toc]
level = bigpart or 'chapter'
lineno = 0
while 1:
line = fp.readline()
if not line:
break
lineno = lineno + 1
m = cline_rx.match(line)
if m:
stype, snum, title, pageno = m.group(1, 2, 3, 4)
title = clean_title(title)
entry = (stype, snum, title, string.atoi(pageno), [])
if stype == level:
toc.append(entry)
else:
direction = _transition_map[(level, stype)]
if direction == OUTER_TO_INNER:
toc = toc[-1][-1]
stack.insert(0, toc)
toc.append(entry)
else:
for i in range(direction):
del stack[0]
toc = stack[0]
toc.append(entry)
level = stype
else:
sys.stderr.write("l.%s: " + line)
return top
hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}")
raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}")
title_rx = re.compile(r"\\([a-zA-Z])+\s+")
title_trans = string.maketrans("", "")
def clean_title(title):
title = raisebox_rx.sub("", title)
title = hackscore_rx.sub(r"\\_", title)
pos = 0
while 1:
m = title_rx.search(title, pos)
if m:
start = m.start()
print "found", `title[start:m.end()]`
if title[start:start+15] != "\\textunderscore":
title = title[:start] + title[m.end():]
pos = start + 1
else:
break
title = string.translate(title, title_trans, "{}")
print `title`
return title
def write_toc(toc, fp):
for entry in toc:
write_toc_entry(entry, fp, 0)
def write_toc_entry(entry, fp, layer):
stype, snum, title, pageno, toc = entry
s = "\\pdfoutline goto name{page.%d}" % pageno
if toc:
s = "%s count -%d" % (s, len(toc))
if snum:
title = "%s %s" % (snum, title)
s = "%s {%s}\n" % (s, title)
fp.write(s)
for entry in toc:
write_toc_entry(entry, fp, layer + 1)
def main():
bigpart = None
opts, args = getopt.getopt(sys.argv[1:], "c:")
if opts:
bigpart = opts[0][1]
if not args:
usage()
sys.exit(2)
for filename in args:
base, ext = os.path.splitext(filename)
ext = ext or ".toc"
toc = parse_toc(open(base + ext), bigpart)
write_toc(toc, open(base + ".bkm", "w"))
if __name__ == "__main__":
main()