mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 08:19:20 +00:00 
			
		
		
		
	 63a0191c8a
			
		
	
	
		63a0191c8a
		
	
	
	
	
		
			
			couple of these are currently found in index data, but these should all be handled in the same way. Closes SF bug #952737.
		
			
				
	
	
		
			388 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			388 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #! /usr/bin/env python
 | |
| 
 | |
| __version__ = '$Revision$'
 | |
| 
 | |
| import os.path
 | |
| import re
 | |
| import string
 | |
| import sys
 | |
| 
 | |
| from xml.sax.saxutils import quoteattr
 | |
| 
 | |
| 
 | |
| bang_join = "!".join
 | |
| null_join = "".join
 | |
| 
 | |
| REPLACEMENTS = [
 | |
|     # Hackish way to deal with macros replaced with simple text
 | |
|     (re.compile(r"\\ABC\b"), "ABC"),
 | |
|     (re.compile(r"\\ASCII\b"), "ASCII"),
 | |
|     (re.compile(r"\\Cpp\b"), "C++"),
 | |
|     (re.compile(r"\\EOF\b"), "EOF"),
 | |
|     (re.compile(r"\\NULL\b"), "NULL"),
 | |
|     (re.compile(r"\\POSIX\b"), "POSIX"),
 | |
|     (re.compile(r"\\UNIX\b"), "Unix"),
 | |
|     # deal with turds left over from LaTeX2HTML
 | |
|     (re.compile(r"<#\d+#>"), ""),
 | |
|     ]
 | |
| 
 | |
| class Node:
 | |
|     continuation = 0
 | |
| 
 | |
|     def __init__(self, link, str, seqno):
 | |
|         self.links = [link]
 | |
|         self.seqno = seqno
 | |
|         for pattern, replacement in REPLACEMENTS:
 | |
|             str = pattern.sub(replacement, str)
 | |
|         # build up the text
 | |
|         self.text = split_entry_text(str)
 | |
|         self.key = split_entry_key(str)
 | |
| 
 | |
|     def __cmp__(self, other):
 | |
|         """Comparison operator includes sequence number, for use with
 | |
|         list.sort()."""
 | |
|         return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
 | |
| 
 | |
|     def cmp_entry(self, other):
 | |
|         """Comparison 'operator' that ignores sequence number."""
 | |
|         c = 0
 | |
|         for i in range(min(len(self.key), len(other.key))):
 | |
|             c = (cmp_part(self.key[i], other.key[i])
 | |
|                  or cmp_part(self.text[i], other.text[i]))
 | |
|             if c:
 | |
|                 break
 | |
|         return c or cmp(self.key, other.key) or cmp(self.text, other.text)
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
 | |
| 
 | |
|     def __str__(self):
 | |
|         return bang_join(self.key)
 | |
| 
 | |
|     def dump(self):
 | |
|         return "%s\1%s###%s\n" \
 | |
|                % ("\1".join(self.links),
 | |
|                   bang_join(self.text),
 | |
|                   self.seqno)
 | |
| 
 | |
| 
 | |
| def cmp_part(s1, s2):
 | |
|     result = cmp(s1, s2)
 | |
|     if result == 0:
 | |
|         return 0
 | |
|     l1 = s1.lower()
 | |
|     l2 = s2.lower()
 | |
|     minlen = min(len(s1), len(s2))
 | |
|     if len(s1) < len(s2) and l1 == l2[:len(s1)]:
 | |
|         result = -1
 | |
|     elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
 | |
|         result = 1
 | |
|     else:
 | |
|         result = cmp(l1, l2) or cmp(s1, s2)
 | |
|     return result
 | |
| 
 | |
| 
 | |
| def split_entry(str, which):
 | |
|     stuff = []
 | |
|     parts = str.split('!')
 | |
|     parts = [part.split('@') for part in parts]
 | |
|     for entry in parts:
 | |
|         if len(entry) != 1:
 | |
|             key = entry[which]
 | |
|         else:
 | |
|             key = entry[0]
 | |
|         stuff.append(key)
 | |
|     return stuff
 | |
| 
 | |
| 
 | |
| _rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
 | |
|                    re.IGNORECASE)
 | |
| _rmparens = re.compile(r"\(\)")
 | |
| 
 | |
| def split_entry_key(str):
 | |
|     parts = split_entry(str, 1)
 | |
|     for i in range(len(parts)):
 | |
|         m = _rmtt.match(parts[i])
 | |
|         if m:
 | |
|             parts[i] = null_join(m.group(1, 2, 3))
 | |
|         else:
 | |
|             parts[i] = parts[i].lower()
 | |
|         # remove '()' from the key:
 | |
|         parts[i] = _rmparens.sub('', parts[i])
 | |
|     return map(trim_ignored_letters, parts)
 | |
| 
 | |
| 
 | |
| def split_entry_text(str):
 | |
|     if '<' in str:
 | |
|         m = _rmtt.match(str)
 | |
|         if m:
 | |
|             str = null_join(m.group(1, 2, 3))
 | |
|     return split_entry(str, 1)
 | |
| 
 | |
| 
 | |
| def load(fp):
 | |
|     nodes = []
 | |
|     rx = re.compile("(.*)\1(.*)###(.*)$")
 | |
|     while 1:
 | |
|         line = fp.readline()
 | |
|         if not line:
 | |
|             break
 | |
|         m = rx.match(line)
 | |
|         if m:
 | |
|             link, str, seqno = m.group(1, 2, 3)
 | |
|             nodes.append(Node(link, str, seqno))
 | |
|     return nodes
 | |
| 
 | |
| 
 | |
| def trim_ignored_letters(s):
 | |
|     # ignore $ to keep environment variables with the
 | |
|     # leading letter from the name
 | |
|     if s.startswith("$"):
 | |
|         return s[1:].lower()
 | |
|     else:
 | |
|         return s.lower()
 | |
| 
 | |
| def get_first_letter(s):
 | |
|     if s.startswith("<tex2html_percent_mark>"):
 | |
|         return "%"
 | |
|     else:
 | |
|         return trim_ignored_letters(s)[0]
 | |
| 
 | |
| 
 | |
| def split_letters(nodes):
 | |
|     letter_groups = []
 | |
|     if nodes:
 | |
|         group = []
 | |
|         append = group.append
 | |
|         letter = get_first_letter(nodes[0].text[0])
 | |
|         letter_groups.append((letter, group))
 | |
|         for node in nodes:
 | |
|             nletter = get_first_letter(node.text[0])
 | |
|             if letter != nletter:
 | |
|                 letter = nletter
 | |
|                 group = []
 | |
|                 letter_groups.append((letter, group))
 | |
|                 append = group.append
 | |
|             append(node)
 | |
|     return letter_groups
 | |
| 
 | |
| 
 | |
| def group_symbols(groups):
 | |
|     entries = []
 | |
|     ident_letters = string.ascii_letters + "_"
 | |
|     while groups[0][0] not in ident_letters:
 | |
|         entries += groups[0][1]
 | |
|         del groups[0]
 | |
|     if entries:
 | |
|         groups.insert(0, ("Symbols", entries))
 | |
| 
 | |
| 
 | |
| # need a function to separate the nodes into columns...
 | |
| def split_columns(nodes, columns=1):
 | |
|     if columns <= 1:
 | |
|         return [nodes]
 | |
|     # This is a rough height; we may have to increase to avoid breaks before
 | |
|     # a subitem.
 | |
|     colheight = int(len(nodes) / columns)
 | |
|     numlong = int(len(nodes) % columns)
 | |
|     if numlong:
 | |
|         colheight = colheight + 1
 | |
|     else:
 | |
|         numlong = columns
 | |
|     cols = []
 | |
|     for i in range(numlong):
 | |
|         start = i * colheight
 | |
|         end = start + colheight
 | |
|         cols.append(nodes[start:end])
 | |
|     del nodes[:end]
 | |
|     colheight = colheight - 1
 | |
|     try:
 | |
|         numshort = int(len(nodes) / colheight)
 | |
|     except ZeroDivisionError:
 | |
|         cols = cols + (columns - len(cols)) * [[]]
 | |
|     else:
 | |
|         for i in range(numshort):
 | |
|             start = i * colheight
 | |
|             end = start + colheight
 | |
|             cols.append(nodes[start:end])
 | |
|     #
 | |
|     # If items continue across columns, make sure they are marked
 | |
|     # as continuations so the user knows to look at the previous column.
 | |
|     #
 | |
|     for i in range(len(cols) - 1):
 | |
|         try:
 | |
|             prev = cols[i][-1]
 | |
|             next = cols[i + 1][0]
 | |
|         except IndexError:
 | |
|             return cols
 | |
|         else:
 | |
|             n = min(len(prev.key), len(next.key))
 | |
|             for j in range(n):
 | |
|                 if prev.key[j] != next.key[j]:
 | |
|                     break
 | |
|                 next.continuation = j + 1
 | |
|     return cols
 | |
| 
 | |
| 
 | |
| DL_LEVEL_INDENT = "  "
 | |
| 
 | |
| def format_column(nodes):
 | |
|     strings = ["<dl compact='compact'>"]
 | |
|     append = strings.append
 | |
|     level = 0
 | |
|     previous = []
 | |
|     for node in nodes:
 | |
|         current = node.text
 | |
|         count = 0
 | |
|         for i in range(min(len(current), len(previous))):
 | |
|             if previous[i] != current[i]:
 | |
|                 break
 | |
|             count = i + 1
 | |
|         if count > level:
 | |
|             append("<dl compact='compact'>" * (count - level) + "\n")
 | |
|             level = count
 | |
|         elif level > count:
 | |
|             append("\n")
 | |
|             append(level * DL_LEVEL_INDENT)
 | |
|             append("</dl>" * (level - count))
 | |
|             level = count
 | |
|         # else: level == count
 | |
|         for i in range(count, len(current) - 1):
 | |
|             term = node.text[i]
 | |
|             level = level + 1
 | |
|             if node.continuation > i:
 | |
|                 extra = " (continued)"
 | |
|             else:
 | |
|                 extra = ""
 | |
|             append("\n<dt>%s%s\n<dd>\n%s<dl compact='compact'>"
 | |
|                    % (term, extra, level * DL_LEVEL_INDENT))
 | |
|         append("\n%s<dt>%s%s</a>"
 | |
|                % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
 | |
|         for link in node.links[1:]:
 | |
|             append(",\n%s    %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
 | |
|         previous = current
 | |
|     append("\n")
 | |
|     append("</dl>" * (level + 1))
 | |
|     return null_join(strings)
 | |
| 
 | |
| 
 | |
| def format_nodes(nodes, columns=1):
 | |
|     strings = []
 | |
|     append = strings.append
 | |
|     if columns > 1:
 | |
|         colnos = range(columns)
 | |
|         colheight = int(len(nodes) / columns)
 | |
|         if len(nodes) % columns:
 | |
|             colheight = colheight + 1
 | |
|         colwidth = int(100 / columns)
 | |
|         append('<table width="100%"><tr valign="top">')
 | |
|         for col in split_columns(nodes, columns):
 | |
|             append('<td width="%d%%">\n' % colwidth)
 | |
|             append(format_column(col))
 | |
|             append("\n</td>")
 | |
|         append("\n</tr></table>")
 | |
|     else:
 | |
|         append(format_column(nodes))
 | |
|     return null_join(strings)
 | |
| 
 | |
| 
 | |
| def format_letter(letter):
 | |
|     if letter == '.':
 | |
|         lettername = ". (dot)"
 | |
|     elif letter == '_':
 | |
|         lettername = "_ (underscore)"
 | |
|     else:
 | |
|         lettername = letter.capitalize()
 | |
|     return "\n<hr />\n<h2 id=%s>%s</h2>\n\n" \
 | |
|            % (quoteattr("letter-" + letter), lettername)
 | |
| 
 | |
| 
 | |
| def format_html_letters(nodes, columns, group_symbol_nodes):
 | |
|     letter_groups = split_letters(nodes)
 | |
|     if group_symbol_nodes:
 | |
|         group_symbols(letter_groups)
 | |
|     items = []
 | |
|     for letter, nodes in letter_groups:
 | |
|         s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
 | |
|         items.append(s)
 | |
|     s = ["<hr /><center>\n%s</center>\n" % " |\n".join(items)]
 | |
|     for letter, nodes in letter_groups:
 | |
|         s.append(format_letter(letter))
 | |
|         s.append(format_nodes(nodes, columns))
 | |
|     return null_join(s)
 | |
| 
 | |
| def format_html(nodes, columns):
 | |
|     return format_nodes(nodes, columns)
 | |
| 
 | |
| 
 | |
| def collapse(nodes):
 | |
|     """Collapse sequences of nodes with matching keys into a single node.
 | |
|     Destructive."""
 | |
|     if len(nodes) < 2:
 | |
|         return
 | |
|     prev = nodes[0]
 | |
|     i = 1
 | |
|     while i < len(nodes):
 | |
|         node = nodes[i]
 | |
|         if not node.cmp_entry(prev):
 | |
|             prev.links.append(node.links[0])
 | |
|             del nodes[i]
 | |
|         else:
 | |
|             i = i + 1
 | |
|             prev = node
 | |
| 
 | |
| 
 | |
| def dump(nodes, fp):
 | |
|     for node in nodes:
 | |
|         fp.write(node.dump())
 | |
| 
 | |
| 
 | |
| def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0):
 | |
|     nodes.sort()
 | |
|     collapse(nodes)
 | |
|     if letters:
 | |
|         return format_html_letters(nodes, columns, group_symbol_nodes)
 | |
|     else:
 | |
|         return format_html(nodes, columns)
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     import getopt
 | |
|     ifn = "-"
 | |
|     ofn = "-"
 | |
|     columns = 1
 | |
|     letters = 0
 | |
|     group_symbol_nodes = 1
 | |
|     opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
 | |
|                                ["columns=", "dont-group-symbols",
 | |
|                                 "group-symbols", "letters", "output="])
 | |
|     for opt, val in opts:
 | |
|         if opt in ("-o", "--output"):
 | |
|             ofn = val
 | |
|         elif opt in ("-c", "--columns"):
 | |
|             columns = int(val, 10)
 | |
|         elif opt in ("-l", "--letters"):
 | |
|             letters = 1
 | |
|         elif opt == "--group-symbols":
 | |
|             group_symbol_nodes = 1
 | |
|         elif opt == "--dont-group-symbols":
 | |
|             group_symbol_nodes = 0
 | |
|     if not args:
 | |
|         args = [ifn]
 | |
|     nodes = []
 | |
|     for fn in args:
 | |
|         nodes = nodes + load(open(fn))
 | |
|     num_nodes = len(nodes)
 | |
|     html = process_nodes(nodes, columns, letters, group_symbol_nodes)
 | |
|     program = os.path.basename(sys.argv[0])
 | |
|     if ofn == "-":
 | |
|         sys.stdout.write(html)
 | |
|         sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
 | |
|     else:
 | |
|         open(ofn, "w").write(html)
 | |
|         print
 | |
|         print "%s: %d index nodes" % (program, num_nodes)
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 |