mirror of
https://github.com/python/cpython.git
synced 2025-07-24 11:44:31 +00:00
Update the docstring.
Lots of changes to get the paragraph marking to work, and not go into an infinite recursion. Start to rationalize markup of method signatures; not complete. find_all_elements(): similar to getElementsByTagName(), but operates on the not-quite-legal fragments we deal with here. Simplifies several individual transforms. fixup_sectionauthors(): Convert \sectionauthor items to <author> just after the <section>'s <title>.
This commit is contained in:
parent
3f3b09657f
commit
7dab6affba
1 changed files with 159 additions and 101 deletions
|
@ -1,12 +1,7 @@
|
|||
#! /usr/bin/env python
|
||||
|
||||
"""Promote the IDs from <label/> elements to the enclosing section / chapter /
|
||||
whatever, then remove the <label/> elements. This allows *ML style internal
|
||||
linking rather than the bogus LaTeX model.
|
||||
|
||||
Note that <label/>s in <title> elements are promoted two steps, since the
|
||||
<title> elements are artificially created from the section parameter, and the
|
||||
label really refers to the sectioning construct.
|
||||
"""Perform massive transformations on a document tree created from the LaTeX
|
||||
of the Python documentation, and dump the ESIS data for the transformed tree.
|
||||
"""
|
||||
__version__ = '$Revision$'
|
||||
|
||||
|
@ -26,6 +21,13 @@ class ConversionError(Exception):
|
|||
|
||||
DEBUG_PARA_FIXER = 0
|
||||
|
||||
if DEBUG_PARA_FIXER:
|
||||
def para_msg(s):
|
||||
sys.stderr.write("*** %s\n" % s)
|
||||
else:
|
||||
def para_msg(s):
|
||||
pass
|
||||
|
||||
|
||||
# Workaround to deal with invalid documents (multiple root elements). This
|
||||
# does not indicate a bug in the DOM implementation.
|
||||
|
@ -62,6 +64,19 @@ def extract_first_element(doc, gi):
|
|||
return node
|
||||
|
||||
|
||||
def find_all_elements(doc, gi):
|
||||
nodes = []
|
||||
if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi:
|
||||
nodes.append(doc)
|
||||
for child in doc.childNodes:
|
||||
if child.nodeType == xml.dom.core.ELEMENT:
|
||||
if child.tagName == gi:
|
||||
nodes.append(child)
|
||||
for node in child.getElementsByTagName(gi):
|
||||
nodes.append(node)
|
||||
return nodes
|
||||
|
||||
|
||||
def simplify(doc):
|
||||
# Try to rationalize the document a bit, since these things are simply
|
||||
# not valid SGML/XML documents as they stand, and need a little work.
|
||||
|
@ -108,31 +123,50 @@ def cleanup_root_text(doc):
|
|||
doc.removeChild(node)
|
||||
|
||||
|
||||
def rewrite_desc_entries(doc, argname_gi):
|
||||
argnodes = doc.getElementsByTagName(argname_gi)
|
||||
for node in argnodes:
|
||||
def handle_args(doc):
|
||||
for node in find_all_elements(doc, "args"):
|
||||
parent = node.parentNode
|
||||
nodes = []
|
||||
for n in parent.childNodes:
|
||||
if n.nodeType != xml.dom.core.ELEMENT or n.tagName != argname_gi:
|
||||
if n.nodeType != xml.dom.core.ELEMENT or n.tagName != "args":
|
||||
nodes.append(n)
|
||||
signature = doc.createElement("signature")
|
||||
signature.appendChild(doc.createTextNode("\n "))
|
||||
name = doc.createElement("name")
|
||||
name.appendChild(doc.createTextNode(parent.getAttribute("name")))
|
||||
parent.removeAttribute("name")
|
||||
signature.appendChild(name)
|
||||
desc = doc.createElement("description")
|
||||
for n in nodes:
|
||||
parent.removeChild(n)
|
||||
desc.appendChild(n)
|
||||
desc.appendChild(doc.createTextNode("\n "))
|
||||
parent.replaceChild(signature, node)
|
||||
parent.insertBefore(doc.createTextNode("\n "), signature)
|
||||
if node.childNodes:
|
||||
# keep the <args>...</args>, newline & indent
|
||||
parent.insertBefore(doc.createText("\n "), node)
|
||||
else:
|
||||
# no arguments, remove the <args/> node
|
||||
parent.removeChild(node)
|
||||
signature.appendChild(doc.createTextNode("\n "))
|
||||
signature.appendChild(node)
|
||||
parent.appendChild(doc.createText("\n "))
|
||||
parent.appendChild(desc)
|
||||
parent.appendChild(doc.createText("\n"))
|
||||
signature.appendChild(doc.createTextNode("\n "))
|
||||
|
||||
def handle_args(doc):
|
||||
rewrite_desc_entries(doc, "args")
|
||||
rewrite_desc_entries(doc, "constructor-args")
|
||||
|
||||
def methodline_to_signature(doc, methodline):
|
||||
signature = doc.createElement("signature")
|
||||
signature.appendChild(doc.createTextNode("\n "))
|
||||
name = doc.createElement("name")
|
||||
name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
|
||||
signature.appendChild(name)
|
||||
methodline.parentNode.removeChild(methodline)
|
||||
if len(methodline.childNodes):
|
||||
methodline._node.name = "args"
|
||||
methodline.removeAttribute("name")
|
||||
signature.appendChild(doc.createTextNode("\n "))
|
||||
signature.appendChild(methodline)
|
||||
signature.appendChild(doc.createTextNode("\n "))
|
||||
return signature
|
||||
|
||||
|
||||
def handle_appendix(doc):
|
||||
|
@ -165,20 +199,17 @@ def handle_appendix(doc):
|
|||
|
||||
|
||||
def handle_labels(doc):
|
||||
for node in doc.childNodes:
|
||||
if node.nodeType == xml.dom.core.ELEMENT:
|
||||
labels = node.getElementsByTagName("label")
|
||||
for label in labels:
|
||||
id = label.getAttribute("id")
|
||||
if not id:
|
||||
continue
|
||||
parent = label.parentNode
|
||||
if parent.tagName == "title":
|
||||
parent.parentNode.setAttribute("id", id)
|
||||
else:
|
||||
parent.setAttribute("id", id)
|
||||
# now, remove <label id="..."/> from parent:
|
||||
parent.removeChild(label)
|
||||
for label in find_all_elements(doc, "label"):
|
||||
id = label.getAttribute("id")
|
||||
if not id:
|
||||
continue
|
||||
parent = label.parentNode
|
||||
if parent.tagName == "title":
|
||||
parent.parentNode.setAttribute("id", id)
|
||||
else:
|
||||
parent.setAttribute("id", id)
|
||||
# now, remove <label id="..."/> from parent:
|
||||
parent.removeChild(label)
|
||||
|
||||
|
||||
def fixup_trailing_whitespace(doc, wsmap):
|
||||
|
@ -306,7 +337,8 @@ def create_module_info(doc, section):
|
|||
if first_data.data[:4] == " ---":
|
||||
first_data.data = string.lstrip(first_data.data[4:])
|
||||
title._node.name = "short-synopsis"
|
||||
if children[-1].data[-1:] == ".":
|
||||
if children[-1].nodeType == xml.dom.core.TEXT \
|
||||
and children[-1].data[-1:] == ".":
|
||||
children[-1].data = children[-1].data[:-1]
|
||||
section.removeChild(title)
|
||||
section.removeChild(section.childNodes[0])
|
||||
|
@ -335,10 +367,8 @@ def create_module_info(doc, section):
|
|||
|
||||
|
||||
def cleanup_synopses(doc):
|
||||
for node in doc.childNodes:
|
||||
if node.nodeType == xml.dom.core.ELEMENT \
|
||||
and node.tagName == "section":
|
||||
create_module_info(doc, node)
|
||||
for node in find_all_elements(doc, "section"):
|
||||
create_module_info(doc, node)
|
||||
|
||||
|
||||
def remap_element_names(root, name_map):
|
||||
|
@ -361,11 +391,9 @@ def remap_element_names(root, name_map):
|
|||
|
||||
def fixup_table_structures(doc):
|
||||
# must be done after remap_element_names(), or the tables won't be found
|
||||
for child in doc.childNodes:
|
||||
if child.nodeType == xml.dom.core.ELEMENT:
|
||||
tables = child.getElementsByTagName("table")
|
||||
for table in tables:
|
||||
fixup_table(doc, table)
|
||||
for table in find_all_elements(doc, "table"):
|
||||
fixup_table(doc, table)
|
||||
|
||||
|
||||
def fixup_table(doc, table):
|
||||
# create the table head
|
||||
|
@ -443,66 +471,81 @@ def move_elements_by_name(doc, source, dest, name, sep=None):
|
|||
FIXUP_PARA_ELEMENTS = (
|
||||
"chapter",
|
||||
"section", "subsection", "subsubsection",
|
||||
"paragraph", "subparagraph", "description",
|
||||
"opcodedesc", "classdesc",
|
||||
"funcdesc", "methoddesc", "excdesc", "datadesc",
|
||||
"funcdescni", "methoddescni", "excdescni", "datadescni",
|
||||
"paragraph", "subparagraph",
|
||||
"excdesc", "datadesc",
|
||||
"excdescni", "datadescni",
|
||||
)
|
||||
|
||||
RECURSE_INTO_PARA_CONTAINERS = (
|
||||
"chapter",
|
||||
"section", "subsection", "subsubsection",
|
||||
"paragraph", "subparagraph",
|
||||
"abstract",
|
||||
"memberdesc", "memberdescni", "datadesc", "datadescni",
|
||||
)
|
||||
|
||||
PARA_LEVEL_ELEMENTS = (
|
||||
"moduleinfo", "title", "verbatim",
|
||||
"opcodedesc", "classdesc",
|
||||
"funcdesc", "methoddesc", "excdesc", "datadesc",
|
||||
"funcdescni", "methoddescni", "excdescni", "datadescni",
|
||||
"funcdesc", "methoddesc", "excdesc",
|
||||
"funcdescni", "methoddescni", "excdescni",
|
||||
"tableii", "tableiii", "tableiv", "localmoduletable",
|
||||
"sectionauthor",
|
||||
"sectionauthor", "seealso",
|
||||
# include <para>, so we can just do it again to get subsequent paras:
|
||||
"para",
|
||||
)
|
||||
|
||||
PARA_LEVEL_PRECEEDERS = (
|
||||
"index", "indexii", "indexiii", "indexiv",
|
||||
"stindex", "obindex", "COMMENT", "label",
|
||||
"stindex", "obindex", "COMMENT", "label", "input",
|
||||
"memberline", "memberlineni",
|
||||
"methodline", "methodlineni",
|
||||
)
|
||||
|
||||
|
||||
def fixup_paras(doc):
|
||||
for child in doc.childNodes:
|
||||
if child.nodeType == xml.dom.core.ELEMENT \
|
||||
and child.tagName in FIXUP_PARA_ELEMENTS:
|
||||
and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
|
||||
#
|
||||
fixup_paras_helper(doc, child)
|
||||
descriptions = child.getElementsByTagName("description")
|
||||
for description in descriptions:
|
||||
if DEBUG_PARA_FIXER:
|
||||
sys.stderr.write("-- Fixing up <description> element...\n")
|
||||
fixup_paras_helper(doc, description)
|
||||
|
||||
|
||||
def fixup_paras_helper(doc, container):
|
||||
def fixup_paras_helper(doc, container, depth=0):
|
||||
# document is already normalized
|
||||
children = container.childNodes
|
||||
start = 0
|
||||
start_fixed = 0
|
||||
i = len(children)
|
||||
SKIP_ELEMENTS = PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS
|
||||
if DEBUG_PARA_FIXER:
|
||||
sys.stderr.write("fixup_paras_helper() called on <%s>; %d, %d\n"
|
||||
% (container.tagName, start, i))
|
||||
if i > start:
|
||||
# the first [start:i] children shoudl be rewritten as <para> elements
|
||||
# start by breaking text nodes that contain \n\n+ into multiple nodes
|
||||
nstart, i = skip_leading_nodes(container.childNodes, start, i)
|
||||
if i > nstart:
|
||||
build_para(doc, container, nstart, i)
|
||||
fixup_paras_helper(doc, container)
|
||||
while len(children) > start:
|
||||
start = skip_leading_nodes(children, start)
|
||||
if start >= len(children):
|
||||
break
|
||||
#
|
||||
# Either paragraph material or something to recurse into:
|
||||
#
|
||||
if (children[start].nodeType == xml.dom.core.ELEMENT) \
|
||||
and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
|
||||
fixup_paras_helper(doc, children[start])
|
||||
start = skip_leading_nodes(children, start + 1)
|
||||
continue
|
||||
#
|
||||
# paragraph material:
|
||||
#
|
||||
build_para(doc, container, start, len(children))
|
||||
if DEBUG_PARA_FIXER and depth == 10:
|
||||
sys.exit(1)
|
||||
start = start + 1
|
||||
|
||||
|
||||
def build_para(doc, parent, start, i):
|
||||
children = parent.childNodes
|
||||
# collect all children until \n\n+ is found in a text node or a
|
||||
# PARA_LEVEL_ELEMENT is found.
|
||||
after = start + 1
|
||||
have_last = 0
|
||||
BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + FIXUP_PARA_ELEMENTS
|
||||
# Collect all children until \n\n+ is found in a text node or a
|
||||
# member of BREAK_ELEMENTS is found.
|
||||
for j in range(start, i):
|
||||
after = j + 1
|
||||
child = children[j]
|
||||
|
@ -521,6 +564,9 @@ def build_para(doc, parent, start, i):
|
|||
break
|
||||
else:
|
||||
have_last = 1
|
||||
if (start + 1) > after:
|
||||
raise ConversionError(
|
||||
"build_para() could not identify content to turn into a paragraph")
|
||||
if children[after - 1].nodeType == xml.dom.core.TEXT:
|
||||
# we may need to split off trailing white space:
|
||||
child = children[after - 1]
|
||||
|
@ -528,66 +574,60 @@ def build_para(doc, parent, start, i):
|
|||
if string.rstrip(data) != data:
|
||||
have_last = 0
|
||||
child.splitText(len(string.rstrip(data)))
|
||||
children = parent.childNodes
|
||||
para = doc.createElement("para")
|
||||
prev = None
|
||||
indexes = range(start, after)
|
||||
indexes.reverse()
|
||||
for j in indexes:
|
||||
node = children[j]
|
||||
node = parent.childNodes[j]
|
||||
parent.removeChild(node)
|
||||
para.insertBefore(node, prev)
|
||||
prev = node
|
||||
if have_last:
|
||||
parent.appendChild(para)
|
||||
return len(parent.childNodes)
|
||||
else:
|
||||
parent.insertBefore(para, parent.childNodes[start])
|
||||
return start + 1
|
||||
|
||||
|
||||
def skip_leading_nodes(children, start, i):
|
||||
i = min(i, len(children))
|
||||
def skip_leading_nodes(children, start):
|
||||
"""Return index into children of a node at which paragraph building should
|
||||
begin or a recursive call to fixup_paras_helper() should be made (for
|
||||
subsections, etc.).
|
||||
|
||||
When the return value >= len(children), we've built all the paras we can
|
||||
from this list of children.
|
||||
"""
|
||||
i = len(children)
|
||||
while i > start:
|
||||
# skip over leading comments and whitespace:
|
||||
try:
|
||||
child = children[start]
|
||||
except IndexError:
|
||||
sys.stderr.write(
|
||||
"skip_leading_nodes() failed at index %d\n" % start)
|
||||
raise
|
||||
child = children[start]
|
||||
nodeType = child.nodeType
|
||||
if nodeType == xml.dom.core.COMMENT:
|
||||
start = start + 1
|
||||
elif nodeType == xml.dom.core.TEXT:
|
||||
if nodeType == xml.dom.core.TEXT:
|
||||
data = child.data
|
||||
shortened = string.lstrip(data)
|
||||
if shortened:
|
||||
if data != shortened:
|
||||
# break into two nodes: whitespace and non-whitespace
|
||||
child.splitText(len(data) - len(shortened))
|
||||
return start + 1, i + 1
|
||||
break
|
||||
return start + 1
|
||||
return start
|
||||
# all whitespace, just skip
|
||||
start = start + 1
|
||||
elif nodeType == xml.dom.core.ELEMENT:
|
||||
if child.tagName in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
|
||||
start = start + 1
|
||||
else:
|
||||
break
|
||||
else:
|
||||
break
|
||||
return start, i
|
||||
tagName = child.tagName
|
||||
if tagName in RECURSE_INTO_PARA_CONTAINERS:
|
||||
return start
|
||||
if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
|
||||
return start
|
||||
start = start + 1
|
||||
return start
|
||||
|
||||
|
||||
def fixup_rfc_references(doc):
|
||||
rfc_nodes = []
|
||||
for child in doc.childNodes:
|
||||
if child.nodeType == xml.dom.core.ELEMENT:
|
||||
kids = child.getElementsByTagName("rfc")
|
||||
for k in kids:
|
||||
rfc_nodes.append(k)
|
||||
for rfc_node in rfc_nodes:
|
||||
rfc_node.appendChild(doc.createTextNode(
|
||||
"RFC " + rfc_node.getAttribute("num")))
|
||||
for rfcnode in find_all_elements(doc, "rfc"):
|
||||
rfcnode.appendChild(doc.createTextNode(
|
||||
"RFC " + rfcnode.getAttribute("num")))
|
||||
|
||||
|
||||
def fixup_signatures(doc):
|
||||
|
@ -596,6 +636,7 @@ def fixup_signatures(doc):
|
|||
args = child.getElementsByTagName("args")
|
||||
for arg in args:
|
||||
fixup_args(doc, arg)
|
||||
arg.normalize()
|
||||
args = child.getElementsByTagName("constructor-args")
|
||||
for arg in args:
|
||||
fixup_args(doc, arg)
|
||||
|
@ -618,6 +659,22 @@ def fixup_args(doc, arglist):
|
|||
return fixup_args(doc, arglist)
|
||||
|
||||
|
||||
def fixup_sectionauthors(doc):
|
||||
for sectauth in find_all_elements(doc, "sectionauthor"):
|
||||
section = sectauth.parentNode
|
||||
section.removeChild(sectauth)
|
||||
sectauth._node.name = "author"
|
||||
sectauth.appendChild(doc.createTextNode(
|
||||
sectauth.getAttribute("name")))
|
||||
sectauth.removeAttribute("name")
|
||||
after = section.childNodes[2]
|
||||
title = section.childNodes[1]
|
||||
if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title":
|
||||
after = section.childNodes[0]
|
||||
section.insertBefore(doc.createTextNode("\n "), after)
|
||||
section.insertBefore(sectauth, after)
|
||||
|
||||
|
||||
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
|
||||
|
||||
def write_esis(doc, ofp, knownempty):
|
||||
|
@ -669,6 +726,7 @@ def convert(ifp, ofp):
|
|||
cleanup_synopses(doc)
|
||||
normalize(doc)
|
||||
fixup_paras(doc)
|
||||
fixup_sectionauthors(doc)
|
||||
remap_element_names(doc, {
|
||||
"tableii": ("table", {"cols": "2"}),
|
||||
"tableiii": ("table", {"cols": "3"}),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue