mirror of
https://github.com/python/cpython.git
synced 2025-11-12 07:02:33 +00:00
create_module_info(): New function: Modify a <section> to create a
<moduleinfo> element based on various meta information, and strip some cruftiness. This is more usable for information extraction, and organizes the information more clearly. cleanup_synopses(): Rewrite to use create_module_info(), so this will work with multi-rooted "documents".
This commit is contained in:
parent
fee6abea90
commit
aaed9710e9
1 changed files with 99 additions and 13 deletions
|
|
@ -227,20 +227,104 @@ def cleanup_trailing_parens(doc, element_names):
|
||||||
queue.append(child)
|
queue.append(child)
|
||||||
|
|
||||||
|
|
||||||
def cleanup_synopses(doc):
|
def contents_match(left, right):
|
||||||
# Actually, this should build a "moduleinfo" element from various
|
left_children = left.childNodes
|
||||||
# parts of the meta-information in the section. <moduleinfo> needs
|
right_children = right.childNodes
|
||||||
# some design work before we can really do anything real.
|
if len(left_children) != len(right_children):
|
||||||
synopses = doc.getElementsByTagName("modulesynopsis")
|
return 0
|
||||||
for node in synopses:
|
for l, r in map(None, left_children, right_children):
|
||||||
|
nodeType = l.nodeType
|
||||||
|
if nodeType != r.nodeType:
|
||||||
|
return 0
|
||||||
|
if nodeType == xml.dom.core.ELEMENT:
|
||||||
|
if l.tagName != r.tagName:
|
||||||
|
return 0
|
||||||
|
# should check attributes, but that's not a problem here
|
||||||
|
if not contents_match(l, r):
|
||||||
|
return 0
|
||||||
|
elif nodeType == xml.dom.core.TEXT:
|
||||||
|
if l.data != r.data:
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
# not quite right, but good enough
|
||||||
|
return 0
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def create_module_info(doc, section):
|
||||||
|
# Heavy.
|
||||||
|
node = extract_first_element(section, "modulesynopsis")
|
||||||
|
if node is None:
|
||||||
|
return
|
||||||
node._node.name = "synopsis"
|
node._node.name = "synopsis"
|
||||||
parent = node.parentNode
|
lastchild = node.childNodes[-1]
|
||||||
if parent.tagName == "section":
|
if lastchild.nodeType == xml.dom.core.TEXT \
|
||||||
children = parent.childNodes
|
and lastchild.data[-1:] == ".":
|
||||||
parent.removeChild(node)
|
lastchild.data = lastchild.data[:-1]
|
||||||
parent.insertBefore(node, children[2])
|
if section.tagName == "section":
|
||||||
text = doc.createTextNode("\n ")
|
modinfo_pos = 2
|
||||||
parent.insertBefore(text, node)
|
modinfo = doc.createElement("moduleinfo")
|
||||||
|
moddecl = extract_first_element(section, "declaremodule")
|
||||||
|
name = None
|
||||||
|
if moddecl:
|
||||||
|
modinfo.appendChild(doc.createTextNode("\n "))
|
||||||
|
name = moddecl.attributes["name"].value
|
||||||
|
namenode = doc.createElement("name")
|
||||||
|
namenode.appendChild(doc.createTextNode(name))
|
||||||
|
modinfo.appendChild(namenode)
|
||||||
|
type = moddecl.attributes.get("type")
|
||||||
|
if type:
|
||||||
|
type = type.value
|
||||||
|
modinfo.appendChild(doc.createTextNode("\n "))
|
||||||
|
typenode = doc.createElement("type")
|
||||||
|
typenode.appendChild(doc.createTextNode(type))
|
||||||
|
modinfo.appendChild(typenode)
|
||||||
|
title = get_first_element(section, "title")
|
||||||
|
if title:
|
||||||
|
children = title.childNodes
|
||||||
|
if len(children) >= 2 \
|
||||||
|
and children[0].nodeType == xml.dom.core.ELEMENT \
|
||||||
|
and children[0].tagName == "module" \
|
||||||
|
and children[0].childNodes[0].data == name:
|
||||||
|
# this is it; morph the <title> into <short-synopsis>
|
||||||
|
first_data = children[1]
|
||||||
|
if first_data.data[:4] == " ---":
|
||||||
|
first_data.data = string.lstrip(first_data.data[4:])
|
||||||
|
title._node.name = "short-synopsis"
|
||||||
|
if children[-1].data[-1:] == ".":
|
||||||
|
children[-1].data = children[-1].data[:-1]
|
||||||
|
section.removeChild(title)
|
||||||
|
section.removeChild(section.childNodes[0])
|
||||||
|
title.removeChild(children[0])
|
||||||
|
modinfo_pos = 0
|
||||||
|
else:
|
||||||
|
sys.stderr.write(
|
||||||
|
"module name in title doesn't match"
|
||||||
|
" <declaremodule>; no <short-synopsis>\n")
|
||||||
|
else:
|
||||||
|
sys.stderr.write(
|
||||||
|
"Unexpected condition: <section> without <title>\n")
|
||||||
|
modinfo.appendChild(doc.createTextNode("\n "))
|
||||||
|
modinfo.appendChild(node)
|
||||||
|
if title and not contents_match(title, node):
|
||||||
|
# The short synopsis is actually different,
|
||||||
|
# and needs to be stored:
|
||||||
|
modinfo.appendChild(doc.createTextNode("\n "))
|
||||||
|
modinfo.appendChild(title)
|
||||||
|
modinfo.appendChild(doc.createTextNode("\n "))
|
||||||
|
section.insertBefore(modinfo, section.childNodes[modinfo_pos])
|
||||||
|
section.insertBefore(doc.createTextNode("\n "), modinfo)
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup_synopses(doc):
|
||||||
|
for node in doc.childNodes:
|
||||||
|
if node.nodeType == xml.dom.core.ELEMENT \
|
||||||
|
and node.tagName == "section":
|
||||||
|
create_module_info(doc, node)
|
||||||
|
|
||||||
|
|
||||||
|
def fixup_paras(doc):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
|
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
|
||||||
|
|
@ -292,6 +376,8 @@ def convert(ifp, ofp):
|
||||||
cleanup_root_text(doc)
|
cleanup_root_text(doc)
|
||||||
cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
|
cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
|
||||||
cleanup_synopses(doc)
|
cleanup_synopses(doc)
|
||||||
|
normalize(doc)
|
||||||
|
fixup_paras(doc)
|
||||||
#
|
#
|
||||||
d = {}
|
d = {}
|
||||||
for gi in p.get_empties():
|
for gi in p.get_empties():
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue