mirror of
https://github.com/python/cpython.git
synced 2025-11-03 19:34:08 +00:00
Lots of small changes to make this work with the Python DOM bindings
(minidom in particular); it was using PyDOM which is now obsolete. Only write the output file on success -- this avoids updating the timestamp on the file on failure, which confuses "make".
This commit is contained in:
parent
7519e7af42
commit
3e8f921fb9
1 changed files with 95 additions and 91 deletions
|
|
@ -11,12 +11,12 @@ import esistools
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
import sys
|
import sys
|
||||||
import xml.dom.core
|
import xml.dom
|
||||||
|
import xml.dom.minidom
|
||||||
|
|
||||||
from xml.dom.core import \
|
ELEMENT = xml.dom.Node.ELEMENT_NODE
|
||||||
ELEMENT, \
|
ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
|
||||||
ENTITY_REFERENCE, \
|
TEXT = xml.dom.Node.TEXT_NODE
|
||||||
TEXT
|
|
||||||
|
|
||||||
|
|
||||||
class ConversionError(Exception):
|
class ConversionError(Exception):
|
||||||
|
|
@ -49,32 +49,9 @@ else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
# Workaround to deal with invalid documents (multiple root elements). This
|
|
||||||
# does not indicate a bug in the DOM implementation.
|
|
||||||
#
|
|
||||||
def get_documentElement(doc):
|
|
||||||
docelem = None
|
|
||||||
for n in doc.childNodes:
|
|
||||||
if n.nodeType == ELEMENT:
|
|
||||||
docelem = n
|
|
||||||
return docelem
|
|
||||||
|
|
||||||
xml.dom.core.Document.get_documentElement = get_documentElement
|
|
||||||
|
|
||||||
|
|
||||||
# Replace get_childNodes for the Document class; without this, children
|
|
||||||
# accessed from the Document object via .childNodes (no matter how many
|
|
||||||
# levels of access are used) will be given an ownerDocument of None.
|
|
||||||
#
|
|
||||||
def get_childNodes(doc):
|
|
||||||
return xml.dom.core.NodeList(doc._node.children, doc._node)
|
|
||||||
|
|
||||||
xml.dom.core.Document.get_childNodes = get_childNodes
|
|
||||||
|
|
||||||
|
|
||||||
def get_first_element(doc, gi):
|
def get_first_element(doc, gi):
|
||||||
for n in doc.childNodes:
|
for n in doc.childNodes:
|
||||||
if n.get_nodeName() == gi:
|
if n.nodeName == gi:
|
||||||
return n
|
return n
|
||||||
|
|
||||||
def extract_first_element(doc, gi):
|
def extract_first_element(doc, gi):
|
||||||
|
|
@ -84,13 +61,25 @@ def extract_first_element(doc, gi):
|
||||||
return node
|
return node
|
||||||
|
|
||||||
|
|
||||||
|
def get_documentElement(node):
|
||||||
|
result = None
|
||||||
|
for child in node.childNodes:
|
||||||
|
if child.nodeType == ELEMENT:
|
||||||
|
result = child
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def set_tagName(elem, gi):
|
||||||
|
elem.nodeName = elem.tagName = gi
|
||||||
|
|
||||||
|
|
||||||
def find_all_elements(doc, gi):
|
def find_all_elements(doc, gi):
|
||||||
nodes = []
|
nodes = []
|
||||||
if doc.get_nodeName() == gi:
|
if doc.nodeName == gi:
|
||||||
nodes.append(doc)
|
nodes.append(doc)
|
||||||
for child in doc.childNodes:
|
for child in doc.childNodes:
|
||||||
if child.nodeType == ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
if child.get_tagName() == gi:
|
if child.tagName == gi:
|
||||||
nodes.append(child)
|
nodes.append(child)
|
||||||
for node in child.getElementsByTagName(gi):
|
for node in child.getElementsByTagName(gi):
|
||||||
nodes.append(node)
|
nodes.append(node)
|
||||||
|
|
@ -99,18 +88,19 @@ def find_all_elements(doc, gi):
|
||||||
def find_all_child_elements(doc, gi):
|
def find_all_child_elements(doc, gi):
|
||||||
nodes = []
|
nodes = []
|
||||||
for child in doc.childNodes:
|
for child in doc.childNodes:
|
||||||
if child.get_nodeName() == gi:
|
if child.nodeName == gi:
|
||||||
nodes.append(child)
|
nodes.append(child)
|
||||||
return nodes
|
return nodes
|
||||||
|
|
||||||
|
|
||||||
def find_all_elements_from_set(doc, gi_set):
|
def find_all_elements_from_set(doc, gi_set):
|
||||||
return __find_all_elements_from_set(doc, gi_set, [])
|
return __find_all_elements_from_set(doc, gi_set, [])
|
||||||
|
|
||||||
def __find_all_elements_from_set(doc, gi_set, nodes):
|
def __find_all_elements_from_set(doc, gi_set, nodes):
|
||||||
if doc.get_nodeName() in gi_set:
|
if doc.nodeName in gi_set:
|
||||||
nodes.append(doc)
|
nodes.append(doc)
|
||||||
for child in doc.childNodes:
|
for child in doc.childNodes:
|
||||||
if child.get_nodeType() == ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
__find_all_elements_from_set(child, gi_set, nodes)
|
__find_all_elements_from_set(child, gi_set, nodes)
|
||||||
return nodes
|
return nodes
|
||||||
|
|
||||||
|
|
@ -129,7 +119,7 @@ def simplify(doc, fragment):
|
||||||
# update the name of the root element
|
# update the name of the root element
|
||||||
node = get_first_element(fragment, "document")
|
node = get_first_element(fragment, "document")
|
||||||
if node is not None:
|
if node is not None:
|
||||||
node._node.name = documentclass
|
set_tagName(node, documentclass)
|
||||||
while 1:
|
while 1:
|
||||||
node = extract_first_element(fragment, "input")
|
node = extract_first_element(fragment, "input")
|
||||||
if node is None:
|
if node is None:
|
||||||
|
|
@ -143,7 +133,7 @@ def simplify(doc, fragment):
|
||||||
docelem.insertBefore(text, docelem.firstChild)
|
docelem.insertBefore(text, docelem.firstChild)
|
||||||
docelem.insertBefore(node, text)
|
docelem.insertBefore(node, text)
|
||||||
docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
|
docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
|
||||||
while fragment.firstChild and fragment.firstChild.get_nodeType() == TEXT:
|
while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
|
||||||
fragment.removeChild(fragment.firstChild)
|
fragment.removeChild(fragment.firstChild)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -153,9 +143,9 @@ def cleanup_root_text(doc):
|
||||||
for n in doc.childNodes:
|
for n in doc.childNodes:
|
||||||
prevskip = skip
|
prevskip = skip
|
||||||
skip = 0
|
skip = 0
|
||||||
if n.get_nodeType() == TEXT and not prevskip:
|
if n.nodeType == TEXT and not prevskip:
|
||||||
discards.append(n)
|
discards.append(n)
|
||||||
elif n.get_nodeName() == "COMMENT":
|
elif n.nodeName == "COMMENT":
|
||||||
skip = 1
|
skip = 1
|
||||||
for node in discards:
|
for node in discards:
|
||||||
doc.removeChild(node)
|
doc.removeChild(node)
|
||||||
|
|
@ -177,8 +167,8 @@ def fixup_descriptors(doc, fragment):
|
||||||
def find_and_fix_descriptors(doc, container):
|
def find_and_fix_descriptors(doc, container):
|
||||||
children = container.childNodes
|
children = container.childNodes
|
||||||
for child in children:
|
for child in children:
|
||||||
if child.get_nodeType() == ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
tagName = child.get_tagName()
|
tagName = child.tagName
|
||||||
if tagName in DESCRIPTOR_ELEMENTS:
|
if tagName in DESCRIPTOR_ELEMENTS:
|
||||||
rewrite_descriptor(doc, child)
|
rewrite_descriptor(doc, child)
|
||||||
elif tagName == "subsection":
|
elif tagName == "subsection":
|
||||||
|
|
@ -200,12 +190,12 @@ def rewrite_descriptor(doc, descriptor):
|
||||||
# 6. Put it back together.
|
# 6. Put it back together.
|
||||||
#
|
#
|
||||||
# 1.
|
# 1.
|
||||||
descname = descriptor.get_tagName()
|
descname = descriptor.tagName
|
||||||
index = 1
|
index = 1
|
||||||
if descname[-2:] == "ni":
|
if descname[-2:] == "ni":
|
||||||
descname = descname[:-2]
|
descname = descname[:-2]
|
||||||
descriptor.setAttribute("index", "no")
|
descriptor.setAttribute("index", "no")
|
||||||
descriptor._node.name = descname
|
set_tagName(descriptor, descname)
|
||||||
index = 0
|
index = 0
|
||||||
desctype = descname[:-4] # remove 'desc'
|
desctype = descname[:-4] # remove 'desc'
|
||||||
linename = desctype + "line"
|
linename = desctype + "line"
|
||||||
|
|
@ -219,7 +209,7 @@ def rewrite_descriptor(doc, descriptor):
|
||||||
name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
|
name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
|
||||||
descriptor.removeAttribute("name")
|
descriptor.removeAttribute("name")
|
||||||
# 2a.
|
# 2a.
|
||||||
if descriptor.attributes.has_key("var"):
|
if descriptor.hasAttribute("var"):
|
||||||
if descname != "opcodedesc":
|
if descname != "opcodedesc":
|
||||||
raise RuntimeError, \
|
raise RuntimeError, \
|
||||||
"got 'var' attribute on descriptor other than opcodedesc"
|
"got 'var' attribute on descriptor other than opcodedesc"
|
||||||
|
|
@ -245,10 +235,15 @@ def rewrite_descriptor(doc, descriptor):
|
||||||
# 3, 4.
|
# 3, 4.
|
||||||
pos = skip_leading_nodes(children, pos)
|
pos = skip_leading_nodes(children, pos)
|
||||||
while pos < len(children) \
|
while pos < len(children) \
|
||||||
and children[pos].get_nodeName() in (linename, "versionadded"):
|
and children[pos].nodeName in (linename, "versionadded"):
|
||||||
if children[pos].get_tagName() == linename:
|
if children[pos].tagName == linename:
|
||||||
# this is really a supplemental signature, create <signature>
|
# this is really a supplemental signature, create <signature>
|
||||||
|
oldchild = children[pos].cloneNode(1)
|
||||||
|
try:
|
||||||
sig = methodline_to_signature(doc, children[pos])
|
sig = methodline_to_signature(doc, children[pos])
|
||||||
|
except KeyError:
|
||||||
|
print oldchild.toxml()
|
||||||
|
raise
|
||||||
newchildren.append(sig)
|
newchildren.append(sig)
|
||||||
else:
|
else:
|
||||||
# <versionadded added=...>
|
# <versionadded added=...>
|
||||||
|
|
@ -301,7 +296,7 @@ def move_children(origin, dest, start=0):
|
||||||
def handle_appendix(doc, fragment):
|
def handle_appendix(doc, fragment):
|
||||||
# must be called after simplfy() if document is multi-rooted to begin with
|
# must be called after simplfy() if document is multi-rooted to begin with
|
||||||
docelem = get_documentElement(fragment)
|
docelem = get_documentElement(fragment)
|
||||||
toplevel = docelem.get_tagName() == "manual" and "chapter" or "section"
|
toplevel = docelem.tagName == "manual" and "chapter" or "section"
|
||||||
appendices = 0
|
appendices = 0
|
||||||
nodes = []
|
nodes = []
|
||||||
for node in docelem.childNodes:
|
for node in docelem.childNodes:
|
||||||
|
|
@ -333,7 +328,7 @@ def handle_labels(doc, fragment):
|
||||||
if not id:
|
if not id:
|
||||||
continue
|
continue
|
||||||
parent = label.parentNode
|
parent = label.parentNode
|
||||||
parentTagName = parent.get_tagName()
|
parentTagName = parent.tagName
|
||||||
if parentTagName == "title":
|
if parentTagName == "title":
|
||||||
parent.parentNode.setAttribute("id", id)
|
parent.parentNode.setAttribute("id", id)
|
||||||
else:
|
else:
|
||||||
|
|
@ -352,8 +347,8 @@ def fixup_trailing_whitespace(doc, wsmap):
|
||||||
while queue:
|
while queue:
|
||||||
node = queue[0]
|
node = queue[0]
|
||||||
del queue[0]
|
del queue[0]
|
||||||
if wsmap.has_key(node.get_nodeName()):
|
if wsmap.has_key(node.nodeName):
|
||||||
ws = wsmap[node.get_tagName()]
|
ws = wsmap[node.tagName]
|
||||||
children = node.childNodes
|
children = node.childNodes
|
||||||
children.reverse()
|
children.reverse()
|
||||||
if children[0].nodeType == TEXT:
|
if children[0].nodeType == TEXT:
|
||||||
|
|
@ -361,8 +356,8 @@ def fixup_trailing_whitespace(doc, wsmap):
|
||||||
children[0].data = data
|
children[0].data = data
|
||||||
children.reverse()
|
children.reverse()
|
||||||
# hack to get the title in place:
|
# hack to get the title in place:
|
||||||
if node.get_tagName() == "title" \
|
if node.tagName == "title" \
|
||||||
and node.parentNode.firstChild.get_nodeType() == ELEMENT:
|
and node.parentNode.firstChild.nodeType == ELEMENT:
|
||||||
node.parentNode.insertBefore(doc.createText("\n "),
|
node.parentNode.insertBefore(doc.createText("\n "),
|
||||||
node.parentNode.firstChild)
|
node.parentNode.firstChild)
|
||||||
for child in node.childNodes:
|
for child in node.childNodes:
|
||||||
|
|
@ -388,7 +383,7 @@ def cleanup_trailing_parens(doc, element_names):
|
||||||
while queue:
|
while queue:
|
||||||
node = queue[0]
|
node = queue[0]
|
||||||
del queue[0]
|
del queue[0]
|
||||||
if rewrite_element(node.get_tagName()):
|
if rewrite_element(node.tagName):
|
||||||
children = node.childNodes
|
children = node.childNodes
|
||||||
if len(children) == 1 \
|
if len(children) == 1 \
|
||||||
and children[0].nodeType == TEXT:
|
and children[0].nodeType == TEXT:
|
||||||
|
|
@ -411,7 +406,7 @@ def contents_match(left, right):
|
||||||
if nodeType != r.nodeType:
|
if nodeType != r.nodeType:
|
||||||
return 0
|
return 0
|
||||||
if nodeType == ELEMENT:
|
if nodeType == ELEMENT:
|
||||||
if l.get_tagName() != r.get_tagName():
|
if l.tagName != r.tagName:
|
||||||
return 0
|
return 0
|
||||||
# should check attributes, but that's not a problem here
|
# should check attributes, but that's not a problem here
|
||||||
if not contents_match(l, r):
|
if not contents_match(l, r):
|
||||||
|
|
@ -430,19 +425,19 @@ def create_module_info(doc, section):
|
||||||
node = extract_first_element(section, "modulesynopsis")
|
node = extract_first_element(section, "modulesynopsis")
|
||||||
if node is None:
|
if node is None:
|
||||||
return
|
return
|
||||||
node._node.name = "synopsis"
|
set_tagName(node, "synopsis")
|
||||||
lastchild = node.childNodes[-1]
|
lastchild = node.childNodes[-1]
|
||||||
if lastchild.nodeType == TEXT \
|
if lastchild.nodeType == TEXT \
|
||||||
and lastchild.data[-1:] == ".":
|
and lastchild.data[-1:] == ".":
|
||||||
lastchild.data = lastchild.data[:-1]
|
lastchild.data = lastchild.data[:-1]
|
||||||
modauthor = extract_first_element(section, "moduleauthor")
|
modauthor = extract_first_element(section, "moduleauthor")
|
||||||
if modauthor:
|
if modauthor:
|
||||||
modauthor._node.name = "author"
|
set_tagName(modauthor, "author")
|
||||||
modauthor.appendChild(doc.createTextNode(
|
modauthor.appendChild(doc.createTextNode(
|
||||||
modauthor.getAttribute("name")))
|
modauthor.getAttribute("name")))
|
||||||
modauthor.removeAttribute("name")
|
modauthor.removeAttribute("name")
|
||||||
platform = extract_first_element(section, "platform")
|
platform = extract_first_element(section, "platform")
|
||||||
if section.get_tagName() == "section":
|
if section.tagName == "section":
|
||||||
modinfo_pos = 2
|
modinfo_pos = 2
|
||||||
modinfo = doc.createElement("moduleinfo")
|
modinfo = doc.createElement("moduleinfo")
|
||||||
moddecl = extract_first_element(section, "declaremodule")
|
moddecl = extract_first_element(section, "declaremodule")
|
||||||
|
|
@ -467,13 +462,13 @@ def create_module_info(doc, section):
|
||||||
if title:
|
if title:
|
||||||
children = title.childNodes
|
children = title.childNodes
|
||||||
if len(children) >= 2 \
|
if len(children) >= 2 \
|
||||||
and children[0].get_nodeName() == "module" \
|
and children[0].nodeName == "module" \
|
||||||
and children[0].childNodes[0].data == name:
|
and children[0].childNodes[0].data == name:
|
||||||
# this is it; morph the <title> into <short-synopsis>
|
# this is it; morph the <title> into <short-synopsis>
|
||||||
first_data = children[1]
|
first_data = children[1]
|
||||||
if first_data.data[:4] == " ---":
|
if first_data.data[:4] == " ---":
|
||||||
first_data.data = string.lstrip(first_data.data[4:])
|
first_data.data = string.lstrip(first_data.data[4:])
|
||||||
title._node.name = "short-synopsis"
|
set_tagName(title, "short-synopsis")
|
||||||
if children[-1].nodeType == TEXT \
|
if children[-1].nodeType == TEXT \
|
||||||
and children[-1].data[-1:] == ".":
|
and children[-1].data[-1:] == ".":
|
||||||
children[-1].data = children[-1].data[:-1]
|
children[-1].data = children[-1].data[:-1]
|
||||||
|
|
@ -511,7 +506,7 @@ def create_module_info(doc, section):
|
||||||
children = section.childNodes
|
children = section.childNodes
|
||||||
for i in range(len(children)):
|
for i in range(len(children)):
|
||||||
node = children[i]
|
node = children[i]
|
||||||
if node.get_nodeName() == "moduleinfo":
|
if node.nodeName == "moduleinfo":
|
||||||
nextnode = children[i+1]
|
nextnode = children[i+1]
|
||||||
if nextnode.nodeType == TEXT:
|
if nextnode.nodeType == TEXT:
|
||||||
data = nextnode.data
|
data = nextnode.data
|
||||||
|
|
@ -544,7 +539,7 @@ def fixup_table(doc, table):
|
||||||
children = table.childNodes
|
children = table.childNodes
|
||||||
for child in children:
|
for child in children:
|
||||||
if child.nodeType == ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
tagName = child.get_tagName()
|
tagName = child.tagName
|
||||||
if tagName == "hline" and prev_row is not None:
|
if tagName == "hline" and prev_row is not None:
|
||||||
prev_row.setAttribute("rowsep", "1")
|
prev_row.setAttribute("rowsep", "1")
|
||||||
elif tagName == "row":
|
elif tagName == "row":
|
||||||
|
|
@ -558,13 +553,14 @@ def fixup_table(doc, table):
|
||||||
nodeType = child.nodeType
|
nodeType = child.nodeType
|
||||||
if nodeType == TEXT:
|
if nodeType == TEXT:
|
||||||
if string.strip(child.data):
|
if string.strip(child.data):
|
||||||
raise ConversionError("unexpected free data in table")
|
raise ConversionError("unexpected free data in <%s>: %r"
|
||||||
|
% (table.tagName, child.data))
|
||||||
table.removeChild(child)
|
table.removeChild(child)
|
||||||
continue
|
continue
|
||||||
if nodeType == ELEMENT:
|
if nodeType == ELEMENT:
|
||||||
if child.get_tagName() != "hline":
|
if child.tagName != "hline":
|
||||||
raise ConversionError(
|
raise ConversionError(
|
||||||
"unexpected <%s> in table" % child.get_tagName())
|
"unexpected <%s> in table" % child.tagName)
|
||||||
table.removeChild(child)
|
table.removeChild(child)
|
||||||
continue
|
continue
|
||||||
raise ConversionError(
|
raise ConversionError(
|
||||||
|
|
@ -593,7 +589,7 @@ def fixup_row(doc, row):
|
||||||
def move_elements_by_name(doc, source, dest, name, sep=None):
|
def move_elements_by_name(doc, source, dest, name, sep=None):
|
||||||
nodes = []
|
nodes = []
|
||||||
for child in source.childNodes:
|
for child in source.childNodes:
|
||||||
if child.get_nodeName() == name:
|
if child.nodeName == name:
|
||||||
nodes.append(child)
|
nodes.append(child)
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
source.removeChild(node)
|
source.removeChild(node)
|
||||||
|
|
@ -633,7 +629,7 @@ PARA_LEVEL_PRECEEDERS = (
|
||||||
|
|
||||||
def fixup_paras(doc, fragment):
|
def fixup_paras(doc, fragment):
|
||||||
for child in fragment.childNodes:
|
for child in fragment.childNodes:
|
||||||
if child.get_nodeName() in RECURSE_INTO_PARA_CONTAINERS:
|
if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
|
||||||
fixup_paras_helper(doc, child)
|
fixup_paras_helper(doc, child)
|
||||||
descriptions = find_all_elements(fragment, "description")
|
descriptions = find_all_elements(fragment, "description")
|
||||||
for description in descriptions:
|
for description in descriptions:
|
||||||
|
|
@ -645,7 +641,7 @@ def fixup_paras_helper(doc, container, depth=0):
|
||||||
children = container.childNodes
|
children = container.childNodes
|
||||||
start = skip_leading_nodes(children)
|
start = skip_leading_nodes(children)
|
||||||
while len(children) > start:
|
while len(children) > start:
|
||||||
if children[start].get_nodeName() in RECURSE_INTO_PARA_CONTAINERS:
|
if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
|
||||||
# Something to recurse into:
|
# Something to recurse into:
|
||||||
fixup_paras_helper(doc, children[start])
|
fixup_paras_helper(doc, children[start])
|
||||||
else:
|
else:
|
||||||
|
|
@ -668,7 +664,7 @@ def build_para(doc, parent, start, i):
|
||||||
child = children[j]
|
child = children[j]
|
||||||
nodeType = child.nodeType
|
nodeType = child.nodeType
|
||||||
if nodeType == ELEMENT:
|
if nodeType == ELEMENT:
|
||||||
if child.get_tagName() in BREAK_ELEMENTS:
|
if child.tagName in BREAK_ELEMENTS:
|
||||||
after = j
|
after = j
|
||||||
break
|
break
|
||||||
elif nodeType == TEXT:
|
elif nodeType == TEXT:
|
||||||
|
|
@ -742,7 +738,7 @@ def skip_leading_nodes(children, start=0):
|
||||||
return start
|
return start
|
||||||
# all whitespace, just skip
|
# all whitespace, just skip
|
||||||
elif nodeType == ELEMENT:
|
elif nodeType == ELEMENT:
|
||||||
tagName = child.get_tagName()
|
tagName = child.tagName
|
||||||
if tagName in RECURSE_INTO_PARA_CONTAINERS:
|
if tagName in RECURSE_INTO_PARA_CONTAINERS:
|
||||||
return start
|
return start
|
||||||
if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
|
if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
|
||||||
|
|
@ -772,7 +768,7 @@ def fixup_signatures(doc, fragment):
|
||||||
|
|
||||||
def fixup_args(doc, arglist):
|
def fixup_args(doc, arglist):
|
||||||
for child in arglist.childNodes:
|
for child in arglist.childNodes:
|
||||||
if child.get_nodeName() == "optional":
|
if child.nodeName == "optional":
|
||||||
# found it; fix and return
|
# found it; fix and return
|
||||||
arglist.insertBefore(doc.createTextNode("["), child)
|
arglist.insertBefore(doc.createTextNode("["), child)
|
||||||
optkids = child.childNodes
|
optkids = child.childNodes
|
||||||
|
|
@ -789,13 +785,13 @@ def fixup_sectionauthors(doc, fragment):
|
||||||
for sectauth in find_all_elements(fragment, "sectionauthor"):
|
for sectauth in find_all_elements(fragment, "sectionauthor"):
|
||||||
section = sectauth.parentNode
|
section = sectauth.parentNode
|
||||||
section.removeChild(sectauth)
|
section.removeChild(sectauth)
|
||||||
sectauth._node.name = "author"
|
set_tagName(sectauth, "author")
|
||||||
sectauth.appendChild(doc.createTextNode(
|
sectauth.appendChild(doc.createTextNode(
|
||||||
sectauth.getAttribute("name")))
|
sectauth.getAttribute("name")))
|
||||||
sectauth.removeAttribute("name")
|
sectauth.removeAttribute("name")
|
||||||
after = section.childNodes[2]
|
after = section.childNodes[2]
|
||||||
title = section.childNodes[1]
|
title = section.childNodes[1]
|
||||||
if title.get_nodeName() != "title":
|
if title.nodeName != "title":
|
||||||
after = section.childNodes[0]
|
after = section.childNodes[0]
|
||||||
section.insertBefore(doc.createTextNode("\n "), after)
|
section.insertBefore(doc.createTextNode("\n "), after)
|
||||||
section.insertBefore(sectauth, after)
|
section.insertBefore(sectauth, after)
|
||||||
|
|
@ -806,17 +802,17 @@ def fixup_verbatims(doc):
|
||||||
child = verbatim.childNodes[0]
|
child = verbatim.childNodes[0]
|
||||||
if child.nodeType == TEXT \
|
if child.nodeType == TEXT \
|
||||||
and string.lstrip(child.data)[:3] == ">>>":
|
and string.lstrip(child.data)[:3] == ">>>":
|
||||||
verbatim._node.name = "interactive-session"
|
set_tagName(verbatim, "interactive-session")
|
||||||
|
|
||||||
|
|
||||||
def add_node_ids(fragment, counter=0):
|
def add_node_ids(fragment, counter=0):
|
||||||
fragment._node.node_id = counter
|
fragment.node_id = counter
|
||||||
for node in fragment.childNodes:
|
for node in fragment.childNodes:
|
||||||
counter = counter + 1
|
counter = counter + 1
|
||||||
if node.nodeType == ELEMENT:
|
if node.nodeType == ELEMENT:
|
||||||
counter = add_node_ids(node, counter)
|
counter = add_node_ids(node, counter)
|
||||||
else:
|
else:
|
||||||
node._node.node_id = counter
|
node.node_id = counter
|
||||||
return counter + 1
|
return counter + 1
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -831,14 +827,14 @@ def fixup_refmodindexes(fragment):
|
||||||
d = {}
|
d = {}
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
parent = node.parentNode
|
parent = node.parentNode
|
||||||
d[parent._node.node_id] = parent
|
d[parent.node_id] = parent
|
||||||
del nodes
|
del nodes
|
||||||
map(fixup_refmodindexes_chunk, d.values())
|
map(fixup_refmodindexes_chunk, d.values())
|
||||||
|
|
||||||
|
|
||||||
def fixup_refmodindexes_chunk(container):
|
def fixup_refmodindexes_chunk(container):
|
||||||
# node is probably a <para>; let's see how often it isn't:
|
# node is probably a <para>; let's see how often it isn't:
|
||||||
if container.get_tagName() != PARA_ELEMENT:
|
if container.tagName != PARA_ELEMENT:
|
||||||
bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
|
bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
|
||||||
module_entries = find_all_elements(container, "module")
|
module_entries = find_all_elements(container, "module")
|
||||||
if not module_entries:
|
if not module_entries:
|
||||||
|
|
@ -849,7 +845,7 @@ def fixup_refmodindexes_chunk(container):
|
||||||
children = entry.childNodes
|
children = entry.childNodes
|
||||||
if len(children) != 0:
|
if len(children) != 0:
|
||||||
bwrite("--- unexpected number of children for %s node:\n"
|
bwrite("--- unexpected number of children for %s node:\n"
|
||||||
% entry.get_tagName())
|
% entry.tagName)
|
||||||
ewrite(entry.toxml() + "\n")
|
ewrite(entry.toxml() + "\n")
|
||||||
continue
|
continue
|
||||||
found = 0
|
found = 0
|
||||||
|
|
@ -873,7 +869,7 @@ def fixup_bifuncindexes(fragment):
|
||||||
# make sure that each parent is only processed once:
|
# make sure that each parent is only processed once:
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
parent = node.parentNode
|
parent = node.parentNode
|
||||||
d[parent._node.node_id] = parent
|
d[parent.node_id] = parent
|
||||||
del nodes
|
del nodes
|
||||||
map(fixup_bifuncindexes_chunk, d.values())
|
map(fixup_bifuncindexes_chunk, d.values())
|
||||||
|
|
||||||
|
|
@ -905,7 +901,7 @@ def join_adjacent_elements(container, gi):
|
||||||
while queue:
|
while queue:
|
||||||
parent = queue.pop()
|
parent = queue.pop()
|
||||||
i = 0
|
i = 0
|
||||||
children = parent.get_childNodes()
|
children = parent.childNodes
|
||||||
nchildren = len(children)
|
nchildren = len(children)
|
||||||
while i < (nchildren - 1):
|
while i < (nchildren - 1):
|
||||||
child = children[i]
|
child = children[i]
|
||||||
|
|
@ -914,7 +910,7 @@ def join_adjacent_elements(container, gi):
|
||||||
ewrite("--- merging two <%s/> elements\n" % gi)
|
ewrite("--- merging two <%s/> elements\n" % gi)
|
||||||
child = children[i]
|
child = children[i]
|
||||||
nextchild = children[i+1]
|
nextchild = children[i+1]
|
||||||
nextchildren = nextchild.get_childNodes()
|
nextchildren = nextchild.childNodes
|
||||||
while len(nextchildren):
|
while len(nextchildren):
|
||||||
node = nextchildren[0]
|
node = nextchildren[0]
|
||||||
nextchild.removeChild(node)
|
nextchild.removeChild(node)
|
||||||
|
|
@ -932,14 +928,13 @@ def write_esis(doc, ofp, knownempty):
|
||||||
for node in doc.childNodes:
|
for node in doc.childNodes:
|
||||||
nodeType = node.nodeType
|
nodeType = node.nodeType
|
||||||
if nodeType == ELEMENT:
|
if nodeType == ELEMENT:
|
||||||
gi = node.get_tagName()
|
gi = node.tagName
|
||||||
if knownempty(gi):
|
if knownempty(gi):
|
||||||
if node.hasChildNodes():
|
if node.hasChildNodes():
|
||||||
raise ValueError, \
|
raise ValueError, \
|
||||||
"declared-empty node <%s> has children" % gi
|
"declared-empty node <%s> has children" % gi
|
||||||
ofp.write("e\n")
|
ofp.write("e\n")
|
||||||
for k, v in node.attributes.items():
|
for k, value in node.attributes.items():
|
||||||
value = v.value
|
|
||||||
if _token_rx.match(value):
|
if _token_rx.match(value):
|
||||||
dtype = "TOKEN"
|
dtype = "TOKEN"
|
||||||
else:
|
else:
|
||||||
|
|
@ -951,16 +946,17 @@ def write_esis(doc, ofp, knownempty):
|
||||||
elif nodeType == TEXT:
|
elif nodeType == TEXT:
|
||||||
ofp.write("-%s\n" % esistools.encode(node.data))
|
ofp.write("-%s\n" % esistools.encode(node.data))
|
||||||
elif nodeType == ENTITY_REFERENCE:
|
elif nodeType == ENTITY_REFERENCE:
|
||||||
ofp.write("&%s\n" % node.get_nodeName())
|
ofp.write("&%s\n" % node.nodeName)
|
||||||
else:
|
else:
|
||||||
raise RuntimeError, "unsupported node type: %s" % nodeType
|
raise RuntimeError, "unsupported node type: %s" % nodeType
|
||||||
|
|
||||||
|
|
||||||
def convert(ifp, ofp):
|
def convert(ifp, ofp):
|
||||||
p = esistools.ExtendedEsisBuilder()
|
events = esistools.parse(ifp)
|
||||||
p.feed(ifp.read())
|
toktype, doc = events.getEvent()
|
||||||
doc = p.document
|
fragment = doc.createDocumentFragment()
|
||||||
fragment = p.fragment
|
events.expandNode(fragment)
|
||||||
|
|
||||||
normalize(fragment)
|
normalize(fragment)
|
||||||
simplify(doc, fragment)
|
simplify(doc, fragment)
|
||||||
handle_labels(doc, fragment)
|
handle_labels(doc, fragment)
|
||||||
|
|
@ -994,8 +990,10 @@ def convert(ifp, ofp):
|
||||||
join_adjacent_elements(fragment, "option")
|
join_adjacent_elements(fragment, "option")
|
||||||
#
|
#
|
||||||
d = {}
|
d = {}
|
||||||
for gi in p.get_empties():
|
for gi in events.parser.get_empties():
|
||||||
d[gi] = gi
|
d[gi] = gi
|
||||||
|
if d.has_key("author"):
|
||||||
|
del d["author"]
|
||||||
if d.has_key("rfc"):
|
if d.has_key("rfc"):
|
||||||
del d["rfc"]
|
del d["rfc"]
|
||||||
knownempty = d.has_key
|
knownempty = d.has_key
|
||||||
|
|
@ -1019,11 +1017,17 @@ def main():
|
||||||
ofp = sys.stdout
|
ofp = sys.stdout
|
||||||
elif len(sys.argv) == 3:
|
elif len(sys.argv) == 3:
|
||||||
ifp = open(sys.argv[1])
|
ifp = open(sys.argv[1])
|
||||||
ofp = open(sys.argv[2], "w")
|
import StringIO
|
||||||
|
ofp = StringIO.StringIO()
|
||||||
else:
|
else:
|
||||||
usage()
|
usage()
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
convert(ifp, ofp)
|
convert(ifp, ofp)
|
||||||
|
if len(sys.argv) == 3:
|
||||||
|
fp = open(sys.argv[2], "w")
|
||||||
|
fp.write(ofp.getvalue())
|
||||||
|
fp.close()
|
||||||
|
ofp.close()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue