Lots of small changes to make this work with the Python DOM bindings

(minidom in particular); it was using PyDOM which is now obsolete. Only write the output file on success -- this avoids updating the timestamp on the file on failure, which confuses "make".
2025-11-03 19:34:08 +00:00 · 2001-03-23 17:01:47 +00:00 · 2001-03-23 17:01:47 +00:00 · 3e8f921fb9
commit 3e8f921fb9
parent 7519e7af42
1 changed files with 95 additions and 91 deletions
--- a/Doc/tools/sgmlconv/docfixer.py
+++ b/Doc/tools/sgmlconv/docfixer.py
@ -11,12 +11,12 @@ import esistools
 import re
 import string
 import sys
-import xml.dom.core
+import xml.dom
 import xml.dom.minidom
-from xml.dom.core import \
+ELEMENT = xml.dom.Node.ELEMENT_NODE
-     ELEMENT, \
+ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
-     ENTITY_REFERENCE, \
+TEXT = xml.dom.Node.TEXT_NODE
     TEXT
 class ConversionError(Exception):
@ -49,32 +49,9 @@ else:
        pass
 # Workaround to deal with invalid documents (multiple root elements).  This
 # does not indicate a bug in the DOM implementation.
 #
 def get_documentElement(doc):
    docelem = None
    for n in doc.childNodes:
        if n.nodeType == ELEMENT:
            docelem = n
    return docelem
 xml.dom.core.Document.get_documentElement = get_documentElement
 # Replace get_childNodes for the Document class; without this, children
 # accessed from the Document object via .childNodes (no matter how many
 # levels of access are used) will be given an ownerDocument of None.
 #
 def get_childNodes(doc):
    return xml.dom.core.NodeList(doc._node.children, doc._node)
 xml.dom.core.Document.get_childNodes = get_childNodes
 def get_first_element(doc, gi):
    for n in doc.childNodes:
-        if n.get_nodeName() == gi:
+        if n.nodeName == gi:
            return n
 def extract_first_element(doc, gi):
@ -84,13 +61,25 @@ def extract_first_element(doc, gi):
    return node
 def get_documentElement(node):
    result = None
    for child in node.childNodes:
        if child.nodeType == ELEMENT:
            result = child
    return result
 def set_tagName(elem, gi):
    elem.nodeName = elem.tagName = gi
 def find_all_elements(doc, gi):
    nodes = []
-    if doc.get_nodeName() == gi:
+    if doc.nodeName == gi:
        nodes.append(doc)
    for child in doc.childNodes:
        if child.nodeType == ELEMENT:
-            if child.get_tagName() == gi:
+            if child.tagName == gi:
                nodes.append(child)
            for node in child.getElementsByTagName(gi):
                nodes.append(node)
@ -99,18 +88,19 @@ def find_all_elements(doc, gi):
 def find_all_child_elements(doc, gi):
    nodes = []
    for child in doc.childNodes:
-        if child.get_nodeName() == gi:
+        if child.nodeName == gi:
            nodes.append(child)
    return nodes
 def find_all_elements_from_set(doc, gi_set):
    return __find_all_elements_from_set(doc, gi_set, [])
 def __find_all_elements_from_set(doc, gi_set, nodes):
-    if doc.get_nodeName() in gi_set:
+    if doc.nodeName in gi_set:
        nodes.append(doc)
    for child in doc.childNodes:
-        if child.get_nodeType() == ELEMENT:
+        if child.nodeType == ELEMENT:
            __find_all_elements_from_set(child, gi_set, nodes)
    return nodes
@ -129,7 +119,7 @@ def simplify(doc, fragment):
    # update the name of the root element
    node = get_first_element(fragment, "document")
    if node is not None:
-        node._node.name = documentclass
+        set_tagName(node, documentclass)
    while 1:
        node = extract_first_element(fragment, "input")
        if node is None:
@ -143,7 +133,7 @@ def simplify(doc, fragment):
            docelem.insertBefore(text, docelem.firstChild)
            docelem.insertBefore(node, text)
        docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
-    while fragment.firstChild and fragment.firstChild.get_nodeType() == TEXT:
+    while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
        fragment.removeChild(fragment.firstChild)
@ -153,9 +143,9 @@ def cleanup_root_text(doc):
    for n in doc.childNodes:
        prevskip = skip
        skip = 0
-        if n.get_nodeType() == TEXT and not prevskip:
+        if n.nodeType == TEXT and not prevskip:
            discards.append(n)
-        elif n.get_nodeName() == "COMMENT":
+        elif n.nodeName == "COMMENT":
            skip = 1
    for node in discards:
        doc.removeChild(node)
@ -177,8 +167,8 @@ def fixup_descriptors(doc, fragment):
 def find_and_fix_descriptors(doc, container):
    children = container.childNodes
    for child in children:
-        if child.get_nodeType() == ELEMENT:
+        if child.nodeType == ELEMENT:
-            tagName = child.get_tagName()
+            tagName = child.tagName
            if tagName in DESCRIPTOR_ELEMENTS:
                rewrite_descriptor(doc, child)
            elif tagName == "subsection":
@ -200,12 +190,12 @@ def rewrite_descriptor(doc, descriptor):
    #   6. Put it back together.
    #
    # 1.
-    descname = descriptor.get_tagName()
+    descname = descriptor.tagName
    index = 1
    if descname[-2:] == "ni":
        descname = descname[:-2]
        descriptor.setAttribute("index", "no")
-        descriptor._node.name = descname
+        set_tagName(descriptor, descname)
        index = 0
    desctype = descname[:-4] # remove 'desc'
    linename = desctype + "line"
@ -219,7 +209,7 @@ def rewrite_descriptor(doc, descriptor):
    name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
    descriptor.removeAttribute("name")
    # 2a.
-    if descriptor.attributes.has_key("var"):
+    if descriptor.hasAttribute("var"):
        if descname != "opcodedesc":
            raise RuntimeError, \
                  "got 'var' attribute on descriptor other than opcodedesc"
@ -245,10 +235,15 @@ def rewrite_descriptor(doc, descriptor):
    # 3, 4.
    pos = skip_leading_nodes(children, pos)
    while pos < len(children) \
-          and children[pos].get_nodeName() in (linename, "versionadded"):
+          and children[pos].nodeName in (linename, "versionadded"):
-        if children[pos].get_tagName() == linename:
+        if children[pos].tagName == linename:
            # this is really a supplemental signature, create <signature>
            oldchild = children[pos].cloneNode(1)
            try:
                sig = methodline_to_signature(doc, children[pos])
            except KeyError:
                print oldchild.toxml()
                raise
            newchildren.append(sig)
        else:
            # <versionadded added=...>
@ -301,7 +296,7 @@ def move_children(origin, dest, start=0):
 def handle_appendix(doc, fragment):
    # must be called after simplfy() if document is multi-rooted to begin with
    docelem = get_documentElement(fragment)
-    toplevel = docelem.get_tagName() == "manual" and "chapter" or "section"
+    toplevel = docelem.tagName == "manual" and "chapter" or "section"
    appendices = 0
    nodes = []
    for node in docelem.childNodes:
@ -333,7 +328,7 @@ def handle_labels(doc, fragment):
        if not id:
            continue
        parent = label.parentNode
-        parentTagName = parent.get_tagName()
+        parentTagName = parent.tagName
        if parentTagName == "title":
            parent.parentNode.setAttribute("id", id)
        else:
@ -352,8 +347,8 @@ def fixup_trailing_whitespace(doc, wsmap):
    while queue:
        node = queue[0]
        del queue[0]
-        if wsmap.has_key(node.get_nodeName()):
+        if wsmap.has_key(node.nodeName):
-            ws = wsmap[node.get_tagName()]
+            ws = wsmap[node.tagName]
            children = node.childNodes
            children.reverse()
            if children[0].nodeType == TEXT:
@ -361,8 +356,8 @@ def fixup_trailing_whitespace(doc, wsmap):
                children[0].data = data
            children.reverse()
            # hack to get the title in place:
-            if node.get_tagName() == "title" \
+            if node.tagName == "title" \
-               and node.parentNode.firstChild.get_nodeType() == ELEMENT:
+               and node.parentNode.firstChild.nodeType == ELEMENT:
                node.parentNode.insertBefore(doc.createText("\n  "),
                                             node.parentNode.firstChild)
        for child in node.childNodes:
@ -388,7 +383,7 @@ def cleanup_trailing_parens(doc, element_names):
    while queue:
        node = queue[0]
        del queue[0]
-        if rewrite_element(node.get_tagName()):
+        if rewrite_element(node.tagName):
            children = node.childNodes
            if len(children) == 1 \
               and children[0].nodeType == TEXT:
@ -411,7 +406,7 @@ def contents_match(left, right):
        if nodeType != r.nodeType:
            return 0
        if nodeType == ELEMENT:
-            if l.get_tagName() != r.get_tagName():
+            if l.tagName != r.tagName:
                return 0
            # should check attributes, but that's not a problem here
            if not contents_match(l, r):
@ -430,19 +425,19 @@ def create_module_info(doc, section):
    node = extract_first_element(section, "modulesynopsis")
    if node is None:
        return
-    node._node.name = "synopsis"
+    set_tagName(node, "synopsis")
    lastchild = node.childNodes[-1]
    if lastchild.nodeType == TEXT \
       and lastchild.data[-1:] == ".":
        lastchild.data = lastchild.data[:-1]
    modauthor = extract_first_element(section, "moduleauthor")
    if modauthor:
-        modauthor._node.name = "author"
+        set_tagName(modauthor, "author")
        modauthor.appendChild(doc.createTextNode(
            modauthor.getAttribute("name")))
        modauthor.removeAttribute("name")
    platform = extract_first_element(section, "platform")
-    if section.get_tagName() == "section":
+    if section.tagName == "section":
        modinfo_pos = 2
        modinfo = doc.createElement("moduleinfo")
        moddecl = extract_first_element(section, "declaremodule")
@ -467,13 +462,13 @@ def create_module_info(doc, section):
        if title:
            children = title.childNodes
            if len(children) >= 2 \
-               and children[0].get_nodeName() == "module" \
+               and children[0].nodeName == "module" \
               and children[0].childNodes[0].data == name:
                # this is it; morph the <title> into <short-synopsis>
                first_data = children[1]
                if first_data.data[:4] == " ---":
                    first_data.data = string.lstrip(first_data.data[4:])
-                title._node.name = "short-synopsis"
+                set_tagName(title, "short-synopsis")
                if children[-1].nodeType == TEXT \
                   and children[-1].data[-1:] == ".":
                    children[-1].data = children[-1].data[:-1]
@ -511,7 +506,7 @@ def create_module_info(doc, section):
        children = section.childNodes
        for i in range(len(children)):
            node = children[i]
-            if node.get_nodeName() == "moduleinfo":
+            if node.nodeName == "moduleinfo":
                nextnode = children[i+1]
                if nextnode.nodeType == TEXT:
                    data = nextnode.data
@ -544,7 +539,7 @@ def fixup_table(doc, table):
    children = table.childNodes
    for child in children:
        if child.nodeType == ELEMENT:
-            tagName = child.get_tagName()
+            tagName = child.tagName
            if tagName == "hline" and prev_row is not None:
                prev_row.setAttribute("rowsep", "1")
            elif tagName == "row":
@ -558,13 +553,14 @@ def fixup_table(doc, table):
        nodeType = child.nodeType
        if nodeType == TEXT:
            if string.strip(child.data):
-                raise ConversionError("unexpected free data in table")
+                raise ConversionError("unexpected free data in <%s>: %r"
                                      % (table.tagName, child.data))
            table.removeChild(child)
            continue
        if nodeType == ELEMENT:
-            if child.get_tagName() != "hline":
+            if child.tagName != "hline":
                raise ConversionError(
-                    "unexpected <%s> in table" % child.get_tagName())
+                    "unexpected <%s> in table" % child.tagName)
            table.removeChild(child)
            continue
        raise ConversionError(
@ -593,7 +589,7 @@ def fixup_row(doc, row):
 def move_elements_by_name(doc, source, dest, name, sep=None):
    nodes = []
    for child in source.childNodes:
-        if child.get_nodeName() == name:
+        if child.nodeName == name:
            nodes.append(child)
    for node in nodes:
        source.removeChild(node)
@ -633,7 +629,7 @@ PARA_LEVEL_PRECEEDERS = (
 def fixup_paras(doc, fragment):
    for child in fragment.childNodes:
-        if child.get_nodeName() in RECURSE_INTO_PARA_CONTAINERS:
+        if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
            fixup_paras_helper(doc, child)
    descriptions = find_all_elements(fragment, "description")
    for description in descriptions:
@ -645,7 +641,7 @@ def fixup_paras_helper(doc, container, depth=0):
    children = container.childNodes
    start = skip_leading_nodes(children)
    while len(children) > start:
-        if children[start].get_nodeName() in RECURSE_INTO_PARA_CONTAINERS:
+        if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
            # Something to recurse into:
            fixup_paras_helper(doc, children[start])
        else:
@ -668,7 +664,7 @@ def build_para(doc, parent, start, i):
        child = children[j]
        nodeType = child.nodeType
        if nodeType == ELEMENT:
-            if child.get_tagName() in BREAK_ELEMENTS:
+            if child.tagName in BREAK_ELEMENTS:
                after = j
                break
        elif nodeType == TEXT:
@ -742,7 +738,7 @@ def skip_leading_nodes(children, start=0):
                return start
            # all whitespace, just skip
        elif nodeType == ELEMENT:
-            tagName = child.get_tagName()
+            tagName = child.tagName
            if tagName in RECURSE_INTO_PARA_CONTAINERS:
                return start
            if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
@ -772,7 +768,7 @@ def fixup_signatures(doc, fragment):
 def fixup_args(doc, arglist):
    for child in arglist.childNodes:
-        if child.get_nodeName() == "optional":
+        if child.nodeName == "optional":
            # found it; fix and return
            arglist.insertBefore(doc.createTextNode("["), child)
            optkids = child.childNodes
@ -789,13 +785,13 @@ def fixup_sectionauthors(doc, fragment):
    for sectauth in find_all_elements(fragment, "sectionauthor"):
        section = sectauth.parentNode
        section.removeChild(sectauth)
-        sectauth._node.name = "author"
+        set_tagName(sectauth, "author")
        sectauth.appendChild(doc.createTextNode(
            sectauth.getAttribute("name")))
        sectauth.removeAttribute("name")
        after = section.childNodes[2]
        title = section.childNodes[1]
-        if title.get_nodeName() != "title":
+        if title.nodeName != "title":
            after = section.childNodes[0]
        section.insertBefore(doc.createTextNode("\n  "), after)
        section.insertBefore(sectauth, after)
@ -806,17 +802,17 @@ def fixup_verbatims(doc):
        child = verbatim.childNodes[0]
        if child.nodeType == TEXT \
           and string.lstrip(child.data)[:3] == ">>>":
-            verbatim._node.name = "interactive-session"
+            set_tagName(verbatim, "interactive-session")
 def add_node_ids(fragment, counter=0):
-    fragment._node.node_id = counter
+    fragment.node_id = counter
    for node in fragment.childNodes:
        counter = counter + 1
        if node.nodeType == ELEMENT:
            counter = add_node_ids(node, counter)
        else:
-            node._node.node_id = counter
+            node.node_id = counter
    return counter + 1
@ -831,14 +827,14 @@ def fixup_refmodindexes(fragment):
    d = {}
    for node in nodes:
        parent = node.parentNode
-        d[parent._node.node_id] = parent
+        d[parent.node_id] = parent
    del nodes
    map(fixup_refmodindexes_chunk, d.values())
 def fixup_refmodindexes_chunk(container):
    # node is probably a <para>; let's see how often it isn't:
-    if container.get_tagName() != PARA_ELEMENT:
+    if container.tagName != PARA_ELEMENT:
        bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
    module_entries = find_all_elements(container, "module")
    if not module_entries:
@ -849,7 +845,7 @@ def fixup_refmodindexes_chunk(container):
        children = entry.childNodes
        if len(children) != 0:
            bwrite("--- unexpected number of children for %s node:\n"
-                   % entry.get_tagName())
+                   % entry.tagName)
            ewrite(entry.toxml() + "\n")
            continue
        found = 0
@ -873,7 +869,7 @@ def fixup_bifuncindexes(fragment):
    # make sure that each parent is only processed once:
    for node in nodes:
        parent = node.parentNode
-        d[parent._node.node_id] = parent
+        d[parent.node_id] = parent
    del nodes
    map(fixup_bifuncindexes_chunk, d.values())
@ -905,7 +901,7 @@ def join_adjacent_elements(container, gi):
    while queue:
        parent = queue.pop()
        i = 0
-        children = parent.get_childNodes()
+        children = parent.childNodes
        nchildren = len(children)
        while i < (nchildren - 1):
            child = children[i]
@ -914,7 +910,7 @@ def join_adjacent_elements(container, gi):
                    ewrite("--- merging two <%s/> elements\n" % gi)
                    child = children[i]
                    nextchild = children[i+1]
-                    nextchildren = nextchild.get_childNodes()
+                    nextchildren = nextchild.childNodes
                    while len(nextchildren):
                        node = nextchildren[0]
                        nextchild.removeChild(node)
@ -932,14 +928,13 @@ def write_esis(doc, ofp, knownempty):
    for node in doc.childNodes:
        nodeType = node.nodeType
        if nodeType == ELEMENT:
-            gi = node.get_tagName()
+            gi = node.tagName
            if knownempty(gi):
                if node.hasChildNodes():
                    raise ValueError, \
                          "declared-empty node <%s> has children" % gi
                ofp.write("e\n")
-            for k, v in node.attributes.items():
+            for k, value in node.attributes.items():
                value = v.value
                if _token_rx.match(value):
                    dtype = "TOKEN"
                else:
@ -951,16 +946,17 @@ def write_esis(doc, ofp, knownempty):
        elif nodeType == TEXT:
            ofp.write("-%s\n" % esistools.encode(node.data))
        elif nodeType == ENTITY_REFERENCE:
-            ofp.write("&%s\n" % node.get_nodeName())
+            ofp.write("&%s\n" % node.nodeName)
        else:
            raise RuntimeError, "unsupported node type: %s" % nodeType
 def convert(ifp, ofp):
-    p = esistools.ExtendedEsisBuilder()
+    events = esistools.parse(ifp)
-    p.feed(ifp.read())
+    toktype, doc = events.getEvent()
-    doc = p.document
+    fragment = doc.createDocumentFragment()
-    fragment = p.fragment
+    events.expandNode(fragment)
    normalize(fragment)
    simplify(doc, fragment)
    handle_labels(doc, fragment)
@ -994,8 +990,10 @@ def convert(ifp, ofp):
    join_adjacent_elements(fragment, "option")
    #
    d = {}
-    for gi in p.get_empties():
+    for gi in events.parser.get_empties():
        d[gi] = gi
    if d.has_key("author"):
        del d["author"]
    if d.has_key("rfc"):
        del d["rfc"]
    knownempty = d.has_key
@ -1019,11 +1017,17 @@ def main():
        ofp = sys.stdout
    elif len(sys.argv) == 3:
        ifp = open(sys.argv[1])
-        ofp = open(sys.argv[2], "w")
+        import StringIO
        ofp = StringIO.StringIO()
    else:
        usage()
        sys.exit(2)
    convert(ifp, ofp)
    if len(sys.argv) == 3:
        fp = open(sys.argv[2], "w")
        fp.write(ofp.getvalue())
        fp.close()
        ofp.close()
 if __name__ == "__main__":