mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Lots of adjustments to deal with the document content now being stored
in a fragment rather than the main document object.
This commit is contained in:
parent
54fb7fb9d0
commit
e779d4f03b
1 changed files with 94 additions and 91 deletions
|
@ -12,7 +12,10 @@ import re
|
||||||
import string
|
import string
|
||||||
import sys
|
import sys
|
||||||
import xml.dom.core
|
import xml.dom.core
|
||||||
import xml.dom.esis_builder
|
|
||||||
|
from xml.dom.core import \
|
||||||
|
ELEMENT, \
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
|
||||||
class ConversionError(Exception):
|
class ConversionError(Exception):
|
||||||
|
@ -32,11 +35,11 @@ else:
|
||||||
# Workaround to deal with invalid documents (multiple root elements). This
|
# Workaround to deal with invalid documents (multiple root elements). This
|
||||||
# does not indicate a bug in the DOM implementation.
|
# does not indicate a bug in the DOM implementation.
|
||||||
#
|
#
|
||||||
def get_documentElement(self):
|
def get_documentElement(doc):
|
||||||
docelem = None
|
docelem = None
|
||||||
for n in self._node.children:
|
for n in doc.childNodes:
|
||||||
if n.type == xml.dom.core.ELEMENT:
|
if n.nodeType == ELEMENT:
|
||||||
docelem = xml.dom.core.Element(n, self, self)
|
docelem = n
|
||||||
return docelem
|
return docelem
|
||||||
|
|
||||||
xml.dom.core.Document.get_documentElement = get_documentElement
|
xml.dom.core.Document.get_documentElement = get_documentElement
|
||||||
|
@ -46,15 +49,15 @@ xml.dom.core.Document.get_documentElement = get_documentElement
|
||||||
# accessed from the Document object via .childNodes (no matter how many
|
# accessed from the Document object via .childNodes (no matter how many
|
||||||
# levels of access are used) will be given an ownerDocument of None.
|
# levels of access are used) will be given an ownerDocument of None.
|
||||||
#
|
#
|
||||||
def get_childNodes(self):
|
def get_childNodes(doc):
|
||||||
return xml.dom.core.NodeList(self._node.children, self, self)
|
return xml.dom.core.NodeList(doc._node.children, doc._node)
|
||||||
|
|
||||||
xml.dom.core.Document.get_childNodes = get_childNodes
|
xml.dom.core.Document.get_childNodes = get_childNodes
|
||||||
|
|
||||||
|
|
||||||
def get_first_element(doc, gi):
|
def get_first_element(doc, gi):
|
||||||
for n in doc.childNodes:
|
for n in doc.childNodes:
|
||||||
if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
|
if n.nodeType == ELEMENT and n.tagName == gi:
|
||||||
return n
|
return n
|
||||||
|
|
||||||
def extract_first_element(doc, gi):
|
def extract_first_element(doc, gi):
|
||||||
|
@ -66,10 +69,10 @@ def extract_first_element(doc, gi):
|
||||||
|
|
||||||
def find_all_elements(doc, gi):
|
def find_all_elements(doc, gi):
|
||||||
nodes = []
|
nodes = []
|
||||||
if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi:
|
if doc.nodeType == ELEMENT and doc.tagName == gi:
|
||||||
nodes.append(doc)
|
nodes.append(doc)
|
||||||
for child in doc.childNodes:
|
for child in doc.childNodes:
|
||||||
if child.nodeType == xml.dom.core.ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
if child.tagName == gi:
|
if child.tagName == gi:
|
||||||
nodes.append(child)
|
nodes.append(child)
|
||||||
for node in child.getElementsByTagName(gi):
|
for node in child.getElementsByTagName(gi):
|
||||||
|
@ -77,36 +80,36 @@ def find_all_elements(doc, gi):
|
||||||
return nodes
|
return nodes
|
||||||
|
|
||||||
|
|
||||||
def simplify(doc):
|
def simplify(doc, fragment):
|
||||||
# Try to rationalize the document a bit, since these things are simply
|
# Try to rationalize the document a bit, since these things are simply
|
||||||
# not valid SGML/XML documents as they stand, and need a little work.
|
# not valid SGML/XML documents as they stand, and need a little work.
|
||||||
documentclass = "document"
|
documentclass = "document"
|
||||||
inputs = []
|
inputs = []
|
||||||
node = extract_first_element(doc, "documentclass")
|
node = extract_first_element(fragment, "documentclass")
|
||||||
if node is not None:
|
if node is not None:
|
||||||
documentclass = node.getAttribute("classname")
|
documentclass = node.getAttribute("classname")
|
||||||
node = extract_first_element(doc, "title")
|
node = extract_first_element(fragment, "title")
|
||||||
if node is not None:
|
if node is not None:
|
||||||
inputs.append(node)
|
inputs.append(node)
|
||||||
# update the name of the root element
|
# update the name of the root element
|
||||||
node = get_first_element(doc, "document")
|
node = get_first_element(fragment, "document")
|
||||||
if node is not None:
|
if node is not None:
|
||||||
node._node.name = documentclass
|
node._node.name = documentclass
|
||||||
while 1:
|
while 1:
|
||||||
node = extract_first_element(doc, "input")
|
node = extract_first_element(fragment, "input")
|
||||||
if node is None:
|
if node is None:
|
||||||
break
|
break
|
||||||
inputs.append(node)
|
inputs.append(node)
|
||||||
if inputs:
|
if inputs:
|
||||||
docelem = doc.documentElement
|
docelem = get_documentElement(fragment)
|
||||||
inputs.reverse()
|
inputs.reverse()
|
||||||
for node in inputs:
|
for node in inputs:
|
||||||
text = doc.createTextNode("\n")
|
text = doc.createTextNode("\n")
|
||||||
docelem.insertBefore(text, docelem.firstChild)
|
docelem.insertBefore(text, docelem.firstChild)
|
||||||
docelem.insertBefore(node, text)
|
docelem.insertBefore(node, text)
|
||||||
docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
|
docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
|
||||||
while doc.firstChild.nodeType == xml.dom.core.TEXT:
|
while fragment.firstChild.nodeType == TEXT:
|
||||||
doc.removeChild(doc.firstChild)
|
fragment.removeChild(fragment.firstChild)
|
||||||
|
|
||||||
|
|
||||||
def cleanup_root_text(doc):
|
def cleanup_root_text(doc):
|
||||||
|
@ -115,9 +118,9 @@ def cleanup_root_text(doc):
|
||||||
for n in doc.childNodes:
|
for n in doc.childNodes:
|
||||||
prevskip = skip
|
prevskip = skip
|
||||||
skip = 0
|
skip = 0
|
||||||
if n.nodeType == xml.dom.core.TEXT and not prevskip:
|
if n.nodeType == TEXT and not prevskip:
|
||||||
discards.append(n)
|
discards.append(n)
|
||||||
elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
|
elif n.nodeType == ELEMENT and n.tagName == "COMMENT":
|
||||||
skip = 1
|
skip = 1
|
||||||
for node in discards:
|
for node in discards:
|
||||||
doc.removeChild(node)
|
doc.removeChild(node)
|
||||||
|
@ -130,8 +133,8 @@ DESCRIPTOR_ELEMENTS = (
|
||||||
"datadesc", "datadescni",
|
"datadesc", "datadescni",
|
||||||
)
|
)
|
||||||
|
|
||||||
def fixup_descriptors(doc):
|
def fixup_descriptors(doc, fragment):
|
||||||
sections = find_all_elements(doc, "section")
|
sections = find_all_elements(fragment, "section")
|
||||||
for section in sections:
|
for section in sections:
|
||||||
find_and_fix_descriptors(doc, section)
|
find_and_fix_descriptors(doc, section)
|
||||||
|
|
||||||
|
@ -139,7 +142,7 @@ def fixup_descriptors(doc):
|
||||||
def find_and_fix_descriptors(doc, container):
|
def find_and_fix_descriptors(doc, container):
|
||||||
children = container.childNodes
|
children = container.childNodes
|
||||||
for child in children:
|
for child in children:
|
||||||
if child.nodeType == xml.dom.core.ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
tagName = child.tagName
|
tagName = child.tagName
|
||||||
if tagName in DESCRIPTOR_ELEMENTS:
|
if tagName in DESCRIPTOR_ELEMENTS:
|
||||||
rewrite_descriptor(doc, child)
|
rewrite_descriptor(doc, child)
|
||||||
|
@ -191,7 +194,7 @@ def rewrite_descriptor(doc, descriptor):
|
||||||
pos = skip_leading_nodes(children, 0)
|
pos = skip_leading_nodes(children, 0)
|
||||||
if pos < len(children):
|
if pos < len(children):
|
||||||
child = children[pos]
|
child = children[pos]
|
||||||
if child.nodeType == xml.dom.core.ELEMENT and child.tagName == "args":
|
if child.nodeType == ELEMENT and child.tagName == "args":
|
||||||
# create an <args> in <signature>:
|
# create an <args> in <signature>:
|
||||||
args = doc.createElement("args")
|
args = doc.createElement("args")
|
||||||
argchildren = []
|
argchildren = []
|
||||||
|
@ -205,7 +208,7 @@ def rewrite_descriptor(doc, descriptor):
|
||||||
# 3, 4.
|
# 3, 4.
|
||||||
pos = skip_leading_nodes(children, pos + 1)
|
pos = skip_leading_nodes(children, pos + 1)
|
||||||
while pos < len(children) \
|
while pos < len(children) \
|
||||||
and children[pos].nodeType == xml.dom.core.ELEMENT \
|
and children[pos].nodeType == ELEMENT \
|
||||||
and children[pos].tagName in (linename, "versionadded"):
|
and children[pos].tagName in (linename, "versionadded"):
|
||||||
if children[pos].tagName == linename:
|
if children[pos].tagName == linename:
|
||||||
# this is really a supplemental signature, create <signature>
|
# this is really a supplemental signature, create <signature>
|
||||||
|
@ -222,7 +225,7 @@ def rewrite_descriptor(doc, descriptor):
|
||||||
newchildren.append(description)
|
newchildren.append(description)
|
||||||
move_children(descriptor, description, pos)
|
move_children(descriptor, description, pos)
|
||||||
last = description.childNodes[-1]
|
last = description.childNodes[-1]
|
||||||
if last.nodeType == xml.dom.core.TEXT:
|
if last.nodeType == TEXT:
|
||||||
last.data = string.rstrip(last.data) + "\n "
|
last.data = string.rstrip(last.data) + "\n "
|
||||||
# 6.
|
# 6.
|
||||||
# should have nothing but whitespace and signature lines in <descriptor>;
|
# should have nothing but whitespace and signature lines in <descriptor>;
|
||||||
|
@ -259,16 +262,16 @@ def move_children(origin, dest, start=0):
|
||||||
dest.appendChild(node)
|
dest.appendChild(node)
|
||||||
|
|
||||||
|
|
||||||
def handle_appendix(doc):
|
def handle_appendix(doc, fragment):
|
||||||
# must be called after simplfy() if document is multi-rooted to begin with
|
# must be called after simplfy() if document is multi-rooted to begin with
|
||||||
docelem = doc.documentElement
|
docelem = get_documentElement(fragment)
|
||||||
toplevel = docelem.tagName == "manual" and "chapter" or "section"
|
toplevel = docelem.tagName == "manual" and "chapter" or "section"
|
||||||
appendices = 0
|
appendices = 0
|
||||||
nodes = []
|
nodes = []
|
||||||
for node in docelem.childNodes:
|
for node in docelem.childNodes:
|
||||||
if appendices:
|
if appendices:
|
||||||
nodes.append(node)
|
nodes.append(node)
|
||||||
elif node.nodeType == xml.dom.core.ELEMENT:
|
elif node.nodeType == ELEMENT:
|
||||||
appnodes = node.getElementsByTagName("appendix")
|
appnodes = node.getElementsByTagName("appendix")
|
||||||
if appnodes:
|
if appnodes:
|
||||||
appendices = 1
|
appendices = 1
|
||||||
|
@ -281,7 +284,7 @@ def handle_appendix(doc):
|
||||||
back = doc.createElement("back-matter")
|
back = doc.createElement("back-matter")
|
||||||
docelem.appendChild(back)
|
docelem.appendChild(back)
|
||||||
back.appendChild(doc.createTextNode("\n"))
|
back.appendChild(doc.createTextNode("\n"))
|
||||||
while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
|
while nodes and nodes[0].nodeType == TEXT \
|
||||||
and not string.strip(nodes[0].data):
|
and not string.strip(nodes[0].data):
|
||||||
del nodes[0]
|
del nodes[0]
|
||||||
map(back.appendChild, nodes)
|
map(back.appendChild, nodes)
|
||||||
|
@ -307,28 +310,28 @@ def fixup_trailing_whitespace(doc, wsmap):
|
||||||
while queue:
|
while queue:
|
||||||
node = queue[0]
|
node = queue[0]
|
||||||
del queue[0]
|
del queue[0]
|
||||||
if node.nodeType == xml.dom.core.ELEMENT \
|
if node.nodeType == ELEMENT \
|
||||||
and wsmap.has_key(node.tagName):
|
and wsmap.has_key(node.tagName):
|
||||||
ws = wsmap[node.tagName]
|
ws = wsmap[node.tagName]
|
||||||
children = node.childNodes
|
children = node.childNodes
|
||||||
children.reverse()
|
children.reverse()
|
||||||
if children[0].nodeType == xml.dom.core.TEXT:
|
if children[0].nodeType == TEXT:
|
||||||
data = string.rstrip(children[0].data) + ws
|
data = string.rstrip(children[0].data) + ws
|
||||||
children[0].data = data
|
children[0].data = data
|
||||||
children.reverse()
|
children.reverse()
|
||||||
# hack to get the title in place:
|
# hack to get the title in place:
|
||||||
if node.tagName == "title" \
|
if node.tagName == "title" \
|
||||||
and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
|
and node.parentNode.firstChild.nodeType == ELEMENT:
|
||||||
node.parentNode.insertBefore(doc.createText("\n "),
|
node.parentNode.insertBefore(doc.createText("\n "),
|
||||||
node.parentNode.firstChild)
|
node.parentNode.firstChild)
|
||||||
for child in node.childNodes:
|
for child in node.childNodes:
|
||||||
if child.nodeType == xml.dom.core.ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
queue.append(child)
|
queue.append(child)
|
||||||
|
|
||||||
|
|
||||||
def normalize(doc):
|
def normalize(doc):
|
||||||
for node in doc.childNodes:
|
for node in doc.childNodes:
|
||||||
if node.nodeType == xml.dom.core.ELEMENT:
|
if node.nodeType == ELEMENT:
|
||||||
node.normalize()
|
node.normalize()
|
||||||
|
|
||||||
|
|
||||||
|
@ -339,7 +342,7 @@ def cleanup_trailing_parens(doc, element_names):
|
||||||
rewrite_element = d.has_key
|
rewrite_element = d.has_key
|
||||||
queue = []
|
queue = []
|
||||||
for node in doc.childNodes:
|
for node in doc.childNodes:
|
||||||
if node.nodeType == xml.dom.core.ELEMENT:
|
if node.nodeType == ELEMENT:
|
||||||
queue.append(node)
|
queue.append(node)
|
||||||
while queue:
|
while queue:
|
||||||
node = queue[0]
|
node = queue[0]
|
||||||
|
@ -347,13 +350,13 @@ def cleanup_trailing_parens(doc, element_names):
|
||||||
if rewrite_element(node.tagName):
|
if rewrite_element(node.tagName):
|
||||||
children = node.childNodes
|
children = node.childNodes
|
||||||
if len(children) == 1 \
|
if len(children) == 1 \
|
||||||
and children[0].nodeType == xml.dom.core.TEXT:
|
and children[0].nodeType == TEXT:
|
||||||
data = children[0].data
|
data = children[0].data
|
||||||
if data[-2:] == "()":
|
if data[-2:] == "()":
|
||||||
children[0].data = data[:-2]
|
children[0].data = data[:-2]
|
||||||
else:
|
else:
|
||||||
for child in node.childNodes:
|
for child in node.childNodes:
|
||||||
if child.nodeType == xml.dom.core.ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
queue.append(child)
|
queue.append(child)
|
||||||
|
|
||||||
|
|
||||||
|
@ -366,13 +369,13 @@ def contents_match(left, right):
|
||||||
nodeType = l.nodeType
|
nodeType = l.nodeType
|
||||||
if nodeType != r.nodeType:
|
if nodeType != r.nodeType:
|
||||||
return 0
|
return 0
|
||||||
if nodeType == xml.dom.core.ELEMENT:
|
if nodeType == ELEMENT:
|
||||||
if l.tagName != r.tagName:
|
if l.tagName != r.tagName:
|
||||||
return 0
|
return 0
|
||||||
# should check attributes, but that's not a problem here
|
# should check attributes, but that's not a problem here
|
||||||
if not contents_match(l, r):
|
if not contents_match(l, r):
|
||||||
return 0
|
return 0
|
||||||
elif nodeType == xml.dom.core.TEXT:
|
elif nodeType == TEXT:
|
||||||
if l.data != r.data:
|
if l.data != r.data:
|
||||||
return 0
|
return 0
|
||||||
else:
|
else:
|
||||||
|
@ -388,7 +391,7 @@ def create_module_info(doc, section):
|
||||||
return
|
return
|
||||||
node._node.name = "synopsis"
|
node._node.name = "synopsis"
|
||||||
lastchild = node.childNodes[-1]
|
lastchild = node.childNodes[-1]
|
||||||
if lastchild.nodeType == xml.dom.core.TEXT \
|
if lastchild.nodeType == TEXT \
|
||||||
and lastchild.data[-1:] == ".":
|
and lastchild.data[-1:] == ".":
|
||||||
lastchild.data = lastchild.data[:-1]
|
lastchild.data = lastchild.data[:-1]
|
||||||
modauthor = extract_first_element(section, "moduleauthor")
|
modauthor = extract_first_element(section, "moduleauthor")
|
||||||
|
@ -423,7 +426,7 @@ def create_module_info(doc, section):
|
||||||
if title:
|
if title:
|
||||||
children = title.childNodes
|
children = title.childNodes
|
||||||
if len(children) >= 2 \
|
if len(children) >= 2 \
|
||||||
and children[0].nodeType == xml.dom.core.ELEMENT \
|
and children[0].nodeType == ELEMENT \
|
||||||
and children[0].tagName == "module" \
|
and children[0].tagName == "module" \
|
||||||
and children[0].childNodes[0].data == name:
|
and children[0].childNodes[0].data == name:
|
||||||
# this is it; morph the <title> into <short-synopsis>
|
# this is it; morph the <title> into <short-synopsis>
|
||||||
|
@ -431,7 +434,7 @@ def create_module_info(doc, section):
|
||||||
if first_data.data[:4] == " ---":
|
if first_data.data[:4] == " ---":
|
||||||
first_data.data = string.lstrip(first_data.data[4:])
|
first_data.data = string.lstrip(first_data.data[4:])
|
||||||
title._node.name = "short-synopsis"
|
title._node.name = "short-synopsis"
|
||||||
if children[-1].nodeType == xml.dom.core.TEXT \
|
if children[-1].nodeType == TEXT \
|
||||||
and children[-1].data[-1:] == ".":
|
and children[-1].data[-1:] == ".":
|
||||||
children[-1].data = children[-1].data[:-1]
|
children[-1].data = children[-1].data[:-1]
|
||||||
section.removeChild(title)
|
section.removeChild(title)
|
||||||
|
@ -470,10 +473,10 @@ def create_module_info(doc, section):
|
||||||
children = section.childNodes
|
children = section.childNodes
|
||||||
for i in range(len(children)):
|
for i in range(len(children)):
|
||||||
node = children[i]
|
node = children[i]
|
||||||
if node.nodeType == xml.dom.core.ELEMENT \
|
if node.nodeType == ELEMENT \
|
||||||
and node.tagName == "moduleinfo":
|
and node.tagName == "moduleinfo":
|
||||||
nextnode = children[i+1]
|
nextnode = children[i+1]
|
||||||
if nextnode.nodeType == xml.dom.core.TEXT:
|
if nextnode.nodeType == TEXT:
|
||||||
data = nextnode.data
|
data = nextnode.data
|
||||||
if len(string.lstrip(data)) < (len(data) - 4):
|
if len(string.lstrip(data)) < (len(data) - 4):
|
||||||
nextnode.data = "\n\n\n" + string.lstrip(data)
|
nextnode.data = "\n\n\n" + string.lstrip(data)
|
||||||
|
@ -487,7 +490,7 @@ def cleanup_synopses(doc):
|
||||||
def remap_element_names(root, name_map):
|
def remap_element_names(root, name_map):
|
||||||
queue = []
|
queue = []
|
||||||
for child in root.childNodes:
|
for child in root.childNodes:
|
||||||
if child.nodeType == xml.dom.core.ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
queue.append(child)
|
queue.append(child)
|
||||||
while queue:
|
while queue:
|
||||||
node = queue.pop()
|
node = queue.pop()
|
||||||
|
@ -498,13 +501,13 @@ def remap_element_names(root, name_map):
|
||||||
for attr, value in attrs.items():
|
for attr, value in attrs.items():
|
||||||
node.setAttribute(attr, value)
|
node.setAttribute(attr, value)
|
||||||
for child in node.childNodes:
|
for child in node.childNodes:
|
||||||
if child.nodeType == xml.dom.core.ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
queue.append(child)
|
queue.append(child)
|
||||||
|
|
||||||
|
|
||||||
def fixup_table_structures(doc):
|
def fixup_table_structures(doc, fragment):
|
||||||
# must be done after remap_element_names(), or the tables won't be found
|
# must be done after remap_element_names(), or the tables won't be found
|
||||||
for table in find_all_elements(doc, "table"):
|
for table in find_all_elements(fragment, "table"):
|
||||||
fixup_table(doc, table)
|
fixup_table(doc, table)
|
||||||
|
|
||||||
|
|
||||||
|
@ -522,7 +525,7 @@ def fixup_table(doc, table):
|
||||||
last_was_hline = 0
|
last_was_hline = 0
|
||||||
children = table.childNodes
|
children = table.childNodes
|
||||||
for child in children:
|
for child in children:
|
||||||
if child.nodeType == xml.dom.core.ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
tagName = child.tagName
|
tagName = child.tagName
|
||||||
if tagName == "hline" and prev_row is not None:
|
if tagName == "hline" and prev_row is not None:
|
||||||
prev_row.setAttribute("rowsep", "1")
|
prev_row.setAttribute("rowsep", "1")
|
||||||
|
@ -535,12 +538,12 @@ def fixup_table(doc, table):
|
||||||
while children:
|
while children:
|
||||||
child = children[0]
|
child = children[0]
|
||||||
nodeType = child.nodeType
|
nodeType = child.nodeType
|
||||||
if nodeType == xml.dom.core.TEXT:
|
if nodeType == TEXT:
|
||||||
if string.strip(child.data):
|
if string.strip(child.data):
|
||||||
raise ConversionError("unexpected free data in table")
|
raise ConversionError("unexpected free data in table")
|
||||||
table.removeChild(child)
|
table.removeChild(child)
|
||||||
continue
|
continue
|
||||||
if nodeType == xml.dom.core.ELEMENT:
|
if nodeType == ELEMENT:
|
||||||
if child.tagName != "hline":
|
if child.tagName != "hline":
|
||||||
raise ConversionError(
|
raise ConversionError(
|
||||||
"unexpected <%s> in table" % child.tagName)
|
"unexpected <%s> in table" % child.tagName)
|
||||||
|
@ -572,7 +575,7 @@ def fixup_row(doc, row):
|
||||||
def move_elements_by_name(doc, source, dest, name, sep=None):
|
def move_elements_by_name(doc, source, dest, name, sep=None):
|
||||||
nodes = []
|
nodes = []
|
||||||
for child in source.childNodes:
|
for child in source.childNodes:
|
||||||
if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name:
|
if child.nodeType == ELEMENT and child.tagName == name:
|
||||||
nodes.append(child)
|
nodes.append(child)
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
source.removeChild(node)
|
source.removeChild(node)
|
||||||
|
@ -606,13 +609,13 @@ PARA_LEVEL_PRECEEDERS = (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def fixup_paras(doc):
|
def fixup_paras(doc, fragment):
|
||||||
for child in doc.childNodes:
|
for child in fragment.childNodes:
|
||||||
if child.nodeType == xml.dom.core.ELEMENT \
|
if child.nodeType == ELEMENT \
|
||||||
and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
|
and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
|
||||||
#
|
#
|
||||||
fixup_paras_helper(doc, child)
|
fixup_paras_helper(doc, child)
|
||||||
descriptions = find_all_elements(doc, "description")
|
descriptions = find_all_elements(fragment, "description")
|
||||||
for description in descriptions:
|
for description in descriptions:
|
||||||
fixup_paras_helper(doc, description)
|
fixup_paras_helper(doc, description)
|
||||||
|
|
||||||
|
@ -628,7 +631,7 @@ def fixup_paras_helper(doc, container, depth=0):
|
||||||
#
|
#
|
||||||
# Either paragraph material or something to recurse into:
|
# Either paragraph material or something to recurse into:
|
||||||
#
|
#
|
||||||
if (children[start].nodeType == xml.dom.core.ELEMENT) \
|
if (children[start].nodeType == ELEMENT) \
|
||||||
and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
|
and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
|
||||||
fixup_paras_helper(doc, children[start])
|
fixup_paras_helper(doc, children[start])
|
||||||
start = skip_leading_nodes(children, start + 1)
|
start = skip_leading_nodes(children, start + 1)
|
||||||
|
@ -653,11 +656,11 @@ def build_para(doc, parent, start, i):
|
||||||
after = j + 1
|
after = j + 1
|
||||||
child = children[j]
|
child = children[j]
|
||||||
nodeType = child.nodeType
|
nodeType = child.nodeType
|
||||||
if nodeType == xml.dom.core.ELEMENT:
|
if nodeType == ELEMENT:
|
||||||
if child.tagName in BREAK_ELEMENTS:
|
if child.tagName in BREAK_ELEMENTS:
|
||||||
after = j
|
after = j
|
||||||
break
|
break
|
||||||
elif nodeType == xml.dom.core.TEXT:
|
elif nodeType == TEXT:
|
||||||
pos = string.find(child.data, "\n\n")
|
pos = string.find(child.data, "\n\n")
|
||||||
if pos == 0:
|
if pos == 0:
|
||||||
after = j
|
after = j
|
||||||
|
@ -670,7 +673,7 @@ def build_para(doc, parent, start, i):
|
||||||
if (start + 1) > after:
|
if (start + 1) > after:
|
||||||
raise ConversionError(
|
raise ConversionError(
|
||||||
"build_para() could not identify content to turn into a paragraph")
|
"build_para() could not identify content to turn into a paragraph")
|
||||||
if children[after - 1].nodeType == xml.dom.core.TEXT:
|
if children[after - 1].nodeType == TEXT:
|
||||||
# we may need to split off trailing white space:
|
# we may need to split off trailing white space:
|
||||||
child = children[after - 1]
|
child = children[after - 1]
|
||||||
data = child.data
|
data = child.data
|
||||||
|
@ -707,7 +710,7 @@ def skip_leading_nodes(children, start):
|
||||||
# skip over leading comments and whitespace:
|
# skip over leading comments and whitespace:
|
||||||
child = children[start]
|
child = children[start]
|
||||||
nodeType = child.nodeType
|
nodeType = child.nodeType
|
||||||
if nodeType == xml.dom.core.TEXT:
|
if nodeType == TEXT:
|
||||||
data = child.data
|
data = child.data
|
||||||
shortened = string.lstrip(data)
|
shortened = string.lstrip(data)
|
||||||
if shortened:
|
if shortened:
|
||||||
|
@ -717,7 +720,7 @@ def skip_leading_nodes(children, start):
|
||||||
return start + 1
|
return start + 1
|
||||||
return start
|
return start
|
||||||
# all whitespace, just skip
|
# all whitespace, just skip
|
||||||
elif nodeType == xml.dom.core.ELEMENT:
|
elif nodeType == ELEMENT:
|
||||||
tagName = child.tagName
|
tagName = child.tagName
|
||||||
if tagName in RECURSE_INTO_PARA_CONTAINERS:
|
if tagName in RECURSE_INTO_PARA_CONTAINERS:
|
||||||
return start
|
return start
|
||||||
|
@ -727,15 +730,15 @@ def skip_leading_nodes(children, start):
|
||||||
return start
|
return start
|
||||||
|
|
||||||
|
|
||||||
def fixup_rfc_references(doc):
|
def fixup_rfc_references(doc, fragment):
|
||||||
for rfcnode in find_all_elements(doc, "rfc"):
|
for rfcnode in find_all_elements(fragment, "rfc"):
|
||||||
rfcnode.appendChild(doc.createTextNode(
|
rfcnode.appendChild(doc.createTextNode(
|
||||||
"RFC " + rfcnode.getAttribute("num")))
|
"RFC " + rfcnode.getAttribute("num")))
|
||||||
|
|
||||||
|
|
||||||
def fixup_signatures(doc):
|
def fixup_signatures(doc, fragment):
|
||||||
for child in doc.childNodes:
|
for child in fragment.childNodes:
|
||||||
if child.nodeType == xml.dom.core.ELEMENT:
|
if child.nodeType == ELEMENT:
|
||||||
args = child.getElementsByTagName("args")
|
args = child.getElementsByTagName("args")
|
||||||
for arg in args:
|
for arg in args:
|
||||||
fixup_args(doc, arg)
|
fixup_args(doc, arg)
|
||||||
|
@ -748,7 +751,7 @@ def fixup_signatures(doc):
|
||||||
|
|
||||||
def fixup_args(doc, arglist):
|
def fixup_args(doc, arglist):
|
||||||
for child in arglist.childNodes:
|
for child in arglist.childNodes:
|
||||||
if child.nodeType == xml.dom.core.ELEMENT \
|
if child.nodeType == ELEMENT \
|
||||||
and child.tagName == "optional":
|
and child.tagName == "optional":
|
||||||
# found it; fix and return
|
# found it; fix and return
|
||||||
arglist.insertBefore(doc.createTextNode("["), child)
|
arglist.insertBefore(doc.createTextNode("["), child)
|
||||||
|
@ -762,8 +765,8 @@ def fixup_args(doc, arglist):
|
||||||
return fixup_args(doc, arglist)
|
return fixup_args(doc, arglist)
|
||||||
|
|
||||||
|
|
||||||
def fixup_sectionauthors(doc):
|
def fixup_sectionauthors(doc, fragment):
|
||||||
for sectauth in find_all_elements(doc, "sectionauthor"):
|
for sectauth in find_all_elements(fragment, "sectionauthor"):
|
||||||
section = sectauth.parentNode
|
section = sectauth.parentNode
|
||||||
section.removeChild(sectauth)
|
section.removeChild(sectauth)
|
||||||
sectauth._node.name = "author"
|
sectauth._node.name = "author"
|
||||||
|
@ -772,7 +775,7 @@ def fixup_sectionauthors(doc):
|
||||||
sectauth.removeAttribute("name")
|
sectauth.removeAttribute("name")
|
||||||
after = section.childNodes[2]
|
after = section.childNodes[2]
|
||||||
title = section.childNodes[1]
|
title = section.childNodes[1]
|
||||||
if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title":
|
if title.nodeType == ELEMENT and title.tagName != "title":
|
||||||
after = section.childNodes[0]
|
after = section.childNodes[0]
|
||||||
section.insertBefore(doc.createTextNode("\n "), after)
|
section.insertBefore(doc.createTextNode("\n "), after)
|
||||||
section.insertBefore(sectauth, after)
|
section.insertBefore(sectauth, after)
|
||||||
|
@ -781,10 +784,9 @@ def fixup_sectionauthors(doc):
|
||||||
def fixup_verbatims(doc):
|
def fixup_verbatims(doc):
|
||||||
for verbatim in find_all_elements(doc, "verbatim"):
|
for verbatim in find_all_elements(doc, "verbatim"):
|
||||||
child = verbatim.childNodes[0]
|
child = verbatim.childNodes[0]
|
||||||
if child.nodeType == xml.dom.core.TEXT \
|
if child.nodeType == TEXT \
|
||||||
and string.lstrip(child.data)[:3] == ">>>":
|
and string.lstrip(child.data)[:3] == ">>>":
|
||||||
verbatim._node.name = "interpreter-session"
|
verbatim._node.name = "interactive-session"
|
||||||
#verbatim.setAttribute("interactive", "interactive")
|
|
||||||
|
|
||||||
|
|
||||||
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
|
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
|
||||||
|
@ -792,7 +794,7 @@ _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
|
||||||
def write_esis(doc, ofp, knownempty):
|
def write_esis(doc, ofp, knownempty):
|
||||||
for node in doc.childNodes:
|
for node in doc.childNodes:
|
||||||
nodeType = node.nodeType
|
nodeType = node.nodeType
|
||||||
if nodeType == xml.dom.core.ELEMENT:
|
if nodeType == ELEMENT:
|
||||||
gi = node.tagName
|
gi = node.tagName
|
||||||
if knownempty(gi):
|
if knownempty(gi):
|
||||||
if node.hasChildNodes():
|
if node.hasChildNodes():
|
||||||
|
@ -808,7 +810,7 @@ def write_esis(doc, ofp, knownempty):
|
||||||
ofp.write("(%s\n" % gi)
|
ofp.write("(%s\n" % gi)
|
||||||
write_esis(node, ofp, knownempty)
|
write_esis(node, ofp, knownempty)
|
||||||
ofp.write(")%s\n" % gi)
|
ofp.write(")%s\n" % gi)
|
||||||
elif nodeType == xml.dom.core.TEXT:
|
elif nodeType == TEXT:
|
||||||
ofp.write("-%s\n" % esistools.encode(node.data))
|
ofp.write("-%s\n" % esistools.encode(node.data))
|
||||||
else:
|
else:
|
||||||
raise RuntimeError, "unsupported node type: %s" % nodeType
|
raise RuntimeError, "unsupported node type: %s" % nodeType
|
||||||
|
@ -818,10 +820,11 @@ def convert(ifp, ofp):
|
||||||
p = esistools.ExtendedEsisBuilder()
|
p = esistools.ExtendedEsisBuilder()
|
||||||
p.feed(ifp.read())
|
p.feed(ifp.read())
|
||||||
doc = p.document
|
doc = p.document
|
||||||
normalize(doc)
|
fragment = p.fragment
|
||||||
simplify(doc)
|
normalize(fragment)
|
||||||
handle_labels(doc)
|
simplify(doc, fragment)
|
||||||
handle_appendix(doc)
|
handle_labels(fragment)
|
||||||
|
handle_appendix(doc, fragment)
|
||||||
fixup_trailing_whitespace(doc, {
|
fixup_trailing_whitespace(doc, {
|
||||||
"abstract": "\n",
|
"abstract": "\n",
|
||||||
"title": "",
|
"title": "",
|
||||||
|
@ -835,12 +838,12 @@ def convert(ifp, ofp):
|
||||||
cleanup_root_text(doc)
|
cleanup_root_text(doc)
|
||||||
cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
|
cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
|
||||||
cleanup_synopses(doc)
|
cleanup_synopses(doc)
|
||||||
fixup_descriptors(doc)
|
fixup_descriptors(doc, fragment)
|
||||||
fixup_verbatims(doc)
|
fixup_verbatims(fragment)
|
||||||
normalize(doc)
|
normalize(fragment)
|
||||||
fixup_paras(doc)
|
fixup_paras(doc, fragment)
|
||||||
fixup_sectionauthors(doc)
|
fixup_sectionauthors(doc, fragment)
|
||||||
remap_element_names(doc, {
|
remap_element_names(fragment, {
|
||||||
"tableii": ("table", {"cols": "2"}),
|
"tableii": ("table", {"cols": "2"}),
|
||||||
"tableiii": ("table", {"cols": "3"}),
|
"tableiii": ("table", {"cols": "3"}),
|
||||||
"tableiv": ("table", {"cols": "4"}),
|
"tableiv": ("table", {"cols": "4"}),
|
||||||
|
@ -849,9 +852,9 @@ def convert(ifp, ofp):
|
||||||
"lineiv": ("row", {}),
|
"lineiv": ("row", {}),
|
||||||
"refmodule": ("module", {"link": "link"}),
|
"refmodule": ("module", {"link": "link"}),
|
||||||
})
|
})
|
||||||
fixup_table_structures(doc)
|
fixup_table_structures(doc, fragment)
|
||||||
fixup_rfc_references(doc)
|
fixup_rfc_references(doc, fragment)
|
||||||
fixup_signatures(doc)
|
fixup_signatures(doc, fragment)
|
||||||
#
|
#
|
||||||
d = {}
|
d = {}
|
||||||
for gi in p.get_empties():
|
for gi in p.get_empties():
|
||||||
|
@ -861,7 +864,7 @@ def convert(ifp, ofp):
|
||||||
knownempty = d.has_key
|
knownempty = d.has_key
|
||||||
#
|
#
|
||||||
try:
|
try:
|
||||||
write_esis(doc, ofp, knownempty)
|
write_esis(fragment, ofp, knownempty)
|
||||||
except IOError, (err, msg):
|
except IOError, (err, msg):
|
||||||
# Ignore EPIPE; it just means that whoever we're writing to stopped
|
# Ignore EPIPE; it just means that whoever we're writing to stopped
|
||||||
# reading. The rest of the output would be ignored. All other errors
|
# reading. The rest of the output would be ignored. All other errors
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue