Massive changes.

Separate the Conversion class into a base and a subclass; the subclass
is pretty minimal but the separation is useful for....

NewConversion:  New class that implements a somewhat different
		approach to the conversion.  This uses a table of
		instances (rather than tuples) that have more
		information than the tuples used for the older
		conversion procedure.  This allows a lot more control
		over the conversion, and it seems to be pretty
		stable.

TableEntry,
Parameter:	New classes that are used to build the conversion
		specification used by NewConversion.

TableParser:	xmllib.XMLParser subclass that builds a conversion
		specification from an XML document.

load_table():	Convenience function that loads a table from a file.

main():  Added flags --new and --old; these select which conversion is
	 used.  The default is --new.

Several fixes have been made in the old conversion as well; these were
done before writing & switching to the new conversion, and should be
archived.

The next checkin of this file will discard the old conversion; is is
kept in this checkin to allow it to be retrieved if needed, and to
avoid lossing the bugfixes that have been made to it in the interim.
This commit is contained in:
Fred Drake 1999-07-29 22:22:13 +00:00
parent 2394c98c05
commit 96e4a06fa6

View file

@ -16,26 +16,41 @@ to load an alternate table from an external file.
"""
__version__ = '$Revision$'
import copy
import errno
import getopt
import os
import re
import string
import StringIO
import sys
import UserList
from esistools import encode
from types import ListType, StringType, TupleType
try:
from xml.parsers.xmllib import XMLParser
except ImportError:
from xmllib import XMLParser
DEBUG = 0
class Error(Exception):
pass
class LaTeXFormatError(Error):
class LaTeXFormatError(Exception):
pass
class LaTeXStackError(LaTeXFormatError):
def __init__(self, found, stack):
msg = "environment close for %s doesn't match;\n stack = %s" \
% (found, stack)
self.found = found
self.stack = stack[:]
LaTeXFormatError.__init__(self, msg)
_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
@ -58,22 +73,49 @@ def dbgmsg(msg):
sys.stderr.write(msg + "\n")
def pushing(name, point, depth):
dbgmsg("%s<%s> at %s" % (" "*depth, name, point))
dbgmsg("pushing <%s> at %s" % (name, point))
def popping(name, point, depth):
dbgmsg("%s</%s> at %s" % (" "*depth, name, point))
dbgmsg("popping </%s> at %s" % (name, point))
class Conversion:
def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()):
class _Stack(UserList.UserList):
StringType = type('')
def append(self, entry):
if type(entry) is not self.StringType:
raise LaTeXFormatError("cannot push non-string on stack: "
+ `entry`)
sys.stderr.write("%s<%s>\n" % (" "*len(self.data), entry))
self.data.append(entry)
def pop(self, index=-1):
entry = self.data[index]
del self.data[index]
sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry))
def __delitem__(self, index):
entry = self.data[index]
del self.data[index]
sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry))
def new_stack():
if DEBUG:
return _Stack()
return []
class BaseConversion:
def __init__(self, ifp, ofp, table={}, discards=(), autoclosing=()):
self.ofp_stack = [ofp]
self.pop_output()
self.table = table
self.discards = discards
self.autoclosing = autoclosing
self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
self.err_write = sys.stderr.write
self.preamble = 1
self.stack = new_stack()
def push_output(self, ofp):
self.ofp_stack.append(self.ofp)
@ -84,16 +126,20 @@ class Conversion:
self.ofp = self.ofp_stack.pop()
self.write = self.ofp.write
def err_write(self, msg):
if DEBUG:
sys.stderr.write(str(msg) + "\n")
def convert(self):
self.subconvert()
class Conversion(BaseConversion):
def subconvert(self, endchar=None, depth=0):
stack = []
stack = self.stack
line = self.line
if DEBUG and endchar:
self.err_write(
"subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`))
while line:
if line[0] == endchar and not stack:
if DEBUG:
self.err_write("subconvert() --> %s\n" % `line[1:21]`)
self.line = line
return line
m = _comment_rx.match(line)
@ -117,19 +163,16 @@ class Conversion:
# special magic
for n in stack[1:]:
if n not in self.autoclosing:
self.err_write(stack)
raise LaTeXFormatError(
"open element on stack: " + `n`)
# should be more careful, but this is easier to code:
stack = []
self.write(")document\n")
elif stack and envname == stack[-1]:
self.write(")%s\n" % envname)
del stack[-1]
popping(envname, "a", len(stack) + depth)
else:
self.err_write("stack: %s\n" % `stack`)
raise LaTeXFormatError(
"environment close for %s doesn't match" % envname)
raise LaTeXStackError(envname, stack)
line = line[m.end():]
continue
m = _begin_macro_rx.match(line)
@ -171,7 +214,7 @@ class Conversion:
self.write("Anumbered TOKEN no\n")
# rip off the macroname
if params:
line = line[m.end(1):]
line = line[m.end(1):]
elif empty:
line = line[m.end(1):]
else:
@ -184,7 +227,6 @@ class Conversion:
#
if optional and type(params[0]) is TupleType:
# the attribute name isn't used in this special case
pushing(macroname, "a", depth + len(stack))
stack.append(macroname)
self.write("(%s\n" % macroname)
m = _start_optional_rx.match(line)
@ -210,7 +252,6 @@ class Conversion:
# of the attribute element, and the macro will
# have to be closed some other way (such as
# auto-closing).
pushing(macroname, "b", len(stack) + depth)
stack.append(macroname)
self.write("(%s\n" % macroname)
macroname = attrname[0]
@ -262,8 +303,6 @@ class Conversion:
self.pop_output()
continue
if line[0] == endchar and not stack:
if DEBUG:
self.err_write("subconvert() --> %s\n" % `line[1:21]`)
self.line = line[1:]
return self.line
if line[0] == "}":
@ -318,9 +357,6 @@ class Conversion:
+ string.join(stack, ", "))
# otherwise we just ran out of input here...
def convert(self):
self.subconvert()
def start_macro(self, name):
conversion = self.table.get(name, ([], 0, 0, 0, 0))
params, optional, empty, environ, nocontent = conversion
@ -331,7 +367,275 @@ class Conversion:
return params, optional, empty, environ
def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
class NewConversion(BaseConversion):
def __init__(self, ifp, ofp, table={}):
BaseConversion.__init__(self, ifp, ofp, table)
self.discards = []
def subconvert(self, endchar=None, depth=0):
#
# Parses content, including sub-structures, until the character
# 'endchar' is found (with no open structures), or until the end
# of the input data is endchar is None.
#
stack = new_stack()
line = self.line
while line:
if line[0] == endchar and not stack:
self.line = line
return line
m = _comment_rx.match(line)
if m:
text = m.group(1)
if text:
self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
% encode(text))
line = line[m.end():]
continue
m = _begin_env_rx.match(line)
if m:
name = m.group(1)
entry = self.get_env_entry(name)
# re-write to use the macro handler
line = r"\%s %s" % (name, line[m.end():])
continue
m = _end_env_rx.match(line)
if m:
# end of environment
envname = m.group(1)
entry = self.get_entry(envname)
while stack and envname != stack[-1] \
and stack[-1] in entry.endcloses:
self.write(")%s\n" % stack.pop())
if stack and envname == stack[-1]:
self.write(")%s\n" % entry.outputname)
del stack[-1]
else:
raise LaTeXStackError(envname, stack)
line = line[m.end():]
continue
m = _begin_macro_rx.match(line)
if m:
# start of macro
macroname = m.group(1)
entry = self.get_entry(macroname)
if entry.verbatim:
# magic case!
pos = string.find(line, "\\end{%s}" % macroname)
text = line[m.end(1):pos]
stack.append(entry.name)
self.write("(%s\n" % entry.outputname)
self.write("-%s\n" % encode(text))
self.write(")%s\n" % entry.outputname)
stack.pop()
line = line[pos + len("\\end{%s}" % macroname):]
continue
while stack and stack[-1] in entry.closes:
top = stack.pop()
topentry = self.get_entry(top)
if topentry.outputname:
self.write(")%s\n-\\n\n" % topentry.outputname)
#
if entry.outputname:
if entry.empty:
self.write("e\n")
self.push_output(self.ofp)
else:
self.push_output(StringIO.StringIO())
#
params, optional, empty, environ = self.start_macro(macroname)
# rip off the macroname
if params:
line = line[m.end(1):]
elif empty:
line = line[m.end(1):]
else:
line = line[m.end():]
opened = 0
implied_content = 0
# handle attribute mappings here:
for pentry in params:
if pentry.type == "attribute":
if pentry.optional:
m = _optional_rx.match(line)
if m:
line = line[m.end():]
self.dump_attr(pentry, m.group(1))
elif pentry.text:
# value supplied by conversion spec:
self.dump_attr(pentry, pentry.text)
else:
m = _parameter_rx.match(line)
if not m:
raise LaTeXFormatError(
"could not extract parameter %s for %s: %s"
% (pentry.name, macroname, `line[:100]`))
self.dump_attr(pentry, m.group(1))
## if entry.name == "label":
## sys.stderr.write("[%s]" % m.group(1))
line = line[m.end():]
elif pentry.type == "child":
if pentry.optional:
m = _optional_rx.match(line)
if m:
line = line[m.end():]
if entry.outputname and not opened:
opened = 1
self.write("(%s\n" % entry.outputname)
stack.append(macroname)
stack.append(pentry.name)
self.write("(%s\n" % pentry.name)
self.write("-%s\n" % encode(m.group(1)))
self.write(")%s\n" % pentry.name)
stack.pop()
else:
if entry.outputname and not opened:
opened = 1
self.write("(%s\n" % entry.outputname)
stack.append(entry.name)
self.write("(%s\n" % pentry.name)
stack.append(pentry.name)
self.line = skip_white(line)[1:]
line = self.subconvert(
"}", len(stack) + depth + 1)[1:]
self.write(")%s\n" % stack.pop())
elif pentry.type == "content":
if pentry.implied:
implied_content = 1
else:
if entry.outputname and not opened:
opened = 1
self.write("(%s\n" % entry.outputname)
stack.append(entry.name)
line = skip_white(line)
if line[0] != "{":
raise LaTeXFormatError(
"missing content for " + macroname)
self.line = line[1:]
line = self.subconvert("}", len(stack) + depth + 1)
if line and line[0] == "}":
line = line[1:]
elif pentry.type == "text":
if pentry.text:
if entry.outputname and not opened:
opened = 1
stack.append(entry.name)
self.write("(%s\n" % entry.outputname)
self.write("-%s\n" % encode(pentry.text))
if entry.outputname:
if not opened:
self.write("(%s\n" % entry.outputname)
stack.append(entry.name)
if not implied_content:
self.write(")%s\n" % entry.outputname)
stack.pop()
self.pop_output()
continue
if line[0] == endchar and not stack:
self.line = line[1:]
return self.line
if line[0] == "}":
# end of macro or group
macroname = stack[-1]
if macroname:
conversion = self.table.get(macroname)
if conversion.outputname:
# otherwise, it was just a bare group
self.write(")%s\n" % conversion.outputname)
del stack[-1]
line = line[1:]
continue
if line[0] == "{":
stack.append("")
line = line[1:]
continue
if line[0] == "\\" and line[1] in ESCAPED_CHARS:
self.write("-%s\n" % encode(line[1]))
line = line[2:]
continue
if line[:2] == r"\\":
self.write("(BREAK\n)BREAK\n")
line = line[2:]
continue
m = _text_rx.match(line)
if m:
text = encode(m.group())
self.write("-%s\n" % text)
line = line[m.end():]
continue
# special case because of \item[]
# XXX can we axe this???
if line[0] == "]":
self.write("-]\n")
line = line[1:]
continue
# avoid infinite loops
extra = ""
if len(line) > 100:
extra = "..."
raise LaTeXFormatError("could not identify markup: %s%s"
% (`line[:100]`, extra))
while stack:
entry = self.get_entry(stack[-1])
if entry.closes:
self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
del stack[-1]
else:
break
if stack:
raise LaTeXFormatError("elements remain on stack: "
+ string.join(stack, ", "))
# otherwise we just ran out of input here...
def start_macro(self, name):
conversion = self.get_entry(name)
parameters = conversion.parameters
optional = parameters and parameters[0].optional
## empty = not len(parameters)
## if empty:
## self.write("e\n")
## elif conversion.empty:
## empty = 1
return parameters, optional, conversion.empty, conversion.environment
def get_entry(self, name):
entry = self.table.get(name)
if entry is None:
self.err_write("get_entry(%s) failing; building default entry!"
% `name`)
# not defined; build a default entry:
entry = TableEntry(name)
entry.has_content = 1
entry.parameters.append(Parameter("content"))
self.table[name] = entry
return entry
def get_env_entry(self, name):
entry = self.table.get(name)
if entry is None:
# not defined; build a default entry:
entry = TableEntry(name, 1)
entry.has_content = 1
entry.parameters.append(Parameter("content"))
entry.parameters[-1].implied = 1
self.table[name] = entry
elif not entry.environment:
raise LaTeXFormatError(
name + " is defined as a macro; expected environment")
return entry
def dump_attr(self, pentry, value):
if not (pentry.name and value):
return
if _token_rx.match(value):
dtype = "TOKEN"
else:
dtype = "CDATA"
self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
def old_convert(ifp, ofp, table={}, discards=(), autoclosing=()):
c = Conversion(ifp, ofp, table, discards, autoclosing)
try:
c.convert()
@ -340,32 +644,162 @@ def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
raise
def new_convert(ifp, ofp, table={}, discards=(), autoclosing=()):
c = NewConversion(ifp, ofp, table)
try:
c.convert()
except IOError, (err, msg):
if err != errno.EPIPE:
raise
def skip_white(line):
while line and line[0] in " %\n\t":
while line and line[0] in " %\n\t\r":
line = string.lstrip(line[1:])
return line
class TableEntry:
def __init__(self, name, environment=0):
self.name = name
self.outputname = name
self.environment = environment
self.empty = not environment
self.has_content = 0
self.verbatim = 0
self.auto_close = 0
self.parameters = []
self.closes = []
self.endcloses = []
class Parameter:
def __init__(self, type, name=None, optional=0):
self.type = type
self.name = name
self.optional = optional
self.text = ''
self.implied = 0
class TableParser(XMLParser):
def __init__(self):
self.__table = {}
self.__current = None
self.__buffer = ''
XMLParser.__init__(self)
def get_table(self):
for entry in self.__table.values():
if entry.environment and not entry.has_content:
p = Parameter("content")
p.implied = 1
entry.parameters.append(p)
entry.has_content = 1
return self.__table
def start_environment(self, attrs):
name = attrs["name"]
self.__current = TableEntry(name, environment=1)
self.__current.verbatim = attrs.get("verbatim") == "yes"
if attrs.has_key("outputname"):
self.__current.outputname = attrs.get("outputname")
self.__current.endcloses = string.split(attrs.get("endcloses", ""))
def end_environment(self):
self.end_macro()
def start_macro(self, attrs):
name = attrs["name"]
self.__current = TableEntry(name)
self.__current.closes = string.split(attrs.get("closes", ""))
if attrs.has_key("outputname"):
self.__current.outputname = attrs.get("outputname")
def end_macro(self):
## if self.__current.parameters and not self.__current.outputname:
## raise ValueError, "markup with parameters must have an output name"
self.__table[self.__current.name] = self.__current
self.__current = None
def start_attribute(self, attrs):
name = attrs.get("name")
optional = attrs.get("optional") == "yes"
if name:
p = Parameter("attribute", name, optional=optional)
else:
p = Parameter("attribute", optional=optional)
self.__current.parameters.append(p)
self.__buffer = ''
def end_attribute(self):
self.__current.parameters[-1].text = self.__buffer
def start_child(self, attrs):
name = attrs["name"]
p = Parameter("child", name, attrs.get("optional") == "yes")
self.__current.parameters.append(p)
self.__current.empty = 0
def start_content(self, attrs):
p = Parameter("content")
p.implied = attrs.get("implied") == "yes"
if self.__current.environment:
p.implied = 1
self.__current.parameters.append(p)
self.__current.has_content = 1
self.__current.empty = 0
def start_text(self, attrs):
self.__buffer = ''
def end_text(self):
p = Parameter("text")
p.text = self.__buffer
self.__current.parameters.append(p)
def handle_data(self, data):
self.__buffer = self.__buffer + data
def load_table(fp):
parser = TableParser()
parser.feed(fp.read())
parser.close()
return parser.get_table()
def main():
if len(sys.argv) == 2:
ifp = open(sys.argv[1])
global DEBUG
#
convert = new_convert
newstyle = 1
opts, args = getopt.getopt(sys.argv[1:], "Dn", ["debug", "new"])
for opt, arg in opts:
if opt in ("-n", "--new"):
convert = new_convert
newstyle = 1
elif opt in ("-o", "--old"):
convert = old_convert
newstyle = 0
elif opt in ("-D", "--debug"):
DEBUG = DEBUG + 1
if len(args) == 0:
ifp = sys.stdin
ofp = sys.stdout
elif len(sys.argv) == 3:
ifp = open(sys.argv[1])
ofp = open(sys.argv[2], "w")
elif len(args) == 1:
ifp = open(args)
ofp = sys.stdout
elif len(args) == 2:
ifp = open(args[0])
ofp = open(args[1], "w")
else:
usage()
sys.exit(2)
convert(ifp, ofp, {
table = {
# entries have the form:
# name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent)
# attribute names can be:
# "string" -- normal attribute
# ("string",) -- sub-element with content of macro; like for \section
# ["string"] -- sub-element
"appendix": ([], 0, 1, 0, 0),
"bifuncindex": (["name"], 0, 1, 0, 0),
"catcode": ([], 0, 1, 0, 0),
"cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0),
"chapter": ([("title",)], 0, 0, 0, 0),
"chapter*": ([("title",)], 0, 0, 0, 0),
@ -405,6 +839,7 @@ def main():
"maketitle": ([], 0, 1, 0, 0),
"manpage": (["name", "section"], 0, 1, 0, 0),
"memberdesc": (["class", "name"], 1, 0, 1, 0),
"memberdescni": (["class", "name"], 1, 0, 1, 0),
"methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0),
"methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0),
"methodline": (["class", "name"], 1, 0, 0, 0),
@ -452,6 +887,8 @@ def main():
#
# Things that will actually be going away!
#
"appendix": ([], 0, 1, 0, 0),
"catcode": ([], 0, 1, 0, 0),
"fi": ([], 0, 1, 0, 0),
"ifhtml": ([], 0, 1, 0, 0),
"makeindex": ([], 0, 1, 0, 0),
@ -460,7 +897,10 @@ def main():
"noindent": ([], 0, 1, 0, 0),
"protect": ([], 0, 1, 0, 0),
"tableofcontents": ([], 0, 1, 0, 0),
},
}
if newstyle:
table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
convert(ifp, ofp, table,
discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
"noindent", "tableofcontents"],
autoclosing=["chapter", "section", "subsection", "subsubsection",