mirror of
https://github.com/python/cpython.git
synced 2025-08-01 15:43:13 +00:00

But I'm not giving this or the info files out until Andy (or anyone else) agrees to take over the maintenance of this script! Hah!
2406 lines
68 KiB
Python
2406 lines
68 KiB
Python
#
|
|
# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
|
|
# and generate texinfo source.
|
|
#
|
|
# This is *not* a good example of good programming practices. In fact, this
|
|
# file could use a complete rewrite, in order to become faster, more
|
|
# easily extensible and maintainable.
|
|
#
|
|
# However, I added some comments on a few places for the pityful person who
|
|
# would ever need to take a look into this file.
|
|
#
|
|
# Have I been clear enough??
|
|
#
|
|
# -jh
|
|
#
|
|
# Yup. I made some performance improvements and hope this lasts a while;
|
|
# I don't want to be the schmuck who ends up re-writting it!
|
|
#
|
|
# -fld
|
|
#
|
|
# (sometime later...)
|
|
#
|
|
# Ok, I've re-worked substantial chunks of this. It's only getting worse.
|
|
# It just might be gone before the next source release. (Yeah!)
|
|
#
|
|
# -fld
|
|
|
|
import sys, string, regex, getopt, os
|
|
|
|
from types import IntType, ListType, StringType, TupleType
|
|
|
|
release_version = sys.version[:3]
|
|
|
|
# Different parse modes for phase 1
|
|
MODE_REGULAR = 0
|
|
MODE_VERBATIM = 1
|
|
MODE_CS_SCAN = 2
|
|
MODE_COMMENT = 3
|
|
MODE_MATH = 4
|
|
MODE_DMATH = 5
|
|
MODE_GOBBLEWHITE = 6
|
|
|
|
the_modes = (MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT,
|
|
MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE)
|
|
|
|
# Show the neighbourhood of the scanned buffer
|
|
def epsilon(buf, where):
|
|
wmt, wpt = where - 10, where + 10
|
|
if wmt < 0:
|
|
wmt = 0
|
|
if wpt > len(buf):
|
|
wpt = len(buf)
|
|
return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
|
|
|
|
# Should return the line number. never worked
|
|
def lin():
|
|
global lineno
|
|
return ' Line ' + `lineno` + '.'
|
|
|
|
# Displays the recursion level.
|
|
def lv(lvl):
|
|
return ' Level ' + `lvl` + '.'
|
|
|
|
# Combine the three previous functions. Used often.
|
|
def lle(lvl, buf, where):
|
|
return lv(lvl) + lin() + epsilon(buf, where)
|
|
|
|
|
|
# This class is only needed for _symbolic_ representation of the parse mode.
|
|
class Mode:
|
|
def __init__(self, arg):
|
|
if arg not in the_modes:
|
|
raise ValueError, 'mode not in the_modes'
|
|
self.mode = arg
|
|
|
|
def __cmp__(self, other):
|
|
if type(self) != type(other):
|
|
other = mode[other]
|
|
return cmp(self.mode, other.mode)
|
|
|
|
def __repr__(self):
|
|
if self.mode == MODE_REGULAR:
|
|
return 'MODE_REGULAR'
|
|
elif self.mode == MODE_VERBATIM:
|
|
return 'MODE_VERBATIM'
|
|
elif self.mode == MODE_CS_SCAN:
|
|
return 'MODE_CS_SCAN'
|
|
elif self.mode == MODE_COMMENT:
|
|
return 'MODE_COMMENT'
|
|
elif self.mode == MODE_MATH:
|
|
return 'MODE_MATH'
|
|
elif self.mode == MODE_DMATH:
|
|
return 'MODE_DMATH'
|
|
elif self.mode == MODE_GOBBLEWHITE:
|
|
return 'MODE_GOBBLEWHITE'
|
|
else:
|
|
raise ValueError, 'mode not in the_modes'
|
|
|
|
# just a wrapper around a class initialisation
|
|
mode = {}
|
|
for t in the_modes:
|
|
mode[t] = Mode(t)
|
|
|
|
|
|
# After phase 1, the text consists of chunks, with a certain type
|
|
# this type will be assigned to the chtype member of the chunk
|
|
# the where-field contains the file position where this is found
|
|
# and the data field contains (1): a tuple describing start- end end
|
|
# positions of the substring (can be used as slice for the buf-variable),
|
|
# (2) just a string, mostly generated by the changeit routine,
|
|
# or (3) a list, describing a (recursive) subgroup of chunks
|
|
PLAIN = 0 # ASSUME PLAINTEXT, data = the text
|
|
GROUP = 1 # GROUP ({}), data = [chunk, chunk,..]
|
|
CSNAME = 2 # CONTROL SEQ TOKEN, data = the command
|
|
COMMENT = 3 # data is the actual comment
|
|
DMATH = 4 # DISPLAYMATH, data = [chunk, chunk,..]
|
|
MATH = 5 # MATH, see DISPLAYMATH
|
|
OTHER = 6 # CHAR WITH CATCODE OTHER, data = char
|
|
ACTIVE = 7 # ACTIVE CHAR
|
|
GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME
|
|
ENDLINE = 9 # END-OF-LINE, data = '\n'
|
|
DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par
|
|
ENV = 11 # LaTeX-environment
|
|
# data =(envname,[ch,ch,ch,.])
|
|
CSLINE = 12 # for texi: next chunk will be one group
|
|
# of args. Will be set all on 1 line
|
|
IGNORE = 13 # IGNORE this data
|
|
ENDENV = 14 # TEMP END OF GROUP INDICATOR
|
|
IF = 15 # IF-directive
|
|
# data = (flag,negate,[ch, ch, ch,...])
|
|
|
|
the_types = (PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE,
|
|
GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF)
|
|
|
|
# class, just to display symbolic name
|
|
class ChunkType:
|
|
def __init__(self, chunk_type):
|
|
if chunk_type not in the_types:
|
|
raise ValueError, 'chunk_type not in the_types'
|
|
self.chunk_type = chunk_type
|
|
|
|
def __cmp__(self, other):
|
|
if type(self) != type(other):
|
|
other = chunk_type[other]
|
|
return cmp(self.chunk_type, other.chunk_type)
|
|
|
|
def __repr__(self):
|
|
if self.chunk_type == PLAIN:
|
|
return 'PLAIN'
|
|
elif self.chunk_type == GROUP:
|
|
return 'GROUP'
|
|
elif self.chunk_type == CSNAME:
|
|
return 'CSNAME'
|
|
elif self.chunk_type == COMMENT:
|
|
return 'COMMENT'
|
|
elif self.chunk_type == DMATH:
|
|
return 'DMATH'
|
|
elif self.chunk_type == MATH:
|
|
return 'MATH'
|
|
elif self.chunk_type == OTHER:
|
|
return 'OTHER'
|
|
elif self.chunk_type == ACTIVE:
|
|
return 'ACTIVE'
|
|
elif self.chunk_type == GOBBLEDWHITE:
|
|
return 'GOBBLEDWHITE'
|
|
elif self.chunk_type == DENDLINE:
|
|
return 'DENDLINE'
|
|
elif self.chunk_type == ENDLINE:
|
|
return 'ENDLINE'
|
|
elif self.chunk_type == ENV:
|
|
return 'ENV'
|
|
elif self.chunk_type == CSLINE:
|
|
return 'CSLINE'
|
|
elif self.chunk_type == IGNORE:
|
|
return 'IGNORE'
|
|
elif self.chunk_type == ENDENV:
|
|
return 'ENDENV'
|
|
elif self.chunk_type == IF:
|
|
return 'IF'
|
|
else:
|
|
raise ValueError, 'chunk_type not in the_types'
|
|
|
|
# ...and the wrapper
|
|
chunk_type = {}
|
|
for t in the_types:
|
|
chunk_type[t] = ChunkType(t)
|
|
|
|
# store a type object of the ChunkType-class-instance...
|
|
chunk_type_type = type(chunk_type[PLAIN])
|
|
|
|
# this class contains a part of the parsed buffer
|
|
class Chunk:
|
|
def __init__(self, chtype, where, data):
|
|
if type(chtype) != chunk_type_type:
|
|
chtype = chunk_type[chtype]
|
|
self.chtype = chtype
|
|
self.where = where
|
|
self.data = data
|
|
|
|
__datatypes = [chunk_type[CSNAME], chunk_type[PLAIN], chunk_type[CSLINE]]
|
|
|
|
def __repr__(self):
|
|
if self.chtype in self.__datatypes:
|
|
data = s(self.buf, self.data)
|
|
else:
|
|
data = self.data
|
|
return 'chunk' + `self.chtype, self.where, data`
|
|
|
|
# and the wrapper
|
|
chunk = Chunk
|
|
|
|
|
|
error = 'partparse.error'
|
|
|
|
#
|
|
# TeX's catcodes...
|
|
#
|
|
CC_ESCAPE = 0
|
|
CC_LBRACE = 1
|
|
CC_RBRACE = 2
|
|
CC_MATHSHIFT = 3
|
|
CC_ALIGNMENT = 4
|
|
CC_ENDLINE = 5
|
|
CC_PARAMETER = 6
|
|
CC_SUPERSCRIPT = 7
|
|
CC_SUBSCRIPT = 8
|
|
CC_IGNORE = 9
|
|
CC_WHITE = 10
|
|
CC_LETTER = 11
|
|
CC_OTHER = 12
|
|
CC_ACTIVE = 13
|
|
CC_COMMENT = 14
|
|
CC_INVALID = 15
|
|
|
|
# and the names
|
|
cc_names = [
|
|
'CC_ESCAPE',
|
|
'CC_LBRACE',
|
|
'CC_RBRACE',
|
|
'CC_MATHSHIFT',
|
|
'CC_ALIGNMENT',
|
|
'CC_ENDLINE',
|
|
'CC_PARAMETER',
|
|
'CC_SUPERSCRIPT',
|
|
'CC_SUBSCRIPT',
|
|
'CC_IGNORE',
|
|
'CC_WHITE',
|
|
'CC_LETTER',
|
|
'CC_OTHER',
|
|
'CC_ACTIVE',
|
|
'CC_COMMENT',
|
|
'CC_INVALID',
|
|
]
|
|
|
|
# Show a list of catcode-name-symbols
|
|
def pcl(codelist):
|
|
result = ''
|
|
for i in codelist:
|
|
result = result + cc_names[i] + ', '
|
|
return '[' + result[:-2] + ']'
|
|
|
|
# the name of the catcode (ACTIVE, OTHER, etc.)
|
|
def pc(code):
|
|
return cc_names[code]
|
|
|
|
|
|
# Which catcodes make the parser stop parsing regular plaintext
|
|
regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT,
|
|
CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT,
|
|
CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
|
|
|
|
# same for scanning a control sequence name
|
|
csname_scancodes = [CC_LETTER]
|
|
|
|
# same for gobbling LWSP
|
|
white_scancodes = [CC_WHITE]
|
|
##white_scancodes = [CC_WHITE, CC_ENDLINE]
|
|
|
|
# make a list of all catcode id's, except for catcode ``other''
|
|
all_but_other_codes = range(16)
|
|
del all_but_other_codes[CC_OTHER]
|
|
##print all_but_other_codes
|
|
|
|
# when does a comment end
|
|
comment_stopcodes = [CC_ENDLINE]
|
|
|
|
# gather all characters together, specified by a list of catcodes
|
|
def code2string(cc, codelist):
|
|
##print 'code2string: codelist = ' + pcl(codelist),
|
|
result = ''
|
|
for category in codelist:
|
|
if cc[category]:
|
|
result = result + cc[category]
|
|
##print 'result = ' + `result`
|
|
return result
|
|
|
|
# automatically generate all characters of catcode other, being the
|
|
# complement set in the ASCII range (128 characters)
|
|
def make_other_codes(cc):
|
|
otherchars = range(256) # could be made 256, no problem
|
|
for category in all_but_other_codes:
|
|
if cc[category]:
|
|
for c in cc[category]:
|
|
otherchars[ord(c)] = None
|
|
result = ''
|
|
for i in otherchars:
|
|
if i != None:
|
|
result = result + chr(i)
|
|
return result
|
|
|
|
# catcode dump (which characters have which catcodes).
|
|
def dump_cc(name, cc):
|
|
##print '\t' + name
|
|
##print '=' * (8+len(name))
|
|
if len(cc) != 16:
|
|
raise TypeError, 'cc not good cat class'
|
|
## for i in range(16):
|
|
## print pc(i) + '\t' + `cc[i]`
|
|
|
|
|
|
# In the beginning,....
|
|
epoch_cc = [None] * 16
|
|
##dump_cc('epoch_cc', epoch_cc)
|
|
|
|
|
|
# INITEX
|
|
initex_cc = epoch_cc[:]
|
|
initex_cc[CC_ESCAPE] = '\\'
|
|
initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
|
|
'\n', '\0', ' '
|
|
initex_cc[CC_LETTER] = string.uppercase + string.lowercase
|
|
initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
|
|
#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
|
|
##dump_cc('initex_cc', initex_cc)
|
|
|
|
|
|
# LPLAIN: LaTeX catcode setting (see lplain.tex)
|
|
lplain_cc = initex_cc[:]
|
|
lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
|
|
lplain_cc[CC_MATHSHIFT] = '$'
|
|
lplain_cc[CC_ALIGNMENT] = '&'
|
|
lplain_cc[CC_PARAMETER] = '#'
|
|
lplain_cc[CC_SUPERSCRIPT] = '^\x0B' # '^' and C-k
|
|
lplain_cc[CC_SUBSCRIPT] = '_\x01' # '_' and C-a
|
|
lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
|
|
lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l
|
|
lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
|
|
##dump_cc('lplain_cc', lplain_cc)
|
|
|
|
|
|
# Guido's LaTeX environment catcoded '_' as ``other''
|
|
# my own purpose catlist
|
|
my_cc = lplain_cc[:]
|
|
my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
|
|
my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_' # add it to OTHER list
|
|
dump_cc('my_cc', my_cc)
|
|
|
|
|
|
|
|
# needed for un_re, my equivalent for regexp-quote in Emacs
|
|
re_meaning = '\\[]^$'
|
|
|
|
def un_re(str):
|
|
result = ''
|
|
for i in str:
|
|
if i in re_meaning:
|
|
result = result + '\\'
|
|
result = result + i
|
|
return result
|
|
|
|
# NOTE the negate ('^') operator in *some* of the regexps below
|
|
def make_rc_regular(cc):
|
|
# problems here if '[]' are included!!
|
|
return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
|
|
|
|
def make_rc_cs_scan(cc):
|
|
return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
|
|
|
|
def make_rc_comment(cc):
|
|
return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
|
|
|
|
def make_rc_endwhite(cc):
|
|
return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
|
|
|
|
|
|
|
|
# regular: normal mode:
|
|
rc_regular = make_rc_regular(my_cc)
|
|
|
|
# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
|
|
rc_cs_scan = make_rc_cs_scan(my_cc)
|
|
rc_comment = make_rc_comment(my_cc)
|
|
rc_endwhite = make_rc_endwhite(my_cc)
|
|
|
|
|
|
# parseit (BUF, PARSEMODE=mode[MODE_REGULAR], START=0, RECURSION-LEVEL=0)
|
|
# RECURSION-LEVEL will is incremented on entry.
|
|
# result contains the list of chunks returned
|
|
# together with this list, the buffer position is returned
|
|
|
|
# RECURSION-LEVEL will be set to zero *again*, when recursively a
|
|
# {,D}MATH-mode scan has been enetered.
|
|
# This has been done in order to better check for environment-mismatches
|
|
|
|
def parseit(buf, parsemode=mode[MODE_REGULAR], start=0, lvl=0):
|
|
global lineno
|
|
|
|
result = []
|
|
end = len(buf)
|
|
if lvl == 0 and parsemode == mode[MODE_REGULAR]:
|
|
lineno = 1
|
|
lvl = lvl + 1
|
|
|
|
##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
|
|
|
|
#
|
|
# some of the more regular modes...
|
|
#
|
|
|
|
if parsemode in (mode[MODE_REGULAR], mode[MODE_DMATH], mode[MODE_MATH]):
|
|
cstate = []
|
|
newpos = start
|
|
curpmode = parsemode
|
|
while 1:
|
|
where = newpos
|
|
#print '\tnew round: ' + epsilon(buf, where)
|
|
if where == end:
|
|
if lvl > 1 or curpmode != mode[MODE_REGULAR]:
|
|
# not the way we started...
|
|
raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
|
|
# the real ending of lvl-1 parse
|
|
return end, result
|
|
|
|
pos = rc_regular.search(buf, where)
|
|
|
|
if pos < 0:
|
|
pos = end
|
|
|
|
if pos != where:
|
|
newpos, c = pos, chunk(PLAIN, where, (where, pos))
|
|
result.append(c)
|
|
continue
|
|
|
|
|
|
#
|
|
# ok, pos == where and pos != end
|
|
#
|
|
foundchar = buf[where]
|
|
if foundchar in my_cc[CC_LBRACE]:
|
|
# recursive subgroup parse...
|
|
newpos, data = parseit(buf, curpmode, where+1, lvl)
|
|
result.append(chunk(GROUP, where, data))
|
|
|
|
elif foundchar in my_cc[CC_RBRACE]:
|
|
if lvl <= 1:
|
|
raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
|
|
if lvl == 1 and mode != mode[MODE_REGULAR]:
|
|
raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
|
|
return where + 1, result
|
|
|
|
elif foundchar in my_cc[CC_ESCAPE]:
|
|
#
|
|
# call the routine that actually deals with
|
|
# this problem. If do_ret is None, than
|
|
# return the value of do_ret
|
|
#
|
|
# Note that handle_cs might call this routine
|
|
# recursively again...
|
|
#
|
|
do_ret, newpos = handlecs(buf, where,
|
|
curpmode, lvl, result, end)
|
|
if do_ret != None:
|
|
return do_ret
|
|
|
|
elif foundchar in my_cc[CC_COMMENT]:
|
|
newpos, data = parseit(buf,
|
|
mode[MODE_COMMENT], where+1, lvl)
|
|
result.append(chunk(COMMENT, where, data))
|
|
|
|
elif foundchar in my_cc[CC_MATHSHIFT]:
|
|
# note that recursive calls to math-mode
|
|
# scanning are called with recursion-level 0
|
|
# again, in order to check for bad mathend
|
|
#
|
|
if where + 1 != end and buf[where + 1] in my_cc[CC_MATHSHIFT]:
|
|
#
|
|
# double mathshift, e.g. '$$'
|
|
#
|
|
if curpmode == mode[MODE_REGULAR]:
|
|
newpos, data = parseit(buf, mode[MODE_DMATH],
|
|
where + 2, 0)
|
|
result.append(chunk(DMATH, where, data))
|
|
elif curpmode == mode[MODE_MATH]:
|
|
raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
|
|
elif lvl != 1:
|
|
raise error, 'bad mathend.' + lle(lvl, buf, where)
|
|
else:
|
|
return where + 2, result
|
|
else:
|
|
#
|
|
# single math shift, e.g. '$'
|
|
#
|
|
if curpmode == mode[MODE_REGULAR]:
|
|
newpos, data = parseit(buf, mode[MODE_MATH],
|
|
where + 1, 0)
|
|
result.append(chunk(MATH, where, data))
|
|
elif curpmode == mode[MODE_DMATH]:
|
|
raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
|
|
elif lvl != 1:
|
|
raise error, 'bad mathend.' + lv(lvl, buf, where)
|
|
else:
|
|
return where + 1, result
|
|
|
|
elif foundchar in my_cc[CC_IGNORE]:
|
|
print 'warning: ignored char', `foundchar`
|
|
newpos = where + 1
|
|
|
|
elif foundchar in my_cc[CC_ACTIVE]:
|
|
result.append(chunk(ACTIVE, where, foundchar))
|
|
newpos = where + 1
|
|
|
|
elif foundchar in my_cc[CC_INVALID]:
|
|
raise error, 'invalid char ' + `foundchar`
|
|
newpos = where + 1
|
|
|
|
elif foundchar in my_cc[CC_ENDLINE]:
|
|
#
|
|
# after an end of line, eat the rest of
|
|
# whitespace on the beginning of the next line
|
|
# this is what LaTeX more or less does
|
|
#
|
|
# also, try to indicate double newlines (\par)
|
|
#
|
|
lineno = lineno + 1
|
|
savedwhere = where
|
|
newpos, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], where + 1, lvl)
|
|
if newpos != end and buf[newpos] in my_cc[CC_ENDLINE]:
|
|
result.append(chunk(DENDLINE, savedwhere, foundchar))
|
|
else:
|
|
result.append(chunk(ENDLINE, savedwhere, foundchar))
|
|
else:
|
|
result.append(chunk(OTHER, where, foundchar))
|
|
newpos = where + 1
|
|
|
|
elif parsemode == mode[MODE_CS_SCAN]:
|
|
#
|
|
# scan for a control sequence token. `\ape', `\nut' or `\%'
|
|
#
|
|
if start == end:
|
|
raise EOFError, 'can\'t find end of csname'
|
|
pos = rc_cs_scan.search(buf, start)
|
|
if pos < 0:
|
|
pos = end
|
|
if pos == start:
|
|
# first non-letter right where we started the search
|
|
# ---> the control sequence name consists of one single
|
|
# character. Also: don't eat white space...
|
|
if buf[pos] in my_cc[CC_ENDLINE]:
|
|
lineno = lineno + 1
|
|
pos = pos + 1
|
|
return pos, (start, pos)
|
|
else:
|
|
spos = pos
|
|
if buf[pos] == '\n':
|
|
lineno = lineno + 1
|
|
spos = pos + 1
|
|
pos2, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], spos, lvl)
|
|
return pos2, (start, pos)
|
|
|
|
elif parsemode == mode[MODE_GOBBLEWHITE]:
|
|
if start == end:
|
|
return start, ''
|
|
pos = rc_endwhite.search(buf, start)
|
|
if pos < 0:
|
|
pos = start
|
|
return pos, (start, pos)
|
|
|
|
elif parsemode == mode[MODE_COMMENT]:
|
|
pos = rc_comment.search(buf, start)
|
|
lineno = lineno + 1
|
|
if pos < 0:
|
|
print 'no newline perhaps?'
|
|
raise EOFError, 'can\'t find end of comment'
|
|
pos = pos + 1
|
|
pos2, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], pos, lvl)
|
|
return pos2, (start, pos)
|
|
|
|
else:
|
|
raise error, 'Unknown mode (' + `parsemode` + ')'
|
|
|
|
|
|
#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
|
|
|
|
#boxcommands = 'mbox', 'fbox'
|
|
#defcommands = 'def', 'newcommand'
|
|
|
|
endverbstr = '\\end{verbatim}'
|
|
|
|
re_endverb = regex.compile(un_re(endverbstr))
|
|
|
|
#
|
|
# handlecs: helper function for parseit, for the special thing we might
|
|
# wanna do after certain command control sequences
|
|
# returns: None or return_data, newpos
|
|
#
|
|
# in the latter case, the calling function is instructed to immediately
|
|
# return with the data in return_data
|
|
#
|
|
def handlecs(buf, where, curpmode, lvl, result, end):
|
|
global lineno
|
|
|
|
# get the control sequence name...
|
|
newpos, data = parseit(buf, mode[MODE_CS_SCAN], where+1, lvl)
|
|
saveddata = data
|
|
s_buf_data = s(buf, data)
|
|
|
|
if s_buf_data in ('begin', 'end'):
|
|
# skip the expected '{' and get the LaTeX-envname '}'
|
|
newpos, data = parseit(buf, mode[MODE_REGULAR], newpos+1, lvl)
|
|
if len(data) != 1:
|
|
raise error, 'expected 1 chunk of data.' + lle(lvl, buf, where)
|
|
|
|
# yucky, we've got an environment
|
|
envname = s(buf, data[0].data)
|
|
s_buf_saveddata = s(buf, saveddata)
|
|
##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
|
|
if s_buf_saveddata == 'begin' and envname == 'verbatim':
|
|
# verbatim deserves special treatment
|
|
pos = re_endverb.search(buf, newpos)
|
|
if pos < 0:
|
|
raise error, "%s not found.%s" \
|
|
% (`endverbstr`, lle(lvl, buf, where))
|
|
result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
|
|
newpos = pos + len(endverbstr)
|
|
|
|
elif s_buf_saveddata == 'begin':
|
|
# start parsing recursively... If that parse returns
|
|
# from an '\end{...}', then should the last item of
|
|
# the returned data be a string containing the ended
|
|
# environment
|
|
newpos, data = parseit(buf, curpmode, newpos, lvl)
|
|
if not data or type(data[-1]) is not StringType:
|
|
raise error, "missing 'end'" + lle(lvl, buf, where) \
|
|
+ epsilon(buf, newpos)
|
|
retenv = data[-1]
|
|
del data[-1]
|
|
if retenv != envname:
|
|
#[`retenv`, `envname`]
|
|
raise error, 'environments do not match.%s%s' \
|
|
% (lle(lvl, buf, where), epsilon(buf, newpos))
|
|
result.append(chunk(ENV, where, (retenv, data)))
|
|
else:
|
|
# 'end'... append the environment name, as just
|
|
# pointed out, and order parsit to return...
|
|
result.append(envname)
|
|
##print 'POINT of return: ' + epsilon(buf, newpos)
|
|
# the tuple will be returned by parseit
|
|
return (newpos, result), newpos
|
|
|
|
# end of \begin ... \end handling
|
|
|
|
elif s_buf_data[0:2] == 'if':
|
|
# another scary monster: the 'if' directive
|
|
flag = s_buf_data[2:]
|
|
|
|
# recursively call parseit, just like environment above..
|
|
# the last item of data should contain the if-termination
|
|
# e.g., 'else' of 'fi'
|
|
newpos, data = parseit(buf, curpmode, newpos, lvl)
|
|
if not data or data[-1] not in ('else', 'fi'):
|
|
raise error, 'wrong if... termination' + \
|
|
lle(lvl, buf, where) + epsilon(buf, newpos)
|
|
|
|
ifterm = data[-1]
|
|
del data[-1]
|
|
# 0 means dont_negate flag
|
|
result.append(chunk(IF, where, (flag, 0, data)))
|
|
if ifterm == 'else':
|
|
# do the whole thing again, there is only one way
|
|
# to end this one, by 'fi'
|
|
newpos, data = parseit(buf, curpmode, newpos, lvl)
|
|
if not data or data[-1] not in ('fi', ):
|
|
raise error, 'wrong if...else... termination' \
|
|
+ lle(lvl, buf, where) \
|
|
+ epsilon(buf, newpos)
|
|
|
|
ifterm = data[-1]
|
|
del data[-1]
|
|
result.append(chunk(IF, where, (flag, 1, data)))
|
|
#done implicitely: return None, newpos
|
|
|
|
elif s_buf_data in ('else', 'fi'):
|
|
result.append(s(buf, data))
|
|
# order calling party to return tuple
|
|
return (newpos, result), newpos
|
|
|
|
# end of \if, \else, ... \fi handling
|
|
|
|
elif s(buf, saveddata) == 'verb':
|
|
x2 = saveddata[1]
|
|
result.append(chunk(CSNAME, where, data))
|
|
if x2 == end:
|
|
raise error, 'premature end of command.' + lle(lvl, buf, where)
|
|
delimchar = buf[x2]
|
|
##print 'VERB: delimchar ' + `delimchar`
|
|
pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
|
|
if pos < 0:
|
|
raise error, 'end of \'verb\' argument (' + \
|
|
`delimchar` + ') not found.' + \
|
|
lle(lvl, buf, where)
|
|
result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
|
|
newpos = pos + 1
|
|
else:
|
|
result.append(chunk(CSNAME, where, data))
|
|
return None, newpos
|
|
|
|
# this is just a function to get the string value if the possible data-tuple
|
|
def s(buf, data):
|
|
if type(data) is StringType:
|
|
return data
|
|
if len(data) != 2 or not (type(data[0]) is type(data[1]) is IntType):
|
|
raise TypeError, 'expected tuple of 2 integers'
|
|
x1, x2 = data
|
|
return buf[x1:x2]
|
|
|
|
|
|
##length, data1, i = getnextarg(length, buf, pp, i + 1)
|
|
|
|
# make a deep-copy of some chunks
|
|
def crcopy(r):
|
|
return map(chunkcopy, r)
|
|
|
|
|
|
# copy a chunk, would better be a method of class Chunk...
|
|
def chunkcopy(ch):
|
|
if ch.chtype == chunk_type[GROUP]:
|
|
return chunk(GROUP, ch.where, map(chunkcopy, ch.data))
|
|
else:
|
|
return chunk(ch.chtype, ch.where, ch.data)
|
|
|
|
|
|
# get next argument for TeX-macro, flatten a group (insert between)
|
|
# or return Command Sequence token, or give back one character
|
|
def getnextarg(length, buf, pp, item):
|
|
|
|
##wobj = Wobj()
|
|
##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
|
|
##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
|
|
|
|
while item < length and pp[item].chtype == chunk_type[ENDLINE]:
|
|
del pp[item]
|
|
length = length - 1
|
|
if item >= length:
|
|
raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
|
|
if pp[item].chtype == chunk_type[GROUP]:
|
|
newpp = pp[item].data
|
|
del pp[item]
|
|
length = length - 1
|
|
changeit(buf, newpp)
|
|
length = length + len(newpp)
|
|
pp[item:item] = newpp
|
|
item = item + len(newpp)
|
|
if len(newpp) < 10:
|
|
wobj = Wobj()
|
|
dumpit(buf, wobj.write, newpp)
|
|
##print 'GETNEXTARG: inserted ' + `wobj.data`
|
|
return length, item
|
|
elif pp[item].chtype == chunk_type[PLAIN]:
|
|
#grab one char
|
|
print 'WARNING: grabbing one char'
|
|
if len(s(buf, pp[item].data)) > 1:
|
|
pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
|
|
item, length = item+1, length+1
|
|
pp[item].data = s(buf, pp[item].data)[1:]
|
|
else:
|
|
item = item+1
|
|
return length, item
|
|
else:
|
|
ch = pp[item]
|
|
try:
|
|
str = `s(buf, ch.data)`
|
|
except TypeError:
|
|
str = `ch.data`
|
|
if len(str) > 400:
|
|
str = str[:400] + '...'
|
|
print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
|
|
return length, item
|
|
|
|
|
|
# this one is needed to find the end of LaTeX's optional argument, like
|
|
# item[...]
|
|
re_endopt = regex.compile(']')
|
|
|
|
# get a LaTeX-optional argument, you know, the square braces '[' and ']'
|
|
def getoptarg(length, buf, pp, item):
|
|
|
|
wobj = Wobj()
|
|
dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
|
|
##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
|
|
|
|
if item >= length or \
|
|
pp[item].chtype != chunk_type[PLAIN] or \
|
|
s(buf, pp[item].data)[0] != '[':
|
|
return length, item
|
|
|
|
pp[item].data = s(buf, pp[item].data)[1:]
|
|
if len(pp[item].data) == 0:
|
|
del pp[item]
|
|
length = length-1
|
|
|
|
while 1:
|
|
if item == length:
|
|
raise error, 'No end of optional arg found'
|
|
if pp[item].chtype == chunk_type[PLAIN]:
|
|
text = s(buf, pp[item].data)
|
|
pos = re_endopt.search(text)
|
|
if pos >= 0:
|
|
pp[item].data = text[:pos]
|
|
if pos == 0:
|
|
del pp[item]
|
|
length = length-1
|
|
else:
|
|
item=item+1
|
|
text = text[pos+1:]
|
|
|
|
while text and text[0] in ' \t':
|
|
text = text[1:]
|
|
|
|
if text:
|
|
pp.insert(item, chunk(PLAIN, 0, text))
|
|
length = length + 1
|
|
return length, item
|
|
|
|
item = item+1
|
|
|
|
|
|
# Wobj just add write-requests to the ``data'' attribute
|
|
class Wobj:
|
|
data = ''
|
|
|
|
def write(self, data):
|
|
self.data = self.data + data
|
|
|
|
# ignore these commands
|
|
ignoredcommands = ('hline', 'small', '/', 'tableofcontents', 'Large')
|
|
# map commands like these to themselves as plaintext
|
|
wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF', 'LaTeX', 'POSIX', 'TeX',
|
|
'SliTeX')
|
|
# \{ --> {, \} --> }, etc
|
|
themselves = ('{', '}', ',', '.', '@', ' ', '\n') + wordsselves
|
|
# these ones also themselves (see argargs macro in myformat.sty)
|
|
inargsselves = (',', '[', ']', '(', ')')
|
|
# this is how *I* would show the difference between emph and strong
|
|
# code 1 means: fold to uppercase
|
|
markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'),
|
|
'strong': ('*', '*')}
|
|
|
|
# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
|
|
fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
|
|
|
|
|
|
# try to remove macros and return flat text
|
|
def flattext(buf, pp):
|
|
pp = crcopy(pp)
|
|
##print '---> FLATTEXT ' + `pp`
|
|
wobj = Wobj()
|
|
|
|
i, length = 0, len(pp)
|
|
while 1:
|
|
if len(pp) != length:
|
|
raise 'FATAL', 'inconsistent length'
|
|
if i >= length:
|
|
break
|
|
ch = pp[i]
|
|
i = i+1
|
|
if ch.chtype == chunk_type[PLAIN]:
|
|
pass
|
|
elif ch.chtype == chunk_type[CSNAME]:
|
|
s_buf_data = s(buf, ch.data)
|
|
if convertible_csname(s_buf_data):
|
|
ch.chtype, ch.data, nix = conversion(s_buf_data)
|
|
if hist.inargs and s_buf_data in inargsselves:
|
|
ch.chtype = chunk_type[PLAIN]
|
|
elif len(s_buf_data) == 1 \
|
|
and s_buf_data in onlylatexspecial:
|
|
ch.chtype = chunk_type[PLAIN]
|
|
# if it is followed by an empty group,
|
|
# remove that group, it was needed for
|
|
# a true space
|
|
if i < length \
|
|
and pp[i].chtype==chunk_type[GROUP] \
|
|
and len(pp[i].data) == 0:
|
|
del pp[i]
|
|
length = length-1
|
|
|
|
elif s_buf_data in markcmds.keys():
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
str = flattext(buf, pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi - i)
|
|
ch.chtype = chunk_type[PLAIN]
|
|
markcmd = s_buf_data
|
|
x = markcmds[markcmd]
|
|
if type(x) == TupleType:
|
|
pre, after = x
|
|
str = pre+str+after
|
|
elif x == 1:
|
|
str = string.upper(str)
|
|
else:
|
|
raise 'FATAL', 'corrupt markcmds'
|
|
ch.data = str
|
|
else:
|
|
if s_buf_data not in ignoredcommands:
|
|
print 'WARNING: deleting command ' + s_buf_data
|
|
print 'PP' + `pp[i-1]`
|
|
del pp[i-1]
|
|
i, length = i-1, length-1
|
|
elif ch.chtype == chunk_type[GROUP]:
|
|
length, newi = getnextarg(length, buf, pp, i-1)
|
|
i = i-1
|
|
## str = flattext(buf, crcopy(pp[i-1:newi]))
|
|
## del pp[i:newi]
|
|
## length = length - (newi - i)
|
|
## ch.chtype = chunk_type[PLAIN]
|
|
## ch.data = str
|
|
else:
|
|
pass
|
|
|
|
dumpit(buf, wobj.write, pp)
|
|
##print 'FLATTEXT: RETURNING ' + `wobj.data`
|
|
return wobj.data
|
|
|
|
# try to generate node names (a bit shorter than the chapter title)
|
|
# note that the \nodename command (see elsewhere) overules these efforts
|
|
def invent_node_names(text):
|
|
words = string.split(text)
|
|
|
|
##print 'WORDS ' + `words`
|
|
|
|
if len(words) == 2 \
|
|
and string.lower(words[0]) == 'built-in' \
|
|
and string.lower(words[1]) not in ('modules', 'functions'):
|
|
return words[1]
|
|
if len(words) == 3 and string.lower(words[1]) == 'module':
|
|
return words[2]
|
|
if len(words) == 3 and string.lower(words[1]) == 'object':
|
|
return string.join(words[0:2])
|
|
if len(words) > 4 \
|
|
and (string.lower(string.join(words[-4:])) \
|
|
== 'methods and data attributes'):
|
|
return string.join(words[:2])
|
|
return text
|
|
|
|
re_commas_etc = regex.compile('[,`\'@{}]')
|
|
|
|
re_whitespace = regex.compile('[ \t]*')
|
|
|
|
|
|
##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
|
|
|
|
# look if the next non-white stuff is also a command, resulting in skipping
|
|
# double endlines (DENDLINE) too, and thus omitting \par's
|
|
# Sometimes this is too much, maybe consider DENDLINE's as stop
|
|
def next_command_p(length, buf, pp, i, cmdname):
|
|
|
|
while 1:
|
|
if i >= len(pp):
|
|
break
|
|
ch = pp[i]
|
|
i = i+1
|
|
if ch.chtype == chunk_type[ENDLINE]:
|
|
continue
|
|
if ch.chtype == chunk_type[DENDLINE]:
|
|
continue
|
|
if ch.chtype == chunk_type[PLAIN]:
|
|
if re_whitespace.search(s(buf, ch.data)) == 0 and \
|
|
re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
|
|
continue
|
|
return -1
|
|
if ch.chtype == chunk_type[CSNAME]:
|
|
if s(buf, ch.data) == cmdname:
|
|
return i # _after_ the command
|
|
return -1
|
|
return -1
|
|
|
|
|
|
# things that are special to LaTeX, but not to texi..
|
|
onlylatexspecial = '_~^$#&%'
|
|
|
|
class Struct: pass
|
|
|
|
hist = Struct()
|
|
out = Struct()
|
|
|
|
def startchange():
|
|
global hist, out
|
|
|
|
hist.chaptertype = "chapter"
|
|
hist.inenv = []
|
|
hist.nodenames = []
|
|
hist.cindex = []
|
|
hist.inargs = 0
|
|
hist.enumeratenesting, hist.itemizenesting = 0, 0
|
|
hist.this_module = None
|
|
|
|
out.doublenodes = []
|
|
out.doublecindeces = []
|
|
|
|
|
|
spacech = [chunk(PLAIN, 0, ' ')]
|
|
commach = [chunk(PLAIN, 0, ', ')]
|
|
cindexch = [chunk(CSLINE, 0, 'cindex')]
|
|
|
|
# the standard variation in symbols for itemize
|
|
itemizesymbols = ['bullet', 'minus', 'dots']
|
|
|
|
# same for enumerate
|
|
enumeratesymbols = ['1', 'A', 'a']
|
|
|
|
# Map of things that convert one-to-one. Each entry is a 3-tuple:
|
|
#
|
|
# new_chtype, new_data, nix_trailing_empty_group
|
|
#
|
|
d = {}
|
|
# add stuff that converts from one name to another:
|
|
for name in ('url', 'module', 'function', 'cfunction',
|
|
'keyword', 'method', 'exception', 'constant',
|
|
'email', 'class', 'member', 'cdata', 'ctype',
|
|
'member', 'sectcode', 'verb',
|
|
'cfunction', 'cdata', 'ctype',
|
|
):
|
|
d[name] = chunk_type[CSNAME], 'code', 0
|
|
for name in ('emph', 'var', 'strong', 'code', 'kbd', 'key',
|
|
'dfn', 'samp', 'file', 'r', 'i', 't'):
|
|
d[name] = chunk_type[CSNAME], name, 0
|
|
d['character'] = chunk_type[CSNAME], 'samp', 0
|
|
d['url'] = chunk_type[CSNAME], 'code', 0
|
|
d['email'] = chunk_type[CSNAME], 'code', 0
|
|
d['mimetype'] = chunk_type[CSNAME], 'code', 0
|
|
d['newsgroup'] = chunk_type[CSNAME], 'code', 0
|
|
d['program'] = chunk_type[CSNAME], 'strong', 0
|
|
d['\\'] = chunk_type[CSNAME], '*', 0
|
|
# add stuff that converts to text:
|
|
for name in themselves:
|
|
d[name] = chunk_type[PLAIN], name, 0
|
|
for name in wordsselves:
|
|
d[name] = chunk_type[PLAIN], name, 1
|
|
for name in ',[]()':
|
|
d[name] = chunk_type[PLAIN], name, 0
|
|
# a lot of these are LaTeX2e additions
|
|
for name, value in [('quotedblbase', ',,'), ('quotesinglbase', ','),
|
|
('textquotedbl', '"'), ('LaTeXe', 'LaTeX2e'),
|
|
('e', '\\'), ('textquotedblleft', "``"),
|
|
('textquotedblright', "''"), ('textquoteleft', "`"),
|
|
('textquoteright', "'"), ('textbackslash', '\\'),
|
|
('textbar', '|'), ('textless', '<'),
|
|
('textgreater', '>'), ('textasciicircum', '^'),
|
|
('Cpp', 'C++'), ('copyright', '')]:
|
|
d[name] = chunk_type[PLAIN], value, 1
|
|
convertible_csname = d.has_key
|
|
conversion = d.get
|
|
del d, name, value
|
|
|
|
##
|
|
## \begin{ {func,data,exc}desc }{name}...
|
|
## the resulting texi-code is dependent on the contents of indexsubitem
|
|
##
|
|
|
|
# indexsubitem: `['XXX', 'function']
|
|
# funcdesc:
|
|
# deffn {`idxsi`} NAME (FUNCARGS)
|
|
|
|
# indexsubitem: `['XXX', 'method']`
|
|
# funcdesc:
|
|
# defmethod {`idxsi[0]`} NAME (FUNCARGS)
|
|
|
|
# indexsubitem: `['in', 'module', 'MODNAME']'
|
|
# datadesc:
|
|
# defcv data {`idxsi[1:]`} NAME
|
|
# excdesc:
|
|
# defcv exception {`idxsi[1:]`} NAME
|
|
# funcdesc:
|
|
# deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
|
|
|
|
# indexsubitem: `['OBJECT', 'attribute']'
|
|
# datadesc
|
|
# defcv attribute {`OBJECT`} NAME
|
|
|
|
|
|
## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
|
|
## or \funcline{NAME}{ARGS}
|
|
##
|
|
def do_funcdesc(length, buf, pp, i, index=1):
|
|
startpoint = i-1
|
|
ch = pp[startpoint]
|
|
wh = ch.where
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
funcname = chunk(GROUP, wh, pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
save = hist.inargs
|
|
hist.inargs = 1
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
hist.inargs = save
|
|
del save
|
|
the_args = [chunk(PLAIN, wh, '()'[0])] + pp[i:newi] + \
|
|
[chunk(PLAIN, wh, '()'[1])]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
idxsi = hist.indexsubitem # words
|
|
command = 'deffn'
|
|
if hist.this_module:
|
|
cat_class = 'function of ' + hist.this_module
|
|
else:
|
|
cat_class = 'built-in function'
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = command
|
|
|
|
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
cslinearg.append(funcname)
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
l = len(cslinearg)
|
|
cslinearg[l:l] = the_args
|
|
|
|
pp.insert(i, chunk(GROUP, wh, cslinearg))
|
|
i, length = i+1, length+1
|
|
hist.command = command
|
|
return length, i
|
|
|
|
|
|
## this routine will be called on \begin{excdesc}{NAME}
|
|
## or \excline{NAME}
|
|
##
|
|
def do_excdesc(length, buf, pp, i):
|
|
startpoint = i-1
|
|
ch = pp[startpoint]
|
|
wh = ch.where
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
excname = chunk(GROUP, wh, pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
idxsi = hist.indexsubitem # words
|
|
command = ''
|
|
cat_class = ''
|
|
class_class = ''
|
|
if idxsi == ['built-in', 'exception', 'base', 'class']:
|
|
command = 'defvr'
|
|
cat_class = 'exception base class'
|
|
else:
|
|
command = 'defcv'
|
|
cat_class = 'exception'
|
|
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = command
|
|
|
|
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
if class_class:
|
|
cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
cslinearg.append(excname)
|
|
|
|
pp.insert(i, chunk(GROUP, wh, cslinearg))
|
|
i, length = i+1, length+1
|
|
hist.command = command
|
|
return length, i
|
|
|
|
## same for datadesc or dataline...
|
|
def do_datadesc(length, buf, pp, i, index=1):
|
|
startpoint = i-1
|
|
ch = pp[startpoint]
|
|
wh = ch.where
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
dataname = chunk(GROUP, wh, pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
idxsi = hist.indexsubitem # words
|
|
command = 'defcv'
|
|
cat_class = 'data'
|
|
class_class = ''
|
|
if idxsi[-1] in ('attribute', 'option'):
|
|
cat_class = idxsi[-1]
|
|
class_class = string.join(idxsi[:-1])
|
|
elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
|
|
class_class = string.join(idxsi[1:])
|
|
elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
|
|
class_class = string.join(idxsi[2:])
|
|
else:
|
|
class_class = string.join(idxsi)
|
|
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = command
|
|
|
|
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
if class_class:
|
|
cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
cslinearg.append(dataname)
|
|
|
|
pp.insert(i, chunk(GROUP, wh, cslinearg))
|
|
i, length = i+1, length+1
|
|
hist.command = command
|
|
return length, i
|
|
|
|
|
|
def do_opcodedesc(length, buf, pp, i):
|
|
startpoint = i-1
|
|
ch = pp[startpoint]
|
|
wh = ch.where
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
dataname = chunk(GROUP, wh, pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ch.chtype = CSLINE
|
|
ch.data = "deffn"
|
|
|
|
cslinearg = [chunk(PLAIN, wh, 'byte\ code\ instruction'),
|
|
chunk(GROUP, wh, [chunk(PLAIN, wh, "byte code instruction")]),
|
|
chunk(PLAIN, wh, ' '),
|
|
dataname,
|
|
chunk(PLAIN, wh, ' '),
|
|
pp[i],
|
|
]
|
|
|
|
pp[i] = chunk(GROUP, wh, cslinearg)
|
|
hist.command = ch.data
|
|
return length, i
|
|
|
|
|
|
def add_module_index(pp, length, i, buf, ch, extra, ref=1):
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = 'pindex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
if not ref:
|
|
if len(ingroupch) == 1:
|
|
hist.this_module = s(buf, ch.data)
|
|
else:
|
|
hist.this_module = None
|
|
print 'add_module_index() error ==>', ingroupch
|
|
|
|
if extra:
|
|
ingroupch.append(chunk(PLAIN, ch.where, ' '))
|
|
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
|
|
ingroupch.append(chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, extra)]))
|
|
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
return length+1, i+1
|
|
|
|
|
|
def yank_indexsubitem(pp, length, i, buf, ch, cmdname):
|
|
stuff = pp[i].data
|
|
if len(stuff) != 1:
|
|
raise error, "first parameter to \\%s too long" % cmdname
|
|
if pp[i].chtype != chunk_type[GROUP]:
|
|
raise error, "bad chunk type following \\%s" \
|
|
"\nexpected GROUP, got %s" + (cmdname, str(ch.chtype))
|
|
text = s(buf, stuff[0].data)
|
|
if text[:1] != '(' or text[-1:] != ')':
|
|
raise error, \
|
|
'expected indexsubitem enclosed in parenteses'
|
|
hist.indexsubitem = string.split(text[1:-1])
|
|
del pp[i-1:i+1]
|
|
return length - 2, i - 1
|
|
|
|
|
|
# regular indices: those that are not set in tt font by default....
|
|
regindices = ('cindex', )
|
|
|
|
# remove illegal characters from node names
|
|
def rm_commas_etc(text):
|
|
result = ''
|
|
changed = 0
|
|
while 1:
|
|
pos = re_commas_etc.search(text)
|
|
if pos >= 0:
|
|
changed = 1
|
|
result = result + text[:pos]
|
|
text = text[pos+1:]
|
|
else:
|
|
result = result + text
|
|
break
|
|
if changed:
|
|
print 'Warning: nodename changed to ' + `result`
|
|
|
|
return result
|
|
|
|
# boolean flags
|
|
flags = {'texi': 1}
|
|
|
|
|
|
# map of \label{} to node names
|
|
label_nodes = {}
|
|
|
|
|
|
##
|
|
## changeit: the actual routine, that changes the contents of the parsed
|
|
## chunks
|
|
##
|
|
|
|
def changeit(buf, pp):
|
|
global onlylatexspecial, hist, out
|
|
|
|
i, length = 0, len(pp)
|
|
while 1:
|
|
# sanity check: length should always equal len(pp)
|
|
if len(pp) != length:
|
|
print i, pp[i]
|
|
raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
|
|
if i >= length:
|
|
break
|
|
ch = pp[i]
|
|
i = i + 1
|
|
|
|
if type(ch) is StringType:
|
|
#normally, only chunks are present in pp,
|
|
# but in some cases, some extra info
|
|
# has been inserted, e.g., the \end{...} clauses
|
|
raise 'FATAL', 'got string, probably too many ' + `end`
|
|
|
|
if ch.chtype == chunk_type[GROUP]:
|
|
# check for {\em ...} constructs
|
|
data = ch.data
|
|
if data and \
|
|
data[0].chtype == chunk_type[CSNAME] and \
|
|
fontchanges.has_key(s(buf, data[0].data)):
|
|
k = s(buf, data[0].data)
|
|
del data[0]
|
|
pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
|
|
length, i = length+1, i+1
|
|
|
|
elif data:
|
|
if len(data) \
|
|
and data[0].chtype == chunk_type[GROUP] \
|
|
and len(data[0].data) \
|
|
and data[0].data[0].chtype == chunk_type[CSNAME] \
|
|
and s(buf, data[0].data[0].data) == 'e':
|
|
data[0] = data[0].data[0]
|
|
print "invoking \\e magic group transform..."
|
|
else:
|
|
## print "GROUP -- ch.data[0].data =", ch.data[0].data
|
|
k = s(buf, data[0].data)
|
|
if k == "fulllineitems":
|
|
del data[0]
|
|
pp[i-1:i] = data
|
|
i = i - 1
|
|
length = length + len(data) - 1
|
|
continue
|
|
|
|
# recursively parse the contents of the group
|
|
changeit(buf, data)
|
|
|
|
elif ch.chtype == chunk_type[IF]:
|
|
# \if...
|
|
flag, negate, data = ch.data
|
|
##print 'IF: flag, negate = ' + `flag, negate`
|
|
if flag not in flags.keys():
|
|
raise error, 'unknown flag ' + `flag`
|
|
|
|
value = flags[flag]
|
|
if negate:
|
|
value = (not value)
|
|
del pp[i-1]
|
|
length, i = length-1, i-1
|
|
if value:
|
|
pp[i:i] = data
|
|
length = length + len(data)
|
|
|
|
|
|
elif ch.chtype == chunk_type[ENV]:
|
|
# \begin{...} ....
|
|
envname, data = ch.data
|
|
|
|
#push this environment name on stack
|
|
hist.inenv.insert(0, envname)
|
|
|
|
#append an endenv chunk after grouped data
|
|
data.append(chunk(ENDENV, ch.where, envname))
|
|
##[`data`]
|
|
|
|
#delete this object
|
|
del pp[i-1]
|
|
i, length = i-1, length-1
|
|
|
|
#insert found data
|
|
pp[i:i] = data
|
|
length = length + len(data)
|
|
|
|
if envname == 'verbatim':
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'example'),
|
|
chunk(GROUP, ch.where, [])]
|
|
length, i = length+2, i+2
|
|
|
|
elif envname in ('itemize', 'list', 'fulllineitems'):
|
|
if hist.itemizenesting > len(itemizesymbols):
|
|
raise error, 'too deep itemize nesting'
|
|
if envname == 'list':
|
|
del pp[i:i+2]
|
|
length = length - 2
|
|
ingroupch = [chunk(CSNAME, ch.where,
|
|
itemizesymbols[hist.itemizenesting])]
|
|
hist.itemizenesting = hist.itemizenesting + 1
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
length, i = length+2, i+2
|
|
|
|
elif envname == 'enumerate':
|
|
if hist.enumeratenesting > len(enumeratesymbols):
|
|
raise error, 'too deep enumerate nesting'
|
|
ingroupch = [chunk(PLAIN, ch.where,
|
|
enumeratesymbols[hist.enumeratenesting])]
|
|
hist.enumeratenesting = hist.enumeratenesting + 1
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
length, i = length+2, i+2
|
|
|
|
elif envname == 'description':
|
|
ingroupch = [chunk(CSNAME, ch.where, 'b')]
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'table'),
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
length, i = length+2, i+2
|
|
|
|
elif (envname == 'tableiii') or (envname == 'tableii'):
|
|
if (envname == 'tableii'):
|
|
ltable = 2
|
|
else:
|
|
ltable = 3
|
|
wh = ch.where
|
|
newcode = []
|
|
|
|
#delete tabular format description
|
|
# e.g., {|l|c|l|}
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
newcode.append(chunk(CSLINE, wh, 'table'))
|
|
ingroupch = [chunk(CSNAME, wh, 'asis')]
|
|
newcode.append(chunk(GROUP, wh, ingroupch))
|
|
newcode.append(chunk(CSLINE, wh, 'item'))
|
|
|
|
#get the name of macro for @item
|
|
# e.g., {code}
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
|
|
if newi-i != 1:
|
|
raise error, 'Sorry, expected 1 chunk argument'
|
|
if pp[i].chtype != chunk_type[PLAIN]:
|
|
raise error, 'Sorry, expected plain text argument'
|
|
hist.itemargmacro = s(buf, pp[i].data)
|
|
if convertible_csname(hist.itemargmacro):
|
|
hist.itemargmacro = conversion(hist.itemargmacro)[1]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
itembody = []
|
|
for count in range(ltable):
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
emphgroup = [
|
|
chunk(CSNAME, wh, 'emph'),
|
|
chunk(GROUP, 0, pp[i:newi])]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
if count == 0:
|
|
itemarg = emphgroup
|
|
elif count == ltable-1:
|
|
itembody = itembody + \
|
|
[chunk(PLAIN, wh, ' --- ')] + emphgroup
|
|
else:
|
|
itembody = emphgroup
|
|
newcode.append(chunk(GROUP, wh, itemarg))
|
|
newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
|
|
pp[i:i] = newcode
|
|
l = len(newcode)
|
|
length, i = length+l, i+l
|
|
del newcode, l
|
|
|
|
if length != len(pp):
|
|
raise 'STILL, SOMETHING wrong', `i`
|
|
|
|
elif envname in ('methoddesc', 'methoddescni'):
|
|
length, newi = getoptarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
#
|
|
pp.insert(i, chunk(PLAIN, ch.where, ''))
|
|
i, length = i+1, length+1
|
|
length, i = do_funcdesc(length, buf, pp, i,
|
|
envname[-2:] != "ni")
|
|
|
|
elif envname in ('memberdesc', 'memberdescni'):
|
|
length, newi = getoptarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
#
|
|
pp.insert(i, chunk(PLAIN, ch.where, ''))
|
|
i, length = i+1, length+1
|
|
length, i = do_datadesc(length, buf, pp, i,
|
|
envname[-2:] != "ni")
|
|
|
|
elif envname in ('funcdesc', 'funcdescni', 'classdesc'):
|
|
pp.insert(i, chunk(PLAIN, ch.where, ''))
|
|
i, length = i+1, length+1
|
|
length, i = do_funcdesc(length, buf, pp, i,
|
|
envname[-2:] != "ni")
|
|
|
|
elif envname == 'excdesc':
|
|
pp.insert(i, chunk(PLAIN, ch.where, ''))
|
|
i, length = i+1, length+1
|
|
length, i = do_excdesc(length, buf, pp, i)
|
|
|
|
elif envname in ('datadesc', 'datadescni'):
|
|
pp.insert(i, chunk(PLAIN, ch.where, ''))
|
|
i, length = i+1, length+1
|
|
length, i = do_datadesc(length, buf, pp, i,
|
|
envname[-2:] != "ni")
|
|
|
|
elif envname == 'opcodedesc':
|
|
pp.insert(i, chunk(PLAIN, ch.where, ''))
|
|
i, length = i+1, length+1
|
|
length, i = do_opcodedesc(length, buf, pp, i)
|
|
|
|
elif envname == 'seealso':
|
|
chunks = [chunk(ENDLINE, ch.where, "\n"),
|
|
chunk(CSNAME, ch.where, "b"),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, "See also: ")]),
|
|
chunk(ENDLINE, ch.where, "\n"),
|
|
chunk(ENDLINE, ch.where, "\n")]
|
|
pp[i-1:i] = chunks
|
|
length = length + len(chunks) - 1
|
|
i = i + len(chunks) - 1
|
|
|
|
elif envname in ('sloppypar', 'flushleft', 'document'):
|
|
pass
|
|
|
|
else:
|
|
print 'WARNING: don\'t know what to do with env ' + `envname`
|
|
|
|
elif ch.chtype == chunk_type[ENDENV]:
|
|
envname = ch.data
|
|
if envname != hist.inenv[0]:
|
|
raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
|
|
del hist.inenv[0]
|
|
del pp[i-1]
|
|
i, length = i-1, length-1
|
|
|
|
if envname == 'verbatim':
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, 'example')])]
|
|
i, length = i+2, length+2
|
|
elif envname in ('itemize', 'list', 'fulllineitems'):
|
|
hist.itemizenesting = hist.itemizenesting - 1
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, 'itemize')])]
|
|
i, length = i+2, length+2
|
|
elif envname == 'enumerate':
|
|
hist.enumeratenesting = hist.enumeratenesting-1
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, 'enumerate')])]
|
|
i, length = i+2, length+2
|
|
elif envname == 'description':
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, 'table')])]
|
|
i, length = i+2, length+2
|
|
elif (envname == 'tableiii') or (envname == 'tableii'):
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, 'table')])]
|
|
i, length = i+2, length + 2
|
|
pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
|
|
i, length = i+1, length+1
|
|
|
|
elif envname in ('funcdesc', 'excdesc', 'datadesc', 'classdesc',
|
|
'funcdescni', 'datadescni',
|
|
'methoddesc', 'memberdesc',
|
|
'methoddescni', 'memberdescni',
|
|
):
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, hist.command)])]
|
|
i, length = i+2, length+2
|
|
|
|
elif envname == 'opcodedesc':
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, "deffn")])]
|
|
i, length = i+2, length+2
|
|
|
|
elif envname in ('seealso', 'sloppypar', 'flushleft', 'document'):
|
|
pass
|
|
|
|
else:
|
|
print 'WARNING: ending env %s has no actions' % `envname`
|
|
|
|
elif ch.chtype == chunk_type[CSNAME]:
|
|
# control name transformations
|
|
s_buf_data = s(buf, ch.data)
|
|
if s_buf_data == 'optional':
|
|
pp[i-1].chtype = chunk_type[PLAIN]
|
|
pp[i-1].data = '['
|
|
if (i < length) and \
|
|
(pp[i].chtype == chunk_type[GROUP]):
|
|
cp=pp[i].data
|
|
pp[i:i+1]=cp + [
|
|
chunk(PLAIN, ch.where, ']')]
|
|
length = length+len(cp)
|
|
|
|
elif s_buf_data in ignoredcommands:
|
|
del pp[i-1]
|
|
i, length = i-1, length-1
|
|
|
|
elif s_buf_data == '@' and \
|
|
i != length and \
|
|
pp[i].chtype == chunk_type[PLAIN] and \
|
|
s(buf, pp[i].data)[0] == '.':
|
|
# \@. --> \. --> @.
|
|
ch.data = '.'
|
|
del pp[i]
|
|
length = length - 1
|
|
|
|
elif convertible_csname(s_buf_data):
|
|
ch.chtype, ch.data, nix = conversion(s_buf_data)
|
|
try:
|
|
if nix and pp[i].chtype == chunk_type[GROUP] \
|
|
and len(pp[i].data) == 0:
|
|
del pp[i]
|
|
length = length - 1
|
|
except IndexError:
|
|
pass
|
|
|
|
elif s_buf_data == '\\':
|
|
# \\ --> \* --> @*
|
|
ch.data = '*'
|
|
|
|
elif len(s_buf_data) == 1 and \
|
|
s_buf_data in onlylatexspecial:
|
|
ch.chtype = chunk_type[PLAIN]
|
|
# check if such a command is followed by
|
|
# an empty group: e.g., `\%{}'. If so, remove
|
|
# this empty group too
|
|
if i < length and \
|
|
pp[i].chtype == chunk_type[GROUP] \
|
|
and len(pp[i].data) == 0:
|
|
del pp[i]
|
|
length = length-1
|
|
|
|
elif s_buf_data == "appendix":
|
|
hist.chaptertype = "appendix"
|
|
del pp[i-1]
|
|
i, length = i-1, length-1
|
|
|
|
elif hist.inargs and s_buf_data in inargsselves:
|
|
# This is the special processing of the
|
|
# arguments of the \begin{funcdesc}... or
|
|
# \funcline... arguments
|
|
# \, --> , \[ --> [, \] --> ]
|
|
ch.chtype = chunk_type[PLAIN]
|
|
|
|
elif s_buf_data == 'setindexsubitem':
|
|
length, i = yank_indexsubitem(pp, length, i, buf, ch,
|
|
'setindexsubitem')
|
|
|
|
elif s_buf_data == 'withsubitem':
|
|
oldsubitem = hist.indexsubitem
|
|
try:
|
|
length, i = yank_indexsubitem(pp, length, i, buf, ch,
|
|
'withsubitem')
|
|
stuff = pp[i].data
|
|
del pp[i]
|
|
length = length - 1
|
|
changeit(buf, stuff)
|
|
stuff = None
|
|
finally:
|
|
hist.indexsubitem = oldsubitem
|
|
|
|
elif s_buf_data in ('textrm', 'pytype'):
|
|
stuff = pp[i].data
|
|
pp[i-1:i+1] = stuff
|
|
length = length - 2 + len(stuff)
|
|
stuff = None
|
|
i = i - 1
|
|
|
|
elif s_buf_data == 'newcommand':
|
|
print "ignoring definition of \\" + s(buf, pp[i].data[0].data)
|
|
del pp[i-1:i+2]
|
|
i = i - 1
|
|
length = length - 3
|
|
|
|
elif s_buf_data == 'renewcommand':
|
|
print "ignoring redefinition of \\" \
|
|
+ s(buf, pp[i].data[0].data)
|
|
del pp[i-1:i+2]
|
|
i = i - 1
|
|
length = length - 3
|
|
|
|
elif s_buf_data == 'mbox':
|
|
stuff = pp[i].data
|
|
pp[i-1:i+1] = stuff
|
|
i = i - 1
|
|
length = length + len(stuff) - 2
|
|
stuff = None
|
|
|
|
elif s_buf_data == 'version':
|
|
ch.chtype = chunk_type[PLAIN]
|
|
ch.data = release_version
|
|
|
|
elif s_buf_data == 'item':
|
|
ch.chtype = chunk_type[CSLINE]
|
|
length, newi = getoptarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
changeit(buf, ingroupch) # catch stuff inside the optional arg
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
i, length = i+1, length+1
|
|
|
|
elif s_buf_data == 'ttindex':
|
|
idxsi = hist.indexsubitem
|
|
|
|
cat_class = ''
|
|
if len(idxsi) >= 2 and idxsi[1] in \
|
|
('method', 'function', 'protocol'):
|
|
command = 'findex'
|
|
elif len(idxsi) >= 2 and idxsi[1] in \
|
|
('exception', 'object'):
|
|
command = 'vindex'
|
|
elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
|
|
command = 'cindex'
|
|
elif len(idxsi) == 3 and idxsi[:2] == ['class', 'in']:
|
|
command = 'findex'
|
|
else:
|
|
print 'WARNING: can\'t categorize ' + `idxsi` \
|
|
+ ' for \'ttindex\' command'
|
|
command = 'cindex'
|
|
|
|
if not cat_class:
|
|
cat_class = '(%s)' % string.join(idxsi)
|
|
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = command
|
|
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
arg = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
cat_arg = [chunk(PLAIN, ch.where, cat_class)]
|
|
|
|
# determine what should be set in roman, and
|
|
# what in tt-font
|
|
if command in regindices:
|
|
|
|
arg = [chunk(CSNAME, ch.where, 't'),
|
|
chunk(GROUP, ch.where, arg)]
|
|
else:
|
|
cat_arg = [chunk(CSNAME, ch.where, 'r'),
|
|
chunk(GROUP, ch.where, cat_arg)]
|
|
|
|
ingroupch = arg + \
|
|
[chunk(PLAIN, ch.where, ' ')] + \
|
|
cat_arg
|
|
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
length, i = length+1, i+1
|
|
|
|
elif s_buf_data == 'ldots':
|
|
# \ldots --> \dots{} --> @dots{}
|
|
ch.data = 'dots'
|
|
if i == length \
|
|
or pp[i].chtype != chunk_type[GROUP] \
|
|
or pp[i].data != []:
|
|
pp.insert(i, chunk(GROUP, ch.where, []))
|
|
i, length = i+1, length+1
|
|
|
|
elif s_buf_data in themselves:
|
|
# \UNIX --> &UNIX;
|
|
ch.chtype = chunk_type[PLAIN]
|
|
if i != length \
|
|
and pp[i].chtype == chunk_type[GROUP] \
|
|
and pp[i].data == []:
|
|
del pp[i]
|
|
length = length-1
|
|
|
|
elif s_buf_data == 'manpage':
|
|
ch.data = 'emph'
|
|
sect = s(buf, pp[i+1].data[0].data)
|
|
pp[i+1].data = "(%s)" % sect
|
|
pp[i+1].chtype = chunk_type[PLAIN]
|
|
|
|
elif s_buf_data == 'envvar':
|
|
# this should do stuff in the index, too...
|
|
ch.data = "$"
|
|
ch.chtype = chunk_type[PLAIN]
|
|
pp[i] = pp[i].data[0]
|
|
|
|
elif s_buf_data == 'regexp':
|
|
ch.data = 'code'
|
|
pp.insert(i+1, chunk(PLAIN, ch.where, '"'))
|
|
pp.insert(i-1, chunk(PLAIN, ch.where, '"'))
|
|
length = length + 2
|
|
i = i + 1
|
|
|
|
elif s_buf_data in ('lineiii', 'lineii'):
|
|
# This is the most tricky one
|
|
# \lineiii{a1}{a2}[{a3}] -->
|
|
# @item @<cts. of itemargmacro>{a1}
|
|
# a2 [ -- a3]
|
|
#
|
|
if not hist.inenv:
|
|
raise error, 'no environment for lineiii'
|
|
if (hist.inenv[0] != 'tableiii') and \
|
|
(hist.inenv[0] != 'tableii'):
|
|
raise error, \
|
|
'wrong command (%s) in wrong environment (%s)' \
|
|
% (s_buf_data, `hist.inenv[0]`)
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = 'item'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = [chunk(CSNAME, 0, hist.itemargmacro),
|
|
chunk(GROUP, 0, pp[i:newi])]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
grouppos = i
|
|
i, length = i+1, length+1
|
|
length, i = getnextarg(length, buf, pp, i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
if newi > i:
|
|
# we have a 3rd arg
|
|
pp.insert(i, chunk(PLAIN, ch.where, ' --- '))
|
|
i = newi + 1
|
|
length = length + 1
|
|
if length != len(pp):
|
|
raise 'IN LINEIII IS THE ERR', `i`
|
|
|
|
elif s_buf_data in ('chapter', 'section',
|
|
'subsection', 'subsubsection'):
|
|
#\xxxsection{A} ---->
|
|
# @node A, , ,
|
|
# @xxxsection A
|
|
## also: remove commas and quotes
|
|
hist.this_module = None
|
|
if s_buf_data == "chapter":
|
|
ch.data = hist.chaptertype
|
|
ch.chtype = chunk_type[CSLINE]
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
afternodenamecmd = next_command_p(length, buf,
|
|
pp, newi, 'nodename')
|
|
if afternodenamecmd < 0:
|
|
cp1 = crcopy(pp[i:newi])
|
|
pp[i:newi] = [chunk(GROUP, ch.where, pp[i:newi])]
|
|
length, newi = length - (newi-i) + 1, i+1
|
|
text = flattext(buf, cp1)
|
|
text = invent_node_names(text)
|
|
else:
|
|
length, endarg = getnextarg(length, buf,
|
|
pp, afternodenamecmd)
|
|
cp1 = crcopy(pp[afternodenamecmd:endarg])
|
|
del pp[newi:endarg]
|
|
length = length - (endarg-newi)
|
|
|
|
pp[i:newi] = [chunk(GROUP, ch.where, pp[i:newi])]
|
|
length, newi = length - (newi-i) + 1, i + 1
|
|
text = flattext(buf, cp1)
|
|
if text[-1] == '.':
|
|
text = text[:-1]
|
|
if text in hist.nodenames:
|
|
print 'WARNING: node name ' + `text` + ' already used'
|
|
out.doublenodes.append(text)
|
|
else:
|
|
hist.nodenames.append(text)
|
|
text = rm_commas_etc(text)
|
|
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'node'),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, text+', , ,')
|
|
])]
|
|
i, length = newi+2, length+2
|
|
|
|
elif s_buf_data == 'funcline':
|
|
# fold it to a very short environment
|
|
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, hist.command)])]
|
|
i, length = i+2, length+2
|
|
length, i = do_funcdesc(length, buf, pp, i)
|
|
|
|
elif s_buf_data == 'dataline':
|
|
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, hist.command)])]
|
|
i, length = i+2, length+2
|
|
length, i = do_datadesc(length, buf, pp, i)
|
|
|
|
elif s_buf_data == 'excline':
|
|
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
|
|
chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where, hist.command)])]
|
|
i, length = i+2, length+2
|
|
length, i = do_excdesc(length, buf, pp, i)
|
|
|
|
elif s_buf_data == 'index':
|
|
#\index{A} --->
|
|
# @cindex A
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = 'cindex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
length, i = length+1, i+1
|
|
|
|
elif s_buf_data == 'bifuncindex':
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = 'findex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ingroupch.append(chunk(PLAIN, ch.where, ' '))
|
|
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
|
|
ingroupch.append(chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where,
|
|
'(built-in function)')]))
|
|
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
length, i = length+1, i+1
|
|
|
|
elif s_buf_data == 'obindex':
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = 'findex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ingroupch.append(chunk(PLAIN, ch.where, ' '))
|
|
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
|
|
ingroupch.append(chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where,
|
|
'(object)')]))
|
|
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
length, i = length+1, i+1
|
|
|
|
elif s_buf_data == 'opindex':
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = 'findex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ingroupch.append(chunk(PLAIN, ch.where, ' '))
|
|
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
|
|
ingroupch.append(chunk(GROUP, ch.where, [
|
|
chunk(PLAIN, ch.where,
|
|
'(operator)')]))
|
|
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
length, i = length+1, i+1
|
|
|
|
elif s_buf_data in ('bimodindex', 'refbimodindex'):
|
|
length, i = add_module_index(
|
|
pp, length, i, buf, ch, '(built-in)',
|
|
(s_buf_data[:3] == 'ref'))
|
|
|
|
elif s_buf_data in ('modindex', 'refmodindex'):
|
|
length, i = add_module_index(
|
|
pp, length, i, buf, ch, '',
|
|
(s_buf_data[:3] == 'ref'))
|
|
|
|
elif s_buf_data in ('stmodindex', 'refstmodindex'):
|
|
length, i = add_module_index(
|
|
pp, length, i, buf, ch, '(standard)',
|
|
(s_buf_data[:3] == 'ref'))
|
|
|
|
elif s_buf_data in ('exmodindex', 'refexmodindex'):
|
|
length, i = add_module_index(
|
|
pp, length, i, buf, ch, '(extension)',
|
|
(s_buf_data[:3] == 'ref'))
|
|
|
|
elif s_buf_data == 'stindex':
|
|
# XXX must actually go to newindex st
|
|
what = (s_buf_data[:2] == "st") and "statement" or "keyword"
|
|
wh = ch.where
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = 'cindex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = [chunk(CSNAME, wh, 'code'),
|
|
chunk(GROUP, wh, pp[i:newi])]
|
|
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
t = ingroupch[:]
|
|
t.append(chunk(PLAIN, wh, ' ' + what))
|
|
|
|
pp.insert(i, chunk(GROUP, wh, t))
|
|
i, length = i+1, length+1
|
|
|
|
pp.insert(i, chunk(CSLINE, wh, 'cindex'))
|
|
i, length = i+1, length+1
|
|
|
|
t = ingroupch[:]
|
|
t.insert(0, chunk(PLAIN, wh, what + ', '))
|
|
|
|
pp.insert(i, chunk(GROUP, wh, t))
|
|
i, length = i+1, length+1
|
|
|
|
elif s_buf_data == 'indexii':
|
|
#\indexii{A}{B} --->
|
|
# @cindex A B
|
|
# @cindex B, A
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp11 = pp[i:newi]
|
|
cp21 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp12 = pp[i:newi]
|
|
cp22 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = 'cindex'
|
|
pp.insert(i, chunk(GROUP, ch.where, cp11 + [
|
|
chunk(PLAIN, ch.where, ' ')] + cp12))
|
|
i, length = i+1, length+1
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
|
|
chunk(GROUP, ch.where, cp22 + [
|
|
chunk(PLAIN, ch.where, ', ')]+ cp21)]
|
|
i, length = i+2, length+2
|
|
|
|
elif s_buf_data == 'indexiii':
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp11 = pp[i:newi]
|
|
cp21 = crcopy(pp[i:newi])
|
|
cp31 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp12 = pp[i:newi]
|
|
cp22 = crcopy(pp[i:newi])
|
|
cp32 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp13 = pp[i:newi]
|
|
cp23 = crcopy(pp[i:newi])
|
|
cp33 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = 'cindex'
|
|
pp.insert(i, chunk(GROUP, ch.where, cp11 + [
|
|
chunk(PLAIN, ch.where, ' ')] + cp12
|
|
+ [chunk(PLAIN, ch.where, ' ')]
|
|
+ cp13))
|
|
i, length = i+1, length+1
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
|
|
chunk(GROUP, ch.where, cp22 + [
|
|
chunk(PLAIN, ch.where, ' ')]+ cp23
|
|
+ [chunk(PLAIN, ch.where, ', ')] +
|
|
cp21)]
|
|
i, length = i+2, length+2
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
|
|
chunk(GROUP, ch.where, cp33 + [
|
|
chunk(PLAIN, ch.where, ', ')]+ cp31
|
|
+ [chunk(PLAIN, ch.where, ' ')] +
|
|
cp32)]
|
|
i, length = i+2, length+2
|
|
|
|
elif s_buf_data == 'indexiv':
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp11 = pp[i:newi]
|
|
cp21 = crcopy(pp[i:newi])
|
|
cp31 = crcopy(pp[i:newi])
|
|
cp41 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp12 = pp[i:newi]
|
|
cp22 = crcopy(pp[i:newi])
|
|
cp32 = crcopy(pp[i:newi])
|
|
cp42 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp13 = pp[i:newi]
|
|
cp23 = crcopy(pp[i:newi])
|
|
cp33 = crcopy(pp[i:newi])
|
|
cp43 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp14 = pp[i:newi]
|
|
cp24 = crcopy(pp[i:newi])
|
|
cp34 = crcopy(pp[i:newi])
|
|
cp44 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ch.chtype = chunk_type[CSLINE]
|
|
ch.data = 'cindex'
|
|
ingroupch = cp11 + \
|
|
spacech + cp12 + \
|
|
spacech + cp13 + \
|
|
spacech + cp14
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
i, length = i+1, length+1
|
|
ingroupch = cp22 + \
|
|
spacech + cp23 + \
|
|
spacech + cp24 + \
|
|
commach + cp21
|
|
pp[i:i] = cindexch + [
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
i, length = i+2, length+2
|
|
ingroupch = cp33 + \
|
|
spacech + cp34 + \
|
|
commach + cp31 + \
|
|
spacech + cp32
|
|
pp[i:i] = cindexch + [
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
i, length = i+2, length+2
|
|
ingroupch = cp44 + \
|
|
commach + cp41 + \
|
|
spacech + cp42 + \
|
|
spacech + cp43
|
|
pp[i:i] = cindexch + [
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
i, length = i+2, length+2
|
|
|
|
elif s_buf_data == 'seemodule':
|
|
# discard optional arg first:
|
|
length, newi = getoptarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
#
|
|
ch.data = "code"
|
|
data = pp[i+1].data
|
|
data.insert(0, chunk(PLAIN, ch.where, " ("))
|
|
data.append(chunk(PLAIN, ch.where, ")"))
|
|
pp[i+1:i+2] = data
|
|
length = length + len(data) - 1
|
|
|
|
elif s_buf_data == 'seetext':
|
|
data = pp[i].data
|
|
data.insert(0, chunk(ENDLINE, ch.where, "\n"))
|
|
pp[i-1:i+1] = data
|
|
i = i - 1
|
|
length = length + len(data) - 2
|
|
|
|
elif s_buf_data == 'deprecated':
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
version = pp[i:newi][0]
|
|
length, newi2 = getnextarg(length, buf, pp, newi)
|
|
action = pp[newi:newi2]
|
|
del pp[i-1:newi2]
|
|
length = length - (newi2 - i) - 1
|
|
stuff = [chunk(PLAIN, ch.where, 'Deprecated since release '),
|
|
version,
|
|
chunk(PLAIN, ch.where, '.')]
|
|
chunks = [chunk(CSNAME, ch.where, 'strong'),
|
|
chunk(GROUP, ch.where, stuff),
|
|
chunk(PLAIN, ch.where, ' ')] + action \
|
|
+ [chunk(DENDLINE, ch.where, '\n')]
|
|
stuff = None
|
|
i = i - 1
|
|
pp[i:i] = chunks
|
|
length = length + len(chunks)
|
|
|
|
elif s_buf_data == "quad":
|
|
ch.chtype = PLAIN
|
|
ch.data = " "
|
|
|
|
elif s_buf_data in ('usepackage', 'input'):
|
|
del pp[i-1:i+1]
|
|
i, length = i-1, length-2
|
|
|
|
elif s_buf_data in ('noindent', 'indexsubitem', 'footnote'):
|
|
pass
|
|
|
|
elif s_buf_data == 'label':
|
|
name = s(buf, pp[i].data[0].data)
|
|
del pp[i-1:i+1]
|
|
length = length - 2
|
|
i = i - 1
|
|
label_nodes[name] = hist.nodenames[-1]
|
|
|
|
elif s_buf_data == 'rfc':
|
|
ch.chtype = chunk_type[PLAIN]
|
|
ch.data = "RFC " + s(buf, pp[i].data[0].data)
|
|
del pp[i]
|
|
length = length - 1
|
|
|
|
elif s_buf_data == 'ref':
|
|
name = s(buf, pp[i].data[0].data)
|
|
if label_nodes.has_key(name):
|
|
pp[i].data[0].data = label_nodes[name]
|
|
else:
|
|
pp[i-1:i+1] = [
|
|
chunk(PLAIN, ch.where,
|
|
"(unknown node reference: %s)" % name)]
|
|
length = length - 1
|
|
print "WARNING: unknown node label", `name`
|
|
|
|
else:
|
|
print "don't know what to do with keyword " + s_buf_data
|
|
|
|
|
|
re_atsign = regex.compile('[@{}]')
|
|
re_newline = regex.compile('\n')
|
|
|
|
def dumpit(buf, wm, pp):
|
|
|
|
global out
|
|
|
|
i, length = 0, len(pp)
|
|
|
|
addspace = 0
|
|
|
|
while 1:
|
|
if len(pp) != length:
|
|
raise 'FATAL', 'inconsistent length'
|
|
if i == length:
|
|
break
|
|
ch = pp[i]
|
|
i = i + 1
|
|
|
|
dospace = addspace
|
|
addspace = 0
|
|
|
|
if ch.chtype == chunk_type[CSNAME]:
|
|
s_buf_data = s(buf, ch.data)
|
|
## if s_buf_data == 'e':
|
|
## wm('\\')
|
|
## continue
|
|
## if s_buf_data == '$':
|
|
## wm('$')
|
|
## continue
|
|
wm('@' + s_buf_data)
|
|
if s_buf_data == 'node' and \
|
|
pp[i].chtype == chunk_type[PLAIN] and \
|
|
s(buf, pp[i].data) in out.doublenodes:
|
|
##XXX doesnt work yet??
|
|
wm(' ZZZ-' + zfill(`i`, 4))
|
|
if s_buf_data[0] in string.letters:
|
|
addspace = 1
|
|
elif ch.chtype == chunk_type[PLAIN]:
|
|
if dospace and s(buf, ch.data) not in (' ', '\t'):
|
|
wm(' ')
|
|
text = s(buf, ch.data)
|
|
while 1:
|
|
pos = re_atsign.search(text)
|
|
if pos < 0:
|
|
break
|
|
wm(text[:pos] + '@' + text[pos])
|
|
text = text[pos+1:]
|
|
wm(text)
|
|
elif ch.chtype == chunk_type[GROUP]:
|
|
wm('{')
|
|
dumpit(buf, wm, ch.data)
|
|
wm('}')
|
|
elif ch.chtype == chunk_type[DENDLINE]:
|
|
wm('\n\n')
|
|
while i != length and pp[i].chtype in \
|
|
(chunk_type[DENDLINE], chunk_type[ENDLINE]):
|
|
i = i + 1
|
|
elif ch.chtype == chunk_type[OTHER]:
|
|
wm(s(buf, ch.data))
|
|
elif ch.chtype == chunk_type[ACTIVE]:
|
|
wm(s(buf, ch.data))
|
|
elif ch.chtype == chunk_type[ENDLINE]:
|
|
wm('\n')
|
|
elif ch.chtype == chunk_type[CSLINE]:
|
|
if i >= 2 and pp[i-2].chtype not in \
|
|
(chunk_type[ENDLINE], chunk_type[DENDLINE]) \
|
|
and (pp[i-2].chtype != chunk_type[PLAIN]
|
|
or s(buf, pp[i-2].data)[-1] != '\n'):
|
|
|
|
wm('\n')
|
|
wm('@' + s(buf, ch.data))
|
|
if i == length:
|
|
raise error, 'CSLINE expected another chunk'
|
|
if pp[i].chtype != chunk_type[GROUP]:
|
|
raise error, 'CSLINE expected GROUP'
|
|
if type(pp[i].data) != ListType:
|
|
raise error, 'GROUP chould contain []-data'
|
|
|
|
wobj = Wobj()
|
|
dumpit(buf, wobj.write, pp[i].data)
|
|
i = i + 1
|
|
text = wobj.data
|
|
del wobj
|
|
if text:
|
|
wm(' ')
|
|
while 1:
|
|
pos = re_newline.search(text)
|
|
if pos < 0:
|
|
break
|
|
# these seem to be completely harmless, so don't warn:
|
|
## print 'WARNING: found newline in csline arg (%s)' \
|
|
## % s(buf, ch.data)
|
|
wm(text[:pos] + ' ')
|
|
text = text[pos+1:]
|
|
wm(text)
|
|
if i >= length or \
|
|
pp[i].chtype not in (chunk_type[CSLINE],
|
|
chunk_type[ENDLINE], chunk_type[DENDLINE]) \
|
|
and (pp[i].chtype != chunk_type[PLAIN]
|
|
or s(buf, pp[i].data)[0] != '\n'):
|
|
wm('\n')
|
|
|
|
elif ch.chtype == chunk_type[COMMENT]:
|
|
if s(buf, ch.data) and \
|
|
regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
|
|
if i >= 2 \
|
|
and pp[i-2].chtype not in (chunk_type[ENDLINE],
|
|
chunk_type[DENDLINE]) \
|
|
and not (pp[i-2].chtype == chunk_type[PLAIN]
|
|
and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
|
|
wm('\n')
|
|
wm('@c ' + s(buf, ch.data))
|
|
elif ch.chtype == chunk_type[IGNORE]:
|
|
pass
|
|
else:
|
|
try:
|
|
str = `s(buf, ch.data)`
|
|
except TypeError:
|
|
str = `ch.data`
|
|
if len(str) > 400:
|
|
str = str[:400] + '...'
|
|
print 'warning:', ch.chtype, 'not handled, data ' + str
|
|
|
|
|
|
|
|
def main():
|
|
global release_version
|
|
outfile = None
|
|
headerfile = 'texipre.dat'
|
|
trailerfile = 'texipost.dat'
|
|
|
|
try:
|
|
opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:v:')
|
|
except getopt.error:
|
|
args = []
|
|
|
|
if not args:
|
|
print 'usage: partparse [-o outfile] [-h headerfile]',
|
|
print '[-t trailerfile] file ...'
|
|
sys.exit(2)
|
|
|
|
for opt, arg in opts:
|
|
if opt == '-o': outfile = arg
|
|
if opt == '-h': headerfile = arg
|
|
if opt == '-t': trailerfile = arg
|
|
if opt == '-v': release_version = arg
|
|
|
|
if not outfile:
|
|
root, ext = os.path.splitext(args[0])
|
|
outfile = root + '.texi'
|
|
|
|
if outfile in args:
|
|
print 'will not overwrite input file', outfile
|
|
sys.exit(2)
|
|
|
|
outf = open(outfile, 'w')
|
|
outf.write(open(headerfile, 'r').read())
|
|
|
|
for file in args:
|
|
if len(args) > 1: print '='*20, file, '='*20
|
|
buf = open(file, 'r').read()
|
|
chunk.buf = buf
|
|
w, pp = parseit(buf)
|
|
startchange()
|
|
changeit(buf, pp)
|
|
dumpit(buf, outf.write, pp)
|
|
|
|
outf.write(open(trailerfile, 'r').read())
|
|
|
|
outf.close()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|