mirror of
https://github.com/python/cpython.git
synced 2025-09-27 02:39:58 +00:00
changed comment parsing
This commit is contained in:
parent
667d704997
commit
145b2e0168
1 changed files with 14 additions and 13 deletions
|
@ -21,7 +21,9 @@ entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]')
|
||||||
charref = regex.compile('&#[a-zA-Z0-9]+;')
|
charref = regex.compile('&#[a-zA-Z0-9]+;')
|
||||||
starttagopen = regex.compile('<[a-zA-Z]')
|
starttagopen = regex.compile('<[a-zA-Z]')
|
||||||
endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>')
|
endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>')
|
||||||
|
special = regex.compile('<![^<>]*>')
|
||||||
commentopen = regex.compile('<!--')
|
commentopen = regex.compile('<!--')
|
||||||
|
commentclose = regex.compile('--[ \t\n]*>')
|
||||||
|
|
||||||
|
|
||||||
# SGML parser base class -- find tags and call handler functions.
|
# SGML parser base class -- find tags and call handler functions.
|
||||||
|
@ -111,6 +113,14 @@ class SGMLParser:
|
||||||
if k < 0: break
|
if k < 0: break
|
||||||
i = i+k
|
i = i+k
|
||||||
continue
|
continue
|
||||||
|
k = special.match(rawdata, i)
|
||||||
|
if k >= 0:
|
||||||
|
if self.literal:
|
||||||
|
self.handle_data(rawdata[i])
|
||||||
|
i = i+1
|
||||||
|
continue
|
||||||
|
i = i+k
|
||||||
|
continue
|
||||||
elif rawdata[i] == '&':
|
elif rawdata[i] == '&':
|
||||||
k = charref.match(rawdata, i)
|
k = charref.match(rawdata, i)
|
||||||
if k >= 0:
|
if k >= 0:
|
||||||
|
@ -141,25 +151,16 @@ class SGMLParser:
|
||||||
self.rawdata = rawdata[i:]
|
self.rawdata = rawdata[i:]
|
||||||
# XXX if end: check for empty stack
|
# XXX if end: check for empty stack
|
||||||
|
|
||||||
# Internal -- parse comment, return length or -1 if not ternimated
|
# Internal -- parse comment, return length or -1 if not terminated
|
||||||
def parse_comment(self, i):
|
def parse_comment(self, i):
|
||||||
rawdata = self.rawdata
|
rawdata = self.rawdata
|
||||||
if rawdata[i:i+4] <> '<!--':
|
if rawdata[i:i+4] <> '<!--':
|
||||||
raise RuntimeError, 'unexpected call to handle_comment'
|
raise RuntimeError, 'unexpected call to handle_comment'
|
||||||
try:
|
j = commentclose.search(rawdata, i+4)
|
||||||
j = string.index(rawdata, '--', i+4)
|
if j < 0:
|
||||||
except string.index_error:
|
|
||||||
return -1
|
return -1
|
||||||
self.handle_comment(rawdata[i+4: j])
|
self.handle_comment(rawdata[i+4: j])
|
||||||
j = j+2
|
j = j+commentclose.match(rawdata, j)
|
||||||
n = len(rawdata)
|
|
||||||
while j < n and rawdata[j] in ' \t\n': j = j+1
|
|
||||||
if j == n: return -1 # Wait for final '>'
|
|
||||||
if rawdata[j] == '>':
|
|
||||||
j = j+1
|
|
||||||
else:
|
|
||||||
print '*** comment not terminated with >'
|
|
||||||
print repr(rawdata[j-5:j]), '*!*', repr(rawdata[j:j+5])
|
|
||||||
return j-i
|
return j-i
|
||||||
|
|
||||||
# Internal -- handle starttag, return length or -1 if not terminated
|
# Internal -- handle starttag, return length or -1 if not terminated
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue