mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
[Old patch that hadn't been checked in.]
get_starttag_text(): New method. Return the text of the most recently parsed start tag, from the '<' to the '>' or '/'. Not really useful for structure processing, but requested for Web-related use. May also be useful for being able to re-generate the input from the parse events, but there's no equivalent for end tags. attrfind: Be a little more forgiving of unquoted attribute values.
This commit is contained in:
parent
8094611eb8
commit
b46696c0ed
1 changed files with 11 additions and 2 deletions
|
@ -37,7 +37,7 @@ tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9]*')
|
||||||
attrfind = re.compile(
|
attrfind = re.compile(
|
||||||
'[%s]*([a-zA-Z_][-.a-zA-Z_0-9]*)' % string.whitespace
|
'[%s]*([a-zA-Z_][-.a-zA-Z_0-9]*)' % string.whitespace
|
||||||
+ ('([%s]*=[%s]*' % (string.whitespace, string.whitespace))
|
+ ('([%s]*=[%s]*' % (string.whitespace, string.whitespace))
|
||||||
+ r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:+*%?!\(\)_#=~]*))?')
|
+ r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:+*%?!&$\(\)_#=~]*))?')
|
||||||
|
|
||||||
|
|
||||||
# SGML parser base class -- find tags and call handler functions.
|
# SGML parser base class -- find tags and call handler functions.
|
||||||
|
@ -207,9 +207,15 @@ class SGMLParser:
|
||||||
self.handle_pi(rawdata[i+2: j])
|
self.handle_pi(rawdata[i+2: j])
|
||||||
j = match.end(0)
|
j = match.end(0)
|
||||||
return j-i
|
return j-i
|
||||||
|
|
||||||
|
__starttag_text = None
|
||||||
|
def get_starttag_text(self):
|
||||||
|
return self.__starttag_text
|
||||||
|
|
||||||
# Internal -- handle starttag, return length or -1 if not terminated
|
# Internal -- handle starttag, return length or -1 if not terminated
|
||||||
def parse_starttag(self, i):
|
def parse_starttag(self, i):
|
||||||
|
self.__starttag_text = None
|
||||||
|
start_pos = i
|
||||||
rawdata = self.rawdata
|
rawdata = self.rawdata
|
||||||
if shorttagopen.match(rawdata, i):
|
if shorttagopen.match(rawdata, i):
|
||||||
# SGML shorthand: <tag/data/ == <tag>data</tag>
|
# SGML shorthand: <tag/data/ == <tag>data</tag>
|
||||||
|
@ -220,9 +226,11 @@ class SGMLParser:
|
||||||
if not match:
|
if not match:
|
||||||
return -1
|
return -1
|
||||||
tag, data = match.group(1, 2)
|
tag, data = match.group(1, 2)
|
||||||
|
self.__starttag_text = '<%s/' % tag
|
||||||
tag = string.lower(tag)
|
tag = string.lower(tag)
|
||||||
self.finish_shorttag(tag, data)
|
|
||||||
k = match.end(0)
|
k = match.end(0)
|
||||||
|
self.finish_shorttag(tag, data)
|
||||||
|
self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
|
||||||
return k
|
return k
|
||||||
# XXX The following should skip matching quotes (' or ")
|
# XXX The following should skip matching quotes (' or ")
|
||||||
match = endbracket.search(rawdata, i+1)
|
match = endbracket.search(rawdata, i+1)
|
||||||
|
@ -255,6 +263,7 @@ class SGMLParser:
|
||||||
k = match.end(0)
|
k = match.end(0)
|
||||||
if rawdata[j] == '>':
|
if rawdata[j] == '>':
|
||||||
j = j+1
|
j = j+1
|
||||||
|
self.__starttag_text = rawdata[start_pos:j]
|
||||||
self.finish_starttag(tag, attrs)
|
self.finish_starttag(tag, attrs)
|
||||||
return j
|
return j
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue