mirror of
https://github.com/python/cpython.git
synced 2025-11-03 03:22:27 +00:00
patch #1462498: handle entityrefs in attribute values.
This commit is contained in:
parent
48d5e508eb
commit
7f6b67c235
4 changed files with 53 additions and 5 deletions
|
|
@ -95,12 +95,15 @@ lower case, and the \var{method} argument is the bound method which
|
||||||
should be used to support semantic interpretation of the start tag.
|
should be used to support semantic interpretation of the start tag.
|
||||||
The \var{attributes} argument is a list of \code{(\var{name},
|
The \var{attributes} argument is a list of \code{(\var{name},
|
||||||
\var{value})} pairs containing the attributes found inside the tag's
|
\var{value})} pairs containing the attributes found inside the tag's
|
||||||
\code{<>} brackets. The \var{name} has been translated to lower case
|
\code{<>} brackets. The \var{name} has been translated to lower case.
|
||||||
and double quotes and backslashes in the \var{value} have been interpreted.
|
Double quotes and backslashes in the \var{value} have been interpreted,
|
||||||
|
as well as known entity and character references.
|
||||||
For instance, for the tag \code{<A HREF="http://www.cwi.nl/">}, this
|
For instance, for the tag \code{<A HREF="http://www.cwi.nl/">}, this
|
||||||
method would be called as \samp{unknown_starttag('a', [('href',
|
method would be called as \samp{unknown_starttag('a', [('href',
|
||||||
'http://www.cwi.nl/')])}. The base implementation simply calls
|
'http://www.cwi.nl/')])}. The base implementation simply calls
|
||||||
\var{method} with \var{attributes} as the only argument.
|
\var{method} with \var{attributes} as the only argument.
|
||||||
|
\versionadded[Handling of entity and character references within
|
||||||
|
attribute values]{2.5}
|
||||||
\end{methoddesc}
|
\end{methoddesc}
|
||||||
|
|
||||||
\begin{methoddesc}{handle_endtag}{tag, method}
|
\begin{methoddesc}{handle_endtag}{tag, method}
|
||||||
|
|
|
||||||
|
|
@ -269,9 +269,37 @@ class SGMLParser(markupbase.ParserBase):
|
||||||
attrname, rest, attrvalue = match.group(1, 2, 3)
|
attrname, rest, attrvalue = match.group(1, 2, 3)
|
||||||
if not rest:
|
if not rest:
|
||||||
attrvalue = attrname
|
attrvalue = attrname
|
||||||
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
else:
|
||||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
if (attrvalue[:1] == "'" == attrvalue[-1:] or
|
||||||
attrvalue = attrvalue[1:-1]
|
attrvalue[:1] == '"' == attrvalue[-1:]):
|
||||||
|
# strip quotes
|
||||||
|
attrvalue = attrvalue[1:-1]
|
||||||
|
l = 0
|
||||||
|
new_attrvalue = ''
|
||||||
|
while l < len(attrvalue):
|
||||||
|
av_match = entityref.match(attrvalue, l)
|
||||||
|
if (av_match and av_match.group(1) in self.entitydefs and
|
||||||
|
attrvalue[av_match.end(1)] == ';'):
|
||||||
|
# only substitute entityrefs ending in ';' since
|
||||||
|
# otherwise we may break <a href='?p=x&q=y'>
|
||||||
|
# which is very common
|
||||||
|
new_attrvalue += self.entitydefs[av_match.group(1)]
|
||||||
|
l = av_match.end(0)
|
||||||
|
continue
|
||||||
|
ch_match = charref.match(attrvalue, l)
|
||||||
|
if ch_match:
|
||||||
|
try:
|
||||||
|
char = chr(int(ch_match.group(1)))
|
||||||
|
new_attrvalue += char
|
||||||
|
l = ch_match.end(0)
|
||||||
|
continue
|
||||||
|
except ValueError:
|
||||||
|
# invalid character reference, don't substitute
|
||||||
|
pass
|
||||||
|
# all other cases
|
||||||
|
new_attrvalue += attrvalue[l]
|
||||||
|
l += 1
|
||||||
|
attrvalue = new_attrvalue
|
||||||
attrs.append((attrname.lower(), attrvalue))
|
attrs.append((attrname.lower(), attrvalue))
|
||||||
k = match.end(0)
|
k = match.end(0)
|
||||||
if rawdata[j] == '>':
|
if rawdata[j] == '>':
|
||||||
|
|
|
||||||
|
|
@ -214,6 +214,20 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
|
||||||
("starttag", "e", [("a", "rgb(1,2,3)")]),
|
("starttag", "e", [("a", "rgb(1,2,3)")]),
|
||||||
])
|
])
|
||||||
|
|
||||||
|
def test_attr_values_entities(self):
|
||||||
|
"""Substitution of entities and charrefs in attribute values"""
|
||||||
|
# SF bug #1452246
|
||||||
|
self.check_events("""<a b=< c=<> d=<-> e='< '
|
||||||
|
f="&xxx;" g=' !' h='Ǵ' i='x?a=b&c=d;'>""",
|
||||||
|
[("starttag", "a", [("b", "<"),
|
||||||
|
("c", "<>"),
|
||||||
|
("d", "<->"),
|
||||||
|
("e", "< "),
|
||||||
|
("f", "&xxx;"),
|
||||||
|
("g", " !"),
|
||||||
|
("h", "Ǵ"),
|
||||||
|
("i", "x?a=b&c=d;"), ])])
|
||||||
|
|
||||||
def test_attr_funky_names(self):
|
def test_attr_funky_names(self):
|
||||||
self.check_events("""<a a.b='v' c:d=v e-f=v>""", [
|
self.check_events("""<a a.b='v' c:d=v e-f=v>""", [
|
||||||
("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
|
("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
|
||||||
|
|
|
||||||
|
|
@ -489,6 +489,9 @@ Extension Modules
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Patch #1462498: sgmllib now handles entity and character references
|
||||||
|
in attribute values.
|
||||||
|
|
||||||
- Added the sqlite3 package. This is based on pysqlite2.1.3, and provides
|
- Added the sqlite3 package. This is based on pysqlite2.1.3, and provides
|
||||||
a DB-API interface in the standard library. You'll need sqlite 3.2.2 or
|
a DB-API interface in the standard library. You'll need sqlite 3.2.2 or
|
||||||
later to build this - if you have an earlier version, the C extension
|
later to build this - if you have an earlier version, the C extension
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue