#2830: add html.escape() helper and move cgi.escape() uses in the standard library to it. It defaults to quote=True and also escapes single quotes, which makes casual use safer. The cgi.escape() interface is not touched, but emits a (silent) PendingDeprecationWarning.

This commit is contained in:
Georg Brandl 2010-10-15 15:57:45 +00:00
parent 70543acfa1
commit 1f7fffb308
11 changed files with 94 additions and 28 deletions

View file

@ -293,7 +293,7 @@ following WSGI-application::
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
import sys, os import sys, os
from cgi import escape from html import escape
from flup.server.fcgi import WSGIServer from flup.server.fcgi import WSGIServer
def app(environ, start_response): def app(environ, start_response):

View file

@ -328,9 +328,9 @@ algorithms implemented in this module in other circumstances.
attribute value delimited by double quotes, as in ``<a href="...">``. Note attribute value delimited by double quotes, as in ``<a href="...">``. Note
that single quotes are never translated. that single quotes are never translated.
If the value to be quoted might include single- or double-quote characters, .. deprecated:: 3.2
or both, consider using the :func:`~xml.sax.saxutils.quoteattr` function in the This function is unsafe because *quote* is false by default, and therefore
:mod:`xml.sax.saxutils` module instead. deprecated. Use :func:`html.escape` instead.
.. _cgi-security: .. _cgi-security:
@ -508,8 +508,8 @@ Common problems and solutions
.. rubric:: Footnotes .. rubric:: Footnotes
.. [#] Note that some recent versions of the HTML specification do state what order the .. [#] Note that some recent versions of the HTML specification do state what
field values should be supplied in, but knowing whether a request was order the field values should be supplied in, but knowing whether a request
received from a conforming browser, or even from a browser at all, is tedious was received from a conforming browser, or even from a browser at all, is
and error-prone. tedious and error-prone.

18
Doc/library/html.rst Normal file
View file

@ -0,0 +1,18 @@
:mod:`html` --- HyperText Markup Language support
=================================================
.. module:: html
:synopsis: Helpers for manipulating HTML.
.. versionadded:: 3.2
This module defines utilities to manipulate HTML.
.. function:: escape(s, quote=True)
Convert the characters ``&``, ``<`` and ``>`` in string *s* to HTML-safe
sequences. Use this if you need to display text that might contain such
characters in HTML. If the optional flag *quote* is true, the characters
(``"``) and (``'``) are also translated; this helps for inclusion in an HTML
attribute value delimited by quotes, as in ``<a href="...">``.

View file

@ -20,6 +20,7 @@ definition of the Python bindings for the DOM and SAX interfaces.
.. toctree:: .. toctree::
html.rst
html.parser.rst html.parser.rst
html.entities.rst html.entities.rst
pyexpat.rst pyexpat.rst

View file

@ -31,13 +31,13 @@ __version__ = "2.6"
# Imports # Imports
# ======= # =======
from operator import attrgetter
from io import StringIO from io import StringIO
import sys import sys
import os import os
import urllib.parse import urllib.parse
import email.parser import email.parser
from warnings import warn from warnings import warn
import html
__all__ = ["MiniFieldStorage", "FieldStorage", __all__ = ["MiniFieldStorage", "FieldStorage",
"parse", "parse_qs", "parse_qsl", "parse_multipart", "parse", "parse_qs", "parse_qsl", "parse_multipart",
@ -800,8 +800,8 @@ def print_exception(type=None, value=None, tb=None, limit=None):
list = traceback.format_tb(tb, limit) + \ list = traceback.format_tb(tb, limit) + \
traceback.format_exception_only(type, value) traceback.format_exception_only(type, value)
print("<PRE>%s<B>%s</B></PRE>" % ( print("<PRE>%s<B>%s</B></PRE>" % (
escape("".join(list[:-1])), html.escape("".join(list[:-1])),
escape(list[-1]), html.escape(list[-1]),
)) ))
del tb del tb
@ -812,7 +812,7 @@ def print_environ(environ=os.environ):
print("<H3>Shell Environment:</H3>") print("<H3>Shell Environment:</H3>")
print("<DL>") print("<DL>")
for key in keys: for key in keys:
print("<DT>", escape(key), "<DD>", escape(environ[key])) print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
print("</DL>") print("</DL>")
print() print()
@ -825,10 +825,10 @@ def print_form(form):
print("<P>No form fields.") print("<P>No form fields.")
print("<DL>") print("<DL>")
for key in keys: for key in keys:
print("<DT>" + escape(key) + ":", end=' ') print("<DT>" + html.escape(key) + ":", end=' ')
value = form[key] value = form[key]
print("<i>" + escape(repr(type(value))) + "</i>") print("<i>" + html.escape(repr(type(value))) + "</i>")
print("<DD>" + escape(repr(value))) print("<DD>" + html.escape(repr(value)))
print("</DL>") print("</DL>")
print() print()
@ -839,9 +839,9 @@ def print_directory():
try: try:
pwd = os.getcwd() pwd = os.getcwd()
except os.error as msg: except os.error as msg:
print("os.error:", escape(str(msg))) print("os.error:", html.escape(str(msg)))
else: else:
print(escape(pwd)) print(html.escape(pwd))
print() print()
def print_arguments(): def print_arguments():
@ -899,9 +899,9 @@ environment as well. Here are some common variable names:
# ========= # =========
def escape(s, quote=None): def escape(s, quote=None):
'''Replace special characters "&", "<" and ">" to HTML-safe sequences. """Deprecated API."""
If the optional flag quote is true, the quotation mark character (") warn("cgi.escape is deprecated, use html.escape instead",
is also translated.''' PendingDeprecationWarning, stacklevel=2)
s = s.replace("&", "&amp;") # Must be done first! s = s.replace("&", "&amp;") # Must be done first!
s = s.replace("<", "&lt;") s = s.replace("<", "&lt;")
s = s.replace(">", "&gt;") s = s.replace(">", "&gt;")
@ -909,6 +909,7 @@ def escape(s, quote=None):
s = s.replace('"', "&quot;") s = s.replace('"', "&quot;")
return s return s
def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"): def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"):
import re import re
return re.match(_vb_pattern, s) return re.match(_vb_pattern, s)

View file

@ -1 +1,20 @@
# This directory is a Python package. """
General functions for HTML manipulation.
"""
_escape_map = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;'}
_escape_map_full = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;',
ord('"'): '&quot;', ord('\''): '&#x27;'}
# NB: this is a candidate for a bytes/string polymorphic interface
def escape(s, quote=True):
"""
Replace special characters "&", "<" and ">" to HTML-safe sequences.
If the optional flag quote is true (the default), the quotation mark
character (") is also translated.
"""
if quote:
return s.translate(_escape_map_full)
return s.translate(_escape_map)

View file

@ -84,7 +84,7 @@ __version__ = "0.6"
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"] __all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
import cgi import html
import email.message import email.message
import email.parser import email.parser
import http.client import http.client
@ -705,7 +705,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
return None return None
list.sort(key=lambda a: a.lower()) list.sort(key=lambda a: a.lower())
r = [] r = []
displaypath = cgi.escape(urllib.parse.unquote(self.path)) displaypath = html.escape(urllib.parse.unquote(self.path))
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">') r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath) r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath) r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
@ -721,7 +721,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
displayname = name + "@" displayname = name + "@"
# Note: a link to a directory displays with @ and links with / # Note: a link to a directory displays with @ and links with /
r.append('<li><a href="%s">%s</a>\n' r.append('<li><a href="%s">%s</a>\n'
% (urllib.parse.quote(linkname), cgi.escape(displayname))) % (urllib.parse.quote(linkname), html.escape(displayname)))
r.append("</ul>\n<hr>\n</body>\n</html>\n") r.append("</ul>\n<hr>\n</body>\n</html>\n")
enc = sys.getfilesystemencoding() enc = sys.getfilesystemencoding()
encoded = ''.join(r).encode(enc) encoded = ''.join(r).encode(enc)

View file

@ -568,8 +568,8 @@ class Test_touch_import(support.TestCase):
def test_from_import(self): def test_from_import(self):
node = parse('bar()') node = parse('bar()')
fixer_util.touch_import("cgi", "escape", node) fixer_util.touch_import("html", "escape", node)
self.assertEqual(str(node), 'from cgi import escape\nbar()\n\n') self.assertEqual(str(node), 'from html import escape\nbar()\n\n')
def test_name_import(self): def test_name_import(self):
node = parse('bar()') node = parse('bar()')

24
Lib/test/test_html.py Normal file
View file

@ -0,0 +1,24 @@
"""
Tests for the html module functions.
"""
import html
import unittest
from test.support import run_unittest
class HtmlTests(unittest.TestCase):
def test_escape(self):
self.assertEqual(
html.escape('\'<script>"&foo;"</script>\''),
'&#x27;&lt;script&gt;&quot;&amp;foo;&quot;&lt;/script&gt;&#x27;')
self.assertEqual(
html.escape('\'<script>"&foo;"</script>\'', False),
'\'&lt;script&gt;"&amp;foo;"&lt;/script&gt;\'')
def test_main():
run_unittest(HtmlTests)
if __name__ == '__main__':
test_main()

View file

@ -12,7 +12,7 @@
# except if the test is specific to the Python implementation. # except if the test is specific to the Python implementation.
import sys import sys
import cgi import html
import unittest import unittest
from test import support from test import support
@ -1328,7 +1328,7 @@ XINCLUDE["default.xml"] = """\
<p>Example.</p> <p>Example.</p>
<xi:include href="{}"/> <xi:include href="{}"/>
</document> </document>
""".format(cgi.escape(SIMPLE_XMLFILE, True)) """.format(html.escape(SIMPLE_XMLFILE, True))
def xinclude_loader(href, parse="xml", encoding=None): def xinclude_loader(href, parse="xml", encoding=None):
try: try:

View file

@ -24,6 +24,9 @@ Core and Builtins
Library Library
------- -------
- Issue #2830: Add the ``html.escape()`` function, which quotes all problematic
characters by default. Deprecate ``cgi.escape()``.
- Issue 9409: Fix the regex to match all kind of filenames, for interactive - Issue 9409: Fix the regex to match all kind of filenames, for interactive
debugging in doctests. debugging in doctests.