mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
#2830: add html.escape() helper and move cgi.escape() uses in the standard library to it. It defaults to quote=True and also escapes single quotes, which makes casual use safer. The cgi.escape() interface is not touched, but emits a (silent) PendingDeprecationWarning.
This commit is contained in:
parent
70543acfa1
commit
1f7fffb308
11 changed files with 94 additions and 28 deletions
|
@ -293,7 +293,7 @@ following WSGI-application::
|
||||||
# -*- coding: UTF-8 -*-
|
# -*- coding: UTF-8 -*-
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
from cgi import escape
|
from html import escape
|
||||||
from flup.server.fcgi import WSGIServer
|
from flup.server.fcgi import WSGIServer
|
||||||
|
|
||||||
def app(environ, start_response):
|
def app(environ, start_response):
|
||||||
|
|
|
@ -328,9 +328,9 @@ algorithms implemented in this module in other circumstances.
|
||||||
attribute value delimited by double quotes, as in ``<a href="...">``. Note
|
attribute value delimited by double quotes, as in ``<a href="...">``. Note
|
||||||
that single quotes are never translated.
|
that single quotes are never translated.
|
||||||
|
|
||||||
If the value to be quoted might include single- or double-quote characters,
|
.. deprecated:: 3.2
|
||||||
or both, consider using the :func:`~xml.sax.saxutils.quoteattr` function in the
|
This function is unsafe because *quote* is false by default, and therefore
|
||||||
:mod:`xml.sax.saxutils` module instead.
|
deprecated. Use :func:`html.escape` instead.
|
||||||
|
|
||||||
|
|
||||||
.. _cgi-security:
|
.. _cgi-security:
|
||||||
|
@ -508,8 +508,8 @@ Common problems and solutions
|
||||||
|
|
||||||
.. rubric:: Footnotes
|
.. rubric:: Footnotes
|
||||||
|
|
||||||
.. [#] Note that some recent versions of the HTML specification do state what order the
|
.. [#] Note that some recent versions of the HTML specification do state what
|
||||||
field values should be supplied in, but knowing whether a request was
|
order the field values should be supplied in, but knowing whether a request
|
||||||
received from a conforming browser, or even from a browser at all, is tedious
|
was received from a conforming browser, or even from a browser at all, is
|
||||||
and error-prone.
|
tedious and error-prone.
|
||||||
|
|
||||||
|
|
18
Doc/library/html.rst
Normal file
18
Doc/library/html.rst
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
:mod:`html` --- HyperText Markup Language support
|
||||||
|
=================================================
|
||||||
|
|
||||||
|
.. module:: html
|
||||||
|
:synopsis: Helpers for manipulating HTML.
|
||||||
|
|
||||||
|
.. versionadded:: 3.2
|
||||||
|
|
||||||
|
|
||||||
|
This module defines utilities to manipulate HTML.
|
||||||
|
|
||||||
|
.. function:: escape(s, quote=True)
|
||||||
|
|
||||||
|
Convert the characters ``&``, ``<`` and ``>`` in string *s* to HTML-safe
|
||||||
|
sequences. Use this if you need to display text that might contain such
|
||||||
|
characters in HTML. If the optional flag *quote* is true, the characters
|
||||||
|
(``"``) and (``'``) are also translated; this helps for inclusion in an HTML
|
||||||
|
attribute value delimited by quotes, as in ``<a href="...">``.
|
|
@ -20,6 +20,7 @@ definition of the Python bindings for the DOM and SAX interfaces.
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
|
|
||||||
|
html.rst
|
||||||
html.parser.rst
|
html.parser.rst
|
||||||
html.entities.rst
|
html.entities.rst
|
||||||
pyexpat.rst
|
pyexpat.rst
|
||||||
|
|
25
Lib/cgi.py
25
Lib/cgi.py
|
@ -31,13 +31,13 @@ __version__ = "2.6"
|
||||||
# Imports
|
# Imports
|
||||||
# =======
|
# =======
|
||||||
|
|
||||||
from operator import attrgetter
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import email.parser
|
import email.parser
|
||||||
from warnings import warn
|
from warnings import warn
|
||||||
|
import html
|
||||||
|
|
||||||
__all__ = ["MiniFieldStorage", "FieldStorage",
|
__all__ = ["MiniFieldStorage", "FieldStorage",
|
||||||
"parse", "parse_qs", "parse_qsl", "parse_multipart",
|
"parse", "parse_qs", "parse_qsl", "parse_multipart",
|
||||||
|
@ -800,8 +800,8 @@ def print_exception(type=None, value=None, tb=None, limit=None):
|
||||||
list = traceback.format_tb(tb, limit) + \
|
list = traceback.format_tb(tb, limit) + \
|
||||||
traceback.format_exception_only(type, value)
|
traceback.format_exception_only(type, value)
|
||||||
print("<PRE>%s<B>%s</B></PRE>" % (
|
print("<PRE>%s<B>%s</B></PRE>" % (
|
||||||
escape("".join(list[:-1])),
|
html.escape("".join(list[:-1])),
|
||||||
escape(list[-1]),
|
html.escape(list[-1]),
|
||||||
))
|
))
|
||||||
del tb
|
del tb
|
||||||
|
|
||||||
|
@ -812,7 +812,7 @@ def print_environ(environ=os.environ):
|
||||||
print("<H3>Shell Environment:</H3>")
|
print("<H3>Shell Environment:</H3>")
|
||||||
print("<DL>")
|
print("<DL>")
|
||||||
for key in keys:
|
for key in keys:
|
||||||
print("<DT>", escape(key), "<DD>", escape(environ[key]))
|
print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
|
||||||
print("</DL>")
|
print("</DL>")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
@ -825,10 +825,10 @@ def print_form(form):
|
||||||
print("<P>No form fields.")
|
print("<P>No form fields.")
|
||||||
print("<DL>")
|
print("<DL>")
|
||||||
for key in keys:
|
for key in keys:
|
||||||
print("<DT>" + escape(key) + ":", end=' ')
|
print("<DT>" + html.escape(key) + ":", end=' ')
|
||||||
value = form[key]
|
value = form[key]
|
||||||
print("<i>" + escape(repr(type(value))) + "</i>")
|
print("<i>" + html.escape(repr(type(value))) + "</i>")
|
||||||
print("<DD>" + escape(repr(value)))
|
print("<DD>" + html.escape(repr(value)))
|
||||||
print("</DL>")
|
print("</DL>")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
@ -839,9 +839,9 @@ def print_directory():
|
||||||
try:
|
try:
|
||||||
pwd = os.getcwd()
|
pwd = os.getcwd()
|
||||||
except os.error as msg:
|
except os.error as msg:
|
||||||
print("os.error:", escape(str(msg)))
|
print("os.error:", html.escape(str(msg)))
|
||||||
else:
|
else:
|
||||||
print(escape(pwd))
|
print(html.escape(pwd))
|
||||||
print()
|
print()
|
||||||
|
|
||||||
def print_arguments():
|
def print_arguments():
|
||||||
|
@ -899,9 +899,9 @@ environment as well. Here are some common variable names:
|
||||||
# =========
|
# =========
|
||||||
|
|
||||||
def escape(s, quote=None):
|
def escape(s, quote=None):
|
||||||
'''Replace special characters "&", "<" and ">" to HTML-safe sequences.
|
"""Deprecated API."""
|
||||||
If the optional flag quote is true, the quotation mark character (")
|
warn("cgi.escape is deprecated, use html.escape instead",
|
||||||
is also translated.'''
|
PendingDeprecationWarning, stacklevel=2)
|
||||||
s = s.replace("&", "&") # Must be done first!
|
s = s.replace("&", "&") # Must be done first!
|
||||||
s = s.replace("<", "<")
|
s = s.replace("<", "<")
|
||||||
s = s.replace(">", ">")
|
s = s.replace(">", ">")
|
||||||
|
@ -909,6 +909,7 @@ def escape(s, quote=None):
|
||||||
s = s.replace('"', """)
|
s = s.replace('"', """)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"):
|
def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"):
|
||||||
import re
|
import re
|
||||||
return re.match(_vb_pattern, s)
|
return re.match(_vb_pattern, s)
|
||||||
|
|
|
@ -1 +1,20 @@
|
||||||
# This directory is a Python package.
|
"""
|
||||||
|
General functions for HTML manipulation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
_escape_map = {ord('&'): '&', ord('<'): '<', ord('>'): '>'}
|
||||||
|
_escape_map_full = {ord('&'): '&', ord('<'): '<', ord('>'): '>',
|
||||||
|
ord('"'): '"', ord('\''): '''}
|
||||||
|
|
||||||
|
# NB: this is a candidate for a bytes/string polymorphic interface
|
||||||
|
|
||||||
|
def escape(s, quote=True):
|
||||||
|
"""
|
||||||
|
Replace special characters "&", "<" and ">" to HTML-safe sequences.
|
||||||
|
If the optional flag quote is true (the default), the quotation mark
|
||||||
|
character (") is also translated.
|
||||||
|
"""
|
||||||
|
if quote:
|
||||||
|
return s.translate(_escape_map_full)
|
||||||
|
return s.translate(_escape_map)
|
||||||
|
|
|
@ -84,7 +84,7 @@ __version__ = "0.6"
|
||||||
|
|
||||||
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
|
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
|
||||||
|
|
||||||
import cgi
|
import html
|
||||||
import email.message
|
import email.message
|
||||||
import email.parser
|
import email.parser
|
||||||
import http.client
|
import http.client
|
||||||
|
@ -705,7 +705,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||||
return None
|
return None
|
||||||
list.sort(key=lambda a: a.lower())
|
list.sort(key=lambda a: a.lower())
|
||||||
r = []
|
r = []
|
||||||
displaypath = cgi.escape(urllib.parse.unquote(self.path))
|
displaypath = html.escape(urllib.parse.unquote(self.path))
|
||||||
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
|
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
|
||||||
r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
|
r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
|
||||||
r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
|
r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
|
||||||
|
@ -721,7 +721,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||||
displayname = name + "@"
|
displayname = name + "@"
|
||||||
# Note: a link to a directory displays with @ and links with /
|
# Note: a link to a directory displays with @ and links with /
|
||||||
r.append('<li><a href="%s">%s</a>\n'
|
r.append('<li><a href="%s">%s</a>\n'
|
||||||
% (urllib.parse.quote(linkname), cgi.escape(displayname)))
|
% (urllib.parse.quote(linkname), html.escape(displayname)))
|
||||||
r.append("</ul>\n<hr>\n</body>\n</html>\n")
|
r.append("</ul>\n<hr>\n</body>\n</html>\n")
|
||||||
enc = sys.getfilesystemencoding()
|
enc = sys.getfilesystemencoding()
|
||||||
encoded = ''.join(r).encode(enc)
|
encoded = ''.join(r).encode(enc)
|
||||||
|
|
|
@ -568,8 +568,8 @@ class Test_touch_import(support.TestCase):
|
||||||
|
|
||||||
def test_from_import(self):
|
def test_from_import(self):
|
||||||
node = parse('bar()')
|
node = parse('bar()')
|
||||||
fixer_util.touch_import("cgi", "escape", node)
|
fixer_util.touch_import("html", "escape", node)
|
||||||
self.assertEqual(str(node), 'from cgi import escape\nbar()\n\n')
|
self.assertEqual(str(node), 'from html import escape\nbar()\n\n')
|
||||||
|
|
||||||
def test_name_import(self):
|
def test_name_import(self):
|
||||||
node = parse('bar()')
|
node = parse('bar()')
|
||||||
|
|
24
Lib/test/test_html.py
Normal file
24
Lib/test/test_html.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
"""
|
||||||
|
Tests for the html module functions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import html
|
||||||
|
import unittest
|
||||||
|
from test.support import run_unittest
|
||||||
|
|
||||||
|
|
||||||
|
class HtmlTests(unittest.TestCase):
|
||||||
|
def test_escape(self):
|
||||||
|
self.assertEqual(
|
||||||
|
html.escape('\'<script>"&foo;"</script>\''),
|
||||||
|
''<script>"&foo;"</script>'')
|
||||||
|
self.assertEqual(
|
||||||
|
html.escape('\'<script>"&foo;"</script>\'', False),
|
||||||
|
'\'<script>"&foo;"</script>\'')
|
||||||
|
|
||||||
|
|
||||||
|
def test_main():
|
||||||
|
run_unittest(HtmlTests)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test_main()
|
|
@ -12,7 +12,7 @@
|
||||||
# except if the test is specific to the Python implementation.
|
# except if the test is specific to the Python implementation.
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import cgi
|
import html
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from test import support
|
from test import support
|
||||||
|
@ -1328,7 +1328,7 @@ XINCLUDE["default.xml"] = """\
|
||||||
<p>Example.</p>
|
<p>Example.</p>
|
||||||
<xi:include href="{}"/>
|
<xi:include href="{}"/>
|
||||||
</document>
|
</document>
|
||||||
""".format(cgi.escape(SIMPLE_XMLFILE, True))
|
""".format(html.escape(SIMPLE_XMLFILE, True))
|
||||||
|
|
||||||
def xinclude_loader(href, parse="xml", encoding=None):
|
def xinclude_loader(href, parse="xml", encoding=None):
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -24,6 +24,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #2830: Add the ``html.escape()`` function, which quotes all problematic
|
||||||
|
characters by default. Deprecate ``cgi.escape()``.
|
||||||
|
|
||||||
- Issue 9409: Fix the regex to match all kind of filenames, for interactive
|
- Issue 9409: Fix the regex to match all kind of filenames, for interactive
|
||||||
debugging in doctests.
|
debugging in doctests.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue