mirror of
https://github.com/python/cpython.git
synced 2025-10-10 00:43:41 +00:00
bpo-30458: Disallow control chars in http URLs. (GH-12755)
Disallow control chars in http URLs in urllib.urlopen. This addresses a potential security problem for applications that do not sanity check their URLs where http request headers could be injected.
This commit is contained in:
parent
5f38b8407b
commit
c4e671eec2
4 changed files with 71 additions and 2 deletions
|
@ -137,6 +137,16 @@ _MAXHEADERS = 100
|
|||
_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
|
||||
_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
|
||||
|
||||
# These characters are not allowed within HTTP URL paths.
|
||||
# See https://tools.ietf.org/html/rfc3986#section-3.3 and the
|
||||
# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
|
||||
# Prevents CVE-2019-9740. Includes control characters such as \r\n.
|
||||
# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
|
||||
_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
|
||||
# Arguably only these _should_ allowed:
|
||||
# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
|
||||
# We are more lenient for assumed real world compatibility purposes.
|
||||
|
||||
# We always set the Content-Length header for these methods because some
|
||||
# servers will otherwise respond with a 411
|
||||
_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
|
||||
|
@ -1079,6 +1089,10 @@ class HTTPConnection:
|
|||
self._method = method
|
||||
if not url:
|
||||
url = '/'
|
||||
# Prevent CVE-2019-9740.
|
||||
if match := _contains_disallowed_url_pchar_re.search(url):
|
||||
raise ValueError(f"URL can't contain control characters. {url!r} "
|
||||
f"(found at least {match.group()!r})")
|
||||
request = '%s %s %s' % (method, url, self._http_vsn_str)
|
||||
|
||||
# Non-ASCII characters should have been eliminated earlier
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue