mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
bpo-43882 - urllib.parse should sanitize urls containing ASCII newline and tabs. (GH-25595)
* issue43882 - urllib.parse should sanitize urls containing ASCII newline and tabs. Co-authored-by: Gregory P. Smith <greg@krypto.org> Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
14fc2bdfab
commit
76cd81d603
4 changed files with 54 additions and 0 deletions
|
@ -78,6 +78,9 @@ scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
|
|||
'0123456789'
|
||||
'+-.')
|
||||
|
||||
# Unsafe bytes to be removed per WHATWG spec
|
||||
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
|
||||
|
||||
# XXX: Consider replacing with functools.lru_cache
|
||||
MAX_CACHE_SIZE = 20
|
||||
_parse_cache = {}
|
||||
|
@ -469,6 +472,9 @@ def urlsplit(url, scheme='', allow_fragments=True):
|
|||
else:
|
||||
scheme, url = url[:i].lower(), url[i+1:]
|
||||
|
||||
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
|
||||
url = url.replace(b, "")
|
||||
|
||||
if url[:2] == '//':
|
||||
netloc, url = _splitnetloc(url, 2)
|
||||
if (('[' in netloc and ']' not in netloc) or
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue