mirror of
https://github.com/python/cpython.git
synced 2025-08-30 21:48:47 +00:00
bpo-36216: Add check for characters in netloc that normalize to separators (GH-12201)
This commit is contained in:
parent
1f58f4fa6a
commit
16e6f7dee7
4 changed files with 61 additions and 0 deletions
|
@ -396,6 +396,21 @@ def _splitnetloc(url, start=0):
|
|||
delim = min(delim, wdelim) # use earliest delim position
|
||||
return url[start:delim], url[delim:] # return (domain, rest)
|
||||
|
||||
def _checknetloc(netloc):
|
||||
if not netloc or netloc.isascii():
|
||||
return
|
||||
# looking for characters like \u2100 that expand to 'a/c'
|
||||
# IDNA uses NFKC equivalence, so normalize for this check
|
||||
import unicodedata
|
||||
netloc2 = unicodedata.normalize('NFKC', netloc)
|
||||
if netloc == netloc2:
|
||||
return
|
||||
_, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay
|
||||
for c in '/?#@:':
|
||||
if c in netloc2:
|
||||
raise ValueError("netloc '" + netloc2 + "' contains invalid " +
|
||||
"characters under NFKC normalization")
|
||||
|
||||
def urlsplit(url, scheme='', allow_fragments=True):
|
||||
"""Parse a URL into 5 components:
|
||||
<scheme>://<netloc>/<path>?<query>#<fragment>
|
||||
|
@ -424,6 +439,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
|
|||
url, fragment = url.split('#', 1)
|
||||
if '?' in url:
|
||||
url, query = url.split('?', 1)
|
||||
_checknetloc(netloc)
|
||||
v = SplitResult('http', netloc, url, query, fragment)
|
||||
_parse_cache[key] = v
|
||||
return _coerce_result(v)
|
||||
|
@ -447,6 +463,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
|
|||
url, fragment = url.split('#', 1)
|
||||
if '?' in url:
|
||||
url, query = url.split('?', 1)
|
||||
_checknetloc(netloc)
|
||||
v = SplitResult(scheme, netloc, url, query, fragment)
|
||||
_parse_cache[key] = v
|
||||
return _coerce_result(v)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue