mirror of
https://github.com/python/cpython.git
synced 2025-10-17 12:18:23 +00:00
gh-76960: Fix urljoin() and urldefrag() for URIs with empty components (GH-123273)
* urljoin() with relative reference "?" sets empty query and removes fragment. * Preserve empty components (authority, params, query, fragment) in urljoin(). * Preserve empty components (authority, params, query) in urldefrag(). Also refactor the code and get rid of double _coerce_args() and _coerce_result() calls in urljoin(), urldefrag(), urlparse() and urlunparse().
This commit is contained in:
parent
e5a567b0a7
commit
fc897fcc01
3 changed files with 140 additions and 52 deletions
|
@ -392,20 +392,23 @@ def urlparse(url, scheme='', allow_fragments=True):
|
|||
Note that % escapes are not expanded.
|
||||
"""
|
||||
url, scheme, _coerce_result = _coerce_args(url, scheme)
|
||||
splitresult = urlsplit(url, scheme, allow_fragments)
|
||||
scheme, netloc, url, query, fragment = splitresult
|
||||
if scheme in uses_params and ';' in url:
|
||||
url, params = _splitparams(url)
|
||||
else:
|
||||
params = ''
|
||||
result = ParseResult(scheme, netloc, url, params, query, fragment)
|
||||
scheme, netloc, url, params, query, fragment = _urlparse(url, scheme, allow_fragments)
|
||||
result = ParseResult(scheme or '', netloc or '', url, params or '', query or '', fragment or '')
|
||||
return _coerce_result(result)
|
||||
|
||||
def _splitparams(url):
|
||||
def _urlparse(url, scheme=None, allow_fragments=True):
|
||||
scheme, netloc, url, query, fragment = _urlsplit(url, scheme, allow_fragments)
|
||||
if (scheme or '') in uses_params and ';' in url:
|
||||
url, params = _splitparams(url, allow_none=True)
|
||||
else:
|
||||
params = None
|
||||
return (scheme, netloc, url, params, query, fragment)
|
||||
|
||||
def _splitparams(url, allow_none=False):
|
||||
if '/' in url:
|
||||
i = url.find(';', url.rfind('/'))
|
||||
if i < 0:
|
||||
return url, ''
|
||||
return url, None if allow_none else ''
|
||||
else:
|
||||
i = url.find(';')
|
||||
return url[:i], url[i+1:]
|
||||
|
@ -472,17 +475,23 @@ def urlsplit(url, scheme='', allow_fragments=True):
|
|||
"""
|
||||
|
||||
url, scheme, _coerce_result = _coerce_args(url, scheme)
|
||||
scheme, netloc, url, query, fragment = _urlsplit(url, scheme, allow_fragments)
|
||||
v = SplitResult(scheme or '', netloc or '', url, query or '', fragment or '')
|
||||
return _coerce_result(v)
|
||||
|
||||
def _urlsplit(url, scheme=None, allow_fragments=True):
|
||||
# Only lstrip url as some applications rely on preserving trailing space.
|
||||
# (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
|
||||
url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE)
|
||||
scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
|
||||
|
||||
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
|
||||
url = url.replace(b, "")
|
||||
scheme = scheme.replace(b, "")
|
||||
if scheme is not None:
|
||||
scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
|
||||
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
|
||||
scheme = scheme.replace(b, "")
|
||||
|
||||
allow_fragments = bool(allow_fragments)
|
||||
netloc = query = fragment = ''
|
||||
netloc = query = fragment = None
|
||||
i = url.find(':')
|
||||
if i > 0 and url[0].isascii() and url[0].isalpha():
|
||||
for c in url[:i]:
|
||||
|
@ -503,8 +512,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
|
|||
if '?' in url:
|
||||
url, query = url.split('?', 1)
|
||||
_checknetloc(netloc)
|
||||
v = SplitResult(scheme, netloc, url, query, fragment)
|
||||
return _coerce_result(v)
|
||||
return (scheme, netloc, url, query, fragment)
|
||||
|
||||
def urlunparse(components):
|
||||
"""Put a parsed URL back together again. This may result in a
|
||||
|
@ -513,9 +521,15 @@ def urlunparse(components):
|
|||
(the draft states that these are equivalent)."""
|
||||
scheme, netloc, url, params, query, fragment, _coerce_result = (
|
||||
_coerce_args(*components))
|
||||
if not netloc:
|
||||
if scheme and scheme in uses_netloc and (not url or url[:1] == '/'):
|
||||
netloc = ''
|
||||
else:
|
||||
netloc = None
|
||||
if params:
|
||||
url = "%s;%s" % (url, params)
|
||||
return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))
|
||||
return _coerce_result(_urlunsplit(scheme or None, netloc, url,
|
||||
query or None, fragment or None))
|
||||
|
||||
def urlunsplit(components):
|
||||
"""Combine the elements of a tuple as returned by urlsplit() into a
|
||||
|
@ -525,20 +539,27 @@ def urlunsplit(components):
|
|||
empty query; the RFC states that these are equivalent)."""
|
||||
scheme, netloc, url, query, fragment, _coerce_result = (
|
||||
_coerce_args(*components))
|
||||
if netloc:
|
||||
if not netloc:
|
||||
if scheme and scheme in uses_netloc and (not url or url[:1] == '/'):
|
||||
netloc = ''
|
||||
else:
|
||||
netloc = None
|
||||
return _coerce_result(_urlunsplit(scheme or None, netloc, url,
|
||||
query or None, fragment or None))
|
||||
|
||||
def _urlunsplit(scheme, netloc, url, query, fragment):
|
||||
if netloc is not None:
|
||||
if url and url[:1] != '/': url = '/' + url
|
||||
url = '//' + netloc + url
|
||||
elif url[:2] == '//':
|
||||
url = '//' + url
|
||||
elif scheme and scheme in uses_netloc and (not url or url[:1] == '/'):
|
||||
url = '//' + url
|
||||
if scheme:
|
||||
url = scheme + ':' + url
|
||||
if query:
|
||||
if query is not None:
|
||||
url = url + '?' + query
|
||||
if fragment:
|
||||
if fragment is not None:
|
||||
url = url + '#' + fragment
|
||||
return _coerce_result(url)
|
||||
return url
|
||||
|
||||
def urljoin(base, url, allow_fragments=True):
|
||||
"""Join a base URL and a possibly relative URL to form an absolute
|
||||
|
@ -549,26 +570,29 @@ def urljoin(base, url, allow_fragments=True):
|
|||
return base
|
||||
|
||||
base, url, _coerce_result = _coerce_args(base, url)
|
||||
bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
|
||||
urlparse(base, '', allow_fragments)
|
||||
scheme, netloc, path, params, query, fragment = \
|
||||
urlparse(url, bscheme, allow_fragments)
|
||||
bscheme, bnetloc, bpath, bquery, bfragment = \
|
||||
_urlsplit(base, None, allow_fragments)
|
||||
scheme, netloc, path, query, fragment = \
|
||||
_urlsplit(url, None, allow_fragments)
|
||||
|
||||
if scheme is None:
|
||||
scheme = bscheme
|
||||
if scheme != bscheme or scheme not in uses_relative:
|
||||
return _coerce_result(url)
|
||||
if scheme in uses_netloc:
|
||||
if netloc:
|
||||
return _coerce_result(urlunparse((scheme, netloc, path,
|
||||
params, query, fragment)))
|
||||
return _coerce_result(_urlunsplit(scheme, netloc, path,
|
||||
query, fragment))
|
||||
netloc = bnetloc
|
||||
|
||||
if not path and not params:
|
||||
if not path:
|
||||
path = bpath
|
||||
params = bparams
|
||||
if not query:
|
||||
if query is None:
|
||||
query = bquery
|
||||
return _coerce_result(urlunparse((scheme, netloc, path,
|
||||
params, query, fragment)))
|
||||
if fragment is None:
|
||||
fragment = bfragment
|
||||
return _coerce_result(_urlunsplit(scheme, netloc, path,
|
||||
query, fragment))
|
||||
|
||||
base_parts = bpath.split('/')
|
||||
if base_parts[-1] != '':
|
||||
|
@ -605,8 +629,8 @@ def urljoin(base, url, allow_fragments=True):
|
|||
# then we need to append the trailing '/'
|
||||
resolved_path.append('')
|
||||
|
||||
return _coerce_result(urlunparse((scheme, netloc, '/'.join(
|
||||
resolved_path) or '/', params, query, fragment)))
|
||||
return _coerce_result(_urlunsplit(scheme, netloc, '/'.join(
|
||||
resolved_path) or '/', query, fragment))
|
||||
|
||||
|
||||
def urldefrag(url):
|
||||
|
@ -618,12 +642,12 @@ def urldefrag(url):
|
|||
"""
|
||||
url, _coerce_result = _coerce_args(url)
|
||||
if '#' in url:
|
||||
s, n, p, a, q, frag = urlparse(url)
|
||||
defrag = urlunparse((s, n, p, a, q, ''))
|
||||
s, n, p, q, frag = _urlsplit(url)
|
||||
defrag = _urlunsplit(s, n, p, q, None)
|
||||
else:
|
||||
frag = ''
|
||||
defrag = url
|
||||
return _coerce_result(DefragResult(defrag, frag))
|
||||
return _coerce_result(DefragResult(defrag, frag or ''))
|
||||
|
||||
_hexdig = '0123456789ABCDEFabcdef'
|
||||
_hextobyte = None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue