bpo-27657: Fix urlparse() with numeric paths (#661)

* bpo-27657: Fix urlparse() with numeric paths Revert parsing decision from bpo-754016 in favor of the documented consensus in bpo-16932 of how to treat strings without a // to designate the netloc. * bpo-22891: Remove urlsplit() optimization for 'http' prefixed inputs.
2025-11-20 02:50:14 +00:00 · 2019-10-18 09:07:20 -04:00 · 2019-10-18 09:07:20 -04:00 · 5a88d50ff0
commit 5a88d50ff0
parent fbe3c76c7c
3 changed files with 9 additions and 25 deletions
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@ -431,31 +431,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
    netloc = query = fragment = ''
    i = url.find(':')
    if i > 0:
-        if url[:i] == 'http': # optimize the common case
-            url = url[i+1:]
-            if url[:2] == '//':
-                netloc, url = _splitnetloc(url, 2)
-                if (('[' in netloc and ']' not in netloc) or
-                        (']' in netloc and '[' not in netloc)):
-                    raise ValueError("Invalid IPv6 URL")
-            if allow_fragments and '#' in url:
-                url, fragment = url.split('#', 1)
-            if '?' in url:
-                url, query = url.split('?', 1)
-            _checknetloc(netloc)
-            v = SplitResult('http', netloc, url, query, fragment)
-            _parse_cache[key] = v
-            return _coerce_result(v)
        for c in url[:i]:
            if c not in scheme_chars:
                break
        else:
-            # make sure "url" is not actually a port number (in which case
-            # "scheme" is really part of the path)
-            rest = url[i+1:]
-            if not rest or any(c not in '0123456789' for c in rest):
-                # not a port number
-                scheme, url = url[:i].lower(), rest
+            scheme, url = url[:i].lower(), url[i+1:]

    if url[:2] == '//':
        netloc, url = _splitnetloc(url, 2)