Fix Issue754016 - urlparse goes wrong with IP:port without scheme

2025-11-24 20:30:18 +00:00 · 2010-08-04 04:45:31 +00:00 · 2010-08-04 04:45:31 +00:00 · 0b5019fe23
commit 0b5019fe23
parent 75a292e5be
3 changed files with 44 additions and 5 deletions
--- a/Doc/library/urlparse.rst
+++ b/Doc/library/urlparse.rst
@ -58,6 +58,24 @@ The :mod:`urlparse` module defines the following functions:
      >>> o.geturl()
      'http://www.cwi.nl:80/%7Eguido/Python.html'
   If the scheme value is not specified, urlparse following the syntax
   specifications from RFC 1808, expects the netloc value to start with '//',
   Otherwise, it is not possible to distinguish between net_loc and path
   component and would classify the indistinguishable component as path as in
   a relative url.
       >>> from urlparse import urlparse
       >>> urlparse('//www.cwi.nl:80/%7Eguido/Python.html')
       ParseResult(scheme='', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html',
                  params='', query='', fragment='')
       >>> urlparse('www.cwi.nl:80/%7Eguido/Python.html')
       ParseResult(scheme='', netloc='', path='www.cwi.nl:80/%7Eguido/Python.html',
                  params='', query='', fragment='')
       >>> urlparse('help/Python.html')
       ParseResult(scheme='', netloc='', path='help/Python.html', params='',
                  query='', fragment='')
   If the *scheme* argument is specified, it gives the default addressing
   scheme, to be used only if the URL does not specify one.  The default value for
   this argument is the empty string.
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@ -478,6 +478,26 @@ class UrlParseTestCase(unittest.TestCase):
        self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"),
                         ('x-newscheme','foo.com','/stuff','','',''))
    def test_withoutscheme(self):
        # Test urlparse without scheme
        # Issue 754016: urlparse goes wrong with IP:port without scheme
        # RFC 1808 specifies that netloc should start with //, urlparse expects
        # the same, otherwise it classifies the portion of url as path.
        self.assertEqual(urlparse.urlparse("path"),
                ('','','path','','',''))
        self.assertEqual(urlparse.urlparse("//www.python.org:80"),
                ('','www.python.org:80','','','',''))
        self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
                ('http','www.python.org:80','','','',''))
    def test_portseparator(self):
        # Issue 754016 makes changes for port separator ':' from scheme separator
        self.assertEqual(urlparse.urlparse("path:80"),
                ('','','path:80','','',''))
        self.assertEqual(urlparse.urlparse("http:"),('http','','','','',''))
        self.assertEqual(urlparse.urlparse("https:"),('https','','','','',''))
        self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
                ('http','www.python.org:80','','','',''))
 def test_main():
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@ -187,11 +187,12 @@ def urlsplit(url, scheme='', allow_fragments=True):
            v = SplitResult(scheme, netloc, url, query, fragment)
            _parse_cache[key] = v
            return v
-        for c in url[:i]:
+        if url.endswith(':') or not url[i+1].isdigit():
-            if c not in scheme_chars:
+            for c in url[:i]:
-                break
+                if c not in scheme_chars:
-        else:
+                    break
-            scheme, url = url[:i].lower(), url[i+1:]
+            else:
                scheme, url = url[:i].lower(), url[i+1:]
    if url[:2] == '//':
        netloc, url = _splitnetloc(url, 2)