mirror of
https://github.com/python/cpython.git
synced 2025-08-10 03:49:18 +00:00
Fix Issue754016 - urlparse goes wrong with IP:port without scheme
This commit is contained in:
parent
75a292e5be
commit
0b5019fe23
3 changed files with 44 additions and 5 deletions
|
@ -58,6 +58,24 @@ The :mod:`urlparse` module defines the following functions:
|
||||||
>>> o.geturl()
|
>>> o.geturl()
|
||||||
'http://www.cwi.nl:80/%7Eguido/Python.html'
|
'http://www.cwi.nl:80/%7Eguido/Python.html'
|
||||||
|
|
||||||
|
|
||||||
|
If the scheme value is not specified, urlparse following the syntax
|
||||||
|
specifications from RFC 1808, expects the netloc value to start with '//',
|
||||||
|
Otherwise, it is not possible to distinguish between net_loc and path
|
||||||
|
component and would classify the indistinguishable component as path as in
|
||||||
|
a relative url.
|
||||||
|
|
||||||
|
>>> from urlparse import urlparse
|
||||||
|
>>> urlparse('//www.cwi.nl:80/%7Eguido/Python.html')
|
||||||
|
ParseResult(scheme='', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html',
|
||||||
|
params='', query='', fragment='')
|
||||||
|
>>> urlparse('www.cwi.nl:80/%7Eguido/Python.html')
|
||||||
|
ParseResult(scheme='', netloc='', path='www.cwi.nl:80/%7Eguido/Python.html',
|
||||||
|
params='', query='', fragment='')
|
||||||
|
>>> urlparse('help/Python.html')
|
||||||
|
ParseResult(scheme='', netloc='', path='help/Python.html', params='',
|
||||||
|
query='', fragment='')
|
||||||
|
|
||||||
If the *scheme* argument is specified, it gives the default addressing
|
If the *scheme* argument is specified, it gives the default addressing
|
||||||
scheme, to be used only if the URL does not specify one. The default value for
|
scheme, to be used only if the URL does not specify one. The default value for
|
||||||
this argument is the empty string.
|
this argument is the empty string.
|
||||||
|
|
|
@ -478,6 +478,26 @@ class UrlParseTestCase(unittest.TestCase):
|
||||||
self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"),
|
self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"),
|
||||||
('x-newscheme','foo.com','/stuff','','',''))
|
('x-newscheme','foo.com','/stuff','','',''))
|
||||||
|
|
||||||
|
def test_withoutscheme(self):
|
||||||
|
# Test urlparse without scheme
|
||||||
|
# Issue 754016: urlparse goes wrong with IP:port without scheme
|
||||||
|
# RFC 1808 specifies that netloc should start with //, urlparse expects
|
||||||
|
# the same, otherwise it classifies the portion of url as path.
|
||||||
|
self.assertEqual(urlparse.urlparse("path"),
|
||||||
|
('','','path','','',''))
|
||||||
|
self.assertEqual(urlparse.urlparse("//www.python.org:80"),
|
||||||
|
('','www.python.org:80','','','',''))
|
||||||
|
self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
|
||||||
|
('http','www.python.org:80','','','',''))
|
||||||
|
|
||||||
|
def test_portseparator(self):
|
||||||
|
# Issue 754016 makes changes for port separator ':' from scheme separator
|
||||||
|
self.assertEqual(urlparse.urlparse("path:80"),
|
||||||
|
('','','path:80','','',''))
|
||||||
|
self.assertEqual(urlparse.urlparse("http:"),('http','','','','',''))
|
||||||
|
self.assertEqual(urlparse.urlparse("https:"),('https','','','','',''))
|
||||||
|
self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
|
||||||
|
('http','www.python.org:80','','','',''))
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
|
|
|
@ -187,11 +187,12 @@ def urlsplit(url, scheme='', allow_fragments=True):
|
||||||
v = SplitResult(scheme, netloc, url, query, fragment)
|
v = SplitResult(scheme, netloc, url, query, fragment)
|
||||||
_parse_cache[key] = v
|
_parse_cache[key] = v
|
||||||
return v
|
return v
|
||||||
for c in url[:i]:
|
if url.endswith(':') or not url[i+1].isdigit():
|
||||||
if c not in scheme_chars:
|
for c in url[:i]:
|
||||||
break
|
if c not in scheme_chars:
|
||||||
else:
|
break
|
||||||
scheme, url = url[:i].lower(), url[i+1:]
|
else:
|
||||||
|
scheme, url = url[:i].lower(), url[i+1:]
|
||||||
|
|
||||||
if url[:2] == '//':
|
if url[:2] == '//':
|
||||||
netloc, url = _splitnetloc(url, 2)
|
netloc, url = _splitnetloc(url, 2)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue