mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Fix Issue754016 - urlparse goes wrong with IP:port without scheme
This commit is contained in:
parent
4aa0d4d2d0
commit
84c7d9f87b
3 changed files with 44 additions and 5 deletions
|
@ -48,6 +48,23 @@ The :mod:`urllib.parse` module defines the following functions:
|
||||||
>>> o.geturl()
|
>>> o.geturl()
|
||||||
'http://www.cwi.nl:80/%7Eguido/Python.html'
|
'http://www.cwi.nl:80/%7Eguido/Python.html'
|
||||||
|
|
||||||
|
If the scheme value is not specified, urlparse following the syntax
|
||||||
|
specifications from RFC 1808, expects the netloc value to start with '//',
|
||||||
|
Otherwise, it is not possible to distinguish between net_loc and path
|
||||||
|
component and would classify the indistinguishable component as path as in
|
||||||
|
a relative url.
|
||||||
|
|
||||||
|
>>> from urlparse import urlparse
|
||||||
|
>>> urlparse('//www.cwi.nl:80/%7Eguido/Python.html')
|
||||||
|
ParseResult(scheme='', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html',
|
||||||
|
params='', query='', fragment='')
|
||||||
|
>>> urlparse('www.cwi.nl:80/%7Eguido/Python.html')
|
||||||
|
ParseResult(scheme='', netloc='', path='www.cwi.nl:80/%7Eguido/Python.html',
|
||||||
|
params='', query='', fragment='')
|
||||||
|
>>> urlparse('help/Python.html')
|
||||||
|
ParseResult(scheme='', netloc='', path='help/Python.html', params='',
|
||||||
|
query='', fragment='')
|
||||||
|
|
||||||
If the *scheme* argument is specified, it gives the default addressing
|
If the *scheme* argument is specified, it gives the default addressing
|
||||||
scheme, to be used only if the URL does not specify one. The default value for
|
scheme, to be used only if the URL does not specify one. The default value for
|
||||||
this argument is the empty string.
|
this argument is the empty string.
|
||||||
|
|
|
@ -461,6 +461,27 @@ class UrlParseTestCase(unittest.TestCase):
|
||||||
self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
|
self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
|
||||||
('http', 'example.com', '', '', 'blahblah=/foo', ''))
|
('http', 'example.com', '', '', 'blahblah=/foo', ''))
|
||||||
|
|
||||||
|
def test_withoutscheme(self):
|
||||||
|
# Test urlparse without scheme
|
||||||
|
# Issue 754016: urlparse goes wrong with IP:port without scheme
|
||||||
|
# RFC 1808 specifies that netloc should start with //, urlparse expects
|
||||||
|
# the same, otherwise it classifies the portion of url as path.
|
||||||
|
self.assertEqual(urllib.parse.urlparse("path"),
|
||||||
|
('','','path','','',''))
|
||||||
|
self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
|
||||||
|
('','www.python.org:80','','','',''))
|
||||||
|
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
|
||||||
|
('http','www.python.org:80','','','',''))
|
||||||
|
|
||||||
|
def test_portseparator(self):
|
||||||
|
# Issue 754016 makes changes for port separator ':' from scheme separator
|
||||||
|
self.assertEqual(urllib.parse.urlparse("path:80"),
|
||||||
|
('','','path:80','','',''))
|
||||||
|
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
|
||||||
|
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
|
||||||
|
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
|
||||||
|
('http','www.python.org:80','','','',''))
|
||||||
|
|
||||||
def test_usingsys(self):
|
def test_usingsys(self):
|
||||||
# Issue 3314: sys module is used in the error
|
# Issue 3314: sys module is used in the error
|
||||||
self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
|
self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
|
||||||
|
|
|
@ -192,6 +192,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
|
||||||
v = SplitResult(scheme, netloc, url, query, fragment)
|
v = SplitResult(scheme, netloc, url, query, fragment)
|
||||||
_parse_cache[key] = v
|
_parse_cache[key] = v
|
||||||
return v
|
return v
|
||||||
|
if url.endswith(':') or not url[i+1].isdigit():
|
||||||
for c in url[:i]:
|
for c in url[:i]:
|
||||||
if c not in scheme_chars:
|
if c not in scheme_chars:
|
||||||
break
|
break
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue