mirror of
https://github.com/python/cpython.git
synced 2025-09-29 19:56:59 +00:00
The current regex based splitting produces a wrong result. For example:: http://abc#@def Web browsers parse that URL as ``http://abc/#@def``, that is, the host is ``abc``, the path is ``/``, and the fragment is ``#@def``. (cherry picked from commit90e01e50ef
) (cherry picked from commit536c1f1246
)
This commit is contained in:
parent
00134f64d9
commit
b0fba8874a
4 changed files with 58 additions and 13 deletions
|
@ -755,28 +755,35 @@ class UrlParseTestCase(unittest.TestCase):
|
||||||
def test_parse_fragments(self):
|
def test_parse_fragments(self):
|
||||||
# Exercise the allow_fragments parameter of urlparse() and urlsplit()
|
# Exercise the allow_fragments parameter of urlparse() and urlsplit()
|
||||||
tests = (
|
tests = (
|
||||||
("http:#frag", "path"),
|
("http:#frag", "path", "frag"),
|
||||||
("//example.net#frag", "path"),
|
("//example.net#frag", "path", "frag"),
|
||||||
("index.html#frag", "path"),
|
("index.html#frag", "path", "frag"),
|
||||||
(";a=b#frag", "params"),
|
(";a=b#frag", "params", "frag"),
|
||||||
("?a=b#frag", "query"),
|
("?a=b#frag", "query", "frag"),
|
||||||
("#frag", "path"),
|
("#frag", "path", "frag"),
|
||||||
|
("abc#@frag", "path", "@frag"),
|
||||||
|
("//abc#@frag", "path", "@frag"),
|
||||||
|
("//abc:80#@frag", "path", "@frag"),
|
||||||
|
("//abc#@frag:80", "path", "@frag:80"),
|
||||||
)
|
)
|
||||||
for url, attr in tests:
|
for url, attr, expected_frag in tests:
|
||||||
for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
|
for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
|
||||||
if attr == "params" and func is urllib.parse.urlsplit:
|
if attr == "params" and func is urllib.parse.urlsplit:
|
||||||
attr = "path"
|
attr = "path"
|
||||||
with self.subTest(url=url, function=func):
|
with self.subTest(url=url, function=func):
|
||||||
result = func(url, allow_fragments=False)
|
result = func(url, allow_fragments=False)
|
||||||
self.assertEqual(result.fragment, "")
|
self.assertEqual(result.fragment, "")
|
||||||
self.assertTrue(getattr(result, attr).endswith("#frag"))
|
self.assertTrue(
|
||||||
|
getattr(result, attr).endswith("#" + expected_frag))
|
||||||
self.assertEqual(func(url, "", False).fragment, "")
|
self.assertEqual(func(url, "", False).fragment, "")
|
||||||
|
|
||||||
result = func(url, allow_fragments=True)
|
result = func(url, allow_fragments=True)
|
||||||
self.assertEqual(result.fragment, "frag")
|
self.assertEqual(result.fragment, expected_frag)
|
||||||
self.assertFalse(getattr(result, attr).endswith("frag"))
|
self.assertFalse(
|
||||||
self.assertEqual(func(url, "", True).fragment, "frag")
|
getattr(result, attr).endswith(expected_frag))
|
||||||
self.assertEqual(func(url).fragment, "frag")
|
self.assertEqual(func(url, "", True).fragment,
|
||||||
|
expected_frag)
|
||||||
|
self.assertEqual(func(url).fragment, expected_frag)
|
||||||
|
|
||||||
def test_mixed_types_rejected(self):
|
def test_mixed_types_rejected(self):
|
||||||
# Several functions that process either strings or ASCII encoded bytes
|
# Several functions that process either strings or ASCII encoded bytes
|
||||||
|
@ -983,6 +990,26 @@ class Utility_Tests(unittest.TestCase):
|
||||||
self.assertEqual(splithost('/foo/bar/baz.html'),
|
self.assertEqual(splithost('/foo/bar/baz.html'),
|
||||||
(None, '/foo/bar/baz.html'))
|
(None, '/foo/bar/baz.html'))
|
||||||
|
|
||||||
|
# bpo-30500: # starts a fragment.
|
||||||
|
self.assertEqual(splithost('//127.0.0.1#@host.com'),
|
||||||
|
('127.0.0.1', '/#@host.com'))
|
||||||
|
self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
|
||||||
|
('127.0.0.1', '/#@host.com:80'))
|
||||||
|
self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
|
||||||
|
('127.0.0.1:80', '/#@host.com'))
|
||||||
|
|
||||||
|
# Empty host is returned as empty string.
|
||||||
|
self.assertEqual(splithost("///file"),
|
||||||
|
('', '/file'))
|
||||||
|
|
||||||
|
# Trailing semicolon, question mark and hash symbol are kept.
|
||||||
|
self.assertEqual(splithost("//example.net/file;"),
|
||||||
|
('example.net', '/file;'))
|
||||||
|
self.assertEqual(splithost("//example.net/file?"),
|
||||||
|
('example.net', '/file?'))
|
||||||
|
self.assertEqual(splithost("//example.net/file#"),
|
||||||
|
('example.net', '/file#'))
|
||||||
|
|
||||||
def test_splituser(self):
|
def test_splituser(self):
|
||||||
splituser = urllib.parse.splituser
|
splituser = urllib.parse.splituser
|
||||||
self.assertEqual(splituser('User:Pass@www.python.org:080'),
|
self.assertEqual(splituser('User:Pass@www.python.org:080'),
|
||||||
|
|
|
@ -944,7 +944,7 @@ def splithost(url):
|
||||||
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
|
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
|
||||||
global _hostprog
|
global _hostprog
|
||||||
if _hostprog is None:
|
if _hostprog is None:
|
||||||
_hostprog = re.compile('//([^/?]*)(.*)', re.DOTALL)
|
_hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
|
||||||
|
|
||||||
match = _hostprog.match(url)
|
match = _hostprog.match(url)
|
||||||
if match:
|
if match:
|
||||||
|
|
|
@ -1081,6 +1081,7 @@ Max Neunhöffer
|
||||||
Anthon van der Neut
|
Anthon van der Neut
|
||||||
George Neville-Neil
|
George Neville-Neil
|
||||||
Hieu Nguyen
|
Hieu Nguyen
|
||||||
|
Nam Nguyen
|
||||||
Johannes Nicolai
|
Johannes Nicolai
|
||||||
Samuel Nicolary
|
Samuel Nicolary
|
||||||
Jonathan Niehof
|
Jonathan Niehof
|
||||||
|
|
17
Misc/NEWS
17
Misc/NEWS
|
@ -2,6 +2,23 @@
|
||||||
Python News
|
Python News
|
||||||
+++++++++++
|
+++++++++++
|
||||||
|
|
||||||
|
What's New in Python 3.6.3 release candidate 1?
|
||||||
|
===============================================
|
||||||
|
|
||||||
|
*Release date: XXXX-XX-XX*
|
||||||
|
|
||||||
|
Core and Builtins
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
Library
|
||||||
|
-------
|
||||||
|
|
||||||
|
- [Security] bpo-30500: Fix urllib.parse.splithost() to correctly parse
|
||||||
|
fragments. For example, ``splithost('//127.0.0.1#@evil.com/')`` now
|
||||||
|
correctly returns the ``127.0.0.1`` host, instead of treating ``@evil.com``
|
||||||
|
as the host in an authentification (``login@host``).
|
||||||
|
|
||||||
|
|
||||||
What's New in Python 3.6.2 release candidate 1?
|
What's New in Python 3.6.2 release candidate 1?
|
||||||
===============================================
|
===============================================
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue