Make a new urllib package .

It consists of code from urllib, urllib2, urlparse, and robotparser.
The old modules have all been removed.  The new package has five
submodules: urllib.parse, urllib.request, urllib.response,
urllib.error, and urllib.robotparser.  The urllib.request.urlopen()
function uses the url opener from urllib2.

Note that the unittests have not been renamed for the
beta, but they will be renamed in the future.

Joint work with Senthil Kumaran.
This commit is contained in:
Jeremy Hylton 2008-06-18 20:49:58 +00:00
parent a656d2cd89
commit 1afc169616
40 changed files with 3190 additions and 3536 deletions

View file

@ -2,7 +2,7 @@
from test import support
import unittest
import urlparse
import urllib.parse
RFC1808_BASE = "http://a/b/c/d;p?q#f"
RFC2396_BASE = "http://a/b/c/d;p?q"
@ -10,19 +10,19 @@ RFC2396_BASE = "http://a/b/c/d;p?q"
class UrlParseTestCase(unittest.TestCase):
def checkRoundtrips(self, url, parsed, split):
result = urlparse.urlparse(url)
result = urllib.parse.urlparse(url)
self.assertEqual(result, parsed)
t = (result.scheme, result.netloc, result.path,
result.params, result.query, result.fragment)
self.assertEqual(t, parsed)
# put it back together and it should be the same
result2 = urlparse.urlunparse(result)
result2 = urllib.parse.urlunparse(result)
self.assertEqual(result2, url)
self.assertEqual(result2, result.geturl())
# the result of geturl() is a fixpoint; we can always parse it
# again to get the same result:
result3 = urlparse.urlparse(result.geturl())
result3 = urllib.parse.urlparse(result.geturl())
self.assertEqual(result3.geturl(), result.geturl())
self.assertEqual(result3, result)
self.assertEqual(result3.scheme, result.scheme)
@ -37,17 +37,17 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(result3.port, result.port)
# check the roundtrip using urlsplit() as well
result = urlparse.urlsplit(url)
result = urllib.parse.urlsplit(url)
self.assertEqual(result, split)
t = (result.scheme, result.netloc, result.path,
result.query, result.fragment)
self.assertEqual(t, split)
result2 = urlparse.urlunsplit(result)
result2 = urllib.parse.urlunsplit(result)
self.assertEqual(result2, url)
self.assertEqual(result2, result.geturl())
# check the fixpoint property of re-parsing the result of geturl()
result3 = urlparse.urlsplit(result.geturl())
result3 = urllib.parse.urlsplit(result.geturl())
self.assertEqual(result3.geturl(), result.geturl())
self.assertEqual(result3, result)
self.assertEqual(result3.scheme, result.scheme)
@ -83,7 +83,7 @@ class UrlParseTestCase(unittest.TestCase):
self.checkRoundtrips(url, parsed, split)
def test_http_roundtrips(self):
# urlparse.urlsplit treats 'http:' as an optimized special case,
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
# so we test both 'http:' and 'https:' in all the following.
# Three cheers for white box knowledge!
testcases = [
@ -111,13 +111,13 @@ class UrlParseTestCase(unittest.TestCase):
self.checkRoundtrips(url, parsed, split)
def checkJoin(self, base, relurl, expected):
self.assertEqual(urlparse.urljoin(base, relurl), expected,
self.assertEqual(urllib.parse.urljoin(base, relurl), expected,
(base, relurl, expected))
def test_unparse_parse(self):
for u in ['Python', './Python']:
self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
def test_RFC1808(self):
# "normal" cases from RFC 1808:
@ -223,11 +223,11 @@ class UrlParseTestCase(unittest.TestCase):
(RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
(RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
]:
self.assertEqual(urlparse.urldefrag(url), (defrag, frag))
self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag))
def test_urlsplit_attributes(self):
url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
p = urlparse.urlsplit(url)
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
self.assertEqual(p.path, "/doc/")
@ -242,7 +242,7 @@ class UrlParseTestCase(unittest.TestCase):
#self.assertEqual(p.geturl(), url)
url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
p = urlparse.urlsplit(url)
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
self.assertEqual(p.path, "/doc/")
@ -259,7 +259,7 @@ class UrlParseTestCase(unittest.TestCase):
# and request email addresses as usernames.
url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
p = urlparse.urlsplit(url)
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
self.assertEqual(p.path, "/doc/")
@ -274,11 +274,11 @@ class UrlParseTestCase(unittest.TestCase):
def test_attributes_bad_port(self):
"""Check handling of non-integer ports."""
p = urlparse.urlsplit("http://www.example.net:foo")
p = urllib.parse.urlsplit("http://www.example.net:foo")
self.assertEqual(p.netloc, "www.example.net:foo")
self.assertRaises(ValueError, lambda: p.port)
p = urlparse.urlparse("http://www.example.net:foo")
p = urllib.parse.urlparse("http://www.example.net:foo")
self.assertEqual(p.netloc, "www.example.net:foo")
self.assertRaises(ValueError, lambda: p.port)
@ -289,7 +289,7 @@ class UrlParseTestCase(unittest.TestCase):
# scheme://netloc syntax, the netloc and related attributes
# should be left empty.
uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
p = urlparse.urlsplit(uri)
p = urllib.parse.urlsplit(uri)
self.assertEqual(p.netloc, "")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
@ -297,7 +297,7 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(p.port, None)
self.assertEqual(p.geturl(), uri)
p = urlparse.urlparse(uri)
p = urllib.parse.urlparse(uri)
self.assertEqual(p.netloc, "")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
@ -307,7 +307,7 @@ class UrlParseTestCase(unittest.TestCase):
def test_noslash(self):
# Issue 1637: http://foo.com?query is legal
self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
('http', 'example.com', '', '', 'blahblah=/foo', ''))
def test_main():