Make a new urllib package .

It consists of code from urllib, urllib2, urlparse, and robotparser.
The old modules have all been removed.  The new package has five
submodules: urllib.parse, urllib.request, urllib.response,
urllib.error, and urllib.robotparser.  The urllib.request.urlopen()
function uses the url opener from urllib2.

Note that the unittests have not been renamed for the
beta, but they will be renamed in the future.

Joint work with Senthil Kumaran.
This commit is contained in:
Jeremy Hylton 2008-06-18 20:49:58 +00:00
parent a656d2cd89
commit 1afc169616
40 changed files with 3190 additions and 3536 deletions

View file

@ -5,8 +5,8 @@ import os
import io
import socket
import urllib2
from urllib2 import Request, OpenerDirector
import urllib.request
from urllib.request import Request, OpenerDirector
# XXX
# Request
@ -17,10 +17,10 @@ class TrivialTests(unittest.TestCase):
def test_trivial(self):
# A couple trivial tests
self.assertRaises(ValueError, urllib2.urlopen, 'bogus url')
self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
# XXX Name hacking to get this to work on Windows.
fname = os.path.abspath(urllib2.__file__).replace('\\', '/')
fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
if fname[1:2] == ":":
fname = fname[2:]
# And more hacking to get it to work on MacOS. This assumes
@ -29,18 +29,21 @@ class TrivialTests(unittest.TestCase):
fname = '/' + fname.replace(':', '/')
file_url = "file://%s" % fname
f = urllib2.urlopen(file_url)
f = urllib.request.urlopen(file_url)
buf = f.read()
f.close()
def test_parse_http_list(self):
tests = [('a,b,c', ['a', 'b', 'c']),
('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
tests = [
('a,b,c', ['a', 'b', 'c']),
('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
('a, b, "c", "d", "e,f", g, h',
['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
('a="b\\"c", d="e\\,f", g="h\\\\i"',
['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
for string, list in tests:
self.assertEquals(urllib2.parse_http_list(string), list)
self.assertEquals(urllib.request.parse_http_list(string), list)
def test_request_headers_dict():
@ -107,7 +110,7 @@ def test_request_headers_methods():
def test_password_manager(self):
"""
>>> mgr = urllib2.HTTPPasswordMgr()
>>> mgr = urllib.request.HTTPPasswordMgr()
>>> add = mgr.add_password
>>> add("Some Realm", "http://example.com/", "joe", "password")
>>> add("Some Realm", "http://example.com/ni", "ni", "ni")
@ -172,7 +175,7 @@ def test_password_manager(self):
def test_password_manager_default_port(self):
"""
>>> mgr = urllib2.HTTPPasswordMgr()
>>> mgr = urllib.request.HTTPPasswordMgr()
>>> add = mgr.add_password
The point to note here is that we can't guess the default port if there's
@ -288,7 +291,7 @@ class MockHandler:
res = MockResponse(200, "OK", {}, "")
return self.parent.error("http", args[0], res, code, "", {})
elif action == "raise":
raise urllib2.URLError("blah")
raise urllib.error.URLError("blah")
assert False
def close(self): pass
def add_parent(self, parent):
@ -337,7 +340,7 @@ def build_test_opener(*handler_instances):
opener.add_handler(h)
return opener
class MockHTTPHandler(urllib2.BaseHandler):
class MockHTTPHandler(urllib.request.BaseHandler):
# useful for testing redirections and auth
# sends supplied headers and code as first response
# sends 200 OK as second response
@ -392,7 +395,7 @@ class OpenerDirectorTests(unittest.TestCase):
# TypeError in real code; here, returning self from these mock
# methods would either cause no exception, or AttributeError.
from urllib2 import URLError
from urllib.error import URLError
o = OpenerDirector()
meth_spec = [
@ -400,7 +403,7 @@ class OpenerDirectorTests(unittest.TestCase):
[("redirect_request", "return self")],
]
handlers = add_ordered_mock_handlers(o, meth_spec)
o.add_handler(urllib2.UnknownHandler())
o.add_handler(urllib.request.UnknownHandler())
for scheme in "do", "proxy", "redirect":
self.assertRaises(URLError, o.open, scheme+"://example.com/")
@ -458,7 +461,7 @@ class OpenerDirectorTests(unittest.TestCase):
handlers = add_ordered_mock_handlers(o, meth_spec)
req = Request("http://example.com/")
self.assertRaises(urllib2.URLError, o.open, req)
self.assertRaises(urllib.error.URLError, o.open, req)
self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
## def test_error(self):
@ -529,8 +532,7 @@ class OpenerDirectorTests(unittest.TestCase):
def sanepathname2url(path):
import urllib
urlpath = urllib.pathname2url(path)
urlpath = urllib.request.pathname2url(path)
if os.name == "nt" and urlpath.startswith("///"):
urlpath = urlpath[2:]
# XXX don't ask me about the mac...
@ -545,7 +547,7 @@ class HandlerTests(unittest.TestCase):
self.filename, self.filetype = filename, filetype
return io.StringIO(self.data), len(self.data)
class NullFTPHandler(urllib2.FTPHandler):
class NullFTPHandler(urllib.request.FTPHandler):
def __init__(self, data): self.data = data
def connect_ftp(self, user, passwd, host, port, dirs,
timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
@ -587,7 +589,7 @@ class HandlerTests(unittest.TestCase):
def test_file(self):
import email.utils, socket
h = urllib2.FileHandler()
h = urllib.request.FileHandler()
o = h.parent = MockOpener()
TESTFN = support.TESTFN
@ -644,12 +646,12 @@ class HandlerTests(unittest.TestCase):
finally:
f.close()
self.assertRaises(urllib2.URLError,
self.assertRaises(urllib.error.URLError,
h.file_open, Request(url))
finally:
os.remove(TESTFN)
h = urllib2.FileHandler()
h = urllib.request.FileHandler()
o = h.parent = MockOpener()
# XXXX why does // mean ftp (and /// mean not ftp!), and where
# is file: scheme specified? I think this is really a bug, and
@ -668,7 +670,7 @@ class HandlerTests(unittest.TestCase):
try:
h.file_open(req)
# XXXX remove OSError when bug fixed
except (urllib2.URLError, OSError):
except (urllib.error.URLError, OSError):
self.assert_(not ftp)
else:
self.assert_(o.req is req)
@ -685,6 +687,7 @@ class HandlerTests(unittest.TestCase):
return ''
class MockHTTPClass:
def __init__(self):
self.level = 0
self.req_headers = []
self.data = None
self.raise_on_endheaders = False
@ -707,7 +710,7 @@ class HandlerTests(unittest.TestCase):
def getresponse(self):
return MockHTTPResponse(MockFile(), {}, 200, "OK")
h = urllib2.AbstractHTTPHandler()
h = urllib.request.AbstractHTTPHandler()
o = h.parent = MockOpener()
url = "http://example.com/"
@ -737,7 +740,7 @@ class HandlerTests(unittest.TestCase):
# check socket.error converted to URLError
http.raise_on_endheaders = True
self.assertRaises(urllib2.URLError, h.do_open, http, req)
self.assertRaises(urllib.error.URLError, h.do_open, http, req)
# check adding of standard headers
o.addheaders = [("Spam", "eggs")]
@ -768,7 +771,7 @@ class HandlerTests(unittest.TestCase):
self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
def test_errors(self):
h = urllib2.HTTPErrorProcessor()
h = urllib.request.HTTPErrorProcessor()
o = h.parent = MockOpener()
url = "http://example.com/"
@ -794,7 +797,7 @@ class HandlerTests(unittest.TestCase):
def test_cookies(self):
cj = MockCookieJar()
h = urllib2.HTTPCookieProcessor(cj)
h = urllib.request.HTTPCookieProcessor(cj)
o = h.parent = MockOpener()
req = Request("http://example.com/")
@ -810,7 +813,7 @@ class HandlerTests(unittest.TestCase):
def test_redirect(self):
from_url = "http://example.com/a.html"
to_url = "http://example.com/b.html"
h = urllib2.HTTPRedirectHandler()
h = urllib.request.HTTPRedirectHandler()
o = h.parent = MockOpener()
# ordinary redirect behaviour
@ -825,7 +828,7 @@ class HandlerTests(unittest.TestCase):
try:
method(req, MockFile(), code, "Blah",
MockHeaders({"location": to_url}))
except urllib2.HTTPError:
except urllib.error.HTTPError:
# 307 in response to POST requires user OK
self.assert_(code == 307 and data is not None)
self.assertEqual(o.req.get_full_url(), to_url)
@ -860,9 +863,9 @@ class HandlerTests(unittest.TestCase):
while 1:
redirect(h, req, "http://example.com/")
count = count + 1
except urllib2.HTTPError:
except urllib.error.HTTPError:
# don't stop until max_repeats, because cookies may introduce state
self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
# detect endless non-repeating chain of redirects
req = Request(from_url, origin_req_host="example.com")
@ -871,9 +874,9 @@ class HandlerTests(unittest.TestCase):
while 1:
redirect(h, req, "http://example.com/%d" % count)
count = count + 1
except urllib2.HTTPError:
except urllib.error.HTTPError:
self.assertEqual(count,
urllib2.HTTPRedirectHandler.max_redirections)
urllib.request.HTTPRedirectHandler.max_redirections)
def test_cookie_redirect(self):
# cookies shouldn't leak into redirected requests
@ -883,16 +886,16 @@ class HandlerTests(unittest.TestCase):
cj = CookieJar()
interact_netscape(cj, "http://www.example.com/", "spam=eggs")
hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
hdeh = urllib2.HTTPDefaultErrorHandler()
hrh = urllib2.HTTPRedirectHandler()
cp = urllib2.HTTPCookieProcessor(cj)
hdeh = urllib.request.HTTPDefaultErrorHandler()
hrh = urllib.request.HTTPRedirectHandler()
cp = urllib.request.HTTPCookieProcessor(cj)
o = build_test_opener(hh, hdeh, hrh, cp)
o.open("http://www.example.com/")
self.assert_(not hh.req.has_header("Cookie"))
def test_proxy(self):
o = OpenerDirector()
ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
o.add_handler(ph)
meth_spec = [
[("http_open", "return response")]
@ -910,7 +913,7 @@ class HandlerTests(unittest.TestCase):
def test_basic_auth(self, quote_char='"'):
opener = OpenerDirector()
password_manager = MockPasswordManager()
auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
realm = "ACME Widget Store"
http_handler = MockHTTPHandler(
401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
@ -928,10 +931,10 @@ class HandlerTests(unittest.TestCase):
def test_proxy_basic_auth(self):
opener = OpenerDirector()
ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
opener.add_handler(ph)
password_manager = MockPasswordManager()
auth_handler = urllib2.ProxyBasicAuthHandler(password_manager)
auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
realm = "ACME Networks"
http_handler = MockHTTPHandler(
407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
@ -958,15 +961,15 @@ class HandlerTests(unittest.TestCase):
self.recorded = []
def record(self, info):
self.recorded.append(info)
class TestDigestAuthHandler(urllib2.HTTPDigestAuthHandler):
class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
def http_error_401(self, *args, **kwds):
self.parent.record("digest")
urllib2.HTTPDigestAuthHandler.http_error_401(self,
urllib.request.HTTPDigestAuthHandler.http_error_401(self,
*args, **kwds)
class TestBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
def http_error_401(self, *args, **kwds):
self.parent.record("basic")
urllib2.HTTPBasicAuthHandler.http_error_401(self,
urllib.request.HTTPBasicAuthHandler.http_error_401(self,
*args, **kwds)
opener = RecordingOpenerDirector()
@ -1030,13 +1033,13 @@ class HandlerTests(unittest.TestCase):
class MiscTests(unittest.TestCase):
def test_build_opener(self):
class MyHTTPHandler(urllib2.HTTPHandler): pass
class FooHandler(urllib2.BaseHandler):
class MyHTTPHandler(urllib.request.HTTPHandler): pass
class FooHandler(urllib.request.BaseHandler):
def foo_open(self): pass
class BarHandler(urllib2.BaseHandler):
class BarHandler(urllib.request.BaseHandler):
def bar_open(self): pass
build_opener = urllib2.build_opener
build_opener = urllib.request.build_opener
o = build_opener(FooHandler, BarHandler)
self.opener_has_handler(o, FooHandler)
@ -1054,14 +1057,14 @@ class MiscTests(unittest.TestCase):
# a particular case of overriding: default handlers can be passed
# in explicitly
o = build_opener()
self.opener_has_handler(o, urllib2.HTTPHandler)
o = build_opener(urllib2.HTTPHandler)
self.opener_has_handler(o, urllib2.HTTPHandler)
o = build_opener(urllib2.HTTPHandler())
self.opener_has_handler(o, urllib2.HTTPHandler)
self.opener_has_handler(o, urllib.request.HTTPHandler)
o = build_opener(urllib.request.HTTPHandler)
self.opener_has_handler(o, urllib.request.HTTPHandler)
o = build_opener(urllib.request.HTTPHandler())
self.opener_has_handler(o, urllib.request.HTTPHandler)
# Issue2670: multiple handlers sharing the same base class
class MyOtherHTTPHandler(urllib2.HTTPHandler): pass
class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
self.opener_has_handler(o, MyHTTPHandler)
self.opener_has_handler(o, MyOtherHTTPHandler)
@ -1077,7 +1080,7 @@ class MiscTests(unittest.TestCase):
def test_main(verbose=None):
from test import test_urllib2
support.run_doctest(test_urllib2, verbose)
support.run_doctest(urllib2, verbose)
support.run_doctest(urllib.request, verbose)
tests = (TrivialTests,
OpenerDirectorTests,
HandlerTests,