Make a new urllib package .

It consists of code from urllib, urllib2, urlparse, and robotparser.
The old modules have all been removed.  The new package has five
submodules: urllib.parse, urllib.request, urllib.response,
urllib.error, and urllib.robotparser.  The urllib.request.urlopen()
function uses the url opener from urllib2.

Note that the unittests have not been renamed for the
beta, but they will be renamed in the future.

Joint work with Senthil Kumaran.
This commit is contained in:
Jeremy Hylton 2008-06-18 20:49:58 +00:00
parent a656d2cd89
commit 1afc169616
40 changed files with 3190 additions and 3536 deletions

View file

@ -70,7 +70,7 @@ import io
import socket
import email.parser
import email.message
from urlparse import urlsplit
from urllib.parse import urlsplit
import warnings
__all__ = ["HTTPResponse", "HTTPConnection",

View file

@ -28,7 +28,10 @@ http://wwwsearch.sf.net/):
__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
import re, urlparse, copy, time, urllib
import copy
import re
import time
import urllib.parse, urllib.request
try:
import threading as _threading
except ImportError:
@ -580,7 +583,7 @@ def request_host(request):
"""
url = request.get_full_url()
host = urlparse.urlparse(url)[1]
host = urllib.parse.urlparse(url)[1]
if host == "":
host = request.get_header("Host", "")
@ -602,13 +605,11 @@ def eff_request_host(request):
def request_path(request):
"""request-URI, as defined by RFC 2965."""
url = request.get_full_url()
#scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
#req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
path, parameters, query, frag = urlparse.urlparse(url)[2:]
path, parameters, query, frag = urllib.parse.urlparse(url)[2:]
if parameters:
path = "%s;%s" % (path, parameters)
path = escape_path(path)
req_path = urlparse.urlunparse(("", "", path, "", query, frag))
req_path = urllib.parse.urlunparse(("", "", path, "", query, frag))
if not req_path.startswith("/"):
# fix bad RFC 2396 absoluteURI
req_path = "/"+req_path
@ -644,7 +645,7 @@ def escape_path(path):
# And here, kind of: draft-fielding-uri-rfc2396bis-03
# (And in draft IRI specification: draft-duerst-iri-05)
# (And here, for new URI schemes: RFC 2718)
path = urllib.quote(path, HTTP_PATH_SAFE)
path = urllib.parse.quote(path, HTTP_PATH_SAFE)
path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
return path
@ -1197,8 +1198,7 @@ class CookieJar:
"""Collection of HTTP cookies.
You may not need to know about this class: try
urllib2.build_opener(HTTPCookieProcessor).open(url).
urllib.request.build_opener(HTTPCookieProcessor).open(url).
"""
non_word_re = re.compile(r"\W")

View file

@ -93,7 +93,7 @@ import cgi
import time
import socket # For gethostbyaddr()
import shutil
import urllib
import urllib.parse
import select
import mimetypes
import posixpath
@ -683,7 +683,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
return None
list.sort(key=lambda a: a.lower())
r = []
displaypath = cgi.escape(urllib.unquote(self.path))
displaypath = cgi.escape(urllib.parse.unquote(self.path))
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
@ -699,7 +699,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
displayname = name + "@"
# Note: a link to a directory displays with @ and links with /
r.append('<li><a href="%s">%s</a>\n'
% (urllib.quote(linkname), cgi.escape(displayname)))
% (urllib.parse.quote(linkname), cgi.escape(displayname)))
r.append("</ul>\n<hr>\n</body>\n</html>\n")
enc = sys.getfilesystemencoding()
encoded = ''.join(r).encode(enc)
@ -723,7 +723,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
# abandon query parameters
path = path.split('?',1)[0]
path = path.split('#',1)[0]
path = posixpath.normpath(urllib.unquote(path))
path = posixpath.normpath(urllib.parse.unquote(path))
words = path.split('/')
words = filter(None, words)
path = os.getcwd()
@ -947,7 +947,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
env['SERVER_PROTOCOL'] = self.protocol_version
env['SERVER_PORT'] = str(self.server.server_port)
env['REQUEST_METHOD'] = self.command
uqrest = urllib.unquote(rest)
uqrest = urllib.parse.unquote(rest)
env['PATH_INFO'] = uqrest
env['PATH_TRANSLATED'] = self.translate_path(uqrest)
env['SCRIPT_NAME'] = scriptname