Make a new urllib package .

It consists of code from urllib, urllib2, urlparse, and robotparser. The old modules have all been removed. The new package has five submodules: urllib.parse, urllib.request, urllib.response, urllib.error, and urllib.robotparser. The urllib.request.urlopen() function uses the url opener from urllib2. Note that the unittests have not been renamed for the beta, but they will be renamed in the future. Joint work with Senthil Kumaran.
2025-11-25 12:44:13 +00:00 · 2008-06-18 20:49:58 +00:00 · 2008-06-18 20:49:58 +00:00 · 1afc169616
commit 1afc169616
parent a656d2cd89
40 changed files with 3190 additions and 3536 deletions
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@ -70,7 +70,7 @@ import io
 import socket
 import email.parser
 import email.message
-from urlparse import urlsplit
+from urllib.parse import urlsplit
 import warnings

 __all__ = ["HTTPResponse", "HTTPConnection",
--- a/Lib/http/cookiejar.py
+++ b/Lib/http/cookiejar.py
@ -28,7 +28,10 @@ http://wwwsearch.sf.net/):
 __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
           'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']

-import re, urlparse, copy, time, urllib
+import copy
+import re
+import time
+import urllib.parse, urllib.request
 try:
    import threading as _threading
 except ImportError:
@ -580,7 +583,7 @@ def request_host(request):

    """
    url = request.get_full_url()
-    host = urlparse.urlparse(url)[1]
+    host = urllib.parse.urlparse(url)[1]
    if host == "":
        host = request.get_header("Host", "")

@ -602,13 +605,11 @@ def eff_request_host(request):
 def request_path(request):
    """request-URI, as defined by RFC 2965."""
    url = request.get_full_url()
-    #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
-    #req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
-    path, parameters, query, frag = urlparse.urlparse(url)[2:]
+    path, parameters, query, frag = urllib.parse.urlparse(url)[2:]
    if parameters:
        path = "%s;%s" % (path, parameters)
    path = escape_path(path)
-    req_path = urlparse.urlunparse(("", "", path, "", query, frag))
+    req_path = urllib.parse.urlunparse(("", "", path, "", query, frag))
    if not req_path.startswith("/"):
        # fix bad RFC 2396 absoluteURI
        req_path = "/"+req_path
@ -644,7 +645,7 @@ def escape_path(path):
    # And here, kind of: draft-fielding-uri-rfc2396bis-03
    # (And in draft IRI specification: draft-duerst-iri-05)
    # (And here, for new URI schemes: RFC 2718)
-    path = urllib.quote(path, HTTP_PATH_SAFE)
+    path = urllib.parse.quote(path, HTTP_PATH_SAFE)
    path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
    return path

@ -1197,8 +1198,7 @@ class CookieJar:
    """Collection of HTTP cookies.

    You may not need to know about this class: try
-    urllib2.build_opener(HTTPCookieProcessor).open(url).
-
+    urllib.request.build_opener(HTTPCookieProcessor).open(url).
    """

    non_word_re = re.compile(r"\W")
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@ -93,7 +93,7 @@ import cgi
 import time
 import socket # For gethostbyaddr()
 import shutil
-import urllib
+import urllib.parse
 import select
 import mimetypes
 import posixpath
@ -683,7 +683,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
            return None
        list.sort(key=lambda a: a.lower())
        r = []
-        displaypath = cgi.escape(urllib.unquote(self.path))
+        displaypath = cgi.escape(urllib.parse.unquote(self.path))
        r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
        r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
        r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
@ -699,7 +699,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
                displayname = name + "@"
                # Note: a link to a directory displays with @ and links with /
            r.append('<li><a href="%s">%s</a>\n'
-                    % (urllib.quote(linkname), cgi.escape(displayname)))
+                    % (urllib.parse.quote(linkname), cgi.escape(displayname)))
        r.append("</ul>\n<hr>\n</body>\n</html>\n")
        enc = sys.getfilesystemencoding()
        encoded = ''.join(r).encode(enc)
@ -723,7 +723,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
        # abandon query parameters
        path = path.split('?',1)[0]
        path = path.split('#',1)[0]
-        path = posixpath.normpath(urllib.unquote(path))
+        path = posixpath.normpath(urllib.parse.unquote(path))
        words = path.split('/')
        words = filter(None, words)
        path = os.getcwd()
@ -947,7 +947,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
        env['SERVER_PROTOCOL'] = self.protocol_version
        env['SERVER_PORT'] = str(self.server.server_port)
        env['REQUEST_METHOD'] = self.command
-        uqrest = urllib.unquote(rest)
+        uqrest = urllib.parse.unquote(rest)
        env['PATH_INFO'] = uqrest
        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
        env['SCRIPT_NAME'] = scriptname