mirror of
https://github.com/python/cpython.git
synced 2025-07-19 17:25:54 +00:00

svn+ssh://pythondev@svn.python.org/python/trunk ........ r53012 | walter.doerwald | 2006-12-12 22:55:31 +0100 (Tue, 12 Dec 2006) | 2 lines Fix typo. ........ r53023 | brett.cannon | 2006-12-13 23:31:37 +0100 (Wed, 13 Dec 2006) | 2 lines Remove an unneeded import of 'warnings'. ........ r53025 | brett.cannon | 2006-12-14 00:02:38 +0100 (Thu, 14 Dec 2006) | 2 lines Remove unneeded imports of 'warnings'. ........ r53026 | brett.cannon | 2006-12-14 00:09:53 +0100 (Thu, 14 Dec 2006) | 4 lines Add test.test_support.guard_warnings_filter . This function returns a context manager that protects warnings.filter from being modified once the context is exited. ........ r53029 | george.yoshida | 2006-12-14 03:22:44 +0100 (Thu, 14 Dec 2006) | 2 lines Note that guard_warnings_filter was added in 2.6 ........ r53031 | vinay.sajip | 2006-12-14 09:53:55 +0100 (Thu, 14 Dec 2006) | 1 line Added news on recent changes to logging ........ r53032 | andrew.kuchling | 2006-12-14 19:57:53 +0100 (Thu, 14 Dec 2006) | 1 line [Patch #1599256 from David Watson] check that os.fsync is available before using it ........ r53042 | kurt.kaiser | 2006-12-15 06:13:11 +0100 (Fri, 15 Dec 2006) | 6 lines 1. Avoid hang when encountering a duplicate in a completion list. Bug 1571112. 2. Duplicate some old entries from Python's NEWS to IDLE's NEWS.txt M AutoCompleteWindow.py M NEWS.txt ........ r53048 | andrew.kuchling | 2006-12-18 18:12:31 +0100 (Mon, 18 Dec 2006) | 1 line [Bug #1618083] Add missing word; make a few grammar fixes ........ r53050 | andrew.kuchling | 2006-12-18 18:16:05 +0100 (Mon, 18 Dec 2006) | 1 line Bump version ........ r53051 | andrew.kuchling | 2006-12-18 18:22:07 +0100 (Mon, 18 Dec 2006) | 1 line [Bug #1616726] Fix description of generator.close(); if you raise some random exception, the exception is raised and doesn't trigger a RuntimeError ........ r53052 | andrew.kuchling | 2006-12-18 18:38:14 +0100 (Mon, 18 Dec 2006) | 1 line Describe new methods in Queue module ........ r53053 | andrew.kuchling | 2006-12-18 20:22:24 +0100 (Mon, 18 Dec 2006) | 1 line [Patch #1615868 by Lars Gustaebel] Use Py_off_t to fix BZ2File.seek() for offsets > 2Gb ........ r53057 | andrew.kuchling | 2006-12-18 22:29:07 +0100 (Mon, 18 Dec 2006) | 1 line Fix markup ........ r53063 | thomas.wouters | 2006-12-19 09:17:50 +0100 (Tue, 19 Dec 2006) | 5 lines Make sre's SubPattern objects accept slice objects like it already accepts simple slices. ........ r53065 | andrew.kuchling | 2006-12-19 15:13:05 +0100 (Tue, 19 Dec 2006) | 6 lines [Patch #1618455 by Ben Maurer] Improve speed of HMAC by using str.translate() instead of a more general XOR that has to construct a list. Slightly modified from Maurer's patch: the _strxor() function is no longer necessary at all. ........ r53066 | andrew.kuchling | 2006-12-19 15:28:23 +0100 (Tue, 19 Dec 2006) | 9 lines [Bug #1613651] Document socket.recv_into, socket.recvfrom_into Also, the text for recvfrom told you to read recv() for an explanation of the 'flags' argument, but recv() just pointed you at the man page. Copied the man-page text to recvfrom(), recvfrom_into, recv_into to avoid the pointless redirection. I don't have LaTeX on this machine; hope my markup is OK. ........ r53067 | andrew.kuchling | 2006-12-19 15:29:04 +0100 (Tue, 19 Dec 2006) | 1 line Comment typo ........ r53068 | andrew.kuchling | 2006-12-19 16:11:41 +0100 (Tue, 19 Dec 2006) | 1 line [Patch #1617413 from Dug Song] Fix HTTP Basic authentication via HTTPS ........ r53071 | andrew.kuchling | 2006-12-19 16:18:12 +0100 (Tue, 19 Dec 2006) | 1 line [Patch #1600491 from Jim Jewett] Describe how to build help files on Windows ........ r53073 | andrew.kuchling | 2006-12-19 16:43:10 +0100 (Tue, 19 Dec 2006) | 6 lines [Patch #1587139 by kxroberto] Protect lock acquisition/release with try...finally to ensure the lock is always released. This could use the 'with' statement, but the patch uses 'finally'. 2.5 backport candidate. ........ r53074 | vinay.sajip | 2006-12-19 19:29:11 +0100 (Tue, 19 Dec 2006) | 1 line Updated documentation for findCaller() to indicate that a 3-tuple is now returned, rather than a 2-tuple. ........ r53090 | georg.brandl | 2006-12-19 23:06:46 +0100 (Tue, 19 Dec 2006) | 3 lines Patch #1484695: The tarfile module now raises a HeaderError exception if a buffer given to frombuf() is invalid. ........ r53099 | raymond.hettinger | 2006-12-20 07:42:06 +0100 (Wed, 20 Dec 2006) | 5 lines Bug #1590891: random.randrange don't return correct value for big number Needs to be backported. ........ r53106 | georg.brandl | 2006-12-20 12:55:16 +0100 (Wed, 20 Dec 2006) | 3 lines Testcase for patch #1484695. ........ r53110 | andrew.kuchling | 2006-12-20 20:48:20 +0100 (Wed, 20 Dec 2006) | 17 lines [Apply length-checking.diff from bug #1599254] Add length checking to single-file mailbox formats: before doing a flush() on a mailbox, seek to the end and verify its length is unchanged, raising ExternalClashError if the file's length has changed. This fix avoids potential data loss if some other process appends to the mailbox file after the table of contents has been generated; instead of overwriting the modified file, you'll get the exception. I also noticed that the self._lookup() call in self.flush() wasn't necessary (everything that sets self._pending to True also calls self.lookup()), and replaced it by an assertion. 2.5 backport candidate. ........ r53112 | andrew.kuchling | 2006-12-20 20:57:10 +0100 (Wed, 20 Dec 2006) | 1 line [Bug #1619674] Make sum() use the term iterable, not sequence ........ r53113 | andrew.kuchling | 2006-12-20 20:58:11 +0100 (Wed, 20 Dec 2006) | 1 line Two grammar fixes ........ r53115 | andrew.kuchling | 2006-12-20 21:11:12 +0100 (Wed, 20 Dec 2006) | 5 lines Some other built-in functions are described with 'sequence' arguments that should really be 'iterable'; this commit changes them. Did I miss any? Did I introduce any errors? ........ r53117 | andrew.kuchling | 2006-12-20 21:20:42 +0100 (Wed, 20 Dec 2006) | 1 line [Bug #1619680] in_dll() arguments are documented in the wrong order ........ r53120 | neal.norwitz | 2006-12-21 05:38:00 +0100 (Thu, 21 Dec 2006) | 1 line Lars asked for permission on on python-dev for work on tarfile.py ........ r53125 | andrew.kuchling | 2006-12-21 14:40:29 +0100 (Thu, 21 Dec 2006) | 1 line Mention the os.SEEK_* constants ........ r53129 | walter.doerwald | 2006-12-21 19:06:30 +0100 (Thu, 21 Dec 2006) | 2 lines Fix typo. ........ r53131 | thomas.heller | 2006-12-21 19:30:56 +0100 (Thu, 21 Dec 2006) | 3 lines Fix wrong markup of an argument in a method signature. Will backport. ........ r53137 | andrew.kuchling | 2006-12-22 01:50:56 +0100 (Fri, 22 Dec 2006) | 1 line Typo fix ........ r53139 | andrew.kuchling | 2006-12-22 14:25:02 +0100 (Fri, 22 Dec 2006) | 1 line [Bug #737202; fix from Titus Brown] Make CGIHTTPServer work for scripts in sub-directories ........ r53141 | andrew.kuchling | 2006-12-22 16:04:45 +0100 (Fri, 22 Dec 2006) | 6 lines [Bug #802128] Make the mode argument of dumbdbm actually work the way it's described, and add a test for it. 2.5 bugfix candidate, maybe; arguably this patch changes the API of dumbdbm and shouldn't be added in a point-release. ........ r53142 | andrew.kuchling | 2006-12-22 16:16:58 +0100 (Fri, 22 Dec 2006) | 6 lines [Bug #802128 continued] Modify mode depending on the process umask. Is there really no other way to read the umask than to set it? Hope this works on Windows... ........ r53145 | andrew.kuchling | 2006-12-22 17:43:26 +0100 (Fri, 22 Dec 2006) | 1 line [Bug #776202] Apply Walter Doerwald's patch to use text mode for encoded files ........ r53146 | andrew.kuchling | 2006-12-22 19:41:42 +0100 (Fri, 22 Dec 2006) | 9 lines [Patch #783050 from Patrick Lynch] The emulation of forkpty() is incorrect; the master should close the slave fd. Added a test to test_pty.py that reads from the master_fd after doing a pty.fork(); without the fix it hangs forever instead of raising an exception. (<crossing fingers for the buildbots>) 2.5 backport candidate. ........ r53147 | andrew.kuchling | 2006-12-22 20:06:16 +0100 (Fri, 22 Dec 2006) | 1 line [Patch #827559 from Chris Gonnerman] Make SimpleHTTPServer redirect when a directory URL is missing the trailing slash; this lets relative links work. ........ r53149 | andrew.kuchling | 2006-12-22 20:21:27 +0100 (Fri, 22 Dec 2006) | 1 line Darn; this test works when you run test_pty.py directly, but fails when regrtest runs it (the os.read() raises os.error). I can't figure out the cause, so am commenting out the test. ........ r53150 | andrew.kuchling | 2006-12-22 22:48:19 +0100 (Fri, 22 Dec 2006) | 1 line Frak; this test also fails ........ r53153 | lars.gustaebel | 2006-12-23 17:40:13 +0100 (Sat, 23 Dec 2006) | 5 lines Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell() work correctly together with readline(). Will backport to 2.5. ........ r53155 | lars.gustaebel | 2006-12-23 18:57:23 +0100 (Sat, 23 Dec 2006) | 5 lines Patch #1262036: Prevent TarFiles from being added to themselves under certain conditions. Will backport to 2.5. ........ r53159 | andrew.kuchling | 2006-12-27 04:25:31 +0100 (Wed, 27 Dec 2006) | 4 lines [Part of patch #1182394] Move the HMAC blocksize to be a class-level constant; this allows changing it in a subclass. To accommodate this, copy() now uses __class__. Also add some text to a comment. ........ r53160 | andrew.kuchling | 2006-12-27 04:31:24 +0100 (Wed, 27 Dec 2006) | 1 line [Rest of patch #1182394] Add ._current() method so that we can use the written-in-C .hexdigest() method ........ r53161 | lars.gustaebel | 2006-12-27 11:30:46 +0100 (Wed, 27 Dec 2006) | 4 lines Patch #1504073: Fix tarfile.open() for mode "r" with a fileobj argument. Will backport to 2.5. ........ r53165 | neal.norwitz | 2006-12-28 05:39:20 +0100 (Thu, 28 Dec 2006) | 1 line Remove a stray (old) macro name left around (I guess) ........ r53188 | neal.norwitz | 2006-12-29 04:01:53 +0100 (Fri, 29 Dec 2006) | 1 line SF bug #1623890, fix argument name in docstring ........ r53200 | raymond.hettinger | 2006-12-30 05:01:17 +0100 (Sat, 30 Dec 2006) | 1 line For sets with cyclical reprs, emit an ellipsis instead of infinitely recursing. ........ r53232 | brett.cannon | 2007-01-04 01:23:49 +0100 (Thu, 04 Jan 2007) | 3 lines Add EnvironmentVarGuard to test.test_support. Provides a context manager to temporarily set or unset environment variables. ........ r53235 | neal.norwitz | 2007-01-04 07:25:31 +0100 (Thu, 04 Jan 2007) | 1 line SF #1627373, fix typo in CarbonEvt. ........ r53244 | raymond.hettinger | 2007-01-04 18:53:34 +0100 (Thu, 04 Jan 2007) | 1 line Fix stability of heapq's nlargest() and nsmallest(). ........ r53249 | martin.v.loewis | 2007-01-04 22:06:12 +0100 (Thu, 04 Jan 2007) | 3 lines Bug #1566280: Explicitly invoke threading._shutdown from Py_Main, to avoid relying on atexit. Will backport to 2.5. ........ r53252 | gregory.p.smith | 2007-01-05 02:59:42 +0100 (Fri, 05 Jan 2007) | 3 lines Support linking of the bsddb module against BerkeleyDB 4.5.x (will backport to 2.5) ........ r53253 | gregory.p.smith | 2007-01-05 03:06:17 +0100 (Fri, 05 Jan 2007) | 2 lines bump module version to match supported berkeleydb version ........ r53255 | neal.norwitz | 2007-01-05 06:25:22 +0100 (Fri, 05 Jan 2007) | 6 lines Prevent crash on shutdown which can occur if we are finalizing and the module dict has been cleared already and some object raises a warning (like in a __del__). Will backport. ........ r53258 | gregory.p.smith | 2007-01-05 08:21:35 +0100 (Fri, 05 Jan 2007) | 2 lines typo fix ........ r53260 | neal.norwitz | 2007-01-05 09:06:43 +0100 (Fri, 05 Jan 2007) | 1 line Add Collin Winter for access to update PEP 3107 ........ r53262 | andrew.kuchling | 2007-01-05 15:22:17 +0100 (Fri, 05 Jan 2007) | 1 line [Bug #1622533] Make docstrings raw strings because they contain control characters (\0, \1) ........ r53264 | andrew.kuchling | 2007-01-05 16:51:24 +0100 (Fri, 05 Jan 2007) | 1 line [Patch #1520904] Fix bsddb tests to write to the temp directory instead of the Lib/bsddb/test directory ........ r53279 | brett.cannon | 2007-01-05 22:45:09 +0100 (Fri, 05 Jan 2007) | 3 lines Silence a warning from gcc 4.0.1 by specifying a function's parameter list is 'void' instead of just a set of empty parentheses. ........ r53285 | raymond.hettinger | 2007-01-06 02:14:41 +0100 (Sat, 06 Jan 2007) | 2 lines SF# 1409443: Expand comment to cover the interaction between f->f_lasti and the PREDICT macros. ........ r53286 | anthony.baxter | 2007-01-06 05:45:54 +0100 (Sat, 06 Jan 2007) | 1 line update to (c) years to include 2007 ........ r53291 | neal.norwitz | 2007-01-06 22:24:35 +0100 (Sat, 06 Jan 2007) | 1 line Add Josiah to SF for maintaining asyncore/asynchat ........ r53293 | peter.astrand | 2007-01-07 09:53:46 +0100 (Sun, 07 Jan 2007) | 1 line Re-implemented fix for #1531862 once again, in a way that works with Python 2.2. Fixes bug #1603424. ........ r53295 | peter.astrand | 2007-01-07 15:34:16 +0100 (Sun, 07 Jan 2007) | 1 line Avoid O(N**2) bottleneck in _communicate_(). Fixes #1598181. ........ r53300 | raymond.hettinger | 2007-01-08 19:09:20 +0100 (Mon, 08 Jan 2007) | 1 line Fix zero-length corner case for iterating over a mutating deque. ........ r53301 | vinay.sajip | 2007-01-08 19:50:32 +0100 (Mon, 08 Jan 2007) | 4 lines Bare except clause removed from SMTPHandler.emit(). Now, only ImportError is trapped. Bare except clause removed from SocketHandler.createSocket(). Now, only socket.error is trapped. (SF #411881) ........ r53302 | vinay.sajip | 2007-01-08 19:51:46 +0100 (Mon, 08 Jan 2007) | 2 lines Bare except clause removed from LogRecord.__init__. Now, only ValueError, TypeError and AttributeError are trapped. (SF #411881) ........ r53303 | vinay.sajip | 2007-01-08 19:52:36 +0100 (Mon, 08 Jan 2007) | 1 line Added entries about removal of some bare except clauses from logging. ........
1531 lines
53 KiB
Python
1531 lines
53 KiB
Python
"""Open an arbitrary URL.
|
|
|
|
See the following document for more info on URLs:
|
|
"Names and Addresses, URIs, URLs, URNs, URCs", at
|
|
http://www.w3.org/pub/WWW/Addressing/Overview.html
|
|
|
|
See also the HTTP spec (from which the error codes are derived):
|
|
"HTTP - Hypertext Transfer Protocol", at
|
|
http://www.w3.org/pub/WWW/Protocols/
|
|
|
|
Related standards and specs:
|
|
- RFC1808: the "relative URL" spec. (authoritative status)
|
|
- RFC1738 - the "URL standard". (authoritative status)
|
|
- RFC1630 - the "URI spec". (informational status)
|
|
|
|
The object returned by URLopener().open(file) will differ per
|
|
protocol. All you know is that is has methods read(), readline(),
|
|
readlines(), fileno(), close() and info(). The read*(), fileno()
|
|
and close() methods work like those of open files.
|
|
The info() method returns a mimetools.Message object which can be
|
|
used to query various info about the object, if available.
|
|
(mimetools.Message objects are queried with the getheader() method.)
|
|
"""
|
|
|
|
import string
|
|
import socket
|
|
import os
|
|
import time
|
|
import sys
|
|
from urlparse import urljoin as basejoin
|
|
|
|
__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
|
|
"urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
|
|
"urlencode", "url2pathname", "pathname2url", "splittag",
|
|
"localhost", "thishost", "ftperrors", "basejoin", "unwrap",
|
|
"splittype", "splithost", "splituser", "splitpasswd", "splitport",
|
|
"splitnport", "splitquery", "splitattr", "splitvalue",
|
|
"splitgophertype", "getproxies"]
|
|
|
|
__version__ = '1.17' # XXX This version is not always updated :-(
|
|
|
|
MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
|
|
|
|
# Helper for non-unix systems
|
|
if os.name == 'mac':
|
|
from macurl2path import url2pathname, pathname2url
|
|
elif os.name == 'nt':
|
|
from nturl2path import url2pathname, pathname2url
|
|
elif os.name == 'riscos':
|
|
from rourl2path import url2pathname, pathname2url
|
|
else:
|
|
def url2pathname(pathname):
|
|
"""OS-specific conversion from a relative URL of the 'file' scheme
|
|
to a file system path; not recommended for general use."""
|
|
return unquote(pathname)
|
|
|
|
def pathname2url(pathname):
|
|
"""OS-specific conversion from a file system path to a relative URL
|
|
of the 'file' scheme; not recommended for general use."""
|
|
return quote(pathname)
|
|
|
|
# This really consists of two pieces:
|
|
# (1) a class which handles opening of all sorts of URLs
|
|
# (plus assorted utilities etc.)
|
|
# (2) a set of functions for parsing URLs
|
|
# XXX Should these be separated out into different modules?
|
|
|
|
|
|
# Shortcut for basic usage
|
|
_urlopener = None
|
|
def urlopen(url, data=None, proxies=None):
|
|
"""urlopen(url [, data]) -> open file-like object"""
|
|
global _urlopener
|
|
if proxies is not None:
|
|
opener = FancyURLopener(proxies=proxies)
|
|
elif not _urlopener:
|
|
opener = FancyURLopener()
|
|
_urlopener = opener
|
|
else:
|
|
opener = _urlopener
|
|
if data is None:
|
|
return opener.open(url)
|
|
else:
|
|
return opener.open(url, data)
|
|
def urlretrieve(url, filename=None, reporthook=None, data=None):
|
|
global _urlopener
|
|
if not _urlopener:
|
|
_urlopener = FancyURLopener()
|
|
return _urlopener.retrieve(url, filename, reporthook, data)
|
|
def urlcleanup():
|
|
if _urlopener:
|
|
_urlopener.cleanup()
|
|
|
|
# exception raised when downloaded size does not match content-length
|
|
class ContentTooShortError(IOError):
|
|
def __init__(self, message, content):
|
|
IOError.__init__(self, message)
|
|
self.content = content
|
|
|
|
ftpcache = {}
|
|
class URLopener:
|
|
"""Class to open URLs.
|
|
This is a class rather than just a subroutine because we may need
|
|
more than one set of global protocol-specific options.
|
|
Note -- this is a base class for those who don't want the
|
|
automatic handling of errors type 302 (relocated) and 401
|
|
(authorization needed)."""
|
|
|
|
__tempfiles = None
|
|
|
|
version = "Python-urllib/%s" % __version__
|
|
|
|
# Constructor
|
|
def __init__(self, proxies=None, **x509):
|
|
if proxies is None:
|
|
proxies = getproxies()
|
|
assert hasattr(proxies, 'keys'), "proxies must be a mapping"
|
|
self.proxies = proxies
|
|
self.key_file = x509.get('key_file')
|
|
self.cert_file = x509.get('cert_file')
|
|
self.addheaders = [('User-Agent', self.version)]
|
|
self.__tempfiles = []
|
|
self.__unlink = os.unlink # See cleanup()
|
|
self.tempcache = None
|
|
# Undocumented feature: if you assign {} to tempcache,
|
|
# it is used to cache files retrieved with
|
|
# self.retrieve(). This is not enabled by default
|
|
# since it does not work for changing documents (and I
|
|
# haven't got the logic to check expiration headers
|
|
# yet).
|
|
self.ftpcache = ftpcache
|
|
# Undocumented feature: you can use a different
|
|
# ftp cache by assigning to the .ftpcache member;
|
|
# in case you want logically independent URL openers
|
|
# XXX This is not threadsafe. Bah.
|
|
|
|
def __del__(self):
|
|
self.close()
|
|
|
|
def close(self):
|
|
self.cleanup()
|
|
|
|
def cleanup(self):
|
|
# This code sometimes runs when the rest of this module
|
|
# has already been deleted, so it can't use any globals
|
|
# or import anything.
|
|
if self.__tempfiles:
|
|
for file in self.__tempfiles:
|
|
try:
|
|
self.__unlink(file)
|
|
except OSError:
|
|
pass
|
|
del self.__tempfiles[:]
|
|
if self.tempcache:
|
|
self.tempcache.clear()
|
|
|
|
def addheader(self, *args):
|
|
"""Add a header to be used by the HTTP interface only
|
|
e.g. u.addheader('Accept', 'sound/basic')"""
|
|
self.addheaders.append(args)
|
|
|
|
# External interface
|
|
def open(self, fullurl, data=None):
|
|
"""Use URLopener().open(file) instead of open(file, 'r')."""
|
|
fullurl = unwrap(toBytes(fullurl))
|
|
if self.tempcache and fullurl in self.tempcache:
|
|
filename, headers = self.tempcache[fullurl]
|
|
fp = open(filename, 'rb')
|
|
return addinfourl(fp, headers, fullurl)
|
|
urltype, url = splittype(fullurl)
|
|
if not urltype:
|
|
urltype = 'file'
|
|
if urltype in self.proxies:
|
|
proxy = self.proxies[urltype]
|
|
urltype, proxyhost = splittype(proxy)
|
|
host, selector = splithost(proxyhost)
|
|
url = (host, fullurl) # Signal special case to open_*()
|
|
else:
|
|
proxy = None
|
|
name = 'open_' + urltype
|
|
self.type = urltype
|
|
name = name.replace('-', '_')
|
|
if not hasattr(self, name):
|
|
if proxy:
|
|
return self.open_unknown_proxy(proxy, fullurl, data)
|
|
else:
|
|
return self.open_unknown(fullurl, data)
|
|
try:
|
|
if data is None:
|
|
return getattr(self, name)(url)
|
|
else:
|
|
return getattr(self, name)(url, data)
|
|
except socket.error, msg:
|
|
raise IOError, ('socket error', msg), sys.exc_info()[2]
|
|
|
|
def open_unknown(self, fullurl, data=None):
|
|
"""Overridable interface to open unknown URL type."""
|
|
type, url = splittype(fullurl)
|
|
raise IOError, ('url error', 'unknown url type', type)
|
|
|
|
def open_unknown_proxy(self, proxy, fullurl, data=None):
|
|
"""Overridable interface to open unknown URL type."""
|
|
type, url = splittype(fullurl)
|
|
raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
|
|
|
|
# External interface
|
|
def retrieve(self, url, filename=None, reporthook=None, data=None):
|
|
"""retrieve(url) returns (filename, headers) for a local object
|
|
or (tempfilename, headers) for a remote object."""
|
|
url = unwrap(toBytes(url))
|
|
if self.tempcache and url in self.tempcache:
|
|
return self.tempcache[url]
|
|
type, url1 = splittype(url)
|
|
if filename is None and (not type or type == 'file'):
|
|
try:
|
|
fp = self.open_local_file(url1)
|
|
hdrs = fp.info()
|
|
del fp
|
|
return url2pathname(splithost(url1)[1]), hdrs
|
|
except IOError, msg:
|
|
pass
|
|
fp = self.open(url, data)
|
|
headers = fp.info()
|
|
if filename:
|
|
tfp = open(filename, 'wb')
|
|
else:
|
|
import tempfile
|
|
garbage, path = splittype(url)
|
|
garbage, path = splithost(path or "")
|
|
path, garbage = splitquery(path or "")
|
|
path, garbage = splitattr(path or "")
|
|
suffix = os.path.splitext(path)[1]
|
|
(fd, filename) = tempfile.mkstemp(suffix)
|
|
self.__tempfiles.append(filename)
|
|
tfp = os.fdopen(fd, 'wb')
|
|
result = filename, headers
|
|
if self.tempcache is not None:
|
|
self.tempcache[url] = result
|
|
bs = 1024*8
|
|
size = -1
|
|
read = 0
|
|
blocknum = 0
|
|
if reporthook:
|
|
if "content-length" in headers:
|
|
size = int(headers["Content-Length"])
|
|
reporthook(blocknum, bs, size)
|
|
while 1:
|
|
block = fp.read(bs)
|
|
if block == "":
|
|
break
|
|
read += len(block)
|
|
tfp.write(block)
|
|
blocknum += 1
|
|
if reporthook:
|
|
reporthook(blocknum, bs, size)
|
|
fp.close()
|
|
tfp.close()
|
|
del fp
|
|
del tfp
|
|
|
|
# raise exception if actual size does not match content-length header
|
|
if size >= 0 and read < size:
|
|
raise ContentTooShortError("retrieval incomplete: got only %i out "
|
|
"of %i bytes" % (read, size), result)
|
|
|
|
return result
|
|
|
|
# Each method named open_<type> knows how to open that type of URL
|
|
|
|
def open_http(self, url, data=None):
|
|
"""Use HTTP protocol."""
|
|
import httplib
|
|
user_passwd = None
|
|
proxy_passwd= None
|
|
if isinstance(url, str):
|
|
host, selector = splithost(url)
|
|
if host:
|
|
user_passwd, host = splituser(host)
|
|
host = unquote(host)
|
|
realhost = host
|
|
else:
|
|
host, selector = url
|
|
# check whether the proxy contains authorization information
|
|
proxy_passwd, host = splituser(host)
|
|
# now we proceed with the url we want to obtain
|
|
urltype, rest = splittype(selector)
|
|
url = rest
|
|
user_passwd = None
|
|
if urltype.lower() != 'http':
|
|
realhost = None
|
|
else:
|
|
realhost, rest = splithost(rest)
|
|
if realhost:
|
|
user_passwd, realhost = splituser(realhost)
|
|
if user_passwd:
|
|
selector = "%s://%s%s" % (urltype, realhost, rest)
|
|
if proxy_bypass(realhost):
|
|
host = realhost
|
|
|
|
#print "proxy via http:", host, selector
|
|
if not host: raise IOError, ('http error', 'no host given')
|
|
|
|
if proxy_passwd:
|
|
import base64
|
|
proxy_auth = base64.b64encode(proxy_passwd).strip()
|
|
else:
|
|
proxy_auth = None
|
|
|
|
if user_passwd:
|
|
import base64
|
|
auth = base64.b64encode(user_passwd).strip()
|
|
else:
|
|
auth = None
|
|
h = httplib.HTTP(host)
|
|
if data is not None:
|
|
h.putrequest('POST', selector)
|
|
h.putheader('Content-Type', 'application/x-www-form-urlencoded')
|
|
h.putheader('Content-Length', '%d' % len(data))
|
|
else:
|
|
h.putrequest('GET', selector)
|
|
if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
|
|
if auth: h.putheader('Authorization', 'Basic %s' % auth)
|
|
if realhost: h.putheader('Host', realhost)
|
|
for args in self.addheaders: h.putheader(*args)
|
|
h.endheaders()
|
|
if data is not None:
|
|
h.send(data)
|
|
errcode, errmsg, headers = h.getreply()
|
|
fp = h.getfile()
|
|
if errcode == 200:
|
|
return addinfourl(fp, headers, "http:" + url)
|
|
else:
|
|
if data is None:
|
|
return self.http_error(url, fp, errcode, errmsg, headers)
|
|
else:
|
|
return self.http_error(url, fp, errcode, errmsg, headers, data)
|
|
|
|
def http_error(self, url, fp, errcode, errmsg, headers, data=None):
|
|
"""Handle http errors.
|
|
Derived class can override this, or provide specific handlers
|
|
named http_error_DDD where DDD is the 3-digit error code."""
|
|
# First check if there's a specific handler for this error
|
|
name = 'http_error_%d' % errcode
|
|
if hasattr(self, name):
|
|
method = getattr(self, name)
|
|
if data is None:
|
|
result = method(url, fp, errcode, errmsg, headers)
|
|
else:
|
|
result = method(url, fp, errcode, errmsg, headers, data)
|
|
if result: return result
|
|
return self.http_error_default(url, fp, errcode, errmsg, headers)
|
|
|
|
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
|
"""Default error handler: close the connection and raise IOError."""
|
|
void = fp.read()
|
|
fp.close()
|
|
raise IOError, ('http error', errcode, errmsg, headers)
|
|
|
|
if hasattr(socket, "ssl"):
|
|
def open_https(self, url, data=None):
|
|
"""Use HTTPS protocol."""
|
|
import httplib
|
|
user_passwd = None
|
|
proxy_passwd = None
|
|
if isinstance(url, str):
|
|
host, selector = splithost(url)
|
|
if host:
|
|
user_passwd, host = splituser(host)
|
|
host = unquote(host)
|
|
realhost = host
|
|
else:
|
|
host, selector = url
|
|
# here, we determine, whether the proxy contains authorization information
|
|
proxy_passwd, host = splituser(host)
|
|
urltype, rest = splittype(selector)
|
|
url = rest
|
|
user_passwd = None
|
|
if urltype.lower() != 'https':
|
|
realhost = None
|
|
else:
|
|
realhost, rest = splithost(rest)
|
|
if realhost:
|
|
user_passwd, realhost = splituser(realhost)
|
|
if user_passwd:
|
|
selector = "%s://%s%s" % (urltype, realhost, rest)
|
|
#print "proxy via https:", host, selector
|
|
if not host: raise IOError, ('https error', 'no host given')
|
|
if proxy_passwd:
|
|
import base64
|
|
proxy_auth = base64.b64encode(proxy_passwd).strip()
|
|
else:
|
|
proxy_auth = None
|
|
if user_passwd:
|
|
import base64
|
|
auth = base64.b64encode(user_passwd).strip()
|
|
else:
|
|
auth = None
|
|
h = httplib.HTTPS(host, 0,
|
|
key_file=self.key_file,
|
|
cert_file=self.cert_file)
|
|
if data is not None:
|
|
h.putrequest('POST', selector)
|
|
h.putheader('Content-Type',
|
|
'application/x-www-form-urlencoded')
|
|
h.putheader('Content-Length', '%d' % len(data))
|
|
else:
|
|
h.putrequest('GET', selector)
|
|
if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
|
|
if auth: h.putheader('Authorization', 'Basic %s' % auth)
|
|
if realhost: h.putheader('Host', realhost)
|
|
for args in self.addheaders: h.putheader(*args)
|
|
h.endheaders()
|
|
if data is not None:
|
|
h.send(data)
|
|
errcode, errmsg, headers = h.getreply()
|
|
fp = h.getfile()
|
|
if errcode == 200:
|
|
return addinfourl(fp, headers, "https:" + url)
|
|
else:
|
|
if data is None:
|
|
return self.http_error(url, fp, errcode, errmsg, headers)
|
|
else:
|
|
return self.http_error(url, fp, errcode, errmsg, headers,
|
|
data)
|
|
|
|
def open_gopher(self, url):
|
|
"""Use Gopher protocol."""
|
|
if not isinstance(url, str):
|
|
raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented')
|
|
import gopherlib
|
|
host, selector = splithost(url)
|
|
if not host: raise IOError, ('gopher error', 'no host given')
|
|
host = unquote(host)
|
|
type, selector = splitgophertype(selector)
|
|
selector, query = splitquery(selector)
|
|
selector = unquote(selector)
|
|
if query:
|
|
query = unquote(query)
|
|
fp = gopherlib.send_query(selector, query, host)
|
|
else:
|
|
fp = gopherlib.send_selector(selector, host)
|
|
return addinfourl(fp, noheaders(), "gopher:" + url)
|
|
|
|
def open_file(self, url):
|
|
"""Use local file or FTP depending on form of URL."""
|
|
if not isinstance(url, str):
|
|
raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
|
|
if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
|
|
return self.open_ftp(url)
|
|
else:
|
|
return self.open_local_file(url)
|
|
|
|
def open_local_file(self, url):
|
|
"""Use local file."""
|
|
import mimetypes, mimetools, email.Utils
|
|
try:
|
|
from cStringIO import StringIO
|
|
except ImportError:
|
|
from StringIO import StringIO
|
|
host, file = splithost(url)
|
|
localname = url2pathname(file)
|
|
try:
|
|
stats = os.stat(localname)
|
|
except OSError, e:
|
|
raise IOError(e.errno, e.strerror, e.filename)
|
|
size = stats.st_size
|
|
modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
|
|
mtype = mimetypes.guess_type(url)[0]
|
|
headers = mimetools.Message(StringIO(
|
|
'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
|
|
(mtype or 'text/plain', size, modified)))
|
|
if not host:
|
|
urlfile = file
|
|
if file[:1] == '/':
|
|
urlfile = 'file://' + file
|
|
return addinfourl(open(localname, 'rb'),
|
|
headers, urlfile)
|
|
host, port = splitport(host)
|
|
if not port \
|
|
and socket.gethostbyname(host) in (localhost(), thishost()):
|
|
urlfile = file
|
|
if file[:1] == '/':
|
|
urlfile = 'file://' + file
|
|
return addinfourl(open(localname, 'rb'),
|
|
headers, urlfile)
|
|
raise IOError, ('local file error', 'not on local host')
|
|
|
|
def open_ftp(self, url):
|
|
"""Use FTP protocol."""
|
|
if not isinstance(url, str):
|
|
raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
|
|
import mimetypes, mimetools
|
|
try:
|
|
from cStringIO import StringIO
|
|
except ImportError:
|
|
from StringIO import StringIO
|
|
host, path = splithost(url)
|
|
if not host: raise IOError, ('ftp error', 'no host given')
|
|
host, port = splitport(host)
|
|
user, host = splituser(host)
|
|
if user: user, passwd = splitpasswd(user)
|
|
else: passwd = None
|
|
host = unquote(host)
|
|
user = unquote(user or '')
|
|
passwd = unquote(passwd or '')
|
|
host = socket.gethostbyname(host)
|
|
if not port:
|
|
import ftplib
|
|
port = ftplib.FTP_PORT
|
|
else:
|
|
port = int(port)
|
|
path, attrs = splitattr(path)
|
|
path = unquote(path)
|
|
dirs = path.split('/')
|
|
dirs, file = dirs[:-1], dirs[-1]
|
|
if dirs and not dirs[0]: dirs = dirs[1:]
|
|
if dirs and not dirs[0]: dirs[0] = '/'
|
|
key = user, host, port, '/'.join(dirs)
|
|
# XXX thread unsafe!
|
|
if len(self.ftpcache) > MAXFTPCACHE:
|
|
# Prune the cache, rather arbitrarily
|
|
for k in self.ftpcache.keys():
|
|
if k != key:
|
|
v = self.ftpcache[k]
|
|
del self.ftpcache[k]
|
|
v.close()
|
|
try:
|
|
if not key in self.ftpcache:
|
|
self.ftpcache[key] = \
|
|
ftpwrapper(user, passwd, host, port, dirs)
|
|
if not file: type = 'D'
|
|
else: type = 'I'
|
|
for attr in attrs:
|
|
attr, value = splitvalue(attr)
|
|
if attr.lower() == 'type' and \
|
|
value in ('a', 'A', 'i', 'I', 'd', 'D'):
|
|
type = value.upper()
|
|
(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
|
|
mtype = mimetypes.guess_type("ftp:" + url)[0]
|
|
headers = ""
|
|
if mtype:
|
|
headers += "Content-Type: %s\n" % mtype
|
|
if retrlen is not None and retrlen >= 0:
|
|
headers += "Content-Length: %d\n" % retrlen
|
|
headers = mimetools.Message(StringIO(headers))
|
|
return addinfourl(fp, headers, "ftp:" + url)
|
|
except ftperrors(), msg:
|
|
raise IOError, ('ftp error', msg), sys.exc_info()[2]
|
|
|
|
def open_data(self, url, data=None):
|
|
"""Use "data" URL."""
|
|
if not isinstance(url, str):
|
|
raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
|
|
# ignore POSTed data
|
|
#
|
|
# syntax of data URLs:
|
|
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
|
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
|
# data := *urlchar
|
|
# parameter := attribute "=" value
|
|
import mimetools
|
|
try:
|
|
from cStringIO import StringIO
|
|
except ImportError:
|
|
from StringIO import StringIO
|
|
try:
|
|
[type, data] = url.split(',', 1)
|
|
except ValueError:
|
|
raise IOError, ('data error', 'bad data URL')
|
|
if not type:
|
|
type = 'text/plain;charset=US-ASCII'
|
|
semi = type.rfind(';')
|
|
if semi >= 0 and '=' not in type[semi:]:
|
|
encoding = type[semi+1:]
|
|
type = type[:semi]
|
|
else:
|
|
encoding = ''
|
|
msg = []
|
|
msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
|
|
time.gmtime(time.time())))
|
|
msg.append('Content-type: %s' % type)
|
|
if encoding == 'base64':
|
|
import base64
|
|
data = base64.decodestring(data)
|
|
else:
|
|
data = unquote(data)
|
|
msg.append('Content-Length: %d' % len(data))
|
|
msg.append('')
|
|
msg.append(data)
|
|
msg = '\n'.join(msg)
|
|
f = StringIO(msg)
|
|
headers = mimetools.Message(f, 0)
|
|
#f.fileno = None # needed for addinfourl
|
|
return addinfourl(f, headers, url)
|
|
|
|
|
|
class FancyURLopener(URLopener):
|
|
"""Derived class with handlers for errors we can handle (perhaps)."""
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
URLopener.__init__(self, *args, **kwargs)
|
|
self.auth_cache = {}
|
|
self.tries = 0
|
|
self.maxtries = 10
|
|
|
|
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
|
"""Default error handling -- don't raise an exception."""
|
|
return addinfourl(fp, headers, "http:" + url)
|
|
|
|
def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
|
|
"""Error 302 -- relocated (temporarily)."""
|
|
self.tries += 1
|
|
if self.maxtries and self.tries >= self.maxtries:
|
|
if hasattr(self, "http_error_500"):
|
|
meth = self.http_error_500
|
|
else:
|
|
meth = self.http_error_default
|
|
self.tries = 0
|
|
return meth(url, fp, 500,
|
|
"Internal Server Error: Redirect Recursion", headers)
|
|
result = self.redirect_internal(url, fp, errcode, errmsg, headers,
|
|
data)
|
|
self.tries = 0
|
|
return result
|
|
|
|
def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
|
|
if 'location' in headers:
|
|
newurl = headers['location']
|
|
elif 'uri' in headers:
|
|
newurl = headers['uri']
|
|
else:
|
|
return
|
|
void = fp.read()
|
|
fp.close()
|
|
# In case the server sent a relative URL, join with original:
|
|
newurl = basejoin(self.type + ":" + url, newurl)
|
|
return self.open(newurl)
|
|
|
|
def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
|
|
"""Error 301 -- also relocated (permanently)."""
|
|
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
|
|
|
|
def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
|
|
"""Error 303 -- also relocated (essentially identical to 302)."""
|
|
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
|
|
|
|
def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
|
|
"""Error 307 -- relocated, but turn POST into error."""
|
|
if data is None:
|
|
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
|
|
else:
|
|
return self.http_error_default(url, fp, errcode, errmsg, headers)
|
|
|
|
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
|
|
"""Error 401 -- authentication required.
|
|
This function supports Basic authentication only."""
|
|
if not 'www-authenticate' in headers:
|
|
URLopener.http_error_default(self, url, fp,
|
|
errcode, errmsg, headers)
|
|
stuff = headers['www-authenticate']
|
|
import re
|
|
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
|
|
if not match:
|
|
URLopener.http_error_default(self, url, fp,
|
|
errcode, errmsg, headers)
|
|
scheme, realm = match.groups()
|
|
if scheme.lower() != 'basic':
|
|
URLopener.http_error_default(self, url, fp,
|
|
errcode, errmsg, headers)
|
|
name = 'retry_' + self.type + '_basic_auth'
|
|
if data is None:
|
|
return getattr(self,name)(url, realm)
|
|
else:
|
|
return getattr(self,name)(url, realm, data)
|
|
|
|
def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
|
|
"""Error 407 -- proxy authentication required.
|
|
This function supports Basic authentication only."""
|
|
if not 'proxy-authenticate' in headers:
|
|
URLopener.http_error_default(self, url, fp,
|
|
errcode, errmsg, headers)
|
|
stuff = headers['proxy-authenticate']
|
|
import re
|
|
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
|
|
if not match:
|
|
URLopener.http_error_default(self, url, fp,
|
|
errcode, errmsg, headers)
|
|
scheme, realm = match.groups()
|
|
if scheme.lower() != 'basic':
|
|
URLopener.http_error_default(self, url, fp,
|
|
errcode, errmsg, headers)
|
|
name = 'retry_proxy_' + self.type + '_basic_auth'
|
|
if data is None:
|
|
return getattr(self,name)(url, realm)
|
|
else:
|
|
return getattr(self,name)(url, realm, data)
|
|
|
|
def retry_proxy_http_basic_auth(self, url, realm, data=None):
|
|
host, selector = splithost(url)
|
|
newurl = 'http://' + host + selector
|
|
proxy = self.proxies['http']
|
|
urltype, proxyhost = splittype(proxy)
|
|
proxyhost, proxyselector = splithost(proxyhost)
|
|
i = proxyhost.find('@') + 1
|
|
proxyhost = proxyhost[i:]
|
|
user, passwd = self.get_user_passwd(proxyhost, realm, i)
|
|
if not (user or passwd): return None
|
|
proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
|
|
self.proxies['http'] = 'http://' + proxyhost + proxyselector
|
|
if data is None:
|
|
return self.open(newurl)
|
|
else:
|
|
return self.open(newurl, data)
|
|
|
|
def retry_proxy_https_basic_auth(self, url, realm, data=None):
|
|
host, selector = splithost(url)
|
|
newurl = 'https://' + host + selector
|
|
proxy = self.proxies['https']
|
|
urltype, proxyhost = splittype(proxy)
|
|
proxyhost, proxyselector = splithost(proxyhost)
|
|
i = proxyhost.find('@') + 1
|
|
proxyhost = proxyhost[i:]
|
|
user, passwd = self.get_user_passwd(proxyhost, realm, i)
|
|
if not (user or passwd): return None
|
|
proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
|
|
self.proxies['https'] = 'https://' + proxyhost + proxyselector
|
|
if data is None:
|
|
return self.open(newurl)
|
|
else:
|
|
return self.open(newurl, data)
|
|
|
|
def retry_http_basic_auth(self, url, realm, data=None):
|
|
host, selector = splithost(url)
|
|
i = host.find('@') + 1
|
|
host = host[i:]
|
|
user, passwd = self.get_user_passwd(host, realm, i)
|
|
if not (user or passwd): return None
|
|
host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
|
|
newurl = 'http://' + host + selector
|
|
if data is None:
|
|
return self.open(newurl)
|
|
else:
|
|
return self.open(newurl, data)
|
|
|
|
def retry_https_basic_auth(self, url, realm, data=None):
|
|
host, selector = splithost(url)
|
|
i = host.find('@') + 1
|
|
host = host[i:]
|
|
user, passwd = self.get_user_passwd(host, realm, i)
|
|
if not (user or passwd): return None
|
|
host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
|
|
newurl = 'https://' + host + selector
|
|
if data is None:
|
|
return self.open(newurl)
|
|
else:
|
|
return self.open(newurl, data)
|
|
|
|
def get_user_passwd(self, host, realm, clear_cache = 0):
|
|
key = realm + '@' + host.lower()
|
|
if key in self.auth_cache:
|
|
if clear_cache:
|
|
del self.auth_cache[key]
|
|
else:
|
|
return self.auth_cache[key]
|
|
user, passwd = self.prompt_user_passwd(host, realm)
|
|
if user or passwd: self.auth_cache[key] = (user, passwd)
|
|
return user, passwd
|
|
|
|
def prompt_user_passwd(self, host, realm):
|
|
"""Override this in a GUI environment!"""
|
|
import getpass, sys
|
|
try:
|
|
sys.stdout.write("Enter username for %s at %s: " % (realm, host))
|
|
sys.stdout.flush()
|
|
user = sys.stdin.readline()
|
|
passwd = getpass.getpass("Enter password for %s in %s at %s: " %
|
|
(user, realm, host))
|
|
return user, passwd
|
|
except KeyboardInterrupt:
|
|
print
|
|
return None, None
|
|
|
|
|
|
# Utility functions
|
|
|
|
_localhost = None
|
|
def localhost():
|
|
"""Return the IP address of the magic hostname 'localhost'."""
|
|
global _localhost
|
|
if _localhost is None:
|
|
_localhost = socket.gethostbyname('localhost')
|
|
return _localhost
|
|
|
|
_thishost = None
|
|
def thishost():
|
|
"""Return the IP address of the current host."""
|
|
global _thishost
|
|
if _thishost is None:
|
|
_thishost = socket.gethostbyname(socket.gethostname())
|
|
return _thishost
|
|
|
|
_ftperrors = None
|
|
def ftperrors():
|
|
"""Return the set of errors raised by the FTP class."""
|
|
global _ftperrors
|
|
if _ftperrors is None:
|
|
import ftplib
|
|
_ftperrors = ftplib.all_errors
|
|
return _ftperrors
|
|
|
|
_noheaders = None
|
|
def noheaders():
|
|
"""Return an empty mimetools.Message object."""
|
|
global _noheaders
|
|
if _noheaders is None:
|
|
import mimetools
|
|
try:
|
|
from cStringIO import StringIO
|
|
except ImportError:
|
|
from StringIO import StringIO
|
|
_noheaders = mimetools.Message(StringIO(), 0)
|
|
_noheaders.fp.close() # Recycle file descriptor
|
|
return _noheaders
|
|
|
|
|
|
# Utility classes
|
|
|
|
class ftpwrapper:
|
|
"""Class used by open_ftp() for cache of open FTP connections."""
|
|
|
|
def __init__(self, user, passwd, host, port, dirs):
|
|
self.user = user
|
|
self.passwd = passwd
|
|
self.host = host
|
|
self.port = port
|
|
self.dirs = dirs
|
|
self.init()
|
|
|
|
def init(self):
|
|
import ftplib
|
|
self.busy = 0
|
|
self.ftp = ftplib.FTP()
|
|
self.ftp.connect(self.host, self.port)
|
|
self.ftp.login(self.user, self.passwd)
|
|
for dir in self.dirs:
|
|
self.ftp.cwd(dir)
|
|
|
|
def retrfile(self, file, type):
|
|
import ftplib
|
|
self.endtransfer()
|
|
if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
|
|
else: cmd = 'TYPE ' + type; isdir = 0
|
|
try:
|
|
self.ftp.voidcmd(cmd)
|
|
except ftplib.all_errors:
|
|
self.init()
|
|
self.ftp.voidcmd(cmd)
|
|
conn = None
|
|
if file and not isdir:
|
|
# Try to retrieve as a file
|
|
try:
|
|
cmd = 'RETR ' + file
|
|
conn = self.ftp.ntransfercmd(cmd)
|
|
except ftplib.error_perm, reason:
|
|
if str(reason)[:3] != '550':
|
|
raise IOError, ('ftp error', reason), sys.exc_info()[2]
|
|
if not conn:
|
|
# Set transfer mode to ASCII!
|
|
self.ftp.voidcmd('TYPE A')
|
|
# Try a directory listing
|
|
if file: cmd = 'LIST ' + file
|
|
else: cmd = 'LIST'
|
|
conn = self.ftp.ntransfercmd(cmd)
|
|
self.busy = 1
|
|
# Pass back both a suitably decorated object and a retrieval length
|
|
return (addclosehook(conn[0].makefile('rb'),
|
|
self.endtransfer), conn[1])
|
|
def endtransfer(self):
|
|
if not self.busy:
|
|
return
|
|
self.busy = 0
|
|
try:
|
|
self.ftp.voidresp()
|
|
except ftperrors():
|
|
pass
|
|
|
|
def close(self):
|
|
self.endtransfer()
|
|
try:
|
|
self.ftp.close()
|
|
except ftperrors():
|
|
pass
|
|
|
|
class addbase:
|
|
"""Base class for addinfo and addclosehook."""
|
|
|
|
def __init__(self, fp):
|
|
self.fp = fp
|
|
self.read = self.fp.read
|
|
self.readline = self.fp.readline
|
|
if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
|
|
if hasattr(self.fp, "fileno"):
|
|
self.fileno = self.fp.fileno
|
|
else:
|
|
self.fileno = lambda: None
|
|
if hasattr(self.fp, "__iter__"):
|
|
self.__iter__ = self.fp.__iter__
|
|
if hasattr(self.fp, "next"):
|
|
self.next = self.fp.next
|
|
|
|
def __repr__(self):
|
|
return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
|
|
id(self), self.fp)
|
|
|
|
def close(self):
|
|
self.read = None
|
|
self.readline = None
|
|
self.readlines = None
|
|
self.fileno = None
|
|
if self.fp: self.fp.close()
|
|
self.fp = None
|
|
|
|
class addclosehook(addbase):
|
|
"""Class to add a close hook to an open file."""
|
|
|
|
def __init__(self, fp, closehook, *hookargs):
|
|
addbase.__init__(self, fp)
|
|
self.closehook = closehook
|
|
self.hookargs = hookargs
|
|
|
|
def close(self):
|
|
addbase.close(self)
|
|
if self.closehook:
|
|
self.closehook(*self.hookargs)
|
|
self.closehook = None
|
|
self.hookargs = None
|
|
|
|
class addinfo(addbase):
|
|
"""class to add an info() method to an open file."""
|
|
|
|
def __init__(self, fp, headers):
|
|
addbase.__init__(self, fp)
|
|
self.headers = headers
|
|
|
|
def info(self):
|
|
return self.headers
|
|
|
|
class addinfourl(addbase):
|
|
"""class to add info() and geturl() methods to an open file."""
|
|
|
|
def __init__(self, fp, headers, url):
|
|
addbase.__init__(self, fp)
|
|
self.headers = headers
|
|
self.url = url
|
|
|
|
def info(self):
|
|
return self.headers
|
|
|
|
def geturl(self):
|
|
return self.url
|
|
|
|
|
|
# Utilities to parse URLs (most of these return None for missing parts):
|
|
# unwrap('<URL:type://host/path>') --> 'type://host/path'
|
|
# splittype('type:opaquestring') --> 'type', 'opaquestring'
|
|
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
|
|
# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
|
|
# splitpasswd('user:passwd') -> 'user', 'passwd'
|
|
# splitport('host:port') --> 'host', 'port'
|
|
# splitquery('/path?query') --> '/path', 'query'
|
|
# splittag('/path#tag') --> '/path', 'tag'
|
|
# splitattr('/path;attr1=value1;attr2=value2;...') ->
|
|
# '/path', ['attr1=value1', 'attr2=value2', ...]
|
|
# splitvalue('attr=value') --> 'attr', 'value'
|
|
# splitgophertype('/Xselector') --> 'X', 'selector'
|
|
# unquote('abc%20def') -> 'abc def'
|
|
# quote('abc def') -> 'abc%20def')
|
|
|
|
try:
|
|
unicode
|
|
except NameError:
|
|
def _is_unicode(x):
|
|
return 0
|
|
else:
|
|
def _is_unicode(x):
|
|
return isinstance(x, unicode)
|
|
|
|
def toBytes(url):
|
|
"""toBytes(u"URL") --> 'URL'."""
|
|
# Most URL schemes require ASCII. If that changes, the conversion
|
|
# can be relaxed
|
|
if _is_unicode(url):
|
|
try:
|
|
url = url.encode("ASCII")
|
|
except UnicodeError:
|
|
raise UnicodeError("URL " + repr(url) +
|
|
" contains non-ASCII characters")
|
|
return url
|
|
|
|
def unwrap(url):
|
|
"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
|
|
url = url.strip()
|
|
if url[:1] == '<' and url[-1:] == '>':
|
|
url = url[1:-1].strip()
|
|
if url[:4] == 'URL:': url = url[4:].strip()
|
|
return url
|
|
|
|
_typeprog = None
|
|
def splittype(url):
|
|
"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
|
|
global _typeprog
|
|
if _typeprog is None:
|
|
import re
|
|
_typeprog = re.compile('^([^/:]+):')
|
|
|
|
match = _typeprog.match(url)
|
|
if match:
|
|
scheme = match.group(1)
|
|
return scheme.lower(), url[len(scheme) + 1:]
|
|
return None, url
|
|
|
|
_hostprog = None
|
|
def splithost(url):
|
|
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
|
|
global _hostprog
|
|
if _hostprog is None:
|
|
import re
|
|
_hostprog = re.compile('^//([^/?]*)(.*)$')
|
|
|
|
match = _hostprog.match(url)
|
|
if match: return match.group(1, 2)
|
|
return None, url
|
|
|
|
_userprog = None
|
|
def splituser(host):
|
|
"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
|
|
global _userprog
|
|
if _userprog is None:
|
|
import re
|
|
_userprog = re.compile('^(.*)@(.*)$')
|
|
|
|
match = _userprog.match(host)
|
|
if match: return map(unquote, match.group(1, 2))
|
|
return None, host
|
|
|
|
_passwdprog = None
|
|
def splitpasswd(user):
|
|
"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
|
|
global _passwdprog
|
|
if _passwdprog is None:
|
|
import re
|
|
_passwdprog = re.compile('^([^:]*):(.*)$')
|
|
|
|
match = _passwdprog.match(user)
|
|
if match: return match.group(1, 2)
|
|
return user, None
|
|
|
|
# splittag('/path#tag') --> '/path', 'tag'
|
|
_portprog = None
|
|
def splitport(host):
|
|
"""splitport('host:port') --> 'host', 'port'."""
|
|
global _portprog
|
|
if _portprog is None:
|
|
import re
|
|
_portprog = re.compile('^(.*):([0-9]+)$')
|
|
|
|
match = _portprog.match(host)
|
|
if match: return match.group(1, 2)
|
|
return host, None
|
|
|
|
_nportprog = None
|
|
def splitnport(host, defport=-1):
|
|
"""Split host and port, returning numeric port.
|
|
Return given default port if no ':' found; defaults to -1.
|
|
Return numerical port if a valid number are found after ':'.
|
|
Return None if ':' but not a valid number."""
|
|
global _nportprog
|
|
if _nportprog is None:
|
|
import re
|
|
_nportprog = re.compile('^(.*):(.*)$')
|
|
|
|
match = _nportprog.match(host)
|
|
if match:
|
|
host, port = match.group(1, 2)
|
|
try:
|
|
if not port: raise ValueError, "no digits"
|
|
nport = int(port)
|
|
except ValueError:
|
|
nport = None
|
|
return host, nport
|
|
return host, defport
|
|
|
|
_queryprog = None
|
|
def splitquery(url):
|
|
"""splitquery('/path?query') --> '/path', 'query'."""
|
|
global _queryprog
|
|
if _queryprog is None:
|
|
import re
|
|
_queryprog = re.compile('^(.*)\?([^?]*)$')
|
|
|
|
match = _queryprog.match(url)
|
|
if match: return match.group(1, 2)
|
|
return url, None
|
|
|
|
_tagprog = None
|
|
def splittag(url):
|
|
"""splittag('/path#tag') --> '/path', 'tag'."""
|
|
global _tagprog
|
|
if _tagprog is None:
|
|
import re
|
|
_tagprog = re.compile('^(.*)#([^#]*)$')
|
|
|
|
match = _tagprog.match(url)
|
|
if match: return match.group(1, 2)
|
|
return url, None
|
|
|
|
def splitattr(url):
|
|
"""splitattr('/path;attr1=value1;attr2=value2;...') ->
|
|
'/path', ['attr1=value1', 'attr2=value2', ...]."""
|
|
words = url.split(';')
|
|
return words[0], words[1:]
|
|
|
|
_valueprog = None
|
|
def splitvalue(attr):
|
|
"""splitvalue('attr=value') --> 'attr', 'value'."""
|
|
global _valueprog
|
|
if _valueprog is None:
|
|
import re
|
|
_valueprog = re.compile('^([^=]*)=(.*)$')
|
|
|
|
match = _valueprog.match(attr)
|
|
if match: return match.group(1, 2)
|
|
return attr, None
|
|
|
|
def splitgophertype(selector):
|
|
"""splitgophertype('/Xselector') --> 'X', 'selector'."""
|
|
if selector[:1] == '/' and selector[1:2]:
|
|
return selector[1], selector[2:]
|
|
return None, selector
|
|
|
|
_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
|
|
_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
|
|
|
|
def unquote(s):
|
|
"""unquote('abc%20def') -> 'abc def'."""
|
|
res = s.split('%')
|
|
for i in xrange(1, len(res)):
|
|
item = res[i]
|
|
try:
|
|
res[i] = _hextochr[item[:2]] + item[2:]
|
|
except KeyError:
|
|
res[i] = '%' + item
|
|
except UnicodeDecodeError:
|
|
res[i] = unichr(int(item[:2], 16)) + item[2:]
|
|
return "".join(res)
|
|
|
|
def unquote_plus(s):
|
|
"""unquote('%7e/abc+def') -> '~/abc def'"""
|
|
s = s.replace('+', ' ')
|
|
return unquote(s)
|
|
|
|
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
|
'abcdefghijklmnopqrstuvwxyz'
|
|
'0123456789' '_.-')
|
|
_safemaps = {}
|
|
|
|
def quote(s, safe = '/'):
|
|
"""quote('abc def') -> 'abc%20def'
|
|
|
|
Each part of a URL, e.g. the path info, the query, etc., has a
|
|
different set of reserved characters that must be quoted.
|
|
|
|
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
|
|
the following reserved characters.
|
|
|
|
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
|
|
"$" | ","
|
|
|
|
Each of these characters is reserved in some component of a URL,
|
|
but not necessarily in all of them.
|
|
|
|
By default, the quote function is intended for quoting the path
|
|
section of a URL. Thus, it will not encode '/'. This character
|
|
is reserved, but in typical usage the quote function is being
|
|
called on a path where the existing slash characters are used as
|
|
reserved characters.
|
|
"""
|
|
cachekey = (safe, always_safe)
|
|
try:
|
|
safe_map = _safemaps[cachekey]
|
|
except KeyError:
|
|
safe += always_safe
|
|
safe_map = {}
|
|
for i in range(256):
|
|
c = chr(i)
|
|
safe_map[c] = (c in safe) and c or ('%%%02X' % i)
|
|
_safemaps[cachekey] = safe_map
|
|
res = map(safe_map.__getitem__, s)
|
|
return ''.join(res)
|
|
|
|
def quote_plus(s, safe = ''):
|
|
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
|
|
if ' ' in s:
|
|
s = quote(s, safe + ' ')
|
|
return s.replace(' ', '+')
|
|
return quote(s, safe)
|
|
|
|
def urlencode(query,doseq=0):
|
|
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
|
|
|
|
If any values in the query arg are sequences and doseq is true, each
|
|
sequence element is converted to a separate parameter.
|
|
|
|
If the query arg is a sequence of two-element tuples, the order of the
|
|
parameters in the output will match the order of parameters in the
|
|
input.
|
|
"""
|
|
|
|
if hasattr(query,"items"):
|
|
# mapping objects
|
|
query = query.items()
|
|
else:
|
|
# it's a bother at times that strings and string-like objects are
|
|
# sequences...
|
|
try:
|
|
# non-sequence items should not work with len()
|
|
# non-empty strings will fail this
|
|
if len(query) and not isinstance(query[0], tuple):
|
|
raise TypeError
|
|
# zero-length sequences of all types will get here and succeed,
|
|
# but that's a minor nit - since the original implementation
|
|
# allowed empty dicts that type of behavior probably should be
|
|
# preserved for consistency
|
|
except TypeError:
|
|
ty,va,tb = sys.exc_info()
|
|
raise TypeError, "not a valid non-string sequence or mapping object", tb
|
|
|
|
l = []
|
|
if not doseq:
|
|
# preserve old behavior
|
|
for k, v in query:
|
|
k = quote_plus(str(k))
|
|
v = quote_plus(str(v))
|
|
l.append(k + '=' + v)
|
|
else:
|
|
for k, v in query:
|
|
k = quote_plus(str(k))
|
|
if isinstance(v, str):
|
|
v = quote_plus(v)
|
|
l.append(k + '=' + v)
|
|
elif _is_unicode(v):
|
|
# is there a reasonable way to convert to ASCII?
|
|
# encode generates a string, but "replace" or "ignore"
|
|
# lose information and "strict" can raise UnicodeError
|
|
v = quote_plus(v.encode("ASCII","replace"))
|
|
l.append(k + '=' + v)
|
|
else:
|
|
try:
|
|
# is this a sufficient test for sequence-ness?
|
|
x = len(v)
|
|
except TypeError:
|
|
# not a sequence
|
|
v = quote_plus(str(v))
|
|
l.append(k + '=' + v)
|
|
else:
|
|
# loop over the sequence
|
|
for elt in v:
|
|
l.append(k + '=' + quote_plus(str(elt)))
|
|
return '&'.join(l)
|
|
|
|
# Proxy handling
|
|
def getproxies_environment():
|
|
"""Return a dictionary of scheme -> proxy server URL mappings.
|
|
|
|
Scan the environment for variables named <scheme>_proxy;
|
|
this seems to be the standard convention. If you need a
|
|
different way, you can pass a proxies dictionary to the
|
|
[Fancy]URLopener constructor.
|
|
|
|
"""
|
|
proxies = {}
|
|
for name, value in os.environ.items():
|
|
name = name.lower()
|
|
if value and name[-6:] == '_proxy':
|
|
proxies[name[:-6]] = value
|
|
return proxies
|
|
|
|
if sys.platform == 'darwin':
|
|
def getproxies_internetconfig():
|
|
"""Return a dictionary of scheme -> proxy server URL mappings.
|
|
|
|
By convention the mac uses Internet Config to store
|
|
proxies. An HTTP proxy, for instance, is stored under
|
|
the HttpProxy key.
|
|
|
|
"""
|
|
try:
|
|
import ic
|
|
except ImportError:
|
|
return {}
|
|
|
|
try:
|
|
config = ic.IC()
|
|
except ic.error:
|
|
return {}
|
|
proxies = {}
|
|
# HTTP:
|
|
if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
|
|
try:
|
|
value = config['HTTPProxyHost']
|
|
except ic.error:
|
|
pass
|
|
else:
|
|
proxies['http'] = 'http://%s' % value
|
|
# FTP: XXXX To be done.
|
|
# Gopher: XXXX To be done.
|
|
return proxies
|
|
|
|
def proxy_bypass(x):
|
|
return 0
|
|
|
|
def getproxies():
|
|
return getproxies_environment() or getproxies_internetconfig()
|
|
|
|
elif os.name == 'nt':
|
|
def getproxies_registry():
|
|
"""Return a dictionary of scheme -> proxy server URL mappings.
|
|
|
|
Win32 uses the registry to store proxies.
|
|
|
|
"""
|
|
proxies = {}
|
|
try:
|
|
import _winreg
|
|
except ImportError:
|
|
# Std module, so should be around - but you never know!
|
|
return proxies
|
|
try:
|
|
internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
|
|
r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
|
|
proxyEnable = _winreg.QueryValueEx(internetSettings,
|
|
'ProxyEnable')[0]
|
|
if proxyEnable:
|
|
# Returned as Unicode but problems if not converted to ASCII
|
|
proxyServer = str(_winreg.QueryValueEx(internetSettings,
|
|
'ProxyServer')[0])
|
|
if '=' in proxyServer:
|
|
# Per-protocol settings
|
|
for p in proxyServer.split(';'):
|
|
protocol, address = p.split('=', 1)
|
|
# See if address has a type:// prefix
|
|
import re
|
|
if not re.match('^([^/:]+)://', address):
|
|
address = '%s://%s' % (protocol, address)
|
|
proxies[protocol] = address
|
|
else:
|
|
# Use one setting for all protocols
|
|
if proxyServer[:5] == 'http:':
|
|
proxies['http'] = proxyServer
|
|
else:
|
|
proxies['http'] = 'http://%s' % proxyServer
|
|
proxies['ftp'] = 'ftp://%s' % proxyServer
|
|
internetSettings.Close()
|
|
except (WindowsError, ValueError, TypeError):
|
|
# Either registry key not found etc, or the value in an
|
|
# unexpected format.
|
|
# proxies already set up to be empty so nothing to do
|
|
pass
|
|
return proxies
|
|
|
|
def getproxies():
|
|
"""Return a dictionary of scheme -> proxy server URL mappings.
|
|
|
|
Returns settings gathered from the environment, if specified,
|
|
or the registry.
|
|
|
|
"""
|
|
return getproxies_environment() or getproxies_registry()
|
|
|
|
def proxy_bypass(host):
|
|
try:
|
|
import _winreg
|
|
import re
|
|
except ImportError:
|
|
# Std modules, so should be around - but you never know!
|
|
return 0
|
|
try:
|
|
internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
|
|
r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
|
|
proxyEnable = _winreg.QueryValueEx(internetSettings,
|
|
'ProxyEnable')[0]
|
|
proxyOverride = str(_winreg.QueryValueEx(internetSettings,
|
|
'ProxyOverride')[0])
|
|
# ^^^^ Returned as Unicode but problems if not converted to ASCII
|
|
except WindowsError:
|
|
return 0
|
|
if not proxyEnable or not proxyOverride:
|
|
return 0
|
|
# try to make a host list from name and IP address.
|
|
rawHost, port = splitport(host)
|
|
host = [rawHost]
|
|
try:
|
|
addr = socket.gethostbyname(rawHost)
|
|
if addr != rawHost:
|
|
host.append(addr)
|
|
except socket.error:
|
|
pass
|
|
try:
|
|
fqdn = socket.getfqdn(rawHost)
|
|
if fqdn != rawHost:
|
|
host.append(fqdn)
|
|
except socket.error:
|
|
pass
|
|
# make a check value list from the registry entry: replace the
|
|
# '<local>' string by the localhost entry and the corresponding
|
|
# canonical entry.
|
|
proxyOverride = proxyOverride.split(';')
|
|
i = 0
|
|
while i < len(proxyOverride):
|
|
if proxyOverride[i] == '<local>':
|
|
proxyOverride[i:i+1] = ['localhost',
|
|
'127.0.0.1',
|
|
socket.gethostname(),
|
|
socket.gethostbyname(
|
|
socket.gethostname())]
|
|
i += 1
|
|
# print proxyOverride
|
|
# now check if we match one of the registry values.
|
|
for test in proxyOverride:
|
|
test = test.replace(".", r"\.") # mask dots
|
|
test = test.replace("*", r".*") # change glob sequence
|
|
test = test.replace("?", r".") # change glob char
|
|
for val in host:
|
|
# print "%s <--> %s" %( test, val )
|
|
if re.match(test, val, re.I):
|
|
return 1
|
|
return 0
|
|
|
|
else:
|
|
# By default use environment variables
|
|
getproxies = getproxies_environment
|
|
|
|
def proxy_bypass(host):
|
|
return 0
|
|
|
|
# Test and time quote() and unquote()
|
|
def test1():
|
|
s = ''
|
|
for i in range(256): s = s + chr(i)
|
|
s = s*4
|
|
t0 = time.time()
|
|
qs = quote(s)
|
|
uqs = unquote(qs)
|
|
t1 = time.time()
|
|
if uqs != s:
|
|
print 'Wrong!'
|
|
print repr(s)
|
|
print repr(qs)
|
|
print repr(uqs)
|
|
print round(t1 - t0, 3), 'sec'
|
|
|
|
|
|
def reporthook(blocknum, blocksize, totalsize):
|
|
# Report during remote transfers
|
|
print "Block number: %d, Block size: %d, Total size: %d" % (
|
|
blocknum, blocksize, totalsize)
|
|
|
|
# Test program
|
|
def test(args=[]):
|
|
if not args:
|
|
args = [
|
|
'/etc/passwd',
|
|
'file:/etc/passwd',
|
|
'file://localhost/etc/passwd',
|
|
'ftp://ftp.python.org/pub/python/README',
|
|
## 'gopher://gopher.micro.umn.edu/1/',
|
|
'http://www.python.org/index.html',
|
|
]
|
|
if hasattr(URLopener, "open_https"):
|
|
args.append('https://synergy.as.cmu.edu/~geek/')
|
|
try:
|
|
for url in args:
|
|
print '-'*10, url, '-'*10
|
|
fn, h = urlretrieve(url, None, reporthook)
|
|
print fn
|
|
if h:
|
|
print '======'
|
|
for k in h.keys(): print k + ':', h[k]
|
|
print '======'
|
|
fp = open(fn, 'rb')
|
|
data = fp.read()
|
|
del fp
|
|
if '\r' in data:
|
|
table = string.maketrans("", "")
|
|
data = data.translate(table, "\r")
|
|
print data
|
|
fn, h = None, None
|
|
print '-'*40
|
|
finally:
|
|
urlcleanup()
|
|
|
|
def main():
|
|
import getopt, sys
|
|
try:
|
|
opts, args = getopt.getopt(sys.argv[1:], "th")
|
|
except getopt.error, msg:
|
|
print msg
|
|
print "Use -h for help"
|
|
return
|
|
t = 0
|
|
for o, a in opts:
|
|
if o == '-t':
|
|
t = t + 1
|
|
if o == '-h':
|
|
print "Usage: python urllib.py [-t] [url ...]"
|
|
print "-t runs self-test;",
|
|
print "otherwise, contents of urls are printed"
|
|
return
|
|
if t:
|
|
if t > 1:
|
|
test1()
|
|
test(args)
|
|
else:
|
|
if not args:
|
|
print "Use -h for help"
|
|
for url in args:
|
|
print urlopen(url).read(),
|
|
|
|
# Run test program when run as a script
|
|
if __name__ == '__main__':
|
|
main()
|