Issue10050 - urlretrieve uses newer urlopen. reporthook of urlretrieve takes, block number, block read size, file_size

This commit is contained in:
Senthil Kumaran 2012-03-13 19:29:33 -07:00
parent a2251aadaa
commit e24f96a059
5 changed files with 119 additions and 42 deletions

View file

@ -94,6 +94,9 @@ import socket
import sys
import time
import collections
import tempfile
import contextlib
from urllib.error import URLError, HTTPError, ContentTooShortError
from urllib.parse import (
@ -156,17 +159,78 @@ def install_opener(opener):
global _opener
_opener = opener
# TODO(jhylton): Make this work with the same global opener.
_urlopener = None
_url_tempfiles = []
def urlretrieve(url, filename=None, reporthook=None, data=None):
global _urlopener
if not _urlopener:
_urlopener = FancyURLopener()
return _urlopener.retrieve(url, filename, reporthook, data)
"""
Retrieve a URL into a temporary location on disk.
Requires a URL argument. If a filename is passed, it is used as
the temporary file location. The reporthook argument should be
a callable that accepts a block number, a read size, and the
total file size of the URL target. The data argument should be
valid URL encoded data.
If a filename is passed and the URL points to a local resource,
the result is a copy from local file to new file.
Returns a tuple containing the path to the newly created
data file as well as the resulting HTTPMessage object.
"""
url_type, path = splittype(url)
with contextlib.closing(urlopen(url, data)) as fp:
headers = fp.info()
# Just return the local path and the "headers" for file://
# URLs. No sense in performing a copy unless requested.
if url_type == "file" and not filename:
return os.path.normpath(path), headers
# Handle temporary file setup.
if filename:
tfp = open(filename, 'wb')
else:
tfp = tempfile.NamedTemporaryFile(delete=False)
filename = tfp.name
_url_tempfiles.append(filename)
with tfp:
result = filename, headers
bs = 1024*8
size = -1
read = 0
blocknum = 0
if "content-length" in headers:
size = int(headers["Content-Length"])
if reporthook:
reporthook(blocknum, 0, size)
while True:
block = fp.read(bs)
if not block:
break
read += len(block)
tfp.write(block)
blocknum += 1
if reporthook:
reporthook(blocknum, len(block), size)
if size >= 0 and read < size:
raise ContentTooShortError(
"retrieval incomplete: got only %i out of %i bytes"
% (read, size), result)
return result
def urlcleanup():
if _urlopener:
_urlopener.cleanup()
for temp_file in _url_tempfiles:
try:
os.unlink(temp_file)
except EnvironmentError:
pass
del _url_tempfiles[:]
global _opener
if _opener:
_opener = None