mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
Issue10050 - urlretrieve uses newer urlopen. reporthook of urlretrieve takes, block number, block read size, file_size
This commit is contained in:
parent
a2251aadaa
commit
e24f96a059
5 changed files with 119 additions and 42 deletions
|
@ -94,6 +94,9 @@ import socket
|
|||
import sys
|
||||
import time
|
||||
import collections
|
||||
import tempfile
|
||||
import contextlib
|
||||
|
||||
|
||||
from urllib.error import URLError, HTTPError, ContentTooShortError
|
||||
from urllib.parse import (
|
||||
|
@ -156,17 +159,78 @@ def install_opener(opener):
|
|||
global _opener
|
||||
_opener = opener
|
||||
|
||||
# TODO(jhylton): Make this work with the same global opener.
|
||||
_urlopener = None
|
||||
_url_tempfiles = []
|
||||
def urlretrieve(url, filename=None, reporthook=None, data=None):
|
||||
global _urlopener
|
||||
if not _urlopener:
|
||||
_urlopener = FancyURLopener()
|
||||
return _urlopener.retrieve(url, filename, reporthook, data)
|
||||
"""
|
||||
Retrieve a URL into a temporary location on disk.
|
||||
|
||||
Requires a URL argument. If a filename is passed, it is used as
|
||||
the temporary file location. The reporthook argument should be
|
||||
a callable that accepts a block number, a read size, and the
|
||||
total file size of the URL target. The data argument should be
|
||||
valid URL encoded data.
|
||||
|
||||
If a filename is passed and the URL points to a local resource,
|
||||
the result is a copy from local file to new file.
|
||||
|
||||
Returns a tuple containing the path to the newly created
|
||||
data file as well as the resulting HTTPMessage object.
|
||||
"""
|
||||
url_type, path = splittype(url)
|
||||
|
||||
with contextlib.closing(urlopen(url, data)) as fp:
|
||||
headers = fp.info()
|
||||
|
||||
# Just return the local path and the "headers" for file://
|
||||
# URLs. No sense in performing a copy unless requested.
|
||||
if url_type == "file" and not filename:
|
||||
return os.path.normpath(path), headers
|
||||
|
||||
# Handle temporary file setup.
|
||||
if filename:
|
||||
tfp = open(filename, 'wb')
|
||||
else:
|
||||
tfp = tempfile.NamedTemporaryFile(delete=False)
|
||||
filename = tfp.name
|
||||
_url_tempfiles.append(filename)
|
||||
|
||||
with tfp:
|
||||
result = filename, headers
|
||||
bs = 1024*8
|
||||
size = -1
|
||||
read = 0
|
||||
blocknum = 0
|
||||
if "content-length" in headers:
|
||||
size = int(headers["Content-Length"])
|
||||
|
||||
if reporthook:
|
||||
reporthook(blocknum, 0, size)
|
||||
|
||||
while True:
|
||||
block = fp.read(bs)
|
||||
if not block:
|
||||
break
|
||||
read += len(block)
|
||||
tfp.write(block)
|
||||
blocknum += 1
|
||||
if reporthook:
|
||||
reporthook(blocknum, len(block), size)
|
||||
|
||||
if size >= 0 and read < size:
|
||||
raise ContentTooShortError(
|
||||
"retrieval incomplete: got only %i out of %i bytes"
|
||||
% (read, size), result)
|
||||
|
||||
return result
|
||||
|
||||
def urlcleanup():
|
||||
if _urlopener:
|
||||
_urlopener.cleanup()
|
||||
for temp_file in _url_tempfiles:
|
||||
try:
|
||||
os.unlink(temp_file)
|
||||
except EnvironmentError:
|
||||
pass
|
||||
|
||||
del _url_tempfiles[:]
|
||||
global _opener
|
||||
if _opener:
|
||||
_opener = None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue