mirror of
https://github.com/python/cpython.git
synced 2025-11-13 15:40:05 +00:00
Patch [ 1062060 ] fix for 1016880 urllib.urlretrieve silently truncates dwnld
This commit is contained in:
parent
568973181a
commit
b925602f16
3 changed files with 48 additions and 2 deletions
|
|
@ -142,6 +142,25 @@ If the \var{url} uses the \file{http:} scheme identifier, the optional
|
||||||
(normally the request type is \code{GET}). The \var{data} argument
|
(normally the request type is \code{GET}). The \var{data} argument
|
||||||
must in standard \mimetype{application/x-www-form-urlencoded} format;
|
must in standard \mimetype{application/x-www-form-urlencoded} format;
|
||||||
see the \function{urlencode()} function below.
|
see the \function{urlencode()} function below.
|
||||||
|
|
||||||
|
\versionchanged[
|
||||||
|
\function{urlretrieve()} will raise \exception{ContentTooShortError}
|
||||||
|
when it detects that the amount of data available
|
||||||
|
was less than the expected amount (which is the size reported by a
|
||||||
|
\var{Content-Length} header). This can occur, for example, when the
|
||||||
|
download is interrupted.
|
||||||
|
|
||||||
|
The \var{Content-Length} is treated as a lower bound: if there's more data
|
||||||
|
to read, urlretrieve reads more data, but if less data is available,
|
||||||
|
it raises the exception.
|
||||||
|
|
||||||
|
You can still retrieve the downloaded data in this case, it is stored
|
||||||
|
in the \member{content} attribute of the exception instance.
|
||||||
|
|
||||||
|
If no \var{Content-Length} header was supplied, urlretrieve can
|
||||||
|
not check the size of the data it has downloaded, and just returns it.
|
||||||
|
In this case you just have to assume that the download was successful]{2.5}
|
||||||
|
|
||||||
\end{funcdesc}
|
\end{funcdesc}
|
||||||
|
|
||||||
\begin{datadesc}{_urlopener}
|
\begin{datadesc}{_urlopener}
|
||||||
|
|
@ -283,6 +302,15 @@ subclass may override this method to support more appropriate behavior
|
||||||
if needed.}
|
if needed.}
|
||||||
\end{classdesc}
|
\end{classdesc}
|
||||||
|
|
||||||
|
\begin{excclassdesc}{ContentTooShortError}{msg\optional{, content}}
|
||||||
|
This exception is raised when the \function{urlretrieve()} function
|
||||||
|
detects that the amount of the downloaded data is less than the
|
||||||
|
expected amount (given by the \var{Content-Length} header). The
|
||||||
|
\member{content} attribute stores the downloaded (and supposedly
|
||||||
|
truncated) data.
|
||||||
|
\versionadded{2.5}
|
||||||
|
\end{excclassdesc}
|
||||||
|
|
||||||
Restrictions:
|
Restrictions:
|
||||||
|
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
|
|
@ -317,7 +345,7 @@ Web client using these functions without using threads.
|
||||||
\item
|
\item
|
||||||
The data returned by \function{urlopen()} or \function{urlretrieve()}
|
The data returned by \function{urlopen()} or \function{urlretrieve()}
|
||||||
is the raw data returned by the server. This may be binary data
|
is the raw data returned by the server. This may be binary data
|
||||||
(e.g. an image), plain text or (for example) HTML\index{HTML}. The
|
(such as an image), plain text or (for example) HTML\index{HTML}. The
|
||||||
HTTP\indexii{HTTP}{protocol} protocol provides type information in the
|
HTTP\indexii{HTTP}{protocol} protocol provides type information in the
|
||||||
reply header, which can be inspected by looking at the
|
reply header, which can be inspected by looking at the
|
||||||
\mailheader{Content-Type} header. For the
|
\mailheader{Content-Type} header. For the
|
||||||
|
|
|
||||||
|
|
@ -86,6 +86,11 @@ def urlcleanup():
|
||||||
if _urlopener:
|
if _urlopener:
|
||||||
_urlopener.cleanup()
|
_urlopener.cleanup()
|
||||||
|
|
||||||
|
# exception raised when downloaded size does not match content-length
|
||||||
|
class ContentTooShortError(IOError):
|
||||||
|
def __init__(self, message, content):
|
||||||
|
IOError.__init__(self, message)
|
||||||
|
self.content = content
|
||||||
|
|
||||||
ftpcache = {}
|
ftpcache = {}
|
||||||
class URLopener:
|
class URLopener:
|
||||||
|
|
@ -228,24 +233,33 @@ class URLopener:
|
||||||
self.tempcache[url] = result
|
self.tempcache[url] = result
|
||||||
bs = 1024*8
|
bs = 1024*8
|
||||||
size = -1
|
size = -1
|
||||||
|
read = 0
|
||||||
blocknum = 1
|
blocknum = 1
|
||||||
if reporthook:
|
if reporthook:
|
||||||
if "content-length" in headers:
|
if "content-length" in headers:
|
||||||
size = int(headers["Content-Length"])
|
size = int(headers["Content-Length"])
|
||||||
reporthook(0, bs, size)
|
reporthook(0, bs, size)
|
||||||
block = fp.read(bs)
|
block = fp.read(bs)
|
||||||
|
read += len(block)
|
||||||
if reporthook:
|
if reporthook:
|
||||||
reporthook(1, bs, size)
|
reporthook(1, bs, size)
|
||||||
while block:
|
while block:
|
||||||
tfp.write(block)
|
tfp.write(block)
|
||||||
block = fp.read(bs)
|
block = fp.read(bs)
|
||||||
blocknum = blocknum + 1
|
read += len(block)
|
||||||
|
blocknum += 1
|
||||||
if reporthook:
|
if reporthook:
|
||||||
reporthook(blocknum, bs, size)
|
reporthook(blocknum, bs, size)
|
||||||
fp.close()
|
fp.close()
|
||||||
tfp.close()
|
tfp.close()
|
||||||
del fp
|
del fp
|
||||||
del tfp
|
del tfp
|
||||||
|
|
||||||
|
# raise exception if actual size does not match content-length header
|
||||||
|
if size >= 0 and read < size:
|
||||||
|
raise ContentTooShortError("retrieval incomplete: got only %i out "
|
||||||
|
"of %i bytes" % (read, size), result)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Each method named open_<type> knows how to open that type of URL
|
# Each method named open_<type> knows how to open that type of URL
|
||||||
|
|
|
||||||
|
|
@ -193,6 +193,10 @@ Extension Modules
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Patch #1062060: urllib.urlretrieve() now raises a new exception, named
|
||||||
|
ContentTooShortException, when the actually downloaded size does not
|
||||||
|
match the Content-Length header.
|
||||||
|
|
||||||
- Bug #1121494: distutils.dir_utils.mkpath now accepts Unicode strings.
|
- Bug #1121494: distutils.dir_utils.mkpath now accepts Unicode strings.
|
||||||
|
|
||||||
- Bug #1178484: Return complete lines from codec stream readers
|
- Bug #1178484: Return complete lines from codec stream readers
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue