mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Issue #16423: urllib.request now has support for `data:
` URLs.
Patch by Mathias Panzenböck.
This commit is contained in:
parent
a833e0d8ae
commit
df204be922
5 changed files with 137 additions and 5 deletions
|
@ -121,7 +121,7 @@ The :mod:`urllib.request` module defines the following functions:
|
||||||
instances of them or subclasses of them: :class:`ProxyHandler`,
|
instances of them or subclasses of them: :class:`ProxyHandler`,
|
||||||
:class:`UnknownHandler`, :class:`HTTPHandler`, :class:`HTTPDefaultErrorHandler`,
|
:class:`UnknownHandler`, :class:`HTTPHandler`, :class:`HTTPDefaultErrorHandler`,
|
||||||
:class:`HTTPRedirectHandler`, :class:`FTPHandler`, :class:`FileHandler`,
|
:class:`HTTPRedirectHandler`, :class:`FTPHandler`, :class:`FileHandler`,
|
||||||
:class:`HTTPErrorProcessor`.
|
:class:`HTTPErrorProcessor`, :class:`DataHandler`.
|
||||||
|
|
||||||
If the Python installation has SSL support (i.e., if the :mod:`ssl` module
|
If the Python installation has SSL support (i.e., if the :mod:`ssl` module
|
||||||
can be imported), :class:`HTTPSHandler` will also be added.
|
can be imported), :class:`HTTPSHandler` will also be added.
|
||||||
|
@ -346,6 +346,11 @@ The following classes are provided:
|
||||||
|
|
||||||
Open local files.
|
Open local files.
|
||||||
|
|
||||||
|
.. class:: DataHandler()
|
||||||
|
|
||||||
|
Open data URLs.
|
||||||
|
|
||||||
|
.. versionadded:: 3.4
|
||||||
|
|
||||||
.. class:: FTPHandler()
|
.. class:: FTPHandler()
|
||||||
|
|
||||||
|
@ -972,6 +977,21 @@ FileHandler Objects
|
||||||
hostname is given, an :exc:`URLError` is raised.
|
hostname is given, an :exc:`URLError` is raised.
|
||||||
|
|
||||||
|
|
||||||
|
.. _data-handler-objects:
|
||||||
|
|
||||||
|
DataHandler Objects
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
.. method:: DataHandler.data_open(req)
|
||||||
|
|
||||||
|
Read a data URL. This kind of URL contains the content encoded in the URL
|
||||||
|
itself. The data URL syntax is specified in :rfc:`2397`. This implementation
|
||||||
|
ignores white spaces in base64 encoded data URLs so the URL may be wrapped
|
||||||
|
in whatever source file it comes from. But even though some browsers don't
|
||||||
|
mind about a missing padding at the end of a base64 encoded data URL, this
|
||||||
|
implementation will raise an :exc:`ValueError` in that case.
|
||||||
|
|
||||||
|
|
||||||
.. _ftp-handler-objects:
|
.. _ftp-handler-objects:
|
||||||
|
|
||||||
FTPHandler Objects
|
FTPHandler Objects
|
||||||
|
@ -1374,7 +1394,9 @@ some point in the future.
|
||||||
pair: FTP; protocol
|
pair: FTP; protocol
|
||||||
|
|
||||||
* Currently, only the following protocols are supported: HTTP (versions 0.9 and
|
* Currently, only the following protocols are supported: HTTP (versions 0.9 and
|
||||||
1.0), FTP, and local files.
|
1.0), FTP, local files, and data URLs.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.4 Added support for data URLs.
|
||||||
|
|
||||||
* The caching feature of :func:`urlretrieve` has been disabled until someone
|
* The caching feature of :func:`urlretrieve` has been disabled until someone
|
||||||
finds the time to hack proper processing of Expiration time headers.
|
finds the time to hack proper processing of Expiration time headers.
|
||||||
|
|
|
@ -337,6 +337,79 @@ Content-Type: text/html; charset=iso-8859-1
|
||||||
with support.check_warnings(('',DeprecationWarning)):
|
with support.check_warnings(('',DeprecationWarning)):
|
||||||
urllib.request.URLopener()
|
urllib.request.URLopener()
|
||||||
|
|
||||||
|
class urlopen_DataTests(unittest.TestCase):
|
||||||
|
"""Test urlopen() opening a data URL."""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
# text containing URL special- and unicode-characters
|
||||||
|
self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
|
||||||
|
# 2x1 pixel RGB PNG image with one black and one white pixel
|
||||||
|
self.image = (
|
||||||
|
b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
|
||||||
|
b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
|
||||||
|
b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
|
||||||
|
b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
|
||||||
|
|
||||||
|
self.text_url = (
|
||||||
|
"data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
|
||||||
|
"D%26%20%C3%B6%20%C3%84%20")
|
||||||
|
self.text_url_base64 = (
|
||||||
|
"data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
|
||||||
|
"sJT0mIPYgxCA%3D")
|
||||||
|
# base64 encoded data URL that contains ignorable spaces,
|
||||||
|
# such as "\n", " ", "%0A", and "%20".
|
||||||
|
self.image_url = (
|
||||||
|
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
|
||||||
|
"QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
|
||||||
|
"vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
|
||||||
|
|
||||||
|
self.text_url_resp = urllib.request.urlopen(self.text_url)
|
||||||
|
self.text_url_base64_resp = urllib.request.urlopen(
|
||||||
|
self.text_url_base64)
|
||||||
|
self.image_url_resp = urllib.request.urlopen(self.image_url)
|
||||||
|
|
||||||
|
def test_interface(self):
|
||||||
|
# Make sure object returned by urlopen() has the specified methods
|
||||||
|
for attr in ("read", "readline", "readlines",
|
||||||
|
"close", "info", "geturl", "getcode", "__iter__"):
|
||||||
|
self.assertTrue(hasattr(self.text_url_resp, attr),
|
||||||
|
"object returned by urlopen() lacks %s attribute" %
|
||||||
|
attr)
|
||||||
|
|
||||||
|
def test_info(self):
|
||||||
|
self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
|
||||||
|
self.assertEqual(self.text_url_base64_resp.info().get_params(),
|
||||||
|
[('text/plain', ''), ('charset', 'ISO-8859-1')])
|
||||||
|
self.assertEqual(self.image_url_resp.info()['content-length'],
|
||||||
|
str(len(self.image)))
|
||||||
|
self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
|
||||||
|
[('text/plain', ''), ('charset', 'US-ASCII')])
|
||||||
|
|
||||||
|
def test_geturl(self):
|
||||||
|
self.assertEqual(self.text_url_resp.geturl(), self.text_url)
|
||||||
|
self.assertEqual(self.text_url_base64_resp.geturl(),
|
||||||
|
self.text_url_base64)
|
||||||
|
self.assertEqual(self.image_url_resp.geturl(), self.image_url)
|
||||||
|
|
||||||
|
def test_read_text(self):
|
||||||
|
self.assertEqual(self.text_url_resp.read().decode(
|
||||||
|
dict(self.text_url_resp.info().get_params())['charset']), self.text)
|
||||||
|
|
||||||
|
def test_read_text_base64(self):
|
||||||
|
self.assertEqual(self.text_url_base64_resp.read().decode(
|
||||||
|
dict(self.text_url_base64_resp.info().get_params())['charset']),
|
||||||
|
self.text)
|
||||||
|
|
||||||
|
def test_read_image(self):
|
||||||
|
self.assertEqual(self.image_url_resp.read(), self.image)
|
||||||
|
|
||||||
|
def test_missing_comma(self):
|
||||||
|
self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
|
||||||
|
|
||||||
|
def test_invalid_base64_data(self):
|
||||||
|
# missing padding character
|
||||||
|
self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
|
||||||
|
|
||||||
class urlretrieve_FileTests(unittest.TestCase):
|
class urlretrieve_FileTests(unittest.TestCase):
|
||||||
"""Test urllib.urlretrieve() on local files"""
|
"""Test urllib.urlretrieve() on local files"""
|
||||||
|
|
||||||
|
@ -1313,6 +1386,7 @@ def test_main():
|
||||||
support.run_unittest(
|
support.run_unittest(
|
||||||
urlopen_FileTests,
|
urlopen_FileTests,
|
||||||
urlopen_HttpTests,
|
urlopen_HttpTests,
|
||||||
|
urlopen_DataTests,
|
||||||
urlretrieve_FileTests,
|
urlretrieve_FileTests,
|
||||||
urlretrieve_HttpTests,
|
urlretrieve_HttpTests,
|
||||||
ProxyTests,
|
ProxyTests,
|
||||||
|
|
|
@ -103,7 +103,8 @@ from urllib.error import URLError, HTTPError, ContentTooShortError
|
||||||
from urllib.parse import (
|
from urllib.parse import (
|
||||||
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
|
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
|
||||||
splittype, splithost, splitport, splituser, splitpasswd,
|
splittype, splithost, splitport, splituser, splitpasswd,
|
||||||
splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
|
splitattr, splitquery, splitvalue, splittag, to_bytes,
|
||||||
|
unquote_to_bytes, urlunparse)
|
||||||
from urllib.response import addinfourl, addclosehook
|
from urllib.response import addinfourl, addclosehook
|
||||||
|
|
||||||
# check for SSL
|
# check for SSL
|
||||||
|
@ -121,7 +122,7 @@ __all__ = [
|
||||||
'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
|
'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
|
||||||
'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
|
'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
|
||||||
'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
|
'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
|
||||||
'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
|
'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
|
||||||
'UnknownHandler', 'HTTPErrorProcessor',
|
'UnknownHandler', 'HTTPErrorProcessor',
|
||||||
# Functions
|
# Functions
|
||||||
'urlopen', 'install_opener', 'build_opener',
|
'urlopen', 'install_opener', 'build_opener',
|
||||||
|
@ -535,7 +536,8 @@ def build_opener(*handlers):
|
||||||
opener = OpenerDirector()
|
opener = OpenerDirector()
|
||||||
default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
|
default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
|
||||||
HTTPDefaultErrorHandler, HTTPRedirectHandler,
|
HTTPDefaultErrorHandler, HTTPRedirectHandler,
|
||||||
FTPHandler, FileHandler, HTTPErrorProcessor]
|
FTPHandler, FileHandler, HTTPErrorProcessor,
|
||||||
|
DataHandler]
|
||||||
if hasattr(http.client, "HTTPSConnection"):
|
if hasattr(http.client, "HTTPSConnection"):
|
||||||
default_classes.append(HTTPSHandler)
|
default_classes.append(HTTPSHandler)
|
||||||
skip = set()
|
skip = set()
|
||||||
|
@ -1541,6 +1543,36 @@ class CacheFTPHandler(FTPHandler):
|
||||||
self.cache.clear()
|
self.cache.clear()
|
||||||
self.timeout.clear()
|
self.timeout.clear()
|
||||||
|
|
||||||
|
class DataHandler(BaseHandler):
|
||||||
|
def data_open(self, req):
|
||||||
|
# data URLs as specified in RFC 2397.
|
||||||
|
#
|
||||||
|
# ignores POSTed data
|
||||||
|
#
|
||||||
|
# syntax:
|
||||||
|
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
||||||
|
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
||||||
|
# data := *urlchar
|
||||||
|
# parameter := attribute "=" value
|
||||||
|
url = req.full_url
|
||||||
|
|
||||||
|
scheme, data = url.split(":",1)
|
||||||
|
mediatype, data = data.split(",",1)
|
||||||
|
|
||||||
|
# even base64 encoded data URLs might be quoted so unquote in any case:
|
||||||
|
data = unquote_to_bytes(data)
|
||||||
|
if mediatype.endswith(";base64"):
|
||||||
|
data = base64.decodebytes(data)
|
||||||
|
mediatype = mediatype[:-7]
|
||||||
|
|
||||||
|
if not mediatype:
|
||||||
|
mediatype = "text/plain;charset=US-ASCII"
|
||||||
|
|
||||||
|
headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
|
||||||
|
(mediatype, len(data)))
|
||||||
|
|
||||||
|
return addinfourl(io.BytesIO(data), headers, url)
|
||||||
|
|
||||||
|
|
||||||
# Code move from the old urllib module
|
# Code move from the old urllib module
|
||||||
|
|
||||||
|
|
|
@ -884,6 +884,7 @@ Mike Pall
|
||||||
Todd R. Palmer
|
Todd R. Palmer
|
||||||
Juan David Ibáñez Palomar
|
Juan David Ibáñez Palomar
|
||||||
Jan Palus
|
Jan Palus
|
||||||
|
Mathias Panzenböck
|
||||||
M. Papillon
|
M. Papillon
|
||||||
Peter Parente
|
Peter Parente
|
||||||
Alexandre Parenteau
|
Alexandre Parenteau
|
||||||
|
|
|
@ -138,6 +138,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #16423: urllib.request now has support for ``data:`` URLs. Patch by
|
||||||
|
Mathias Panzenböck.
|
||||||
|
|
||||||
- Issue #4473: Add a POP3.stls() to switch a clear-text POP3 session into
|
- Issue #4473: Add a POP3.stls() to switch a clear-text POP3 session into
|
||||||
an encrypted POP3 session, on supported servers. Patch by Lorenzo Catucci.
|
an encrypted POP3 session, on supported servers. Patch by Lorenzo Catucci.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue