mirror of
https://github.com/python/cpython.git
synced 2025-10-12 01:43:12 +00:00
#11684: Complete parser bytes interface by adding BytesHeaderParser
Patch by Steffen Daode Nurpmeso.
This commit is contained in:
parent
f400ab40e4
commit
b35c850a3f
5 changed files with 46 additions and 8 deletions
|
@ -94,12 +94,14 @@ Parser class API
|
||||||
The :class:`Parser` class, imported from the :mod:`email.parser` module,
|
The :class:`Parser` class, imported from the :mod:`email.parser` module,
|
||||||
provides an API that can be used to parse a message when the complete contents
|
provides an API that can be used to parse a message when the complete contents
|
||||||
of the message are available in a string or file. The :mod:`email.parser`
|
of the message are available in a string or file. The :mod:`email.parser`
|
||||||
module also provides a second class, called :class:`HeaderParser` which can be
|
module also provides header-only parsers, called :class:`HeaderParser` and
|
||||||
used if you're only interested in the headers of the message.
|
:class:`BytesHeaderParser`, which can be used if you're only interested in the
|
||||||
:class:`HeaderParser` can be much faster in these situations, since it does not
|
headers of the message. :class:`HeaderParser` and :class:`BytesHeaderParser`
|
||||||
attempt to parse the message body, instead setting the payload to the raw body
|
can be much faster in these situations, since they do not attempt to parse the
|
||||||
as a string. :class:`HeaderParser` has the same API as the :class:`Parser`
|
message body, instead setting the payload to the raw body as a string. They
|
||||||
class.
|
have the same API as the :class:`Parser` and :class:`BytesParser` classes.
|
||||||
|
|
||||||
|
.. versionadded:: 3.3 BytesHeaderParser
|
||||||
|
|
||||||
|
|
||||||
.. class:: Parser(_class=email.message.Message)
|
.. class:: Parser(_class=email.message.Message)
|
||||||
|
|
|
@ -297,10 +297,12 @@ class Generator:
|
||||||
# message/rfc822. Such messages are generated by, for example,
|
# message/rfc822. Such messages are generated by, for example,
|
||||||
# Groupwise when forwarding unadorned messages. (Issue 7970.) So
|
# Groupwise when forwarding unadorned messages. (Issue 7970.) So
|
||||||
# in that case we just emit the string body.
|
# in that case we just emit the string body.
|
||||||
payload = msg.get_payload()
|
payload = msg._payload
|
||||||
if isinstance(payload, list):
|
if isinstance(payload, list):
|
||||||
g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
|
g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
|
||||||
payload = s.getvalue()
|
payload = s.getvalue()
|
||||||
|
else:
|
||||||
|
payload = self._encode(payload)
|
||||||
self._fp.write(payload)
|
self._fp.write(payload)
|
||||||
|
|
||||||
# This used to be a module level function; we use a classmethod for this
|
# This used to be a module level function; we use a classmethod for this
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
"""A parser of RFC 2822 and MIME email messages."""
|
"""A parser of RFC 2822 and MIME email messages."""
|
||||||
|
|
||||||
__all__ = ['Parser', 'HeaderParser']
|
__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
from io import StringIO, TextIOWrapper
|
from io import StringIO, TextIOWrapper
|
||||||
|
@ -114,3 +114,11 @@ class BytesParser:
|
||||||
"""
|
"""
|
||||||
text = text.decode('ASCII', errors='surrogateescape')
|
text = text.decode('ASCII', errors='surrogateescape')
|
||||||
return self.parser.parsestr(text, headersonly)
|
return self.parser.parsestr(text, headersonly)
|
||||||
|
|
||||||
|
|
||||||
|
class BytesHeaderParser(BytesParser):
|
||||||
|
def parse(self, fp, headersonly=True):
|
||||||
|
return BytesParser.parse(self, fp, headersonly=True)
|
||||||
|
|
||||||
|
def parsebytes(self, text, headersonly=True):
|
||||||
|
return BytesParser.parsebytes(self, text, headersonly=True)
|
||||||
|
|
|
@ -177,6 +177,17 @@ class TestMessageAPI(TestEmailBase):
|
||||||
gen.flatten(msg, False)
|
gen.flatten(msg, False)
|
||||||
self.assertEqual(out.getvalue(), msgdata)
|
self.assertEqual(out.getvalue(), msgdata)
|
||||||
|
|
||||||
|
def test_byte_message_rfc822_only(self):
|
||||||
|
# Make sure new bytes header parser also passes this.
|
||||||
|
with openfile('msg_46.txt', 'rb') as fp:
|
||||||
|
msgdata = fp.read()
|
||||||
|
parser = email.parser.BytesHeaderParser()
|
||||||
|
msg = parser.parsebytes(msgdata)
|
||||||
|
out = BytesIO()
|
||||||
|
gen = email.generator.BytesGenerator(out)
|
||||||
|
gen.flatten(msg)
|
||||||
|
self.assertEqual(out.getvalue(), msgdata)
|
||||||
|
|
||||||
def test_get_decoded_payload(self):
|
def test_get_decoded_payload(self):
|
||||||
eq = self.assertEqual
|
eq = self.assertEqual
|
||||||
msg = self._msgobj('msg_10.txt')
|
msg = self._msgobj('msg_10.txt')
|
||||||
|
@ -2749,6 +2760,7 @@ Do you like this message?
|
||||||
|
|
||||||
|
|
||||||
class TestParsers(TestEmailBase):
|
class TestParsers(TestEmailBase):
|
||||||
|
|
||||||
def test_header_parser(self):
|
def test_header_parser(self):
|
||||||
eq = self.assertEqual
|
eq = self.assertEqual
|
||||||
# Parse only the headers of a complex multipart MIME document
|
# Parse only the headers of a complex multipart MIME document
|
||||||
|
@ -2760,6 +2772,18 @@ class TestParsers(TestEmailBase):
|
||||||
self.assertFalse(msg.is_multipart())
|
self.assertFalse(msg.is_multipart())
|
||||||
self.assertTrue(isinstance(msg.get_payload(), str))
|
self.assertTrue(isinstance(msg.get_payload(), str))
|
||||||
|
|
||||||
|
def test_bytes_header_parser(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
# Parse only the headers of a complex multipart MIME document
|
||||||
|
with openfile('msg_02.txt', 'rb') as fp:
|
||||||
|
msg = email.parser.BytesHeaderParser().parse(fp)
|
||||||
|
eq(msg['from'], 'ppp-request@zzz.org')
|
||||||
|
eq(msg['to'], 'ppp@zzz.org')
|
||||||
|
eq(msg.get_content_type(), 'multipart/mixed')
|
||||||
|
self.assertFalse(msg.is_multipart())
|
||||||
|
self.assertTrue(isinstance(msg.get_payload(), str))
|
||||||
|
self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
|
||||||
|
|
||||||
def test_whitespace_continuation(self):
|
def test_whitespace_continuation(self):
|
||||||
eq = self.assertEqual
|
eq = self.assertEqual
|
||||||
# This message contains a line after the Subject: header that has only
|
# This message contains a line after the Subject: header that has only
|
||||||
|
|
|
@ -103,6 +103,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #11684: complete email.parser bytes API by adding BytesHeaderParser.
|
||||||
|
|
||||||
- The bz2 module now handles 4GiB+ input buffers correctly.
|
- The bz2 module now handles 4GiB+ input buffers correctly.
|
||||||
|
|
||||||
- Issue #9233: Fix json.loads('{}') to return a dict (instead of a list), when
|
- Issue #9233: Fix json.loads('{}') to return a dict (instead of a list), when
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue