mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
#22233: Only split headers on \r and/or \n, per email RFCs.
Original patch by Martin Panter, new policy fixes by me.
This commit is contained in:
parent
6b46ec7733
commit
dc1650ca06
6 changed files with 104 additions and 19 deletions
|
@ -27,6 +27,7 @@ from email import errors
|
||||||
from email import message
|
from email import message
|
||||||
from email._policybase import compat32
|
from email._policybase import compat32
|
||||||
from collections import deque
|
from collections import deque
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
NLCRE = re.compile('\r\n|\r|\n')
|
NLCRE = re.compile('\r\n|\r|\n')
|
||||||
NLCRE_bol = re.compile('(\r\n|\r|\n)')
|
NLCRE_bol = re.compile('(\r\n|\r|\n)')
|
||||||
|
@ -51,8 +52,9 @@ class BufferedSubFile(object):
|
||||||
simple abstraction -- it parses until EOF closes the current message.
|
simple abstraction -- it parses until EOF closes the current message.
|
||||||
"""
|
"""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# Chunks of the last partial line pushed into this object.
|
# Text stream of the last partial line pushed into this object.
|
||||||
self._partial = []
|
# See issue 22233 for why this is a text stream and not a list.
|
||||||
|
self._partial = StringIO(newline='')
|
||||||
# A deque of full, pushed lines
|
# A deque of full, pushed lines
|
||||||
self._lines = deque()
|
self._lines = deque()
|
||||||
# The stack of false-EOF checking predicates.
|
# The stack of false-EOF checking predicates.
|
||||||
|
@ -68,8 +70,10 @@ class BufferedSubFile(object):
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
# Don't forget any trailing partial line.
|
# Don't forget any trailing partial line.
|
||||||
self.pushlines(''.join(self._partial).splitlines(True))
|
self._partial.seek(0)
|
||||||
self._partial = []
|
self.pushlines(self._partial.readlines())
|
||||||
|
self._partial.seek(0)
|
||||||
|
self._partial.truncate()
|
||||||
self._closed = True
|
self._closed = True
|
||||||
|
|
||||||
def readline(self):
|
def readline(self):
|
||||||
|
@ -97,26 +101,23 @@ class BufferedSubFile(object):
|
||||||
|
|
||||||
def push(self, data):
|
def push(self, data):
|
||||||
"""Push some new data into this object."""
|
"""Push some new data into this object."""
|
||||||
# Crack into lines, but preserve the linesep characters on the end of each
|
self._partial.write(data)
|
||||||
parts = data.splitlines(True)
|
if '\n' not in data and '\r' not in data:
|
||||||
|
# No new complete lines, wait for more.
|
||||||
if not parts or not parts[0].endswith(('\n', '\r')):
|
|
||||||
# No new complete lines, so just accumulate partials
|
|
||||||
self._partial += parts
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if self._partial:
|
# Crack into lines, preserving the linesep characters.
|
||||||
# If there are previous leftovers, complete them now
|
self._partial.seek(0)
|
||||||
self._partial.append(parts[0])
|
parts = self._partial.readlines()
|
||||||
parts[0:1] = ''.join(self._partial).splitlines(True)
|
self._partial.seek(0)
|
||||||
del self._partial[:]
|
self._partial.truncate()
|
||||||
|
|
||||||
# If the last element of the list does not end in a newline, then treat
|
# If the last element of the list does not end in a newline, then treat
|
||||||
# it as a partial line. We only check for '\n' here because a line
|
# it as a partial line. We only check for '\n' here because a line
|
||||||
# ending with '\r' might be a line that was split in the middle of a
|
# ending with '\r' might be a line that was split in the middle of a
|
||||||
# '\r\n' sequence (see bugs 1555570 and 1721862).
|
# '\r\n' sequence (see bugs 1555570 and 1721862).
|
||||||
if not parts[-1].endswith('\n'):
|
if not parts[-1].endswith('\n'):
|
||||||
self._partial = [parts.pop()]
|
self._partial.write(parts.pop())
|
||||||
self.pushlines(parts)
|
self.pushlines(parts)
|
||||||
|
|
||||||
def pushlines(self, lines):
|
def pushlines(self, lines):
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
code that adds all the email6 features.
|
code that adds all the email6 features.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
from email._policybase import Policy, Compat32, compat32, _extend_docstrings
|
from email._policybase import Policy, Compat32, compat32, _extend_docstrings
|
||||||
from email.utils import _has_surrogates
|
from email.utils import _has_surrogates
|
||||||
from email.headerregistry import HeaderRegistry as HeaderRegistry
|
from email.headerregistry import HeaderRegistry as HeaderRegistry
|
||||||
|
@ -18,6 +19,8 @@ __all__ = [
|
||||||
'HTTP',
|
'HTTP',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
linesep_splitter = re.compile(r'\n|\r')
|
||||||
|
|
||||||
@_extend_docstrings
|
@_extend_docstrings
|
||||||
class EmailPolicy(Policy):
|
class EmailPolicy(Policy):
|
||||||
|
|
||||||
|
@ -135,6 +138,8 @@ class EmailPolicy(Policy):
|
||||||
if hasattr(value, 'name') and value.name.lower() == name.lower():
|
if hasattr(value, 'name') and value.name.lower() == name.lower():
|
||||||
return (name, value)
|
return (name, value)
|
||||||
if isinstance(value, str) and len(value.splitlines())>1:
|
if isinstance(value, str) and len(value.splitlines())>1:
|
||||||
|
# XXX this error message isn't quite right when we use splitlines
|
||||||
|
# (see issue 22233), but I'm not sure what should happen here.
|
||||||
raise ValueError("Header values may not contain linefeed "
|
raise ValueError("Header values may not contain linefeed "
|
||||||
"or carriage return characters")
|
"or carriage return characters")
|
||||||
return (name, self.header_factory(name, value))
|
return (name, self.header_factory(name, value))
|
||||||
|
@ -150,7 +155,9 @@ class EmailPolicy(Policy):
|
||||||
"""
|
"""
|
||||||
if hasattr(value, 'name'):
|
if hasattr(value, 'name'):
|
||||||
return value
|
return value
|
||||||
return self.header_factory(name, ''.join(value.splitlines()))
|
# We can't use splitlines here because it splits on more than \r and \n.
|
||||||
|
value = ''.join(linesep_splitter.split(value))
|
||||||
|
return self.header_factory(name, value)
|
||||||
|
|
||||||
def fold(self, name, value):
|
def fold(self, name, value):
|
||||||
"""+
|
"""+
|
||||||
|
|
|
@ -3444,10 +3444,12 @@ class TestFeedParsers(TestEmailBase):
|
||||||
self.assertEqual(m.keys(), ['a', 'b'])
|
self.assertEqual(m.keys(), ['a', 'b'])
|
||||||
m = self.parse(['a:\r', '\nb:\n'])
|
m = self.parse(['a:\r', '\nb:\n'])
|
||||||
self.assertEqual(m.keys(), ['a', 'b'])
|
self.assertEqual(m.keys(), ['a', 'b'])
|
||||||
|
|
||||||
|
# Only CR and LF should break header fields
|
||||||
m = self.parse(['a:\x85b:\u2028c:\n'])
|
m = self.parse(['a:\x85b:\u2028c:\n'])
|
||||||
self.assertEqual(m.items(), [('a', '\x85'), ('b', '\u2028'), ('c', '')])
|
self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')])
|
||||||
m = self.parse(['a:\r', 'b:\x85', 'c:\n'])
|
m = self.parse(['a:\r', 'b:\x85', 'c:\n'])
|
||||||
self.assertEqual(m.items(), [('a', ''), ('b', '\x85'), ('c', '')])
|
self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')])
|
||||||
|
|
||||||
def test_long_lines(self):
|
def test_long_lines(self):
|
||||||
# Expected peak memory use on 32-bit platform: 6*N*M bytes.
|
# Expected peak memory use on 32-bit platform: 6*N*M bytes.
|
||||||
|
|
|
@ -2,6 +2,7 @@ import io
|
||||||
import email
|
import email
|
||||||
import unittest
|
import unittest
|
||||||
from email.message import Message
|
from email.message import Message
|
||||||
|
from email.policy import default
|
||||||
from test.test_email import TestEmailBase
|
from test.test_email import TestEmailBase
|
||||||
|
|
||||||
|
|
||||||
|
@ -32,5 +33,45 @@ class TestCustomMessage(TestEmailBase):
|
||||||
# XXX add tests for other functions that take Message arg.
|
# XXX add tests for other functions that take Message arg.
|
||||||
|
|
||||||
|
|
||||||
|
class TestParserBase:
|
||||||
|
|
||||||
|
def test_only_split_on_cr_lf(self):
|
||||||
|
# The unicode line splitter splits on unicode linebreaks, which are
|
||||||
|
# more numerous than allowed by the email RFCs; make sure we are only
|
||||||
|
# splitting on those two.
|
||||||
|
msg = self.parser(
|
||||||
|
"Next-Line: not\x85broken\r\n"
|
||||||
|
"Null: not\x00broken\r\n"
|
||||||
|
"Vertical-Tab: not\vbroken\r\n"
|
||||||
|
"Form-Feed: not\fbroken\r\n"
|
||||||
|
"File-Separator: not\x1Cbroken\r\n"
|
||||||
|
"Group-Separator: not\x1Dbroken\r\n"
|
||||||
|
"Record-Separator: not\x1Ebroken\r\n"
|
||||||
|
"Line-Separator: not\u2028broken\r\n"
|
||||||
|
"Paragraph-Separator: not\u2029broken\r\n"
|
||||||
|
"\r\n",
|
||||||
|
policy=default,
|
||||||
|
)
|
||||||
|
self.assertEqual(msg.items(), [
|
||||||
|
("Next-Line", "not\x85broken"),
|
||||||
|
("Null", "not\x00broken"),
|
||||||
|
("Vertical-Tab", "not\vbroken"),
|
||||||
|
("Form-Feed", "not\fbroken"),
|
||||||
|
("File-Separator", "not\x1Cbroken"),
|
||||||
|
("Group-Separator", "not\x1Dbroken"),
|
||||||
|
("Record-Separator", "not\x1Ebroken"),
|
||||||
|
("Line-Separator", "not\u2028broken"),
|
||||||
|
("Paragraph-Separator", "not\u2029broken"),
|
||||||
|
])
|
||||||
|
self.assertEqual(msg.get_payload(), "")
|
||||||
|
|
||||||
|
class TestParser(TestParserBase, TestEmailBase):
|
||||||
|
parser = staticmethod(email.message_from_string)
|
||||||
|
|
||||||
|
class TestBytesParser(TestParserBase, TestEmailBase):
|
||||||
|
def parser(self, s, *args, **kw):
|
||||||
|
return email.message_from_bytes(s.encode(), *args, **kw)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -283,6 +283,36 @@ class HeaderTests(TestCase):
|
||||||
self.assertEqual(resp.getheader('First'), 'val')
|
self.assertEqual(resp.getheader('First'), 'val')
|
||||||
self.assertEqual(resp.getheader('Second'), 'val')
|
self.assertEqual(resp.getheader('Second'), 'val')
|
||||||
|
|
||||||
|
def test_parse_all_octets(self):
|
||||||
|
# Ensure no valid header field octet breaks the parser
|
||||||
|
body = (
|
||||||
|
b'HTTP/1.1 200 OK\r\n'
|
||||||
|
b"!#$%&'*+-.^_`|~: value\r\n" # Special token characters
|
||||||
|
b'VCHAR: ' + bytes(range(0x21, 0x7E + 1)) + b'\r\n'
|
||||||
|
b'obs-text: ' + bytes(range(0x80, 0xFF + 1)) + b'\r\n'
|
||||||
|
b'obs-fold: text\r\n'
|
||||||
|
b' folded with space\r\n'
|
||||||
|
b'\tfolded with tab\r\n'
|
||||||
|
b'Content-Length: 0\r\n'
|
||||||
|
b'\r\n'
|
||||||
|
)
|
||||||
|
sock = FakeSocket(body)
|
||||||
|
resp = client.HTTPResponse(sock)
|
||||||
|
resp.begin()
|
||||||
|
self.assertEqual(resp.getheader('Content-Length'), '0')
|
||||||
|
self.assertEqual(resp.msg['Content-Length'], '0')
|
||||||
|
self.assertEqual(resp.getheader("!#$%&'*+-.^_`|~"), 'value')
|
||||||
|
self.assertEqual(resp.msg["!#$%&'*+-.^_`|~"], 'value')
|
||||||
|
vchar = ''.join(map(chr, range(0x21, 0x7E + 1)))
|
||||||
|
self.assertEqual(resp.getheader('VCHAR'), vchar)
|
||||||
|
self.assertEqual(resp.msg['VCHAR'], vchar)
|
||||||
|
self.assertIsNotNone(resp.getheader('obs-text'))
|
||||||
|
self.assertIn('obs-text', resp.msg)
|
||||||
|
for folded in (resp.getheader('obs-fold'), resp.msg['obs-fold']):
|
||||||
|
self.assertTrue(folded.startswith('text'))
|
||||||
|
self.assertIn(' folded with space', folded)
|
||||||
|
self.assertTrue(folded.endswith('folded with tab'))
|
||||||
|
|
||||||
def test_invalid_headers(self):
|
def test_invalid_headers(self):
|
||||||
conn = client.HTTPConnection('example.com')
|
conn = client.HTTPConnection('example.com')
|
||||||
conn.sock = FakeSocket('')
|
conn.sock = FakeSocket('')
|
||||||
|
|
|
@ -62,6 +62,10 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #22233: Break email header lines *only* on the RFC specified CR and LF
|
||||||
|
characters, not on arbitrary unicode line breaks. This also fixes a bug in
|
||||||
|
HTTP header parsing.
|
||||||
|
|
||||||
- Issue 27988: Fix email iter_attachments incorrect mutation of payload list.
|
- Issue 27988: Fix email iter_attachments incorrect mutation of payload list.
|
||||||
|
|
||||||
- Issue #27691: Fix ssl module's parsing of GEN_RID subject alternative name
|
- Issue #27691: Fix ssl module's parsing of GEN_RID subject alternative name
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue