mirror of
https://github.com/python/cpython.git
synced 2025-09-27 10:50:04 +00:00
gh-121284: Fix email address header folding with parsed encoded-word (GH-122754)
Email generators using email.policy.default may convert an RFC 2047 encoded-word to unencoded form during header refolding. In a structured header, this could allow 'specials' chars outside a quoted-string, leading to invalid address headers and enabling spoofing. This change ensures a parsed encoded-word that contains specials is kept as an encoded-word while the header is refolded. [Better fix from @bitdancer.] --------- Co-authored-by: R David Murray <rdmurray@bitdance.com> Co-authored-by: Petr Viktorin <encukou@gmail.com>
This commit is contained in:
parent
ab6333f7f5
commit
295b53df2a
3 changed files with 37 additions and 5 deletions
|
@ -1053,7 +1053,7 @@ def get_fws(value):
|
||||||
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
|
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
|
||||||
return fws, newvalue
|
return fws, newvalue
|
||||||
|
|
||||||
def get_encoded_word(value):
|
def get_encoded_word(value, terminal_type='vtext'):
|
||||||
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
|
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -1092,7 +1092,7 @@ def get_encoded_word(value):
|
||||||
ew.append(token)
|
ew.append(token)
|
||||||
continue
|
continue
|
||||||
chars, *remainder = _wsp_splitter(text, 1)
|
chars, *remainder = _wsp_splitter(text, 1)
|
||||||
vtext = ValueTerminal(chars, 'vtext')
|
vtext = ValueTerminal(chars, terminal_type)
|
||||||
_validate_xtext(vtext)
|
_validate_xtext(vtext)
|
||||||
ew.append(vtext)
|
ew.append(vtext)
|
||||||
text = ''.join(remainder)
|
text = ''.join(remainder)
|
||||||
|
@ -1134,7 +1134,7 @@ def get_unstructured(value):
|
||||||
valid_ew = True
|
valid_ew = True
|
||||||
if value.startswith('=?'):
|
if value.startswith('=?'):
|
||||||
try:
|
try:
|
||||||
token, value = get_encoded_word(value)
|
token, value = get_encoded_word(value, 'utext')
|
||||||
except _InvalidEwError:
|
except _InvalidEwError:
|
||||||
valid_ew = False
|
valid_ew = False
|
||||||
except errors.HeaderParseError:
|
except errors.HeaderParseError:
|
||||||
|
@ -1163,7 +1163,7 @@ def get_unstructured(value):
|
||||||
# the parser to go in an infinite loop.
|
# the parser to go in an infinite loop.
|
||||||
if valid_ew and rfc2047_matcher.search(tok):
|
if valid_ew and rfc2047_matcher.search(tok):
|
||||||
tok, *remainder = value.partition('=?')
|
tok, *remainder = value.partition('=?')
|
||||||
vtext = ValueTerminal(tok, 'vtext')
|
vtext = ValueTerminal(tok, 'utext')
|
||||||
_validate_xtext(vtext)
|
_validate_xtext(vtext)
|
||||||
unstructured.append(vtext)
|
unstructured.append(vtext)
|
||||||
value = ''.join(remainder)
|
value = ''.join(remainder)
|
||||||
|
@ -2813,7 +2813,7 @@ def _refold_parse_tree(parse_tree, *, policy):
|
||||||
continue
|
continue
|
||||||
tstr = str(part)
|
tstr = str(part)
|
||||||
if not want_encoding:
|
if not want_encoding:
|
||||||
if part.token_type == 'ptext':
|
if part.token_type in ('ptext', 'vtext'):
|
||||||
# Encode if tstr contains special characters.
|
# Encode if tstr contains special characters.
|
||||||
want_encoding = not SPECIALSNL.isdisjoint(tstr)
|
want_encoding = not SPECIALSNL.isdisjoint(tstr)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -3076,6 +3076,31 @@ class TestFolding(TestEmailBase):
|
||||||
'=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
|
'=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
|
||||||
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')
|
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')
|
||||||
|
|
||||||
|
def test_address_list_with_specials_in_encoded_word(self):
|
||||||
|
# An encoded-word parsed from a structured header must remain
|
||||||
|
# encoded when it contains specials. Regression for gh-121284.
|
||||||
|
policy = self.policy.clone(max_line_length=40)
|
||||||
|
cases = [
|
||||||
|
# (to, folded)
|
||||||
|
('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= <to@example.com>',
|
||||||
|
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
|
||||||
|
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
|
||||||
|
('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= <to@example.com>',
|
||||||
|
'This long name does not need\n'
|
||||||
|
' encoded-word <to@example.com>\n'),
|
||||||
|
('"A véry long name with, comma" <to@example.com>',
|
||||||
|
# (This isn't the best fold point, but it's not invalid.)
|
||||||
|
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
|
||||||
|
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
|
||||||
|
('"A véry long name containing a, comma" <to@example.com>',
|
||||||
|
'A =?utf-8?q?v=C3=A9ry?= long name\n'
|
||||||
|
' containing =?utf-8?q?a=2C?= comma\n'
|
||||||
|
' <to@example.com>\n'),
|
||||||
|
]
|
||||||
|
for (to, folded) in cases:
|
||||||
|
with self.subTest(to=to):
|
||||||
|
self._test(parser.get_address_list(to)[0], folded, policy=policy)
|
||||||
|
|
||||||
def test_address_list_with_list_separator_after_fold(self):
|
def test_address_list_with_list_separator_after_fold(self):
|
||||||
a = 'x' * 66 + '@example.com'
|
a = 'x' * 66 + '@example.com'
|
||||||
to = f'{a}, "Hübsch Kaktus" <beautiful@example.com>'
|
to = f'{a}, "Hübsch Kaktus" <beautiful@example.com>'
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
Fix bug in the folding of rfc2047 encoded-words when flattening an email message
|
||||||
|
using a modern email policy. Previously when an encoded-word was too long
|
||||||
|
for a line, it would be decoded, split across lines, and re-encoded. But commas
|
||||||
|
and other special characters in the original text could be left unencoded and
|
||||||
|
unquoted. This could theoretically be used to spoof header lines using
|
||||||
|
a carefully constructed encoded-word if the resulting rendered email was
|
||||||
|
transmitted or re-parsed.
|
Loading…
Add table
Add a link
Reference in a new issue