gh-80222: Fix email address header folding with long quoted-string (#122753)

Email generators using email.policy.default could incorrectly omit the
quote ('"') characters from a quoted-string during header refolding,
leading to invalid address headers and enabling header spoofing. This
change restores the quote characters on a bare-quoted-string as the
header is refolded, and escapes backslash and quote chars in the string.
This commit is contained in:
Mike Edmunds 2025-01-18 16:50:52 -08:00 committed by GitHub
parent 61b35f74aa
commit 5aaf416858
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 53 additions and 3 deletions

View file

@ -95,8 +95,16 @@ EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
NLSET = {'\n', '\r'} NLSET = {'\n', '\r'}
SPECIALSNL = SPECIALS | NLSET SPECIALSNL = SPECIALS | NLSET
def make_quoted_pairs(value):
"""Escape dquote and backslash for use within a quoted-string."""
return str(value).replace('\\', '\\\\').replace('"', '\\"')
def quote_string(value): def quote_string(value):
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' escaped = make_quoted_pairs(value)
return f'"{escaped}"'
# Match a RFC 2047 word, looks like =?utf-8?q?someword?= # Match a RFC 2047 word, looks like =?utf-8?q?someword?=
rfc2047_matcher = re.compile(r''' rfc2047_matcher = re.compile(r'''
@ -2905,6 +2913,15 @@ def _refold_parse_tree(parse_tree, *, policy):
if not hasattr(part, 'encode'): if not hasattr(part, 'encode'):
# It's not a terminal, try folding the subparts. # It's not a terminal, try folding the subparts.
newparts = list(part) newparts = list(part)
if part.token_type == 'bare-quoted-string':
# To fold a quoted string we need to create a list of terminal
# tokens that will render the leading and trailing quotes
# and use quoted pairs in the value as appropriate.
newparts = (
[ValueTerminal('"', 'ptext')] +
[ValueTerminal(make_quoted_pairs(p), 'ptext')
for p in newparts] +
[ValueTerminal('"', 'ptext')])
if not part.as_ew_allowed: if not part.as_ew_allowed:
wrap_as_ew_blocked += 1 wrap_as_ew_blocked += 1
newparts.append(end_ew_not_allowed) newparts.append(end_ew_not_allowed)

View file

@ -3082,13 +3082,40 @@ class TestFolding(TestEmailBase):
self._test(parser.get_address_list(to)[0], self._test(parser.get_address_list(to)[0],
f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>\n') f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>\n')
a = '.' * 79 a = '.' * 79 # ('.' is a special, so must be in quoted-string.)
to = f'"{a}" <xyz@example.com>, "Hübsch Kaktus" <beautiful@example.com>' to = f'"{a}" <xyz@example.com>, "Hübsch Kaktus" <beautiful@example.com>'
self._test(parser.get_address_list(to)[0], self._test(parser.get_address_list(to)[0],
f'{a}\n' f'"{a}"\n'
' <xyz@example.com>, =?utf-8?q?H=C3=BCbsch?= Kaktus ' ' <xyz@example.com>, =?utf-8?q?H=C3=BCbsch?= Kaktus '
'<beautiful@example.com>\n') '<beautiful@example.com>\n')
def test_address_list_with_specials_in_long_quoted_string(self):
# Regression for gh-80222.
policy = self.policy.clone(max_line_length=40)
cases = [
# (to, folded)
('"Exfiltrator <spy@example.org> (unclosed comment?" <to@example.com>',
'"Exfiltrator <spy@example.org> (unclosed\n'
' comment?" <to@example.com>\n'),
('"Escaped \\" chars \\\\ in quoted-string stay escaped" <to@example.com>',
'"Escaped \\" chars \\\\ in quoted-string\n'
' stay escaped" <to@example.com>\n'),
('This long display name does not need quotes <to@example.com>',
'This long display name does not need\n'
' quotes <to@example.com>\n'),
('"Quotes are not required but are retained here" <to@example.com>',
'"Quotes are not required but are\n'
' retained here" <to@example.com>\n'),
('"A quoted-string, it can be a valid local-part"@example.com',
'"A quoted-string, it can be a valid\n'
' local-part"@example.com\n'),
('"local-part-with-specials@but-no-fws.cannot-fold"@example.com',
'"local-part-with-specials@but-no-fws.cannot-fold"@example.com\n'),
]
for (to, folded) in cases:
with self.subTest(to=to):
self._test(parser.get_address_list(to)[0], folded, policy=policy)
# XXX Need tests with comments on various sides of a unicode token, # XXX Need tests with comments on various sides of a unicode token,
# and with unicode tokens in the comments. Spaces inside the quotes # and with unicode tokens in the comments. Spaces inside the quotes
# currently don't do the right thing. # currently don't do the right thing.

View file

@ -0,0 +1,6 @@
Fix bug in the folding of quoted strings when flattening an email message using
a modern email policy. Previously when a quoted string was folded so that
it spanned more than one line, the surrounding quotes and internal escapes
would be omitted. This could theoretically be used to spoof header lines
using a carefully constructed quoted string if the resulting rendered email
was transmitted or re-parsed.