[3.12] gh-106669: Revert "gh-102988: Detect email address parsing errors ... (GH-105127)" (GH-106733) (#106941)

This reverts commit 18dfbd0357.
Adds a regression test from the issue.

See https://github.com/python/cpython/issues/106669..
(cherry picked from commit a31dea1feb)
This commit is contained in:
Gregory P. Smith 2023-07-20 21:05:46 -07:00 committed by GitHub
parent c1fd76e138
commit 656f62454b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 30 additions and 167 deletions

View file

@ -65,11 +65,6 @@ of the new API.
*email address* parts. Returns a tuple of that information, unless the parse *email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned. fails, in which case a 2-tuple of ``('', '')`` is returned.
.. versionchanged:: 3.12
For security reasons, addresses that were ambiguous and could parse into
multiple different addresses now cause ``('', '')`` to be returned
instead of only one of the *potential* addresses.
.. function:: formataddr(pair, charset='utf-8') .. function:: formataddr(pair, charset='utf-8')
@ -92,7 +87,7 @@ of the new API.
This method returns a list of 2-tuples of the form returned by ``parseaddr()``. This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by *fieldvalues* is a sequence of header field values as might be returned by
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple :meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
example that gets all the recipients of a message: example that gets all the recipients of a message::
from email.utils import getaddresses from email.utils import getaddresses
@ -102,25 +97,6 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', []) resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
is returned in its place. Other errors in parsing the list of
addresses such as a fieldvalue seemingly parsing into multiple
addresses may result in a list containing a single empty 2-tuple
``[('', '')]`` being returned rather than returning potentially
invalid output.
Example malformed input parsing:
.. doctest::
>>> from email.utils import getaddresses
>>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com'])
[('', '')]
.. versionchanged:: 3.12
The 2-tuple of ``('', '')`` in the returned values when parsing
fails were added as to address a security issue.
.. function:: parsedate(date) .. function:: parsedate(date)

View file

@ -570,14 +570,6 @@ dis
:data:`~dis.hasarg` collection instead. :data:`~dis.hasarg` collection instead.
(Contributed by Irit Katriel in :gh:`94216`.) (Contributed by Irit Katriel in :gh:`94216`.)
email
-----
* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
``('', '')`` 2-tuples in more situations where invalid email addresses are
encountered instead of potentially inaccurate values.
(Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.)
fractions fractions
--------- ---------

View file

@ -106,54 +106,12 @@ def formataddr(pair, charset='utf-8'):
return address return address
def _pre_parse_validation(email_header_fields):
accepted_values = []
for v in email_header_fields:
s = v.replace('\\(', '').replace('\\)', '')
if s.count('(') != s.count(')'):
v = "('', '')"
accepted_values.append(v)
return accepted_values
def _post_parse_validation(parsed_email_header_tuples):
accepted_values = []
# The parser would have parsed a correctly formatted domain-literal
# The existence of an [ after parsing indicates a parsing failure
for v in parsed_email_header_tuples:
if '[' in v[1]:
v = ('', '')
accepted_values.append(v)
return accepted_values
def getaddresses(fieldvalues): def getaddresses(fieldvalues):
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
all = COMMASPACE.join(str(v) for v in fieldvalues)
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
its place.
If the resulting list of parsed address is not the same as the number of
fieldvalues in the input list a parsing error has occurred. A list
containing a single empty 2-tuple [('', '')] is returned in its place.
This is done to avoid invalid output.
"""
fieldvalues = [str(v) for v in fieldvalues]
fieldvalues = _pre_parse_validation(fieldvalues)
all = COMMASPACE.join(v for v in fieldvalues)
a = _AddressList(all) a = _AddressList(all)
result = _post_parse_validation(a.addresslist) return a.addresslist
n = 0
for v in fieldvalues:
n += v.count(',') + 1
if len(result) != n:
return [('', '')]
return result
def _format_timetuple_and_zone(timetuple, zone): def _format_timetuple_and_zone(timetuple, zone):
@ -254,18 +212,9 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', ''). which case return a 2-tuple of ('', '').
""" """
if isinstance(addr, list): addrs = _AddressList(addr).addresslist
addr = addr[0] if not addrs:
return '', ''
if not isinstance(addr, str):
return ('', '')
addr = _pre_parse_validation([addr])[0]
addrs = _post_parse_validation(_AddressList(addr).addresslist)
if not addrs or len(addrs) > 1:
return ('', '')
return addrs[0] return addrs[0]

View file

@ -3319,90 +3319,32 @@ Foo
[('Al Person', 'aperson@dom.ain'), [('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')]) ('Bud Person', 'bperson@dom.ain')])
def test_getaddresses_parsing_errors(self): def test_getaddresses_comma_in_name(self):
"""Test for parsing errors from CVE-2023-27043""" """GH-106669 regression test."""
eq = self.assertEqual self.assertEqual(
eq(utils.getaddresses(['alice@example.org(<bob@example.com>']), utils.getaddresses(
[('', '')]) [
eq(utils.getaddresses(['alice@example.org)<bob@example.com>']), '"Bud, Person" <bperson@dom.ain>',
[('', '')]) 'aperson@dom.ain (Al Person)',
eq(utils.getaddresses(['alice@example.org<<bob@example.com>']), '"Mariusz Felisiak" <to@example.com>',
[('', '')]) ]
eq(utils.getaddresses(['alice@example.org><bob@example.com>']), ),
[('', '')]) [
eq(utils.getaddresses(['alice@example.org@<bob@example.com>']), ('Bud, Person', 'bperson@dom.ain'),
[('', '')]) ('Al Person', 'aperson@dom.ain'),
eq(utils.getaddresses(['alice@example.org,<bob@example.com>']), ('Mariusz Felisiak', 'to@example.com'),
[('', 'alice@example.org'), ('', 'bob@example.com')]) ],
eq(utils.getaddresses(['alice@example.org;<bob@example.com>']), )
[('', '')])
eq(utils.getaddresses(['alice@example.org:<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org.<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org"<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org[<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org]<bob@example.com>']),
[('', '')])
def test_parseaddr_parsing_errors(self):
"""Test for parsing errors from CVE-2023-27043"""
eq = self.assertEqual
eq(utils.parseaddr(['alice@example.org(<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org)<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org<<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org><bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org@<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org,<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org;<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org:<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org.<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org"<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org[<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org]<bob@example.com>']),
('', ''))
def test_getaddresses_nasty(self): def test_getaddresses_nasty(self):
eq = self.assertEqual eq = self.assertEqual
eq(utils.getaddresses(['foo: ;']), [('', '')]) eq(utils.getaddresses(['foo: ;']), [('', '')])
eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) eq(utils.getaddresses(
['[]*-- =~$']),
[('', ''), ('', ''), ('', '*--')])
eq(utils.getaddresses( eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']), ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
eq(utils.getaddresses(
[r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
[('Pete (A nice ) chap his account his host)', 'pete@silly.test')])
eq(utils.getaddresses(
['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
[('', '')])
eq(utils.getaddresses(
['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']),
[('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')])
eq(utils.getaddresses(
['John Doe <jdoe@machine(comment). example>']),
[('John Doe (comment)', 'jdoe@machine.example')])
eq(utils.getaddresses(
['"Mary Smith: Personal Account" <smith@home.example>']),
[('Mary Smith: Personal Account', 'smith@home.example')])
eq(utils.getaddresses(
['Undisclosed recipients:;']),
[('', '')])
eq(utils.getaddresses(
[r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>']),
[('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')])
def test_getaddresses_embedded_comment(self): def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment""" """Test proper handling of a nested comment"""

View file

@ -0,0 +1,4 @@
Reverted the :mod:`email.utils` security improvement change released in
3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail
to parse email addresses with a comma in the quoted name field.
See :gh:`106669`.