mirror of
https://github.com/python/cpython.git
synced 2025-08-15 22:30:42 +00:00
[3.8] bpo-37764: Fix infinite loop when parsing unstructured email headers. (GH-15239) (GH-15686)
Fixes a case in which email._header_value_parser.get_unstructured hangs the system for some invalid headers. This covers the cases in which the header contains either:
- a case without trailing whitespace
- an invalid encoded word
https://bugs.python.org/issue37764
This fix should also be backported to 3.7 and 3.8
https://bugs.python.org/issue37764
(cherry picked from commit c5b242f87f
)
Co-authored-by: Ashwin Ramaswami <aramaswamis@gmail.com>
This commit is contained in:
parent
6d7a786d2e
commit
6ad0a2c45f
5 changed files with 55 additions and 3 deletions
|
@ -935,6 +935,10 @@ class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
|
|||
return ''
|
||||
|
||||
|
||||
class _InvalidEwError(errors.HeaderParseError):
|
||||
"""Invalid encoded word found while parsing headers."""
|
||||
|
||||
|
||||
# XXX these need to become classes and used as instances so
|
||||
# that a program can't change them in a parse tree and screw
|
||||
# up other parse trees. Maybe should have tests for that, too.
|
||||
|
@ -1039,7 +1043,10 @@ def get_encoded_word(value):
|
|||
raise errors.HeaderParseError(
|
||||
"expected encoded word but found {}".format(value))
|
||||
remstr = ''.join(remainder)
|
||||
if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits:
|
||||
if (len(remstr) > 1 and
|
||||
remstr[0] in hexdigits and
|
||||
remstr[1] in hexdigits and
|
||||
tok.count('?') < 2):
|
||||
# The ? after the CTE was followed by an encoded word escape (=XX).
|
||||
rest, *remainder = remstr.split('?=', 1)
|
||||
tok = tok + '?=' + rest
|
||||
|
@ -1051,7 +1058,7 @@ def get_encoded_word(value):
|
|||
try:
|
||||
text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
|
||||
except ValueError:
|
||||
raise errors.HeaderParseError(
|
||||
raise _InvalidEwError(
|
||||
"encoded word format invalid: '{}'".format(ew.cte))
|
||||
ew.charset = charset
|
||||
ew.lang = lang
|
||||
|
@ -1101,9 +1108,12 @@ def get_unstructured(value):
|
|||
token, value = get_fws(value)
|
||||
unstructured.append(token)
|
||||
continue
|
||||
valid_ew = True
|
||||
if value.startswith('=?'):
|
||||
try:
|
||||
token, value = get_encoded_word(value)
|
||||
except _InvalidEwError:
|
||||
valid_ew = False
|
||||
except errors.HeaderParseError:
|
||||
# XXX: Need to figure out how to register defects when
|
||||
# appropriate here.
|
||||
|
@ -1125,7 +1135,10 @@ def get_unstructured(value):
|
|||
# Split in the middle of an atom if there is a rfc2047 encoded word
|
||||
# which does not have WSP on both sides. The defect will be registered
|
||||
# the next time through the loop.
|
||||
if rfc2047_matcher.search(tok):
|
||||
# This needs to only be performed when the encoded word is valid;
|
||||
# otherwise, performing it on an invalid encoded word can cause
|
||||
# the parser to go in an infinite loop.
|
||||
if valid_ew and rfc2047_matcher.search(tok):
|
||||
tok, *remainder = value.partition('=?')
|
||||
vtext = ValueTerminal(tok, 'vtext')
|
||||
_validate_xtext(vtext)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue