gh-127794: Validate email header names according to RFC 5322 (#127820)

`email.message.Message` objects now validate header names specified via `__setitem__`
or `add_header` according to RFC 5322, §2.2 [1].

In particular, callers should expect a ValueError to be raised for invalid header names.

[1]: https://datatracker.ietf.org/doc/html/rfc5322#section-2.2

---------

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
Co-authored-by: R. David Murray <rdmurray@bitdance.com>
This commit is contained in:
Srinivas Reddy Thatiparthy (తాటిపర్తి శ్రీనివాస్ రెడ్డి) 2025-03-30 17:59:29 +05:30 committed by GitHub
parent 55150a79ca
commit c432d0147b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 71 additions and 1 deletions

View file

@ -4,6 +4,7 @@ Allows fine grained feature control of how the package parses and emits data.
""" """
import abc import abc
import re
from email import header from email import header
from email import charset as _charset from email import charset as _charset
from email.utils import _has_surrogates from email.utils import _has_surrogates
@ -14,6 +15,14 @@ __all__ = [
'compat32', 'compat32',
] ]
# validation regex from RFC 5322, equivalent to pattern re.compile("[!-9;-~]+$")
valid_header_name_re = re.compile("[\041-\071\073-\176]+$")
def validate_header_name(name):
# Validate header name according to RFC 5322
if not valid_header_name_re.match(name):
raise ValueError(
f"Header field name contains invalid characters: {name!r}")
class _PolicyBase: class _PolicyBase:
@ -314,6 +323,7 @@ class Compat32(Policy):
"""+ """+
The name and value are returned unmodified. The name and value are returned unmodified.
""" """
validate_header_name(name)
return (name, value) return (name, value)
def header_fetch_parse(self, name, value): def header_fetch_parse(self, name, value):

View file

@ -4,7 +4,13 @@ code that adds all the email6 features.
import re import re
import sys import sys
from email._policybase import Policy, Compat32, compat32, _extend_docstrings from email._policybase import (
Compat32,
Policy,
_extend_docstrings,
compat32,
validate_header_name
)
from email.utils import _has_surrogates from email.utils import _has_surrogates
from email.headerregistry import HeaderRegistry as HeaderRegistry from email.headerregistry import HeaderRegistry as HeaderRegistry
from email.contentmanager import raw_data_manager from email.contentmanager import raw_data_manager
@ -138,6 +144,7 @@ class EmailPolicy(Policy):
CR or LF characters. CR or LF characters.
""" """
validate_header_name(name)
if hasattr(value, 'name') and value.name.lower() == name.lower(): if hasattr(value, 'name') and value.name.lower() == name.lower():
return (name, value) return (name, value)
if isinstance(value, str) and len(value.splitlines())>1: if isinstance(value, str) and len(value.splitlines())>1:

View file

@ -728,6 +728,31 @@ class TestMessageAPI(TestEmailBase):
"attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
msg['Content-Disposition']) msg['Content-Disposition'])
def test_invalid_header_names(self):
invalid_headers = [
('Invalid Header', 'contains space'),
('Tab\tHeader', 'contains tab'),
('Colon:Header', 'contains colon'),
('', 'Empty name'),
(' LeadingSpace', 'starts with space'),
('TrailingSpace ', 'ends with space'),
('Header\x7F', 'Non-ASCII character'),
('Header\x80', 'Extended ASCII'),
]
for policy in (email.policy.default, email.policy.compat32):
for setter in (Message.__setitem__, Message.add_header):
for name, value in invalid_headers:
self.do_test_invalid_header_names(
policy, setter,name, value)
def do_test_invalid_header_names(self, policy, setter, name, value):
with self.subTest(policy=policy, setter=setter, name=name, value=value):
message = Message(policy=policy)
pattern = r'(?i)(?=.*invalid)(?=.*header)(?=.*name)'
with self.assertRaisesRegex(ValueError, pattern) as cm:
setter(message, name, value)
self.assertIn(f"{name!r}", str(cm.exception))
def test_binary_quopri_payload(self): def test_binary_quopri_payload(self):
for charset in ('latin-1', 'ascii'): for charset in ('latin-1', 'ascii'):
msg = Message() msg = Message()

View file

@ -1004,6 +1004,30 @@ class TestEmailMessage(TestEmailMessageBase, TestEmailBase):
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
def test_invalid_header_names(self):
invalid_headers = [
('Invalid Header', 'contains space'),
('Tab\tHeader', 'contains tab'),
('Colon:Header', 'contains colon'),
('', 'Empty name'),
(' LeadingSpace', 'starts with space'),
('TrailingSpace ', 'ends with space'),
('Header\x7F', 'Non-ASCII character'),
('Header\x80', 'Extended ASCII'),
]
for email_policy in (policy.default, policy.compat32):
for setter in (EmailMessage.__setitem__, EmailMessage.add_header):
for name, value in invalid_headers:
self.do_test_invalid_header_names(email_policy, setter, name, value)
def do_test_invalid_header_names(self, policy, setter, name, value):
with self.subTest(policy=policy, setter=setter, name=name, value=value):
message = EmailMessage(policy=policy)
pattern = r'(?i)(?=.*invalid)(?=.*header)(?=.*name)'
with self.assertRaisesRegex(ValueError, pattern) as cm:
setter(message, name, value)
self.assertIn(f"{name!r}", str(cm.exception))
def test_get_body_malformed(self): def test_get_body_malformed(self):
"""test for bpo-42892""" """test for bpo-42892"""
msg = textwrap.dedent("""\ msg = textwrap.dedent("""\

View file

@ -0,0 +1,4 @@
When headers are added to :class:`email.message.Message` objects, either through
:meth:`email.message.Message.__setitem__` or :meth:`email.message.Message.add_header`,
the field name is now validated according to :rfc:`RFC 5322, Section 2.2 <5322#section-2.2>`
and a :exc:`ValueError` is raised if the field name contains any invalid characters.