mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
#23745: handle duplicate MIME parameter names in new parser.
This mimics get_param's error handling for the most part. It is slightly better in some regards as get_param can produce some really weird results for duplicate *0* parts. It departs from get_param slightly in that if we have a mix of non-extended and extended pieces for the same parameter name, the new parser assumes they were all supposed to be extended and concatenates all the values, whereas get_param always picks the non-extended parameter value. All of this error recovery is pretty much arbitrary decisions...
This commit is contained in:
parent
a3a100b594
commit
7d0325d6c8
3 changed files with 139 additions and 7 deletions
|
@ -71,6 +71,7 @@ import re
|
|||
import urllib # For urllib.parse.unquote
|
||||
from string import hexdigits
|
||||
from collections import OrderedDict
|
||||
from operator import itemgetter
|
||||
from email import _encoded_words as _ew
|
||||
from email import errors
|
||||
from email import utils
|
||||
|
@ -1098,15 +1099,34 @@ class MimeParameters(TokenList):
|
|||
params[name] = []
|
||||
params[name].append((token.section_number, token))
|
||||
for name, parts in params.items():
|
||||
parts = sorted(parts)
|
||||
# XXX: there might be more recovery we could do here if, for
|
||||
# example, this is really a case of a duplicate attribute name.
|
||||
parts = sorted(parts, key=itemgetter(0))
|
||||
first_param = parts[0][1]
|
||||
charset = first_param.charset
|
||||
# Our arbitrary error recovery is to ignore duplicate parameters,
|
||||
# to use appearance order if there are duplicate rfc 2231 parts,
|
||||
# and to ignore gaps. This mimics the error recovery of get_param.
|
||||
if not first_param.extended and len(parts) > 1:
|
||||
if parts[1][0] == 0:
|
||||
parts[1][1].defects.append(errors.InvalidHeaderDefect(
|
||||
'duplicate parameter name; duplicate(s) ignored'))
|
||||
parts = parts[:1]
|
||||
# Else assume the *0* was missing...note that this is different
|
||||
# from get_param, but we registered a defect for this earlier.
|
||||
value_parts = []
|
||||
charset = parts[0][1].charset
|
||||
for i, (section_number, param) in enumerate(parts):
|
||||
i = 0
|
||||
for section_number, param in parts:
|
||||
if section_number != i:
|
||||
param.defects.append(errors.InvalidHeaderDefect(
|
||||
"inconsistent multipart parameter numbering"))
|
||||
# We could get fancier here and look for a complete
|
||||
# duplicate extended parameter and ignore the second one
|
||||
# seen. But we're not doing that. The old code didn't.
|
||||
if not param.extended:
|
||||
param.defects.append(errors.InvalidHeaderDefect(
|
||||
'duplicate parameter name; duplicate ignored'))
|
||||
continue
|
||||
else:
|
||||
param.defects.append(errors.InvalidHeaderDefect(
|
||||
"inconsistent RFC2231 parameter numbering"))
|
||||
i += 1
|
||||
value = param.param_value
|
||||
if param.extended:
|
||||
try:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue