bpo-29979: Rewrite cgi.parse_multipart to make it consistent with FieldStorage (#991)

This commit is contained in:
Pierre Quentel 2017-05-08 14:08:34 +02:00 committed by Senthil Kumaran
parent f34c685020
commit cc3fa204d3
5 changed files with 38 additions and 102 deletions

View file

@ -198,105 +198,28 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
DeprecationWarning, 2)
return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
def parse_multipart(fp, pdict):
def parse_multipart(fp, pdict, encoding="utf-8"):
"""Parse multipart input.
Arguments:
fp : input file
pdict: dictionary containing other parameters of content-type header
encoding: request encoding
Returns a dictionary just like parse_qs(): keys are the field names, each
value is a list of values for that field. This is easy to use but not
much good if you are expecting megabytes to be uploaded -- in that case,
use the FieldStorage class instead which is much more flexible. Note
that content-type is the raw, unparsed contents of the content-type
header.
XXX This does not parse nested multipart parts -- use FieldStorage for
that.
XXX This should really be subsumed by FieldStorage altogether -- no
point in having two implementations of the same parsing algorithm.
Also, FieldStorage protects itself better against certain DoS attacks
by limiting the size of the data read in one chunk. The API here
does not support that kind of protection. This also affects parse()
since it can call parse_multipart().
value is a list of values for that field. For non-file fields, the value
is a list of strings.
"""
import http.client
boundary = b""
if 'boundary' in pdict:
boundary = pdict['boundary']
if not valid_boundary(boundary):
raise ValueError('Invalid boundary in multipart form: %r'
% (boundary,))
nextpart = b"--" + boundary
lastpart = b"--" + boundary + b"--"
partdict = {}
terminator = b""
while terminator != lastpart:
bytes = -1
data = None
if terminator:
# At start of next part. Read headers first.
headers = http.client.parse_headers(fp)
clength = headers.get('content-length')
if clength:
try:
bytes = int(clength)
except ValueError:
pass
if bytes > 0:
if maxlen and bytes > maxlen:
raise ValueError('Maximum content length exceeded')
data = fp.read(bytes)
else:
data = b""
# Read lines until end of part.
lines = []
while 1:
line = fp.readline()
if not line:
terminator = lastpart # End outer loop
break
if line.startswith(b"--"):
terminator = line.rstrip()
if terminator in (nextpart, lastpart):
break
lines.append(line)
# Done with part.
if data is None:
continue
if bytes < 0:
if lines:
# Strip final line terminator
line = lines[-1]
if line[-2:] == b"\r\n":
line = line[:-2]
elif line[-1:] == b"\n":
line = line[:-1]
lines[-1] = line
data = b"".join(lines)
line = headers['content-disposition']
if not line:
continue
key, params = parse_header(line)
if key != 'form-data':
continue
if 'name' in params:
name = params['name']
else:
continue
if name in partdict:
partdict[name].append(data)
else:
partdict[name] = [data]
return partdict
# RFC 2026, Section 5.1 : The "multipart" boundary delimiters are always
# represented as 7bit US-ASCII.
boundary = pdict['boundary'].decode('ascii')
ctype = "multipart/form-data; boundary={}".format(boundary)
headers = Message()
headers.set_type(ctype)
headers['Content-Length'] = pdict['CONTENT-LENGTH']
fs = FieldStorage(fp, headers=headers, encoding=encoding,
environ={'REQUEST_METHOD': 'POST'})
return {k: fs.getlist(k) for k in fs}
def _parseparam(s):
while s[:1] == ';':