[3.9] bpo-42967: only use '&' as a query string separator (GH-24297) (#24528)

(cherry picked from commit fcbe0cb04d)

* [3.9] bpo-42967: only use '&' as a query string separator (GH-24297)

bpo-42967: [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl().

urllib.parse will only us "&" as query string separator by default instead of both ";" and "&" as allowed in earlier versions. An optional argument seperator with default value "&" is added to specify the separator.

Co-authored-by: Éric Araujo <merwok@netwok.org>
Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
This commit is contained in:
Senthil Kumaran 2021-02-15 10:03:31 -08:00 committed by GitHub
parent 242f6c9ffe
commit c9f07813ab
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 176 additions and 46 deletions

View file

@ -662,7 +662,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace', max_num_fields=None):
encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
"""Parse a query given as a string argument.
Arguments:
@ -686,12 +686,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
max_num_fields: int. If set, then throws a ValueError if there
are more than n fields read by parse_qsl().
separator: str. The symbol to use for separating the query arguments.
Defaults to &.
Returns a dictionary.
"""
parsed_result = {}
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
encoding=encoding, errors=errors,
max_num_fields=max_num_fields)
max_num_fields=max_num_fields, separator=separator)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
@ -701,7 +704,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace', max_num_fields=None):
encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
"""Parse a query given as a string argument.
Arguments:
@ -724,19 +727,25 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
max_num_fields: int. If set, then throws a ValueError
if there are more than n fields read by parse_qsl().
separator: str. The symbol to use for separating the query arguments.
Defaults to &.
Returns a list, as G-d intended.
"""
qs, _coerce_result = _coerce_args(qs)
if not separator or (not isinstance(separator, (str, bytes))):
raise ValueError("Separator must be of type string or bytes.")
# If max_num_fields is defined then check that the number of fields
# is less than max_num_fields. This prevents a memory exhaustion DOS
# attack via post bodies with many fields.
if max_num_fields is not None:
num_fields = 1 + qs.count('&') + qs.count(';')
num_fields = 1 + qs.count(separator)
if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded')
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
pairs = [s1 for s1 in qs.split(separator)]
r = []
for name_value in pairs:
if not name_value and not strict_parsing: