Issue 600362: Relocated parse_qs() and parse_qsl(), from the cgi module

to the urlparse one.  Added a DeprecationWarning in the old module, it
will be deprecated in the future.  Docs and tests updated.
This commit is contained in:
Facundo Batista 2008-09-03 22:49:01 +00:00
parent 849f79a5d6
commit c469d4c3aa
7 changed files with 159 additions and 131 deletions

View file

@ -37,6 +37,7 @@ import sys
import os
import urllib.parse
import email.parser
from warnings import warn
__all__ = ["MiniFieldStorage", "FieldStorage",
"parse", "parse_qs", "parse_qsl", "parse_multipart",
@ -153,75 +154,23 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
else:
qs = ""
environ['QUERY_STRING'] = qs # XXX Shouldn't, really
return parse_qs(qs, keep_blank_values, strict_parsing)
return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
# parse query string function called from urlparse,
# this is done in order to maintain backward compatiblity.
def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
"""Parse a query given as a string argument.
Arguments:
qs: URL-encoded query string to be parsed
keep_blank_values: flag indicating whether blank values in
URL encoded queries should be treated as blank strings.
A true value indicates that blanks should be retained as
blank strings. The default false value indicates that
blank values are to be ignored and treated as if they were
not included.
strict_parsing: flag indicating what to do with parsing errors.
If false (the default), errors are silently ignored.
If true, errors raise a ValueError exception.
"""
dict = {}
for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):
if name in dict:
dict[name].append(value)
else:
dict[name] = [value]
return dict
"""Parse a query given as a string argument."""
warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
DeprecationWarning)
return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
"""Parse a query given as a string argument.
Arguments:
qs: URL-encoded query string to be parsed
keep_blank_values: flag indicating whether blank values in
URL encoded queries should be treated as blank strings. A
true value indicates that blanks should be retained as blank
strings. The default false value indicates that blank values
are to be ignored and treated as if they were not included.
strict_parsing: flag indicating what to do with parsing errors. If
false (the default), errors are silently ignored. If true,
errors raise a ValueError exception.
Returns a list, as G-d intended.
"""
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = []
for name_value in pairs:
if not name_value and not strict_parsing:
continue
nv = name_value.split('=', 1)
if len(nv) != 2:
if strict_parsing:
raise ValueError("bad query field: %r" % (name_value,))
# Handle case of a control-name with no equal sign
if keep_blank_values:
nv.append('')
else:
continue
if len(nv[1]) or keep_blank_values:
name = urllib.parse.unquote(nv[0].replace('+', ' '))
value = urllib.parse.unquote(nv[1].replace('+', ' '))
r.append((name, value))
return r
"""Parse a query given as a string argument."""
warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qs instead",
DeprecationWarning)
return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
def parse_multipart(fp, pdict):
"""Parse multipart input.
@ -624,8 +573,8 @@ class FieldStorage:
if self.qs_on_post:
qs += '&' + self.qs_on_post
self.list = list = []
for key, value in parse_qsl(qs, self.keep_blank_values,
self.strict_parsing):
for key, value in urllib.parse.parse_qsl(qs, self.keep_blank_values,
self.strict_parsing):
list.append(MiniFieldStorage(key, value))
self.skip_lines()
@ -638,8 +587,8 @@ class FieldStorage:
raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
self.list = []
if self.qs_on_post:
for key, value in parse_qsl(self.qs_on_post, self.keep_blank_values,
self.strict_parsing):
for key, value in urllib.parse.parse_qsl(self.qs_on_post,
self.keep_blank_values, self.strict_parsing):
self.list.append(MiniFieldStorage(key, value))
FieldStorageClass = None

View file

@ -53,25 +53,6 @@ def do_test(buf, method):
except Exception as err:
return ComparableException(err)
# A list of test cases. Each test case is a a two-tuple that contains
# a string with the query and a dictionary with the expected result.
parse_qsl_test_cases = [
("", []),
("&", []),
("&&", []),
("=", [('', '')]),
("=a", [('', 'a')]),
("a", [('a', '')]),
("a=", [('a', '')]),
("a=", [('a', '')]),
("&a=b", [('a', 'b')]),
("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
("a=1&a=2", [('a', '1'), ('a', '2')]),
("a=%26&b=%3D", [('a', '&'), ('b', '=')]),
("a=%C3%BC&b=%CA%83", [('a', '\xfc'), ('b', '\u0283')]),
]
parse_strict_test_cases = [
("", ValueError("bad query field: ''")),
("&", ValueError("bad query field: ''")),
@ -141,11 +122,6 @@ def gen_result(data, environ):
class CgiTests(unittest.TestCase):
def test_qsl(self):
for orig, expect in parse_qsl_test_cases:
result = cgi.parse_qsl(orig, keep_blank_values=True)
self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
def test_strict(self):
for orig, expect in parse_strict_test_cases:
# Test basic parsing

View file

@ -8,6 +8,23 @@ RFC1808_BASE = "http://a/b/c/d;p?q#f"
RFC2396_BASE = "http://a/b/c/d;p?q"
RFC3986_BASE = "http://a/b/c/d;p?q"
# A list of test cases. Each test case is a a two-tuple that contains
# a string with the query and a dictionary with the expected result.
parse_qsl_test_cases = [
("", []),
("&", []),
("&&", []),
("=", [('', '')]),
("=a", [('', 'a')]),
("a", [('a', '')]),
("a=", [('a', '')]),
("a=", [('a', '')]),
("&a=b", [('a', 'b')]),
("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
("a=1&a=2", [('a', '1'), ('a', '2')]),
]
class UrlParseTestCase(unittest.TestCase):
def checkRoundtrips(self, url, parsed, split):
@ -61,6 +78,12 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(result3.hostname, result.hostname)
self.assertEqual(result3.port, result.port)
def test_qsl(self):
for orig, expect in parse_qsl_test_cases:
result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
def test_roundtrips(self):
testcases = [
('file:///tmp/junk.txt',

View file

@ -8,7 +8,7 @@ import sys
import collections
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit",
"urlsplit", "urlunsplit", "parse_qs", "parse_qsl",
"quote", "quote_plus", "quote_from_bytes",
"unquote", "unquote_plus", "unquote_to_bytes"]
@ -329,6 +329,72 @@ def unquote(string, encoding='utf-8', errors='replace'):
res[-1] = b''.join(pct_sequence).decode(encoding, errors)
return ''.join(res)
def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
"""Parse a query given as a string argument.
Arguments:
qs: URL-encoded query string to be parsed
keep_blank_values: flag indicating whether blank values in
URL encoded queries should be treated as blank strings.
A true value indicates that blanks should be retained as
blank strings. The default false value indicates that
blank values are to be ignored and treated as if they were
not included.
strict_parsing: flag indicating what to do with parsing errors.
If false (the default), errors are silently ignored.
If true, errors raise a ValueError exception.
"""
dict = {}
for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):
if name in dict:
dict[name].append(value)
else:
dict[name] = [value]
return dict
def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
"""Parse a query given as a string argument.
Arguments:
qs: URL-encoded query string to be parsed
keep_blank_values: flag indicating whether blank values in
URL encoded queries should be treated as blank strings. A
true value indicates that blanks should be retained as blank
strings. The default false value indicates that blank values
are to be ignored and treated as if they were not included.
strict_parsing: flag indicating what to do with parsing errors. If
false (the default), errors are silently ignored. If true,
errors raise a ValueError exception.
Returns a list, as G-d intended.
"""
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = []
for name_value in pairs:
if not name_value and not strict_parsing:
continue
nv = name_value.split('=', 1)
if len(nv) != 2:
if strict_parsing:
raise ValueError("bad query field: %r" % (name_value,))
# Handle case of a control-name with no equal sign
if keep_blank_values:
nv.append('')
else:
continue
if len(nv[1]) or keep_blank_values:
name = unquote(nv[0].replace('+', ' '))
value = unquote(nv[1].replace('+', ' '))
r.append((name, value))
return r
def unquote_plus(string, encoding='utf-8', errors='replace'):
"""Like unquote(), but also replace plus signs by spaces, as required for
unquoting HTML form values.