Merged revisions 82510 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r82510 | senthil.kumaran | 2010-07-03 23:18:22 +0530 (Sat, 03 Jul 2010) | 4 lines

  Fix Issue5468 - urlencode to handle bytes and other alternate encodings.
  (Extensive tests provided). Patch by Dan Mahn.
........
This commit is contained in:
Senthil Kumaran 2010-07-03 17:55:41 +00:00
parent 8e42fb7ada
commit fe1ad15b4b
4 changed files with 165 additions and 24 deletions

View file

@ -797,6 +797,116 @@ class urlencode_Tests(unittest.TestCase):
self.assertEqual("a=a&a=b",
urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
def test_urlencode_encoding(self):
# ASCII encoding. Expect %3F with errors="replace'
given = (('\u00a0', '\u00c1'),)
expect = '%3F=%3F'
result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
self.assertEqual(expect, result)
# Default is UTF-8 encoding.
given = (('\u00a0', '\u00c1'),)
expect = '%C2%A0=%C3%81'
result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
# Latin-1 encoding.
given = (('\u00a0', '\u00c1'),)
expect = '%A0=%C1'
result = urllib.parse.urlencode(given, encoding="latin-1")
self.assertEqual(expect, result)
def test_urlencode_encoding_doseq(self):
# ASCII Encoding. Expect %3F with errors="replace'
given = (('\u00a0', '\u00c1'),)
expect = '%3F=%3F'
result = urllib.parse.urlencode(given, doseq=True,
encoding="ASCII", errors="replace")
self.assertEqual(expect, result)
# ASCII Encoding. On a sequence of values.
given = (("\u00a0", (1, "\u00c1")),)
expect = '%3F=1&%3F=%3F'
result = urllib.parse.urlencode(given, True,
encoding="ASCII", errors="replace")
self.assertEqual(expect, result)
# Utf-8
given = (("\u00a0", "\u00c1"),)
expect = '%C2%A0=%C3%81'
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
given = (("\u00a0", (42, "\u00c1")),)
expect = '%C2%A0=42&%C2%A0=%C3%81'
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
# latin-1
given = (("\u00a0", "\u00c1"),)
expect = '%A0=%C1'
result = urllib.parse.urlencode(given, True, encoding="latin-1")
self.assertEqual(expect, result)
given = (("\u00a0", (42, "\u00c1")),)
expect = '%A0=42&%A0=%C1'
result = urllib.parse.urlencode(given, True, encoding="latin-1")
self.assertEqual(expect, result)
def test_urlencode_bytes(self):
given = ((b'\xa0\x24', b'\xc1\x24'),)
expect = '%A0%24=%C1%24'
result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
# Sequence of values
given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
expect = '%A0%24=42&%A0%24=%C1%24'
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
def test_urlencode_encoding_safe_parameter(self):
# Send '$' (\x24) as safe character
# Default utf-8 encoding
given = ((b'\xa0\x24', b'\xc1\x24'),)
result = urllib.parse.urlencode(given, safe=":$")
expect = '%A0$=%C1$'
self.assertEqual(expect, result)
given = ((b'\xa0\x24', b'\xc1\x24'),)
result = urllib.parse.urlencode(given, doseq=True, safe=":$")
expect = '%A0$=%C1$'
self.assertEqual(expect, result)
# Safe parameter in sequence
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
result = urllib.parse.urlencode(given, True, safe=":$")
self.assertEqual(expect, result)
# Test all above in latin-1 encoding
given = ((b'\xa0\x24', b'\xc1\x24'),)
result = urllib.parse.urlencode(given, safe=":$",
encoding="latin-1")
expect = '%A0$=%C1$'
self.assertEqual(expect, result)
given = ((b'\xa0\x24', b'\xc1\x24'),)
expect = '%A0$=%C1$'
result = urllib.parse.urlencode(given, doseq=True, safe=":$",
encoding="latin-1")
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
result = urllib.parse.urlencode(given, True, safe=":$",
encoding="latin-1")
self.assertEqual(expect, result)
class Pathname_Tests(unittest.TestCase):
"""Test pathname2url() and url2pathname()"""

View file

@ -533,7 +533,7 @@ def quote_from_bytes(bs, safe='/'):
_safe_quoters[cachekey] = quoter
return ''.join([quoter[char] for char in bs])
def urlencode(query, doseq=False):
def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
If any values in the query arg are sequences and doseq is true, each
@ -542,6 +542,10 @@ def urlencode(query, doseq=False):
If the query arg is a sequence of two-element tuples, the order of the
parameters in the output will match the order of parameters in the
input.
The query arg may be either a string or a bytes type. When query arg is a
string, the safe, encoding and error parameters are sent the quote_plus for
encoding.
"""
if hasattr(query, "items"):
@ -566,14 +570,28 @@ def urlencode(query, doseq=False):
l = []
if not doseq:
for k, v in query:
k = quote_plus(str(k))
v = quote_plus(str(v))
if isinstance(k, bytes):
k = quote_plus(k, safe)
else:
k = quote_plus(str(k), safe, encoding, errors)
if isinstance(v, bytes):
v = quote_plus(v, safe)
else:
v = quote_plus(str(v), safe, encoding, errors)
l.append(k + '=' + v)
else:
for k, v in query:
k = quote_plus(str(k))
if isinstance(v, str):
v = quote_plus(v)
if isinstance(k, bytes):
k = quote_plus(k, safe)
else:
k = quote_plus(str(k), safe, encoding, errors)
if isinstance(v, bytes):
v = quote_plus(v, safe)
l.append(k + '=' + v)
elif isinstance(v, str):
v = quote_plus(v, safe, encoding, errors)
l.append(k + '=' + v)
else:
try:
@ -581,12 +599,16 @@ def urlencode(query, doseq=False):
x = len(v)
except TypeError:
# not a sequence
v = quote_plus(str(v))
v = quote_plus(str(v), safe, encoding, errors)
l.append(k + '=' + v)
else:
# loop over the sequence
for elt in v:
l.append(k + '=' + quote_plus(str(elt)))
if isinstance(elt, bytes):
elt = quote_plus(elt, safe)
else:
elt = quote_plus(str(elt), safe, encoding, errors)
l.append(k + '=' + elt)
return '&'.join(l)
# Utilities to parse URLs (most of these return None for missing parts):