Fixing Issue1712522 - urllib.quote to support Unicode. The default

encoding='utf-8' and errors='strict'.
This commit is contained in:
Senthil Kumaran 2010-07-18 02:27:10 +00:00
parent 5d10d33cd5
commit 5dba6dfe6a
4 changed files with 152 additions and 7 deletions

View file

@ -1193,7 +1193,7 @@ for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
_safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
_safe_quoters = {}
def quote(s, safe='/'):
def quote(s, safe='/', encoding=None, errors=None):
"""quote('abc def') -> 'abc%20def'
Each part of a URL, e.g. the path info, the query, etc., has a
@ -1213,10 +1213,28 @@ def quote(s, safe='/'):
is reserved, but in typical usage the quote function is being
called on a path where the existing slash characters are used as
reserved characters.
string and safe may be either str or unicode objects.
The optional encoding and errors parameters specify how to deal with the
non-ASCII characters, as accepted by the unicode.encode method.
By default, encoding='utf-8' (characters are encoded with UTF-8), and
errors='strict' (unsupported characters raise a UnicodeEncodeError).
"""
# fastpath
if not s:
return s
if encoding is not None or isinstance(s, unicode):
if encoding is None:
encoding = 'utf-8'
if errors is None:
errors = 'strict'
s = s.encode(encoding, errors)
if isinstance(safe, unicode):
# Normalize 'safe' by converting to str and removing non-ASCII chars
safe = safe.encode('ascii', 'ignore')
cachekey = (safe, always_safe)
try:
(quoter, safe) = _safe_quoters[cachekey]
@ -1230,12 +1248,12 @@ def quote(s, safe='/'):
return s
return ''.join(map(quoter, s))
def quote_plus(s, safe=''):
def quote_plus(s, safe='', encoding=None, errors=None):
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
if ' ' in s:
s = quote(s, safe + ' ')
s = quote(s, safe + ' ', encoding, errors)
return s.replace(' ', '+')
return quote(s, safe)
return quote(s, safe, encoding, errors)
def urlencode(query, doseq=0):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.