mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Issue #1285086: Get rid of the refcounting hack and speed up urllib.unquote().
This commit is contained in:
parent
2556c8388c
commit
923baea9f9
3 changed files with 58 additions and 18 deletions
|
@ -28,6 +28,7 @@ import os
|
||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
import base64
|
import base64
|
||||||
|
import re
|
||||||
|
|
||||||
from urlparse import urljoin as basejoin
|
from urlparse import urljoin as basejoin
|
||||||
|
|
||||||
|
@ -1198,22 +1199,35 @@ def splitvalue(attr):
|
||||||
_hexdig = '0123456789ABCDEFabcdef'
|
_hexdig = '0123456789ABCDEFabcdef'
|
||||||
_hextochr = dict((a + b, chr(int(a + b, 16)))
|
_hextochr = dict((a + b, chr(int(a + b, 16)))
|
||||||
for a in _hexdig for b in _hexdig)
|
for a in _hexdig for b in _hexdig)
|
||||||
|
_asciire = re.compile('([\x00-\x7f]+)')
|
||||||
|
|
||||||
def unquote(s):
|
def unquote(s):
|
||||||
"""unquote('abc%20def') -> 'abc def'."""
|
"""unquote('abc%20def') -> 'abc def'."""
|
||||||
res = s.split('%')
|
if _is_unicode(s):
|
||||||
|
if '%' not in s:
|
||||||
|
return s
|
||||||
|
bits = _asciire.split(s)
|
||||||
|
res = [bits[0]]
|
||||||
|
append = res.append
|
||||||
|
for i in range(1, len(bits), 2):
|
||||||
|
append(unquote(str(bits[i])).decode('latin1'))
|
||||||
|
append(bits[i + 1])
|
||||||
|
return ''.join(res)
|
||||||
|
|
||||||
|
bits = s.split('%')
|
||||||
# fastpath
|
# fastpath
|
||||||
if len(res) == 1:
|
if len(bits) == 1:
|
||||||
return s
|
return s
|
||||||
s = res[0]
|
res = [bits[0]]
|
||||||
for item in res[1:]:
|
append = res.append
|
||||||
|
for item in bits[1:]:
|
||||||
try:
|
try:
|
||||||
s += _hextochr[item[:2]] + item[2:]
|
append(_hextochr[item[:2]])
|
||||||
|
append(item[2:])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
s += '%' + item
|
append('%')
|
||||||
except UnicodeDecodeError:
|
append(item)
|
||||||
s += unichr(int(item[:2], 16)) + item[2:]
|
return ''.join(res)
|
||||||
return s
|
|
||||||
|
|
||||||
def unquote_plus(s):
|
def unquote_plus(s):
|
||||||
"""unquote('%7e/abc+def') -> '~/abc def'"""
|
"""unquote('%7e/abc+def') -> '~/abc def'"""
|
||||||
|
|
|
@ -28,6 +28,8 @@ test_urlparse.py provides a good indicator of parsing behavior.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
|
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
|
||||||
"urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]
|
"urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]
|
||||||
|
|
||||||
|
@ -311,6 +313,15 @@ def urldefrag(url):
|
||||||
else:
|
else:
|
||||||
return url, ''
|
return url, ''
|
||||||
|
|
||||||
|
try:
|
||||||
|
unicode
|
||||||
|
except NameError:
|
||||||
|
def _is_unicode(x):
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
def _is_unicode(x):
|
||||||
|
return isinstance(x, unicode)
|
||||||
|
|
||||||
# unquote method for parse_qs and parse_qsl
|
# unquote method for parse_qs and parse_qsl
|
||||||
# Cannot use directly from urllib as it would create a circular reference
|
# Cannot use directly from urllib as it would create a circular reference
|
||||||
# because urllib uses urlparse methods (urljoin). If you update this function,
|
# because urllib uses urlparse methods (urljoin). If you update this function,
|
||||||
|
@ -319,22 +330,35 @@ def urldefrag(url):
|
||||||
_hexdig = '0123456789ABCDEFabcdef'
|
_hexdig = '0123456789ABCDEFabcdef'
|
||||||
_hextochr = dict((a+b, chr(int(a+b,16)))
|
_hextochr = dict((a+b, chr(int(a+b,16)))
|
||||||
for a in _hexdig for b in _hexdig)
|
for a in _hexdig for b in _hexdig)
|
||||||
|
_asciire = re.compile('([\x00-\x7f]+)')
|
||||||
|
|
||||||
def unquote(s):
|
def unquote(s):
|
||||||
"""unquote('abc%20def') -> 'abc def'."""
|
"""unquote('abc%20def') -> 'abc def'."""
|
||||||
res = s.split('%')
|
if _is_unicode(s):
|
||||||
|
if '%' not in s:
|
||||||
|
return s
|
||||||
|
bits = _asciire.split(s)
|
||||||
|
res = [bits[0]]
|
||||||
|
append = res.append
|
||||||
|
for i in range(1, len(bits), 2):
|
||||||
|
append(unquote(str(bits[i])).decode('latin1'))
|
||||||
|
append(bits[i + 1])
|
||||||
|
return ''.join(res)
|
||||||
|
|
||||||
|
bits = s.split('%')
|
||||||
# fastpath
|
# fastpath
|
||||||
if len(res) == 1:
|
if len(bits) == 1:
|
||||||
return s
|
return s
|
||||||
s = res[0]
|
res = [bits[0]]
|
||||||
for item in res[1:]:
|
append = res.append
|
||||||
|
for item in bits[1:]:
|
||||||
try:
|
try:
|
||||||
s += _hextochr[item[:2]] + item[2:]
|
append(_hextochr[item[:2]])
|
||||||
|
append(item[2:])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
s += '%' + item
|
append('%')
|
||||||
except UnicodeDecodeError:
|
append(item)
|
||||||
s += unichr(int(item[:2], 16)) + item[2:]
|
return ''.join(res)
|
||||||
return s
|
|
||||||
|
|
||||||
def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
|
def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
|
||||||
"""Parse a query given as a string argument.
|
"""Parse a query given as a string argument.
|
||||||
|
|
|
@ -214,6 +214,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #1285086: Get rid of the refcounting hack and speed up urllib.unquote().
|
||||||
|
|
||||||
- Issue #17368: Fix an off-by-one error in the Python JSON decoder that caused
|
- Issue #17368: Fix an off-by-one error in the Python JSON decoder that caused
|
||||||
a failure while decoding empty object literals when object_pairs_hook was
|
a failure while decoding empty object literals when object_pairs_hook was
|
||||||
specified.
|
specified.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue