mirror of
https://github.com/python/cpython.git
synced 2025-10-17 20:28:43 +00:00
Easy optimizations of urlparse for the common case of parsing an http URL.
1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find
This commit is contained in:
parent
f6ae743cb5
commit
b85c8479eb
1 changed files with 33 additions and 8 deletions
|
@ -45,15 +45,40 @@ def clear_cache():
|
||||||
# (e.g. netloc is a single string) and we don't expand % escapes.
|
# (e.g. netloc is a single string) and we don't expand % escapes.
|
||||||
def urlparse(url, scheme = '', allow_fragments = 1):
|
def urlparse(url, scheme = '', allow_fragments = 1):
|
||||||
key = url, scheme, allow_fragments
|
key = url, scheme, allow_fragments
|
||||||
try:
|
cached = _parse_cache.get(key, None)
|
||||||
return _parse_cache[key]
|
if cached:
|
||||||
except KeyError:
|
return cached
|
||||||
pass
|
|
||||||
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
|
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
|
||||||
clear_cache()
|
clear_cache()
|
||||||
|
find = string.find
|
||||||
netloc = path = params = query = fragment = ''
|
netloc = path = params = query = fragment = ''
|
||||||
i = string.find(url, ':')
|
i = find(url, ':')
|
||||||
if i > 0:
|
if i > 0:
|
||||||
|
if url[:i] == 'http': # optimizie the common case
|
||||||
|
scheme = string.lower(url[:i])
|
||||||
|
url = url[i+1:]
|
||||||
|
if url[:2] == '//':
|
||||||
|
i = find(url, '/', 2)
|
||||||
|
if i < 0:
|
||||||
|
i = len(url)
|
||||||
|
netloc = url[2:i]
|
||||||
|
url = url[i:]
|
||||||
|
if allow_fragments:
|
||||||
|
i = string.rfind(url, '#')
|
||||||
|
if i >= 0:
|
||||||
|
url = url[:i]
|
||||||
|
fragment = url[i+1:]
|
||||||
|
i = find(url, '?')
|
||||||
|
if i >= 0:
|
||||||
|
url = url[:i]
|
||||||
|
query = url[i+1:]
|
||||||
|
i = find(url, ';')
|
||||||
|
if i >= 0:
|
||||||
|
url = url[:i]
|
||||||
|
params = url[i+1:]
|
||||||
|
tuple = scheme, netloc, url, params, query, fragment
|
||||||
|
_parse_cache[key] = tuple
|
||||||
|
return tuple
|
||||||
for c in url[:i]:
|
for c in url[:i]:
|
||||||
if c not in scheme_chars:
|
if c not in scheme_chars:
|
||||||
break
|
break
|
||||||
|
@ -61,7 +86,7 @@ def urlparse(url, scheme = '', allow_fragments = 1):
|
||||||
scheme, url = string.lower(url[:i]), url[i+1:]
|
scheme, url = string.lower(url[:i]), url[i+1:]
|
||||||
if scheme in uses_netloc:
|
if scheme in uses_netloc:
|
||||||
if url[:2] == '//':
|
if url[:2] == '//':
|
||||||
i = string.find(url, '/', 2)
|
i = find(url, '/', 2)
|
||||||
if i < 0:
|
if i < 0:
|
||||||
i = len(url)
|
i = len(url)
|
||||||
netloc, url = url[2:i], url[i:]
|
netloc, url = url[2:i], url[i:]
|
||||||
|
@ -70,11 +95,11 @@ def urlparse(url, scheme = '', allow_fragments = 1):
|
||||||
if i >= 0:
|
if i >= 0:
|
||||||
url, fragment = url[:i], url[i+1:]
|
url, fragment = url[:i], url[i+1:]
|
||||||
if scheme in uses_query:
|
if scheme in uses_query:
|
||||||
i = string.find(url, '?')
|
i = find(url, '?')
|
||||||
if i >= 0:
|
if i >= 0:
|
||||||
url, query = url[:i], url[i+1:]
|
url, query = url[:i], url[i+1:]
|
||||||
if scheme in uses_params:
|
if scheme in uses_params:
|
||||||
i = string.find(url, ';')
|
i = find(url, ';')
|
||||||
if i >= 0:
|
if i >= 0:
|
||||||
url, params = url[:i], url[i+1:]
|
url, params = url[:i], url[i+1:]
|
||||||
tuple = scheme, netloc, url, params, query, fragment
|
tuple = scheme, netloc, url, params, query, fragment
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue