mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
gh-95865: Further reduce quote_from_bytes memory consumption (#96860)
on large input values. Based on Dennis Sweeney's chunking idea.
This commit is contained in:
parent
04f4977f50
commit
e61ca22431
3 changed files with 16 additions and 1 deletions
|
@ -985,6 +985,10 @@ class UrlParseTestCase(unittest.TestCase):
|
||||||
self.assertEqual(result, 'archaeological%20arcana')
|
self.assertEqual(result, 'archaeological%20arcana')
|
||||||
result = urllib.parse.quote_from_bytes(b'')
|
result = urllib.parse.quote_from_bytes(b'')
|
||||||
self.assertEqual(result, '')
|
self.assertEqual(result, '')
|
||||||
|
result = urllib.parse.quote_from_bytes(b'A'*10_000)
|
||||||
|
self.assertEqual(result, 'A'*10_000)
|
||||||
|
result = urllib.parse.quote_from_bytes(b'z\x01/ '*253_183)
|
||||||
|
self.assertEqual(result, 'z%01/%20'*253_183)
|
||||||
|
|
||||||
def test_unquote_to_bytes(self):
|
def test_unquote_to_bytes(self):
|
||||||
result = urllib.parse.unquote_to_bytes('abc%20def')
|
result = urllib.parse.unquote_to_bytes('abc%20def')
|
||||||
|
|
|
@ -29,6 +29,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
|
||||||
|
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import functools
|
import functools
|
||||||
|
import math
|
||||||
import re
|
import re
|
||||||
import types
|
import types
|
||||||
import warnings
|
import warnings
|
||||||
|
@ -906,7 +907,14 @@ def quote_from_bytes(bs, safe='/'):
|
||||||
if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
|
if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
|
||||||
return bs.decode()
|
return bs.decode()
|
||||||
quoter = _byte_quoter_factory(safe)
|
quoter = _byte_quoter_factory(safe)
|
||||||
return ''.join(map(quoter, bs))
|
if (bs_len := len(bs)) < 200_000:
|
||||||
|
return ''.join(map(quoter, bs))
|
||||||
|
else:
|
||||||
|
# This saves memory - https://github.com/python/cpython/issues/95865
|
||||||
|
chunk_size = math.isqrt(bs_len)
|
||||||
|
chunks = [''.join(map(quoter, bs[i:i+chunk_size]))
|
||||||
|
for i in range(0, bs_len, chunk_size)]
|
||||||
|
return ''.join(chunks)
|
||||||
|
|
||||||
def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
|
def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
|
||||||
quote_via=quote_plus):
|
quote_via=quote_plus):
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Reduce :func:`urllib.parse.quote_from_bytes` memory use on large values.
|
||||||
|
|
||||||
|
Contributed by Dennis Sweeney.
|
Loading…
Add table
Add a link
Reference in a new issue