gh-95865: Further reduce quote_from_bytes memory consumption (#96860)

on large input values.  Based on Dennis Sweeney's chunking idea.
This commit is contained in:
Gregory P. Smith 2022-09-19 16:06:25 -07:00 committed by GitHub
parent 04f4977f50
commit e61ca22431
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 1 deletions

View file

@ -985,6 +985,10 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(result, 'archaeological%20arcana') self.assertEqual(result, 'archaeological%20arcana')
result = urllib.parse.quote_from_bytes(b'') result = urllib.parse.quote_from_bytes(b'')
self.assertEqual(result, '') self.assertEqual(result, '')
result = urllib.parse.quote_from_bytes(b'A'*10_000)
self.assertEqual(result, 'A'*10_000)
result = urllib.parse.quote_from_bytes(b'z\x01/ '*253_183)
self.assertEqual(result, 'z%01/%20'*253_183)
def test_unquote_to_bytes(self): def test_unquote_to_bytes(self):
result = urllib.parse.unquote_to_bytes('abc%20def') result = urllib.parse.unquote_to_bytes('abc%20def')

View file

@ -29,6 +29,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
from collections import namedtuple from collections import namedtuple
import functools import functools
import math
import re import re
import types import types
import warnings import warnings
@ -906,7 +907,14 @@ def quote_from_bytes(bs, safe='/'):
if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
return bs.decode() return bs.decode()
quoter = _byte_quoter_factory(safe) quoter = _byte_quoter_factory(safe)
return ''.join(map(quoter, bs)) if (bs_len := len(bs)) < 200_000:
return ''.join(map(quoter, bs))
else:
# This saves memory - https://github.com/python/cpython/issues/95865
chunk_size = math.isqrt(bs_len)
chunks = [''.join(map(quoter, bs[i:i+chunk_size]))
for i in range(0, bs_len, chunk_size)]
return ''.join(chunks)
def urlencode(query, doseq=False, safe='', encoding=None, errors=None, def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
quote_via=quote_plus): quote_via=quote_plus):

View file

@ -0,0 +1,3 @@
Reduce :func:`urllib.parse.quote_from_bytes` memory use on large values.
Contributed by Dennis Sweeney.