gh-95865: Further reduce quote_from_bytes memory consumption (#96860)

on large input values.  Based on Dennis Sweeney's chunking idea.
This commit is contained in:
Gregory P. Smith 2022-09-19 16:06:25 -07:00 committed by GitHub
parent 04f4977f50
commit e61ca22431
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 1 deletions

View file

@ -29,6 +29,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
from collections import namedtuple
import functools
import math
import re
import types
import warnings
@ -906,7 +907,14 @@ def quote_from_bytes(bs, safe='/'):
if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
return bs.decode()
quoter = _byte_quoter_factory(safe)
return ''.join(map(quoter, bs))
if (bs_len := len(bs)) < 200_000:
return ''.join(map(quoter, bs))
else:
# This saves memory - https://github.com/python/cpython/issues/95865
chunk_size = math.isqrt(bs_len)
chunks = [''.join(map(quoter, bs[i:i+chunk_size]))
for i in range(0, bs_len, chunk_size)]
return ''.join(chunks)
def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
quote_via=quote_plus):