Fixed #36705 -- Avoided string concatenation in utils.

Repeated string concatenation performs poorly on PyPy.
Thanks Seokchan Yoon for the report.
This commit is contained in:
Kasyap Pentamaraju 2025-11-03 22:57:53 +05:30 committed by Jacob Walls
parent 2768747526
commit 1c7db70e79
5 changed files with 30 additions and 22 deletions

View file

@ -4,6 +4,7 @@ import html
import json
import re
import warnings
from collections import deque
from collections.abc import Mapping
from html.parser import HTMLParser
from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit
@ -429,7 +430,7 @@ class Urlizer:
# Strip all opening wrapping punctuation.
middle = word.lstrip(self.wrapping_punctuation_openings)
lead = word[: len(word) - len(middle)]
trail = ""
trail = deque()
# Continue trimming until middle remains unchanged.
trimmed_something = True
@ -442,7 +443,7 @@ class Urlizer:
rstripped = middle.rstrip(closing)
if rstripped != middle:
strip = counts[closing] - counts[opening]
trail = middle[-strip:]
trail.appendleft(middle[-strip:])
middle = middle[:-strip]
trimmed_something = True
counts[closing] -= strip
@ -453,7 +454,7 @@ class Urlizer:
else:
rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon)
if rstripped != middle:
trail = middle[len(rstripped) :] + trail
trail.appendleft(middle[len(rstripped) :])
middle = rstripped
trimmed_something = True
@ -470,13 +471,14 @@ class Urlizer:
# entity.
recent_semicolon = middle[trail_start:].index(";")
middle_semicolon_index = recent_semicolon + trail_start + 1
trail = middle[middle_semicolon_index:] + trail
trail.appendleft(middle[middle_semicolon_index:])
middle = rstripped + middle[trail_start:middle_semicolon_index]
else:
trail = middle[trail_start:] + trail
trail.appendleft(middle[trail_start:])
middle = rstripped
trimmed_something = True
trail = "".join(trail)
return lead, middle, trail
@staticmethod

View file

@ -169,11 +169,11 @@ def int_to_base36(i):
raise ValueError("Negative base36 conversion input.")
if i < 36:
return char_set[i]
b36 = ""
b36_parts = []
while i != 0:
i, n = divmod(i, 36)
b36 = char_set[n] + b36
return b36
b36_parts.append(char_set[n])
return "".join(reversed(b36_parts))
def urlsafe_base64_encode(s):

View file

@ -91,15 +91,15 @@ def format(
# grouping is a single value
intervals = [grouping, 0]
active_interval = intervals.pop(0)
int_part_gd = ""
int_part_gd = []
cnt = 0
for digit in int_part[::-1]:
if cnt and cnt == active_interval:
if intervals:
active_interval = intervals.pop(0) or active_interval
int_part_gd += thousand_sep[::-1]
int_part_gd.append(thousand_sep[::-1])
cnt = 0
int_part_gd += digit
int_part_gd.append(digit)
cnt += 1
int_part = int_part_gd[::-1]
int_part = "".join(int_part_gd)[::-1]
return sign + int_part + dec_part

View file

@ -103,7 +103,7 @@ class TruncateHTMLParser(HTMLParser):
def __init__(self, *, length, replacement, convert_charrefs=True):
super().__init__(convert_charrefs=convert_charrefs)
self.tags = deque()
self.output = ""
self.output = []
self.remaining = length
self.replacement = replacement
@ -119,13 +119,13 @@ class TruncateHTMLParser(HTMLParser):
self.handle_endtag(tag)
def handle_starttag(self, tag, attrs):
self.output += self.get_starttag_text()
self.output.append(self.get_starttag_text())
if tag not in self.void_elements:
self.tags.appendleft(tag)
def handle_endtag(self, tag):
if tag not in self.void_elements:
self.output += f"</{tag}>"
self.output.append(f"</{tag}>")
try:
self.tags.remove(tag)
except ValueError:
@ -136,16 +136,16 @@ class TruncateHTMLParser(HTMLParser):
data_len = len(data)
if self.remaining < data_len:
self.remaining = 0
self.output += add_truncation_text(output, self.replacement)
self.output.append(add_truncation_text(output, self.replacement))
raise self.TruncationCompleted
self.remaining -= data_len
self.output += output
self.output.append(output)
def feed(self, data):
try:
super().feed(data)
except self.TruncationCompleted:
self.output += "".join([f"</{tag}>" for tag in self.tags])
self.output.extend([f"</{tag}>" for tag in self.tags])
self.tags.clear()
self.reset()
else:
@ -166,9 +166,9 @@ class TruncateCharsHTMLParser(TruncateHTMLParser):
def process(self, data):
self.processed_chars += len(data)
if (self.processed_chars == self.length) and (
len(self.output) + len(data) == len(self.rawdata)
sum(len(p) for p in self.output) + len(data) == len(self.rawdata)
):
self.output += data
self.output.append(data)
raise self.TruncationCompleted
output = escape("".join(data[: self.remaining]))
return data, output
@ -213,7 +213,7 @@ class Truncator(SimpleLazyObject):
parser = TruncateCharsHTMLParser(length=length, replacement=truncate)
parser.feed(text)
parser.close()
return parser.output
return "".join(parser.output)
return self._text_chars(length, truncate, text)
def _text_chars(self, length, truncate, text):
@ -250,7 +250,7 @@ class Truncator(SimpleLazyObject):
parser = TruncateWordsHTMLParser(length=length, replacement=truncate)
parser.feed(self._wrapped)
parser.close()
return parser.output
return "".join(parser.output)
return self._text_words(length, truncate)
def _text_words(self, length, truncate):

View file

@ -426,6 +426,12 @@ Django is compatible with versions of PyPy corresponding to the supported
Python versions, but you will need to check the compatibility of other
libraries you rely on.
That said, a lot of a web framework's work is done by concatenating
strings, and PyPy has an issue with that (see
`this PyPy blog
<https://pypy.org/posts/2023/01/string-concatenation-quadratic.html>`_).
This may cause performance issues, depending on your use.
C implementations of Python libraries
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~