Fixed #36705 -- Avoided string concatenation in utils.

Repeated string concatenation performs poorly on PyPy.
Thanks Seokchan Yoon for the report.
This commit is contained in:
Kasyap Pentamaraju 2025-11-03 22:57:53 +05:30 committed by Jacob Walls
parent 2768747526
commit 1c7db70e79
5 changed files with 30 additions and 22 deletions

View file

@ -4,6 +4,7 @@ import html
import json import json
import re import re
import warnings import warnings
from collections import deque
from collections.abc import Mapping from collections.abc import Mapping
from html.parser import HTMLParser from html.parser import HTMLParser
from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit
@ -429,7 +430,7 @@ class Urlizer:
# Strip all opening wrapping punctuation. # Strip all opening wrapping punctuation.
middle = word.lstrip(self.wrapping_punctuation_openings) middle = word.lstrip(self.wrapping_punctuation_openings)
lead = word[: len(word) - len(middle)] lead = word[: len(word) - len(middle)]
trail = "" trail = deque()
# Continue trimming until middle remains unchanged. # Continue trimming until middle remains unchanged.
trimmed_something = True trimmed_something = True
@ -442,7 +443,7 @@ class Urlizer:
rstripped = middle.rstrip(closing) rstripped = middle.rstrip(closing)
if rstripped != middle: if rstripped != middle:
strip = counts[closing] - counts[opening] strip = counts[closing] - counts[opening]
trail = middle[-strip:] trail.appendleft(middle[-strip:])
middle = middle[:-strip] middle = middle[:-strip]
trimmed_something = True trimmed_something = True
counts[closing] -= strip counts[closing] -= strip
@ -453,7 +454,7 @@ class Urlizer:
else: else:
rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon) rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon)
if rstripped != middle: if rstripped != middle:
trail = middle[len(rstripped) :] + trail trail.appendleft(middle[len(rstripped) :])
middle = rstripped middle = rstripped
trimmed_something = True trimmed_something = True
@ -470,13 +471,14 @@ class Urlizer:
# entity. # entity.
recent_semicolon = middle[trail_start:].index(";") recent_semicolon = middle[trail_start:].index(";")
middle_semicolon_index = recent_semicolon + trail_start + 1 middle_semicolon_index = recent_semicolon + trail_start + 1
trail = middle[middle_semicolon_index:] + trail trail.appendleft(middle[middle_semicolon_index:])
middle = rstripped + middle[trail_start:middle_semicolon_index] middle = rstripped + middle[trail_start:middle_semicolon_index]
else: else:
trail = middle[trail_start:] + trail trail.appendleft(middle[trail_start:])
middle = rstripped middle = rstripped
trimmed_something = True trimmed_something = True
trail = "".join(trail)
return lead, middle, trail return lead, middle, trail
@staticmethod @staticmethod

View file

@ -169,11 +169,11 @@ def int_to_base36(i):
raise ValueError("Negative base36 conversion input.") raise ValueError("Negative base36 conversion input.")
if i < 36: if i < 36:
return char_set[i] return char_set[i]
b36 = "" b36_parts = []
while i != 0: while i != 0:
i, n = divmod(i, 36) i, n = divmod(i, 36)
b36 = char_set[n] + b36 b36_parts.append(char_set[n])
return b36 return "".join(reversed(b36_parts))
def urlsafe_base64_encode(s): def urlsafe_base64_encode(s):

View file

@ -91,15 +91,15 @@ def format(
# grouping is a single value # grouping is a single value
intervals = [grouping, 0] intervals = [grouping, 0]
active_interval = intervals.pop(0) active_interval = intervals.pop(0)
int_part_gd = "" int_part_gd = []
cnt = 0 cnt = 0
for digit in int_part[::-1]: for digit in int_part[::-1]:
if cnt and cnt == active_interval: if cnt and cnt == active_interval:
if intervals: if intervals:
active_interval = intervals.pop(0) or active_interval active_interval = intervals.pop(0) or active_interval
int_part_gd += thousand_sep[::-1] int_part_gd.append(thousand_sep[::-1])
cnt = 0 cnt = 0
int_part_gd += digit int_part_gd.append(digit)
cnt += 1 cnt += 1
int_part = int_part_gd[::-1] int_part = "".join(int_part_gd)[::-1]
return sign + int_part + dec_part return sign + int_part + dec_part

View file

@ -103,7 +103,7 @@ class TruncateHTMLParser(HTMLParser):
def __init__(self, *, length, replacement, convert_charrefs=True): def __init__(self, *, length, replacement, convert_charrefs=True):
super().__init__(convert_charrefs=convert_charrefs) super().__init__(convert_charrefs=convert_charrefs)
self.tags = deque() self.tags = deque()
self.output = "" self.output = []
self.remaining = length self.remaining = length
self.replacement = replacement self.replacement = replacement
@ -119,13 +119,13 @@ class TruncateHTMLParser(HTMLParser):
self.handle_endtag(tag) self.handle_endtag(tag)
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
self.output += self.get_starttag_text() self.output.append(self.get_starttag_text())
if tag not in self.void_elements: if tag not in self.void_elements:
self.tags.appendleft(tag) self.tags.appendleft(tag)
def handle_endtag(self, tag): def handle_endtag(self, tag):
if tag not in self.void_elements: if tag not in self.void_elements:
self.output += f"</{tag}>" self.output.append(f"</{tag}>")
try: try:
self.tags.remove(tag) self.tags.remove(tag)
except ValueError: except ValueError:
@ -136,16 +136,16 @@ class TruncateHTMLParser(HTMLParser):
data_len = len(data) data_len = len(data)
if self.remaining < data_len: if self.remaining < data_len:
self.remaining = 0 self.remaining = 0
self.output += add_truncation_text(output, self.replacement) self.output.append(add_truncation_text(output, self.replacement))
raise self.TruncationCompleted raise self.TruncationCompleted
self.remaining -= data_len self.remaining -= data_len
self.output += output self.output.append(output)
def feed(self, data): def feed(self, data):
try: try:
super().feed(data) super().feed(data)
except self.TruncationCompleted: except self.TruncationCompleted:
self.output += "".join([f"</{tag}>" for tag in self.tags]) self.output.extend([f"</{tag}>" for tag in self.tags])
self.tags.clear() self.tags.clear()
self.reset() self.reset()
else: else:
@ -166,9 +166,9 @@ class TruncateCharsHTMLParser(TruncateHTMLParser):
def process(self, data): def process(self, data):
self.processed_chars += len(data) self.processed_chars += len(data)
if (self.processed_chars == self.length) and ( if (self.processed_chars == self.length) and (
len(self.output) + len(data) == len(self.rawdata) sum(len(p) for p in self.output) + len(data) == len(self.rawdata)
): ):
self.output += data self.output.append(data)
raise self.TruncationCompleted raise self.TruncationCompleted
output = escape("".join(data[: self.remaining])) output = escape("".join(data[: self.remaining]))
return data, output return data, output
@ -213,7 +213,7 @@ class Truncator(SimpleLazyObject):
parser = TruncateCharsHTMLParser(length=length, replacement=truncate) parser = TruncateCharsHTMLParser(length=length, replacement=truncate)
parser.feed(text) parser.feed(text)
parser.close() parser.close()
return parser.output return "".join(parser.output)
return self._text_chars(length, truncate, text) return self._text_chars(length, truncate, text)
def _text_chars(self, length, truncate, text): def _text_chars(self, length, truncate, text):
@ -250,7 +250,7 @@ class Truncator(SimpleLazyObject):
parser = TruncateWordsHTMLParser(length=length, replacement=truncate) parser = TruncateWordsHTMLParser(length=length, replacement=truncate)
parser.feed(self._wrapped) parser.feed(self._wrapped)
parser.close() parser.close()
return parser.output return "".join(parser.output)
return self._text_words(length, truncate) return self._text_words(length, truncate)
def _text_words(self, length, truncate): def _text_words(self, length, truncate):

View file

@ -426,6 +426,12 @@ Django is compatible with versions of PyPy corresponding to the supported
Python versions, but you will need to check the compatibility of other Python versions, but you will need to check the compatibility of other
libraries you rely on. libraries you rely on.
That said, a lot of a web framework's work is done by concatenating
strings, and PyPy has an issue with that (see
`this PyPy blog
<https://pypy.org/posts/2023/01/string-concatenation-quadratic.html>`_).
This may cause performance issues, depending on your use.
C implementations of Python libraries C implementations of Python libraries
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~