From 1c7db70e79dce82f50d5958da64ab8e2807a31df Mon Sep 17 00:00:00 2001 From: Kasyap Pentamaraju Date: Mon, 3 Nov 2025 22:57:53 +0530 Subject: [PATCH] Fixed #36705 -- Avoided string concatenation in utils. Repeated string concatenation performs poorly on PyPy. Thanks Seokchan Yoon for the report. --- django/utils/html.py | 12 +++++++----- django/utils/http.py | 6 +++--- django/utils/numberformat.py | 8 ++++---- django/utils/text.py | 20 ++++++++++---------- docs/topics/performance.txt | 6 ++++++ 5 files changed, 30 insertions(+), 22 deletions(-) diff --git a/django/utils/html.py b/django/utils/html.py index b04d6bc5ee..059767d394 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -4,6 +4,7 @@ import html import json import re import warnings +from collections import deque from collections.abc import Mapping from html.parser import HTMLParser from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit @@ -429,7 +430,7 @@ class Urlizer: # Strip all opening wrapping punctuation. middle = word.lstrip(self.wrapping_punctuation_openings) lead = word[: len(word) - len(middle)] - trail = "" + trail = deque() # Continue trimming until middle remains unchanged. trimmed_something = True @@ -442,7 +443,7 @@ class Urlizer: rstripped = middle.rstrip(closing) if rstripped != middle: strip = counts[closing] - counts[opening] - trail = middle[-strip:] + trail.appendleft(middle[-strip:]) middle = middle[:-strip] trimmed_something = True counts[closing] -= strip @@ -453,7 +454,7 @@ class Urlizer: else: rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon) if rstripped != middle: - trail = middle[len(rstripped) :] + trail + trail.appendleft(middle[len(rstripped) :]) middle = rstripped trimmed_something = True @@ -470,13 +471,14 @@ class Urlizer: # entity. recent_semicolon = middle[trail_start:].index(";") middle_semicolon_index = recent_semicolon + trail_start + 1 - trail = middle[middle_semicolon_index:] + trail + trail.appendleft(middle[middle_semicolon_index:]) middle = rstripped + middle[trail_start:middle_semicolon_index] else: - trail = middle[trail_start:] + trail + trail.appendleft(middle[trail_start:]) middle = rstripped trimmed_something = True + trail = "".join(trail) return lead, middle, trail @staticmethod diff --git a/django/utils/http.py b/django/utils/http.py index fe0b21f150..21d5822bf2 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -169,11 +169,11 @@ def int_to_base36(i): raise ValueError("Negative base36 conversion input.") if i < 36: return char_set[i] - b36 = "" + b36_parts = [] while i != 0: i, n = divmod(i, 36) - b36 = char_set[n] + b36 - return b36 + b36_parts.append(char_set[n]) + return "".join(reversed(b36_parts)) def urlsafe_base64_encode(s): diff --git a/django/utils/numberformat.py b/django/utils/numberformat.py index cf8b2d219c..f621ff64b4 100644 --- a/django/utils/numberformat.py +++ b/django/utils/numberformat.py @@ -91,15 +91,15 @@ def format( # grouping is a single value intervals = [grouping, 0] active_interval = intervals.pop(0) - int_part_gd = "" + int_part_gd = [] cnt = 0 for digit in int_part[::-1]: if cnt and cnt == active_interval: if intervals: active_interval = intervals.pop(0) or active_interval - int_part_gd += thousand_sep[::-1] + int_part_gd.append(thousand_sep[::-1]) cnt = 0 - int_part_gd += digit + int_part_gd.append(digit) cnt += 1 - int_part = int_part_gd[::-1] + int_part = "".join(int_part_gd)[::-1] return sign + int_part + dec_part diff --git a/django/utils/text.py b/django/utils/text.py index bad1da6729..baf44265a4 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -103,7 +103,7 @@ class TruncateHTMLParser(HTMLParser): def __init__(self, *, length, replacement, convert_charrefs=True): super().__init__(convert_charrefs=convert_charrefs) self.tags = deque() - self.output = "" + self.output = [] self.remaining = length self.replacement = replacement @@ -119,13 +119,13 @@ class TruncateHTMLParser(HTMLParser): self.handle_endtag(tag) def handle_starttag(self, tag, attrs): - self.output += self.get_starttag_text() + self.output.append(self.get_starttag_text()) if tag not in self.void_elements: self.tags.appendleft(tag) def handle_endtag(self, tag): if tag not in self.void_elements: - self.output += f"" + self.output.append(f"") try: self.tags.remove(tag) except ValueError: @@ -136,16 +136,16 @@ class TruncateHTMLParser(HTMLParser): data_len = len(data) if self.remaining < data_len: self.remaining = 0 - self.output += add_truncation_text(output, self.replacement) + self.output.append(add_truncation_text(output, self.replacement)) raise self.TruncationCompleted self.remaining -= data_len - self.output += output + self.output.append(output) def feed(self, data): try: super().feed(data) except self.TruncationCompleted: - self.output += "".join([f"" for tag in self.tags]) + self.output.extend([f"" for tag in self.tags]) self.tags.clear() self.reset() else: @@ -166,9 +166,9 @@ class TruncateCharsHTMLParser(TruncateHTMLParser): def process(self, data): self.processed_chars += len(data) if (self.processed_chars == self.length) and ( - len(self.output) + len(data) == len(self.rawdata) + sum(len(p) for p in self.output) + len(data) == len(self.rawdata) ): - self.output += data + self.output.append(data) raise self.TruncationCompleted output = escape("".join(data[: self.remaining])) return data, output @@ -213,7 +213,7 @@ class Truncator(SimpleLazyObject): parser = TruncateCharsHTMLParser(length=length, replacement=truncate) parser.feed(text) parser.close() - return parser.output + return "".join(parser.output) return self._text_chars(length, truncate, text) def _text_chars(self, length, truncate, text): @@ -250,7 +250,7 @@ class Truncator(SimpleLazyObject): parser = TruncateWordsHTMLParser(length=length, replacement=truncate) parser.feed(self._wrapped) parser.close() - return parser.output + return "".join(parser.output) return self._text_words(length, truncate) def _text_words(self, length, truncate): diff --git a/docs/topics/performance.txt b/docs/topics/performance.txt index dcffb1a683..2935f21f67 100644 --- a/docs/topics/performance.txt +++ b/docs/topics/performance.txt @@ -426,6 +426,12 @@ Django is compatible with versions of PyPy corresponding to the supported Python versions, but you will need to check the compatibility of other libraries you rely on. +That said, a lot of a web framework's work is done by concatenating +strings, and PyPy has an issue with that (see +`this PyPy blog +`_). +This may cause performance issues, depending on your use. + C implementations of Python libraries ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~