Fixed #19496 -- Added truncatechars_html filter.

Thanks esevece for the suggestion and Nick Sandford and Martin Warne for the inital work on the patch.
2025-09-22 18:22:40 +00:00 · 2013-01-10 17:27:20 +08:00 · 2013-01-10 17:27:20 +08:00 · f94f466cd3
commit f94f466cd3
parent 390001ce52
5 changed files with 93 additions and 23 deletions
--- a/django/utils/text.py
+++ b/django/utils/text.py
@ -24,6 +24,7 @@ capfirst = allow_lazy(capfirst, six.text_type)

 # Set up regular expressions
 re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U | re.S)
+re_chars = re.compile(r'<.*?>|(.)', re.U | re.S)
 re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S)
 re_newlines = re.compile(r'\r\n|\r')  # Used in normalize_newlines
 re_camel_case = re.compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))')
@ -82,7 +83,7 @@ class Truncator(SimpleLazyObject):
            return text
        return '%s%s' % (text, truncate)

-    def chars(self, num, truncate=None):
+    def chars(self, num, truncate=None, html=False):
        """
        Returns the text truncated to be no longer than the specified number
        of characters.
@ -101,7 +102,15 @@ class Truncator(SimpleLazyObject):
                truncate_len -= 1
                if truncate_len == 0:
                    break
+        if html:
+            return self._truncate_html(length, truncate, text, truncate_len, False)
+        return self._text_chars(length, truncate, text, truncate_len)
+    chars = allow_lazy(chars)

+    def _text_chars(self, length, truncate, text, truncate_len):
+        """
+        Truncates a string after a certain number of chars.
+        """
        s_len = 0
        end_index = None
        for i, char in enumerate(text):
@ -119,7 +128,6 @@ class Truncator(SimpleLazyObject):

        # Return the original string since no truncation was necessary
        return text
-    chars = allow_lazy(chars)

    def words(self, num, truncate=None, html=False):
        """
@ -129,7 +137,7 @@ class Truncator(SimpleLazyObject):
        """
        length = int(num)
        if html:
-            return self._html_words(length, truncate)
+            return self._truncate_html(length, truncate, self._wrapped, length, True)
        return self._text_words(length, truncate)
    words = allow_lazy(words)

@ -145,40 +153,45 @@ class Truncator(SimpleLazyObject):
            return self.add_truncation_text(' '.join(words), truncate)
        return ' '.join(words)

-    def _html_words(self, length, truncate):
+    def _truncate_html(self, length, truncate, text, truncate_len, words):
        """
-        Truncates HTML to a certain number of words (not counting tags and
-        comments). Closes opened tags if they were correctly closed in the
-        given HTML.
+        Truncates HTML to a certain number of chars (not counting tags and
+        comments), or, if words is True, then to a certain number of words.
+        Closes opened tags if they were correctly closed in the given HTML.

        Newlines in the HTML are preserved.
        """
-        if length <= 0:
+        if words and length <= 0:
            return ''
+
        html4_singlets = (
            'br', 'col', 'link', 'base', 'img',
            'param', 'area', 'hr', 'input'
        )
-        # Count non-HTML words and keep note of open tags
+
+        # Count non-HTML chars/words and keep note of open tags
        pos = 0
        end_text_pos = 0
-        words = 0
+        current_len = 0
        open_tags = []
-        while words <= length:
-            m = re_words.search(self._wrapped, pos)
+
+        regex = re_words if words else re_chars
+
+        while current_len <= length:
+            m = regex.search(text, pos)
            if not m:
                # Checked through whole string
                break
            pos = m.end(0)
            if m.group(1):
-                # It's an actual non-HTML word
-                words += 1
-                if words == length:
+                # It's an actual non-HTML word or char
+                current_len += 1
+                if current_len == truncate_len:
                    end_text_pos = pos
                continue
            # Check for tag
            tag = re_tag.match(m.group(0))
-            if not tag or end_text_pos:
+            if not tag or current_len >= truncate_len:
                # Don't worry about non tags or tags after our truncate point
                continue
            closing_tag, tagname, self_closing = tag.groups()
@ -199,10 +212,10 @@ class Truncator(SimpleLazyObject):
            else:
                # Add it to the start of the open tags list
                open_tags.insert(0, tagname)
-        if words <= length:
-            # Don't try to close tags if we don't need to truncate
-            return self._wrapped
-        out = self._wrapped[:end_text_pos]
+
+        if current_len <= length:
+            return text
+        out = text[:end_text_pos]
        truncate_text = self.add_truncation_text('', truncate)
        if truncate_text:
            out += truncate_text