Fixed #20568 -- truncatewords_html no longer splits words containing HTML entities.

Thanks yann0 at hotmail.com for the report.
2025-09-25 11:39:43 +00:00 · 2013-07-18 10:45:34 +02:00 · 2013-07-18 10:45:34 +02:00 · 40b95a24ae
commit 40b95a24ae
parent 58d555caf5
3 changed files with 13 additions and 1 deletions
--- a/django/utils/text.py
+++ b/django/utils/text.py
@ -22,7 +22,7 @@ capfirst = lambda x: x and force_text(x)[0].upper() + force_text(x)[1:]
 capfirst = allow_lazy(capfirst, six.text_type)

 # Set up regular expressions
-re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U|re.S)
+re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U|re.S)
 re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S)