Fixed #20568 -- truncatewords_html no longer splits words containing HTML entities.

Thanks yann0 at hotmail.com for the report.
This commit is contained in:
Jaap Roes 2013-07-18 10:45:34 +02:00 committed by Tim Graham
parent 58d555caf5
commit 40b95a24ae
3 changed files with 13 additions and 1 deletions

View file

@ -22,7 +22,7 @@ capfirst = lambda x: x and force_text(x)[0].upper() + force_text(x)[1:]
capfirst = allow_lazy(capfirst, six.text_type)
# Set up regular expressions
re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U|re.S)
re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U|re.S)
re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S)