mirror of
https://github.com/django/django.git
synced 2025-08-30 23:37:50 +00:00
Fixed #19237 -- Used HTML parser to strip tags
The regex method used until now for the strip_tags utility is fast, but subject to flaws and security issues. Consensus and good practice lead use to use a slower but safer method.
This commit is contained in:
parent
01948e384f
commit
dc51ec8bc2
2 changed files with 32 additions and 4 deletions
|
@ -5,6 +5,7 @@ import os
|
|||
|
||||
from django.utils import html
|
||||
from django.utils._os import upath
|
||||
from django.utils.encoding import force_text
|
||||
from django.utils.unittest import TestCase
|
||||
|
||||
|
||||
|
@ -63,10 +64,12 @@ class TestUtilsHtml(TestCase):
|
|||
def test_strip_tags(self):
|
||||
f = html.strip_tags
|
||||
items = (
|
||||
('<p>See: 'é is an apostrophe followed by e acute</p>',
|
||||
'See: 'é is an apostrophe followed by e acute'),
|
||||
('<adf>a', 'a'),
|
||||
('</adf>a', 'a'),
|
||||
('<asdf><asdf>e', 'e'),
|
||||
('<f', '<f'),
|
||||
('hi, <f x', 'hi, <f x'),
|
||||
('</fe', '</fe'),
|
||||
('<x>b<y>', 'b'),
|
||||
('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'),
|
||||
|
@ -81,8 +84,9 @@ class TestUtilsHtml(TestCase):
|
|||
for filename in ('strip_tags1.html', 'strip_tags2.txt'):
|
||||
path = os.path.join(os.path.dirname(upath(__file__)), 'files', filename)
|
||||
with open(path, 'r') as fp:
|
||||
content = force_text(fp.read())
|
||||
start = datetime.now()
|
||||
stripped = html.strip_tags(fp.read())
|
||||
stripped = html.strip_tags(content)
|
||||
elapsed = datetime.now() - start
|
||||
self.assertEqual(elapsed.seconds, 0)
|
||||
self.assertIn("Please try again.", stripped)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue