refactor: use HTML5 parser for BeautifulSoup (#891)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Juro Oravec 2025-01-07 21:27:17 +01:00 committed by GitHub
parent 203d29f511
commit 81c02ddaa7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 15 additions and 12 deletions

View file

@ -394,7 +394,7 @@ CSS_PLACEHOLDER_NAME_B = CSS_PLACEHOLDER_NAME.encode()
JS_PLACEHOLDER_NAME = "JS_PLACEHOLDER"
JS_PLACEHOLDER_NAME_B = JS_PLACEHOLDER_NAME.encode()
CSS_DEPENDENCY_PLACEHOLDER = f'<link name="{CSS_PLACEHOLDER_NAME}"/>'
CSS_DEPENDENCY_PLACEHOLDER = f'<link name="{CSS_PLACEHOLDER_NAME}">'
JS_DEPENDENCY_PLACEHOLDER = f'<script name="{JS_PLACEHOLDER_NAME}"></script>'
COMPONENT_DEPS_COMMENT = "<!-- _RENDERED {data} -->"
@ -415,7 +415,7 @@ MAYBE_COMP_CSS_ID = r"(?: data-djc-css-\w{6})?"
PLACEHOLDER_REGEX = re.compile(
r"{css_placeholder}|{js_placeholder}".format(
css_placeholder=f'<link{MAYBE_COMP_CSS_ID}{MAYBE_COMP_ID} name="{CSS_PLACEHOLDER_NAME}"/>',
css_placeholder=f'<link{MAYBE_COMP_CSS_ID}{MAYBE_COMP_ID} name="{CSS_PLACEHOLDER_NAME}"/?>',
js_placeholder=f'<script{MAYBE_COMP_CSS_ID}{MAYBE_COMP_ID} name="{JS_PLACEHOLDER_NAME}"></script>',
).encode()
)

View file

@ -1,7 +1,7 @@
from abc import ABC, abstractmethod
from typing import Any, List, Optional, Sequence
from bs4 import BeautifulSoup, CData, Comment, Doctype, Tag
from bs4 import BeautifulSoup, CData, Comment, Doctype, NavigableString, Tag
class HTMLNode(ABC):
@ -63,8 +63,11 @@ class SoupNode(HTMLNode):
return f"<!-- {self.node} -->"
elif isinstance(self.node, Doctype):
return f"<!DOCTYPE {self.node}>"
else:
elif isinstance(self.node, NavigableString):
return str(self.node)
else:
# See https://github.com/EmilStenstrom/django-components/pull/861#discussion_r1898516210
return self.node.encode(formatter="html5").decode()
def name(self) -> str:
return self.node.name

View file

@ -153,8 +153,8 @@ class ContextTests(BaseTestCase):
template = Template(template_str)
rendered = template.render(Context())
self.assertInHTML('<h1 data-djc-id-a1bc45="">Uniquely named variable = unique_val</h1>', rendered)
self.assertInHTML('<h1 data-djc-id-a1bc46="">Uniquely named variable = unique_from_slot</h1>', rendered)
self.assertInHTML("<h1 data-djc-id-a1bc45>Uniquely named variable = unique_val</h1>", rendered)
self.assertInHTML("<h1 data-djc-id-a1bc46>Uniquely named variable = unique_from_slot</h1>", rendered)
@parametrize_context_behavior(["django", "isolated"])
def test_nested_component_context_shadows_outer_context_with_unfilled_slots_and_component_tag(

View file

@ -61,7 +61,7 @@ class RenderDependenciesTests(BaseTestCase):
rendered_raw = template.render(Context({}))
# Placeholders
self.assertEqual(rendered_raw.count('<link name="CSS_PLACEHOLDER"/>'), 1)
self.assertEqual(rendered_raw.count('<link name="CSS_PLACEHOLDER">'), 1)
self.assertEqual(rendered_raw.count('<script name="JS_PLACEHOLDER"></script>'), 1)
self.assertEqual(rendered_raw.count("<script"), 1)
@ -510,7 +510,7 @@ class MiddlewareTests(BaseTestCase):
assert_dependencies(rendered1)
self.assertEqual(
rendered1.count('Variable: <strong data-djc-id-a1bc41 data-djc-id-a1bc42="">value</strong>'),
rendered1.count("Variable: <strong data-djc-id-a1bc41 data-djc-id-a1bc42>value</strong>"),
1,
)
@ -520,7 +520,7 @@ class MiddlewareTests(BaseTestCase):
)
assert_dependencies(rendered2)
self.assertEqual(
rendered2.count('Variable: <strong data-djc-id-a1bc43 data-djc-id-a1bc44="">value</strong>'),
rendered2.count("Variable: <strong data-djc-id-a1bc43 data-djc-id-a1bc44>value</strong>"),
1,
)
@ -531,6 +531,6 @@ class MiddlewareTests(BaseTestCase):
assert_dependencies(rendered3)
self.assertEqual(
rendered3.count('Variable: <strong data-djc-id-a1bc45 data-djc-id-a1bc46="">value</strong>'),
rendered3.count("Variable: <strong data-djc-id-a1bc45 data-djc-id-a1bc46>value</strong>"),
1,
)

View file

@ -484,8 +484,8 @@ class DynamicExprTests(BaseTestCase):
(
"<!-- _RENDERED SimpleComponent_5c8766,a1bc41,, -->\n"
"<div data-djc-id-a1bc41><!-- _RENDERED SimpleComponent_5c8766,a1bc40,, -->\n"
'<div data-djc-id-a1bc40="">3</div>\n'
'<div data-djc-id-a1bc40="">True</div>\n'
"<div data-djc-id-a1bc40>3</div>\n"
"<div data-djc-id-a1bc40>True</div>\n"
"</div>\n"
"<div data-djc-id-a1bc41>True</div>"
),