refactor: replace bs4 and perf optimizations (#927)

This commit is contained in:
Juro Oravec 2025-01-24 10:30:41 +01:00 committed by GitHub
parent d407a8cd13
commit 0b65761fce
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 2078 additions and 418 deletions

View file

@ -1,5 +1,15 @@
# Release notes
## v0.126
#### Refactor
- Replaced [BeautifulSoup4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) with a custom HTML parser.
- The heuristic for inserting JS and CSS dependenies into the default place has changed.
- JS is still inserted at the end of the `<body>`, and CSS at the end of `<head>`.
- However, we find end of `<body>` by searching for **last** occurrence of `</body>`
- And for the end of `<head>` we search for the **first** occurrence of `</head>`
## v0.125
⚠️ Attention ⚠️ - We migrated from `EmilStenstrom/django-components` to `django-components/django-components`.

View file

@ -20,7 +20,7 @@ Import as
<a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1170" target="_blank">See source code</a>
<a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1118" target="_blank">See source code</a>
@ -43,7 +43,7 @@ If you insert this tag multiple times, ALL CSS links will be duplicately inserte
<a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1192" target="_blank">See source code</a>
<a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1140" target="_blank">See source code</a>
@ -67,7 +67,7 @@ If you insert this tag multiple times, ALL JS scripts will be duplicately insert
<a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1516" target="_blank">See source code</a>
<a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1257" target="_blank">See source code</a>

View file

@ -29,7 +29,6 @@ classifiers = [
]
dependencies = [
'Django>=4.2',
'beautifulsoup4>=4.12',
]
license = {text = "MIT"}

View file

@ -10,5 +10,4 @@ mypy
playwright
requests
types-requests
whitenoise
beautifulsoup4
whitenoise

View file

@ -6,8 +6,6 @@
#
asgiref==3.8.1
# via django
beautifulsoup4==4.12.3
# via -r requirements-dev.in
black==24.10.0
# via -r requirements-dev.in
cachetools==5.5.0
@ -93,8 +91,6 @@ pyyaml==6.0.2
# via pre-commit
requests==2.32.3
# via -r requirements-dev.in
soupsieve==2.6
# via beautifulsoup4
sqlparse==0.5.2
# via django
tox==4.24.1

View file

@ -20,7 +20,6 @@
# - pymdown-extensions
# - black
# - django>=4.2
# - beautifulsoup4>=4.12
#
asgiref==3.8.1
@ -29,8 +28,6 @@ babel==2.16.0
# via
# mkdocs-git-revision-date-localized-plugin
# mkdocs-material
beautifulsoup4==4.12.3
# via hatch.envs.docs
black==24.10.0
# via hatch.envs.docs
bracex==2.5.post1
@ -213,8 +210,6 @@ six==1.17.0
# via python-dateutil
smmap==5.0.2
# via gitdb
soupsieve==2.6
# via beautifulsoup4
sqlparse==0.5.3
# via django
tinycss2==1.4.0

View file

@ -53,8 +53,10 @@ from django_components.dependencies import (
cache_component_js,
cache_component_js_vars,
postprocess_component_html,
set_component_attrs_for_js_and_css,
)
from django_components.node import BaseNode
from django_components.perfutil.component import component_post_render
from django_components.slots import (
ComponentSlotContext,
Slot,
@ -992,10 +994,11 @@ class Component(
# By adding the current input to the stack, we temporarily allow users
# to access the provided context, slots, etc. Also required so users can
# call `self.inject()` from within `get_context_data()`.
render_id = gen_id()
self._render_stack.append(
RenderStackItem(
input=RenderInput(
id=gen_id(),
id=render_id,
context=context,
slots=slots,
args=args,
@ -1026,8 +1029,17 @@ class Component(
is_filled = SlotIsFilled(slots_untyped)
self._render_stack[-1].is_filled = is_filled
# If any slot fills were defined within the template, we want to scope them
# to the CSS of the parent component. Thus we keep track of the parent component.
if context.get(_COMPONENT_SLOT_CTX_CONTEXT_KEY, None):
parent_comp_ctx: ComponentSlotContext = context[_COMPONENT_SLOT_CTX_CONTEXT_KEY]
parent_id = parent_comp_ctx.component_id
else:
parent_id = None
component_slot_ctx = ComponentSlotContext(
component_name=self.name,
component_id=render_id,
template_name=template.name,
fills=slots_untyped,
is_dynamic_component=getattr(self, "_is_dynamic_component", False),
@ -1062,22 +1074,49 @@ class Component(
new_output = self.on_render_after(context, template, html_content)
html_content = new_output if new_output is not None else html_content
output = postprocess_component_html(
component_cls=self.__class__,
component_id=self.id,
html_content=html_content,
css_input_hash=css_input_hash,
js_input_hash=js_input_hash,
type=type,
render_dependencies=render_dependencies,
)
# After rendering is done, remove the current state from the stack, which means
# properties like `self.context` will no longer return the current state.
self._render_stack.pop()
context.render_context.pop()
return output
# Internal component HTML post-processing:
# - Add the HTML attributes to work with JS and CSS variables
# - Resolve component's JS / CSS into <script> and <link> (if render_dependencies=True)
#
# However, to ensure that we run an HTML parser only once over the HTML content,
# we have to wrap it in this callback. This callback runs only once we know whether
# there are any extra HTML attributes that should be applied to this component's root elements.
#
# This makes it possible for multiple components to resolve to the same HTML element.
# E.g. if CompA renders CompB, and CompB renders a <div>, then the <div> element will have
# IDs of both CompA and CompB.
# ```html
# <div djc-id-a1b3cf djc-id-f3d3cf>...</div>
# ```
def post_processor(root_attributes: Optional[List[str]] = None) -> Tuple[str, Dict[str, List[str]]]:
nonlocal html_content
updated_html, child_components = set_component_attrs_for_js_and_css(
html_content=html_content,
component_id=render_id,
css_input_hash=css_input_hash,
css_scope_id=None, # TODO - Implement
root_attributes=root_attributes,
)
updated_html = postprocess_component_html(
component_cls=self.__class__,
component_id=render_id,
html_content=updated_html,
css_input_hash=css_input_hash,
js_input_hash=js_input_hash,
type=type,
render_dependencies=render_dependencies,
)
return updated_html, child_components
return component_post_render(post_processor, render_id, parent_id)
def _normalize_slot_fills(
self,
@ -1208,6 +1247,14 @@ class Component(
validate_typed_dict(data, data_type, f"Component '{self.name}'", "data")
# Perf
# Each component may use different start and end tags. We represent this
# as individual subclasses of `ComponentNode`. However, multiple components
# may use the same start & end tag combination, e.g. `{% component %}` and `{% endcomponent %}`.
# So we cache the already-created subclasses to be reused.
component_node_subclasses_by_name: Dict[str, Tuple[Type["ComponentNode"], ComponentRegistry]] = {}
class ComponentNode(BaseNode):
"""
Renders one of the components that was previously registered with
@ -1336,15 +1383,31 @@ class ComponentNode(BaseNode):
start_tag: str,
end_tag: str,
) -> "ComponentNode":
# Set the component-specific start and end tags by subclassing the base node
# Set the component-specific start and end tags by subclassing the BaseNode
subcls_name = cls.__name__ + "_" + name
subcls: Type[ComponentNode] = type(subcls_name, (cls,), {"tag": start_tag, "end_tag": end_tag})
# We try to reuse the same subclass for the same start tag, so we can
# avoid creating a new subclass for each time `{% component %}` is called.
if start_tag not in component_node_subclasses_by_name:
subcls: Type[ComponentNode] = type(subcls_name, (cls,), {"tag": start_tag, "end_tag": end_tag})
component_node_subclasses_by_name[start_tag] = (subcls, registry)
cached_subcls, cached_registry = component_node_subclasses_by_name[start_tag]
if cached_registry is not registry:
raise RuntimeError(
f"Detected two Components from different registries using the same start tag '{start_tag}'"
)
elif cached_subcls.end_tag != end_tag:
raise RuntimeError(
f"Detected two Components using the same start tag '{start_tag}' but with different end tags"
)
# Call `BaseNode.parse()` as if with the context of subcls.
node: ComponentNode = super(cls, subcls).parse( # type: ignore[attr-defined]
node: ComponentNode = super(cls, cached_subcls).parse( # type: ignore[attr-defined]
parser,
token,
registry=registry,
registry=cached_registry,
name=name,
)
return node

View file

@ -1,6 +1,8 @@
import inspect
from typing import Any, Dict, Optional, Type, Union, cast
from django.template import Context, Template
from django_components import Component, ComponentRegistry, NotRegistered, types
from django_components.component_registry import all_registries
@ -110,7 +112,21 @@ class DynamicComponent(Component):
comp_class = self._resolve_component(comp_name_or_class, registry)
# NOTE: Slots are passed at component instantiation
return {
"comp_class": comp_class,
"args": args,
"kwargs": kwargs,
}
# NOTE: The inner component is rendered in `on_render_before`, so that the `Context` object
# is already configured as if the inner component was rendered inside the template.
# E.g. the `_COMPONENT_SLOT_CTX_CONTEXT_KEY` is set, which means that the child component
# will know that it's a child of this component.
def on_render_before(self, context: Context, template: Template) -> Context:
comp_class = context["comp_class"]
args = context["args"]
kwargs = context["kwargs"]
comp = comp_class(
registered_name=self.registered_name,
outer_context=self.outer_context,
@ -128,11 +144,10 @@ class DynamicComponent(Component):
render_dependencies=self.input.render_dependencies,
)
return {
"output": output,
}
context["output"] = output
return context
template: types.django_html = """{{ output }}"""
template: types.django_html = """{{ output|safe }}"""
def _resolve_component(
self,

View file

@ -35,7 +35,7 @@ from django.utils.decorators import sync_and_async_middleware
from django.utils.safestring import SafeString, mark_safe
from django_components.node import BaseNode
from django_components.util.html import SoupNode
from django_components.util.html_parser import set_html_attributes
from django_components.util.misc import get_import_path, is_nonempty_str
if TYPE_CHECKING:
@ -272,33 +272,63 @@ def wrap_component_css(comp_cls: Type["Component"], content: str) -> SafeString:
#########################################################
def _link_dependencies_with_component_html(
component_id: str,
html_content: str,
def set_component_attrs_for_js_and_css(
html_content: Union[str, SafeString],
component_id: Optional[str],
css_input_hash: Optional[str],
) -> str:
elems = SoupNode.from_fragment(html_content)
css_scope_id: Optional[str],
root_attributes: Optional[List[str]] = None,
) -> Tuple[Union[str, SafeString], Dict[str, List[str]]]:
# These are the attributes that we want to set on the root element.
all_root_attributes = [*root_attributes] if root_attributes else []
# Insert component ID
for elem in elems:
# Ignore comments, text, doctype, etc.
if not elem.is_element():
continue
# Component ID is used for executing JS script, e.g. `data-djc-id-a1b2c3`
#
# NOTE: We use `data-djc-css-a1b2c3` and `data-djc-id-a1b2c3` instead of
# `data-djc-css="a1b2c3"` and `data-djc-id="a1b2c3"`, to allow
# multiple values to be associated with the same element, which may happen if
# one component renders another.
if component_id:
all_root_attributes.append(f"data-djc-id-{component_id}")
# Component ID is used for executing JS script, e.g. `data-djc-id-a1b2c3`
# Attribute by which we bind the CSS variables to the component's CSS,
# e.g. `data-djc-css-a1b2c3`
if css_input_hash:
all_root_attributes.append(f"data-djc-css-{css_input_hash}")
# These attributes are set on all tags
all_attributes = []
# We apply the CSS scoping attribute to both root and non-root tags.
#
# This is the HTML part of Vue-like CSS scoping.
# That is, for each HTML element that the component renders, we add a `data-djc-scope-a1b2c3` attribute.
# And we stop when we come across a nested components.
if css_scope_id:
all_attributes.append(f"data-djc-scope-{css_scope_id}")
is_safestring = isinstance(html_content, SafeString)
updated_html, child_components = set_html_attributes(
html_content,
root_attributes=all_root_attributes,
all_attributes=all_attributes,
# Setting this means that set_html_attributes will check for HTML elemetnts with this
# attribute, and return a dictionary of {attribute_value: [attributes_set_on_this_tag]}.
#
# NOTE: We use `data-djc-css-a1b2c3` and `data-djc-id-a1b2c3` instead of
# `data-djc-css="a1b2c3"` and `data-djc-id="a1b2c3"`, to allow
# multiple values to be associated with the same element, which may happen if
# One component renders another.
elem.set_attr(f"data-djc-id-{component_id}", True)
# So if HTML contains tag <template djc-render-id="123"></template>,
# and we set on that tag `data-djc-id-123`, then we will get
# {
# "123": ["data-djc-id-123"],
# }
#
# This is a minor optimization. Without this, when we're rendering components in
# component_post_render(), we'd have to parse each `<template djc-render-id="123"></template>`
# to find the HTML attribute that were set on it.
watch_on_attribute="djc-render-id",
)
updated_html = mark_safe(updated_html) if is_safestring else updated_html
# Attribute by which we bind the CSS variables to the component's CSS,
# e.g. `data-djc-css-a1b2c3`
if css_input_hash:
elem.set_attr(f"data-djc-css-{css_input_hash}", True)
return SoupNode.to_html_multiroot(elems)
return updated_html, child_components
def _insert_component_comment(
@ -332,18 +362,13 @@ def _insert_component_comment(
def postprocess_component_html(
component_cls: Type["Component"],
component_id: str,
html_content: str,
html_content: Union[str, SafeString],
css_input_hash: Optional[str],
js_input_hash: Optional[str],
type: RenderType,
render_dependencies: bool,
) -> str:
# Make the HTML work with JS and CSS dependencies
html_content = _link_dependencies_with_component_html(
component_id=component_id,
html_content=html_content,
css_input_hash=css_input_hash,
)
) -> Union[str, SafeString]:
is_safestring = isinstance(html_content, SafeString)
# NOTE: To better understand the next section, consider this:
#
@ -375,6 +400,8 @@ def postprocess_component_html(
if render_dependencies:
output = _render_dependencies(output, type)
output = mark_safe(output) if is_safestring else output
return output
@ -417,8 +444,8 @@ MAYBE_COMP_CSS_ID = r"(?: data-djc-css-\w{6})?"
PLACEHOLDER_REGEX = re.compile(
r"{css_placeholder}|{js_placeholder}".format(
css_placeholder=f'<link{MAYBE_COMP_CSS_ID}{MAYBE_COMP_ID} name="{CSS_PLACEHOLDER_NAME}"/?>',
js_placeholder=f'<script{MAYBE_COMP_CSS_ID}{MAYBE_COMP_ID} name="{JS_PLACEHOLDER_NAME}"></script>',
css_placeholder=f'<link name="{CSS_PLACEHOLDER_NAME}"{MAYBE_COMP_CSS_ID}{MAYBE_COMP_ID}/?>',
js_placeholder=f'<script name="{JS_PLACEHOLDER_NAME}"{MAYBE_COMP_CSS_ID}{MAYBE_COMP_ID}></script>',
).encode()
)
@ -734,6 +761,10 @@ def _process_dep_declarations(content: bytes, type: RenderType) -> Tuple[bytes,
return (content, final_script_tags.encode("utf-8"), final_css_tags.encode("utf-8"))
href_pattern = re.compile(r'href="([^"]+)"')
src_pattern = re.compile(r'src="([^"]+)"')
# Detect duplicates by URLs, extract URLs, and sort by URLs
def _postprocess_media_tags(
script_type: ScriptType,
@ -743,15 +774,21 @@ def _postprocess_media_tags(
tags_by_url: Dict[str, str] = {}
for tag in tags:
node = SoupNode.from_fragment(tag.strip())[0]
# <script src="..."> vs <link href="...">
attr = "src" if script_type == "js" else "href"
maybe_url = node.get_attr(attr, None)
# Extract the URL from <script src="..."> or <link href="...">
if script_type == "js":
attr = "src"
attr_pattern = src_pattern
else:
attr = "href"
attr_pattern = href_pattern
maybe_url_match = attr_pattern.search(tag.strip())
maybe_url = maybe_url_match.group(1) if maybe_url_match else None
if not is_nonempty_str(maybe_url):
raise RuntimeError(
f"One of entries for `Component.Media.{script_type}` media is missing a "
f"value for attribute '{attr}'. If there is content inlined inside the `<{node.name()}>` tags, "
f"value for attribute '{attr}'. If there is content inlined inside the `<{attr}>` tags, "
f"you must move the content to a `.{script_type}` file and reference it via '{attr}'.\nGot:\n{tag}"
)
@ -908,6 +945,9 @@ def _gen_exec_script(
return exec_script
head_or_body_end_tag_re = re.compile(r"<\/(?:head|body)\s*>", re.DOTALL)
def _insert_js_css_to_default_locations(
html_content: str,
js_content: Optional[str],
@ -917,37 +957,50 @@ def _insert_js_css_to_default_locations(
This function tries to insert the JS and CSS content into the default locations.
JS is inserted at the end of `<body>`, and CSS is inserted at the end of `<head>`.
"""
elems = SoupNode.from_fragment(html_content)
if not elems:
We find these tags by looking for the first `</head>` and last `</body>` tags.
"""
if css_content is None and js_content is None:
return None
did_modify_html = False
if css_content is not None:
for elem in elems:
if not elem.is_element():
continue
head = elem.find_tag("head")
if head:
css_elems = SoupNode.from_fragment(css_content)
head.append_children(css_elems)
did_modify_html = True
first_end_head_tag_index = None
last_end_body_tag_index = None
if js_content is not None:
for elem in elems:
if not elem.is_element():
continue
body = elem.find_tag("body")
if body:
js_elems = SoupNode.from_fragment(js_content)
body.append_children(js_elems)
did_modify_html = True
# First check the content for the first `</head>` and last `</body>` tags
for match in head_or_body_end_tag_re.finditer(html_content):
tag_name = match[0][2:6]
# We target the first `</head>`, thus, after we set it, we skip the rest
if tag_name == "head":
if css_content is not None and first_end_head_tag_index is None:
first_end_head_tag_index = match.start()
# But for `</body>`, we want the last occurrence, so we insert the content only
# after the loop.
elif tag_name == "body":
if js_content is not None:
last_end_body_tag_index = match.start()
else:
raise ValueError(f"Unexpected tag name '{tag_name}'")
# Then do two string insertions. First the CSS, because we assume that <head> is before <body>.
index_offset = 0
updated_html = html_content
if css_content is not None and first_end_head_tag_index is not None:
updated_html = updated_html[:first_end_head_tag_index] + css_content + updated_html[first_end_head_tag_index:]
index_offset = len(css_content)
did_modify_html = True
if js_content is not None and last_end_body_tag_index is not None:
js_index = last_end_body_tag_index + index_offset
updated_html = updated_html[:js_index] + js_content + updated_html[js_index:]
did_modify_html = True
if did_modify_html:
transformed = SoupNode.to_html_multiroot(elems)
return transformed
return updated_html
else:
return None # No changes made

View file

@ -0,0 +1,169 @@
import re
from collections import deque
from typing import Callable, Deque, Dict, List, Optional, Tuple
from django.utils.safestring import mark_safe
# Function that accepts a list of extra HTML attributes to be set on the component's root elements
# and returns the component's HTML content and a dictionary of child components' IDs
# and their root elements' HTML attributes.
#
# In other words, we use this to "delay" the actual rendering of the component's HTML content,
# until we know what HTML attributes to apply to the root elements.
ComponentRenderer = Callable[[Optional[List[str]]], Tuple[str, Dict[str, List[str]]]]
# Render-time cache for component rendering
# See Component._post_render()
component_renderer_cache: Dict[str, ComponentRenderer] = {}
child_component_attrs: Dict[str, List[str]] = {}
nested_comp_pattern = re.compile(r'<template [^>]*?djc-render-id="\w{6}"[^>]*?></template>')
render_id_pattern = re.compile(r'djc-render-id="(?P<render_id>\w{6})"')
# When a component is rendered, we want to apply HTML attributes like `data-djc-id-a1b3cf`
# to all root elements. However, we have to approach it smartly, to minimize the HTML parsing.
#
# If we naively first rendered the child components, and then the parent component, then we would
# have to parse the child's HTML twice (once for itself, and once as part of the parent).
# When we have a deeply nested component structure, this can add up to a lot of parsing.
# See https://github.com/django-components/django-components/issues/14#issuecomment-2596096632.
#
# Imagine we first render the child components. Once rendered, child's HTML gets embedded into
# the HTML of the parent. So by the time we get to the root, we will have to parse the full HTML
# document, even if the root component is only a small part of the document.
#
# So instead, when a nested component is rendered, we put there only a placeholder, and store the
# actual HTML content in `component_renderer_cache`.
#
# ```django
# <div>
# <h2>...</h2>
# <template djc-render-id="a1b3cf"></template>
# <span>...</span>
# <template djc-render-id="f3d3cf"></template>
# </div>
# ```
#
# The full flow is as follows:
# 1. When a component is nested in another, the child component is rendered, but it returns
# only a placeholder like `<template djc-render-id="a1b3cf"></template>`.
# The actual HTML output is stored in `component_renderer_cache`.
# 2. The parent of the child component is rendered normally.
# 3. If the placeholder for the child component is at root of the parent component,
# then the placeholder may be tagged with extra attributes, e.g. `data-djc-id-a1b3cf`.
# `<template djc-render-id="a1b3cf" data-djc-id-a1b3cf></template>`.
# 4. When the parent is done rendering, we go back to step 1., the parent component
# either returns the actual HTML, or a placeholder.
# 5. Only once we get to the root component, that has no further parents, is when we finally
# start putting it all together.
# 6. We start at the root component. We search the root component's output HTML for placeholders.
# Each placeholder has ID `data-djc-render-id` that links to its actual content.
# 7. For each found placeholder, we replace it with the actual content.
# But as part of step 7), we also:
# - If any of the child placeholders had extra attributes, we cache these, so we can access them
# once we get to rendering the child component.
# - And if the parent component had any extra attributes set by its parent, we apply these
# to the root elements.
# 8. Lastly, we merge all the parts together, and return the final HTML.
def component_post_render(
renderer: ComponentRenderer,
render_id: str,
parent_id: Optional[str],
) -> str:
# Instead of rendering the component's HTML content immediately, we store it,
# so we can render the component only once we know if there are any HTML attributes
# to be applied to the resulting HTML.
component_renderer_cache[render_id] = renderer
if parent_id is not None:
# Case: Nested component
# If component is nested, return a placeholder
return mark_safe(f'<template djc-render-id="{render_id}"></template>')
# Case: Root component - Construct the final HTML by recursively replacing placeholders
#
# We first generate the component's HTML content, by calling the renderer.
#
# Then we process the component's HTML from root-downwards, going depth-first.
# So if we have a structure:
# <div>
# <h2>...</h2>
# <template djc-render-id="a1b3cf"></template>
# <span>...</span>
# <template djc-render-id="f3d3cf"></template>
# </div>
#
# Then we first split up the current HTML into parts, splitting at placeholders:
# - <div><h2>...</h2>
# - PLACEHOLDER djc-render-id="a1b3cf"
# - <span>...</span>
# - PLACEHOLDER djc-render-id="f3d3cf"
# - </div>
#
# And put the pairs of (content, placeholder_id) into a queue:
# - ("<div><h2>...</h2>", "a1b3cf")
# - ("<span>...</span>", "f3d3cf")
# - ("</div>", None)
#
# Then we process each part:
# 1. Append the content to the output
# 2. If the placeholder ID is not None, then we fetch the renderer by its placeholder ID (e.g. "a1b3cf")
# 3. If there were any extra attributes set by the parent component, we apply these to the renderer.
# 4. We split the content by placeholders, and put the pairs of (content, placeholder_id) into the queue,
# repeating this whole process until we've processed all nested components.
content_parts: List[str] = []
process_queue: Deque[Tuple[str, Optional[str]]] = deque()
process_queue.append(("", render_id))
while len(process_queue):
curr_content_before_component, curr_comp_id = process_queue.popleft()
# Process content before the component
if curr_content_before_component:
content_parts.append(curr_content_before_component)
# The entry was only a remaining text, no more components to process, we're done
if curr_comp_id is None:
continue
# Generate component's content, applying the extra HTML attributes set by the parent component
curr_comp_renderer = component_renderer_cache.pop(curr_comp_id)
# NOTE: This may be undefined, because this is set only for components that
# are also root elements in their parent's HTML
curr_comp_attrs = child_component_attrs.pop(curr_comp_id, None)
curr_comp_content, curr_child_component_attrs = curr_comp_renderer(curr_comp_attrs)
# Exclude the `data-djc-scope-...` attribute from being applied to the child component's HTML
for key in list(curr_child_component_attrs.keys()):
if key.startswith("data-djc-scope-"):
curr_child_component_attrs.pop(key, None)
child_component_attrs.update(curr_child_component_attrs)
# Process the component's content
last_index = 0
parts_to_process: List[Tuple[str, Optional[str]]] = []
# Split component's content by placeholders, and put the pairs of (content, placeholder_id) into the queue
for match in nested_comp_pattern.finditer(curr_comp_content):
part_before_component = curr_comp_content[last_index : match.start()] # noqa: E203
last_index = match.end()
comp_part = match[0]
# Extract the placeholder ID from `<template djc-render-id="a1b3cf"></template>`
curr_child_id_match = render_id_pattern.search(comp_part)
if curr_child_id_match is None:
raise ValueError(f"No placeholder ID found in {comp_part}")
curr_child_id = curr_child_id_match.group("render_id")
parts_to_process.append((part_before_component, curr_child_id))
# Append any remaining text
if last_index < len(curr_comp_content):
parts_to_process.append((curr_comp_content[last_index:], None))
process_queue.extendleft(reversed(parts_to_process))
output = "".join(content_parts)
return mark_safe(output)

View file

@ -140,6 +140,7 @@ class SlotIsFilled(dict):
@dataclass
class ComponentSlotContext:
component_name: str
component_id: str
template_name: str
is_dynamic_component: bool
default_slot: Optional[str]

View file

@ -1,111 +0,0 @@
from abc import ABC, abstractmethod
from typing import Any, List, Optional, Sequence
from bs4 import BeautifulSoup, CData, Comment, Doctype, NavigableString, Tag
class HTMLNode(ABC):
"""
Interface for an HTML manipulation library. This allows us to potentially swap
between different libraries.
"""
@classmethod
@abstractmethod
def from_fragment(cls, html: str) -> Sequence["HTMLNode"]: ... # noqa: E704
@abstractmethod
def to_html(self) -> str: ... # noqa: E704
@abstractmethod
def name(self) -> str:
"""Get tag name"""
...
@abstractmethod
def find_tag(self, tag: str) -> Optional["HTMLNode"]: ... # noqa: E704
@abstractmethod
def append_children(self, children: Sequence[Any]) -> None: ... # noqa: E704
@abstractmethod
def get_attr(self, attr: str, default: Any = None) -> Any: ... # noqa: E704
@abstractmethod
def set_attr(self, attr: str, value: Any) -> None: ... # noqa: E704
@abstractmethod
def is_element(self) -> bool: ... # noqa: E704
"""Returns `False` if the node is a text, comment, or doctype node. `True` otherwise."""
@classmethod
def to_html_multiroot(cls, elems: Sequence["HTMLNode"]) -> str:
return "".join([elem.to_html() for elem in elems])
class SoupNode(HTMLNode):
"""BeautifulSoup implementation of HTMLNode."""
def __init__(self, node: Tag):
self.node = node
@classmethod
def from_fragment(cls, html: str) -> List["SoupNode"]:
soup = BeautifulSoup(html, "html.parser")
# Get top-level elements in the fragment
return [cls(elem) for elem in soup.contents]
def to_html(self) -> str:
if isinstance(self.node, CData):
return f"<![CDATA[{self.node}]]>"
elif isinstance(self.node, Comment):
return f"<!-- {self.node} -->"
elif isinstance(self.node, Doctype):
return f"<!DOCTYPE {self.node}>"
elif isinstance(self.node, NavigableString):
return str(self.node)
else:
# See https://github.com/EmilStenstrom/django-components/pull/861#discussion_r1898516210
return self.node.encode(formatter="html5").decode()
def name(self) -> str:
return self.node.name
def find_tag(self, tag: str) -> Optional["SoupNode"]:
if isinstance(self.node, Tag) and self.node.name == tag:
return self
else:
match = self.node.select_one(tag)
if match:
return SoupNode(match)
return None
def append_children(self, children: Sequence["SoupNode"]) -> None:
if isinstance(self.node, Tag):
for child in children:
self.node.append(child.node)
def get_attr(self, attr: str, default: Any = None) -> Any:
if isinstance(self.node, Tag):
res = self.node.get(attr, default)
if isinstance(res, list):
return " ".join(res)
return res
return default
def set_attr(self, attr: str, value: Any) -> None:
if not isinstance(self.node, Tag):
return
if value is True:
# Set boolean attributes without a value
self.node[attr] = None
elif value is False:
# Remove the attribute
self.node.attrs.pop(attr, None)
else:
self.node[attr] = value
def is_element(self) -> bool:
return isinstance(self.node, Tag)

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,7 @@ if calling `Component.render()` or `render_dependencies()` behave as expected.
For checking the OUTPUT of the dependencies, see `test_dependency_rendering.py`.
"""
import re
from unittest.mock import Mock
from django.http import HttpResponseNotModified
@ -13,7 +14,6 @@ from django.template import Context, Template
from django_components import Component, registry, render_dependencies, types
from django_components.components.dynamic import DynamicComponent
from django_components.middleware import ComponentDependencyMiddleware
from django_components.util.html import SoupNode
from .django_test_setup import setup_test_config
from .testutils import BaseTestCase, create_and_process_template_response
@ -223,9 +223,8 @@ class RenderDependenciesTests(BaseTestCase):
count=1,
)
# Nodes: [Doctype, whitespace, <html>]
nodes = SoupNode.from_fragment(rendered.strip())
rendered_body = nodes[2].find_tag("body").to_html() # type: ignore[union-attr]
body_re = re.compile(r"<body>(.*?)</body>", re.DOTALL)
rendered_body = body_re.search(rendered).group(1) # type: ignore[union-attr]
self.assertInHTML(
"""<script src="django_components/django_components.min.js">""",
@ -275,9 +274,8 @@ class RenderDependenciesTests(BaseTestCase):
count=1,
)
# Nodes: [Doctype, whitespace, <html>]
nodes = SoupNode.from_fragment(rendered.strip())
rendered_head = nodes[2].find_tag("head").to_html() # type: ignore[union-attr]
head_re = re.compile(r"<head>(.*?)</head>", re.DOTALL)
rendered_head = head_re.search(rendered).group(1) # type: ignore[union-attr]
self.assertInHTML(
"""<script src="django_components/django_components.min.js">""",
@ -518,6 +516,7 @@ class MiddlewareTests(BaseTestCase):
template,
context=Context({"component_name": "test-component"}),
)
assert_dependencies(rendered2)
self.assertEqual(
rendered2.count("Variable: <strong data-djc-id-a1bc43 data-djc-id-a1bc44>value</strong>"),

View file

@ -95,9 +95,7 @@ class DynamicExprTests(BaseTestCase):
bool_var="{{ is_active }}"
list_var="{{ list|slice:':-1' }}"
/ %}
""".replace(
"\n", " "
)
"""
)
template = Template(template_str)
@ -116,9 +114,14 @@ class DynamicExprTests(BaseTestCase):
self.assertEqual(captured["bool_var"], True)
self.assertEqual(captured["list_var"], [{"a": 1}, {"a": 2}])
self.assertEqual(
rendered.strip(),
"<!-- _RENDERED SimpleComponent_5b8d97,a1bc3f,, -->\n<div data-djc-id-a1bc3f>lorem</div>\n<div data-djc-id-a1bc3f>True</div>\n<div data-djc-id-a1bc3f>[{'a': 1}, {'a': 2}]</div>", # noqa: E501
self.assertHTMLEqual(
rendered,
"""
<!-- _RENDERED SimpleComponent_5b8d97,a1bc3f,, -->
<div data-djc-id-a1bc3f>lorem</div>
<div data-djc-id-a1bc3f>True</div>
<div data-djc-id-a1bc3f>[{'a': 1}, {'a': 2}]</div>
""",
)
@parametrize_context_behavior(["django", "isolated"])
@ -164,9 +167,7 @@ class DynamicExprTests(BaseTestCase):
list_var="{% noop list %}"
dict_var="{% noop dict %}"
/ %}
""".replace(
"\n", " "
)
"""
)
template = Template(template_str)
@ -186,15 +187,15 @@ class DynamicExprTests(BaseTestCase):
self.assertEqual(captured["dict_var"], {"a": 3})
self.assertEqual(captured["list_var"], [{"a": 1}, {"a": 2}])
self.assertEqual(
rendered.strip(),
(
"<!-- _RENDERED SimpleComponent_743413,a1bc3f,, -->\n"
"<div data-djc-id-a1bc3f>lorem ipsum dolor</div>\n"
"<div data-djc-id-a1bc3f>True</div>\n"
"<div data-djc-id-a1bc3f>[{'a': 1}, {'a': 2}]</div>\n"
"<div data-djc-id-a1bc3f>{'a': 3}</div>"
),
self.assertHTMLEqual(
rendered,
"""
<!-- _RENDERED SimpleComponent_743413,a1bc3f,, -->
<div data-djc-id-a1bc3f>lorem ipsum dolor</div>
<div data-djc-id-a1bc3f>True</div>
<div data-djc-id-a1bc3f>[{'a': 1}, {'a': 2}]</div>
<div data-djc-id-a1bc3f>{'a': 3}</div>
""",
)
@parametrize_context_behavior(["django", "isolated"])
@ -240,9 +241,7 @@ class DynamicExprTests(BaseTestCase):
bool_var="{# noop is_active #}"
list_var=" {# noop list #} "
/ %}
""".replace(
"\n", " "
)
"""
)
template = Template(template_str)
@ -262,14 +261,15 @@ class DynamicExprTests(BaseTestCase):
self.assertEqual(captured["bool_var"], "")
self.assertEqual(captured["list_var"], " ")
# NOTE: This is whitespace-sensitive test, so we check exact output
self.assertEqual(
rendered.strip(),
(
"<!-- _RENDERED SimpleComponent_e258c0,a1bc3f,, -->\n"
"<div data-djc-id-a1bc3f></div>\n"
"<div data-djc-id-a1bc3f> abc</div>\n"
"<div data-djc-id-a1bc3f></div>\n"
"<div data-djc-id-a1bc3f> </div>"
" <div data-djc-id-a1bc3f></div>\n"
" <div data-djc-id-a1bc3f> abc</div>\n"
" <div data-djc-id-a1bc3f></div>\n"
" <div data-djc-id-a1bc3f> </div>"
),
)
@ -315,14 +315,12 @@ class DynamicExprTests(BaseTestCase):
{% load component_tags %}
{% component 'test'
" {% lorem var_a w %} "
" {% lorem var_a w %} {{ list|slice:':-1' }} "
" {% lorem var_a w %} {{ list|slice:':-1'|safe }} "
bool_var=" {% noop is_active %} "
list_var=" {% noop list %} "
dict_var=" {% noop dict %} "
/ %}
""".replace(
"\n", " "
)
"""
)
template = Template(template_str)
@ -342,15 +340,16 @@ class DynamicExprTests(BaseTestCase):
self.assertEqual(captured["dict_var"], " {'a': 3} ")
self.assertEqual(captured["list_var"], " [{'a': 1}, {'a': 2}] ")
# NOTE: This is whitespace-sensitive test, so we check exact output
self.assertEqual(
rendered.strip(),
(
"<!-- _RENDERED SimpleComponent_6c8e94,a1bc3f,, -->\n"
"<div data-djc-id-a1bc3f> lorem ipsum dolor </div>\n"
"<div data-djc-id-a1bc3f> lorem ipsum dolor [{'a': 1}] </div>\n"
"<div data-djc-id-a1bc3f> True </div>\n"
"<div data-djc-id-a1bc3f> [{'a': 1}, {'a': 2}] </div>\n"
"<div data-djc-id-a1bc3f> {'a': 3} </div>"
" <div data-djc-id-a1bc3f> lorem ipsum dolor </div>\n"
" <div data-djc-id-a1bc3f> lorem ipsum dolor [{'a': 1}] </div>\n"
" <div data-djc-id-a1bc3f> True </div>\n"
" <div data-djc-id-a1bc3f> [{'a': 1}, {'a': 2}] </div>\n"
" <div data-djc-id-a1bc3f> {'a': 3} </div>"
),
)
@ -383,9 +382,7 @@ class DynamicExprTests(BaseTestCase):
"""
{% load component_tags %}
{% component 'test' '"' "{%}" bool_var="{% noop is_active %}" / %}
""".replace(
"\n", " "
)
"""
)
template = Template(template_str)
@ -393,14 +390,14 @@ class DynamicExprTests(BaseTestCase):
Context({"is_active": True}),
)
self.assertEqual(
rendered.strip(),
(
"<!-- _RENDERED SimpleComponent_c7a5c3,a1bc3f,, -->\n"
'<div data-djc-id-a1bc3f>"</div>\n'
"<div data-djc-id-a1bc3f>{%}</div>\n"
"<div data-djc-id-a1bc3f>True</div>"
),
self.assertHTMLEqual(
rendered,
"""
<!-- _RENDERED SimpleComponent_c7a5c3,a1bc3f,, -->
<div data-djc-id-a1bc3f>"</div>
<div data-djc-id-a1bc3f>{%}</div>
<div data-djc-id-a1bc3f>True</div>
""",
)
@parametrize_context_behavior(["django", "isolated"])
@ -432,9 +429,7 @@ class DynamicExprTests(BaseTestCase):
"{% component 'test' '{{ var_a }}' bool_var=is_active / %}"
bool_var="{% noop is_active %}"
/ %}
""".replace(
"\n", " "
)
"""
)
template = Template(template_str)
@ -447,16 +442,17 @@ class DynamicExprTests(BaseTestCase):
),
)
self.assertEqual(
rendered.strip(),
(
"<!-- _RENDERED SimpleComponent_5c8766,a1bc41,, -->\n"
"<div data-djc-id-a1bc41><!-- _RENDERED SimpleComponent_5c8766,a1bc40,, -->\n"
"<div data-djc-id-a1bc40>3</div>\n"
"<div data-djc-id-a1bc40>True</div>\n"
"</div>\n"
"<div data-djc-id-a1bc41>True</div>"
),
self.assertHTMLEqual(
rendered,
"""
<!-- _RENDERED SimpleComponent_5c8766,a1bc41,, -->
<div data-djc-id-a1bc41>
<!-- _RENDERED SimpleComponent_5c8766,a1bc40,, -->
<div data-djc-id-a1bc40>3</div>
<div data-djc-id-a1bc40>True</div>
</div>
<div data-djc-id-a1bc41>True</div>
"""
)
@ -498,9 +494,7 @@ class SpreadOperatorTests(BaseTestCase):
..."{{ list|first }}"
x=123
/ %}
""".replace(
"\n", " "
)
"""
)
template = Template(template_str)
@ -738,9 +732,7 @@ class SpreadOperatorTests(BaseTestCase):
x=123
..."{{ list|first }}"
/ %}
""".replace(
"\n", " "
)
"""
)
template1 = Template(template_str1)
@ -761,9 +753,7 @@ class SpreadOperatorTests(BaseTestCase):
}
attrs:style="OVERWRITTEN"
/ %}
""".replace(
"\n", " "
)
"""
)
template2 = Template(template_str2)
@ -792,9 +782,7 @@ class SpreadOperatorTests(BaseTestCase):
var_a
...
/ %}
""".replace(
"\n", " "
)
"""
)
with self.assertRaisesMessage(TemplateSyntaxError, "Spread syntax '...' is missing a value"):
@ -820,9 +808,7 @@ class SpreadOperatorTests(BaseTestCase):
...var_a
...var_b
/ %}
""".replace(
"\n", " "
)
"""
)
template = Template(template_str)
@ -855,9 +841,7 @@ class SpreadOperatorTests(BaseTestCase):
{% component 'test'
...var_b
/ %}
""".replace(
"\n", " "
)
"""
)
template = Template(template_str)

View file

@ -1,127 +0,0 @@
from django.test import TestCase
from django_components.util.html import SoupNode
from .django_test_setup import setup_test_config
setup_test_config({"autodiscover": False})
class HtmlTests(TestCase):
def test_beautifulsoup_impl(self):
nodes = SoupNode.from_fragment(
"""
<div class="abc xyz" data-id="123">
<ul>
<li>Hi</li>
</ul>
</div>
<!-- I'M COMMENT -->
<button>
Click me!
</button>
""".strip()
)
# Items: <div>, whitespace, comment, whitespace, <button>
self.assertEqual(len(nodes), 5)
self.assertHTMLEqual(
nodes[0].to_html(),
"""
<div class="abc xyz" data-id="123">
<ul>
<li>Hi</li>
</ul>
</div>
""",
)
self.assertHTMLEqual(
nodes[2].to_html(),
"<!-- I&#x27;M COMMENT -->",
)
self.assertHTMLEqual(
nodes[4].to_html(),
"""
<button>
Click me!
</button>
""",
)
self.assertEqual(nodes[0].name(), "div")
self.assertEqual(nodes[4].name(), "button")
self.assertEqual(nodes[0].is_element(), True)
self.assertEqual(nodes[2].is_element(), False)
self.assertEqual(nodes[4].is_element(), True)
self.assertEqual(nodes[0].get_attr("class"), "abc xyz")
self.assertEqual(nodes[4].get_attr("class"), None)
nodes[0].set_attr("class", "123 456")
nodes[4].set_attr("class", "abc def")
self.assertEqual(nodes[0].get_attr("class"), "123 456")
self.assertEqual(nodes[4].get_attr("class"), "abc def")
self.assertHTMLEqual(
nodes[0].to_html(),
"""
<div class="123 456" data-id="123">
<ul>
<li>Hi</li>
</ul>
</div>
""",
)
self.assertHTMLEqual(
nodes[4].to_html(),
"""
<button class="abc def">
Click me!
</button>
""",
)
# Setting attr to `True` will set it to boolean attribute,
# while setting it to `False` will remove the attribute.
nodes[4].set_attr("disabled", True)
self.assertHTMLEqual(
nodes[4].to_html(),
"""
<button class="abc def" disabled>
Click me!
</button>
""",
)
nodes[4].set_attr("disabled", False)
self.assertHTMLEqual(
nodes[4].to_html(),
"""
<button class="abc def">
Click me!
</button>
""",
)
# Return self
self.assertEqual(nodes[0].node, nodes[0].find_tag("div").node) # type: ignore[union-attr]
# Return descendant
li = nodes[0].find_tag("li")
self.assertHTMLEqual(li.to_html(), "<li>Hi</li>") # type: ignore[union-attr]
# Return None when not found
self.assertEqual(nodes[0].find_tag("main"), None)
# Insert children
li.append_children([nodes[4]]) # type: ignore[union-attr]
self.assertHTMLEqual(
li.to_html(), # type: ignore[union-attr]
"""
<li>
Hi
<button class="abc def">
Click me!
</button>
</li>
""",
)

477
tests/test_html_parser.py Normal file
View file

@ -0,0 +1,477 @@
from django.test import TestCase
from typing import List
from django_components.util.html_parser import HTMLTag, _parse_html as parse_html, set_html_attributes
from .django_test_setup import setup_test_config
setup_test_config({"autodiscover": False})
# This same set of tests is also found in djc_html_parser, to ensure that
# this implementation can be replaced with the djc_html_parser's Rust-based implementation
class TestHTMLParser(TestCase):
def test_basic_transformation(self):
html = "<div><p>Hello</p></div>"
result, _ = set_html_attributes(html, root_attributes=["data-root"], all_attributes=["data-all"])
expected = "<div data-root data-all><p data-all>Hello</p></div>"
assert result == expected
def test_multiple_roots(self):
html = "<div>First</div><span>Second</span>"
result, _ = set_html_attributes(html, root_attributes=["data-root"], all_attributes=["data-all"])
expected = "<div data-root data-all>First</div><span data-root data-all>Second</span>"
assert result == expected
def test_complex_html(self):
html = """
<div class="container" id="main">
<header class="flex">
<h1 title="Main Title">Hello & Welcome</h1>
<nav data-existing="true">
<a href="/home">Home</a>
<a href="/about" class="active">About</a>
</nav>
</header>
<main>
<article data-existing="true">
<h2>Article 1</h2>
<p>Some text with <strong>bold</strong> and <em>emphasis</em></p>
<img src="test.jpg" alt="Test Image"/>
</article>
</main>
</div>
<footer id="footer">
<p>&copy; 2024</p>
</footer>
"""
result, _ = set_html_attributes(html, ["data-root"], ["data-all", "data-v-123"])
expected = """
<div class="container" id="main" data-root data-all data-v-123>
<header class="flex" data-all data-v-123>
<h1 title="Main Title" data-all data-v-123>Hello & Welcome</h1>
<nav data-existing="true" data-all data-v-123>
<a href="/home" data-all data-v-123>Home</a>
<a href="/about" class="active" data-all data-v-123>About</a>
</nav>
</header>
<main data-all data-v-123>
<article data-existing="true" data-all data-v-123>
<h2 data-all data-v-123>Article 1</h2>
<p data-all data-v-123>Some text with <strong data-all data-v-123>bold</strong> and <em data-all data-v-123>emphasis</em></p>
<img src="test.jpg" alt="Test Image" data-all data-v-123/>
</article>
</main>
</div>
<footer id="footer" data-root data-all data-v-123>
<p data-all data-v-123>&copy; 2024</p>
</footer>
""" # noqa: E501
assert result == expected
def test_void_elements(self):
test_cases = [
('<meta charset="utf-8">', '<meta charset="utf-8" data-root data-v-123>'),
('<meta charset="utf-8"/>', '<meta charset="utf-8" data-root data-v-123/>'),
("<div><br><hr></div>", "<div data-root data-v-123><br data-v-123><hr data-v-123></div>"),
('<img src="test.jpg" alt="Test">', '<img src="test.jpg" alt="Test" data-root data-v-123>'),
]
for input_html, expected in test_cases:
result, _ = set_html_attributes(input_html, ["data-root"], ["data-v-123"])
assert result == expected
def test_html_head_with_meta(self):
html = """
<head>
<meta charset="utf-8">
<title>Test Page</title>
<link rel="stylesheet" href="style.css">
<meta name="description" content="Test">
</head>"""
result, _ = set_html_attributes(html, ["data-root"], ["data-v-123"])
expected = """
<head data-root data-v-123>
<meta charset="utf-8" data-v-123>
<title data-v-123>Test Page</title>
<link rel="stylesheet" href="style.css" data-v-123>
<meta name="description" content="Test" data-v-123>
</head>"""
assert result == expected
def test_watch_attribute(self):
html = """
<div data-id="123">
<p>Regular element</p>
<span data-id="456">Nested element</span>
<img data-id="789" src="test.jpg"/>
</div>"""
result, captured = set_html_attributes(html, ["data-root"], ["data-v-123"], watch_on_attribute="data-id")
expected = """
<div data-id="123" data-root data-v-123>
<p data-v-123>Regular element</p>
<span data-id="456" data-v-123>Nested element</span>
<img data-id="789" src="test.jpg" data-v-123/>
</div>"""
assert result == expected
# Verify attribute capturing
assert len(captured) == 3
# Root element should have both root and all attributes
assert captured["123"] == ["data-root", "data-v-123"]
# Non-root elements should only have all attributes
assert captured["456"] == ["data-v-123"]
assert captured["789"] == ["data-v-123"]
def test_whitespace_preservation(self):
html = """<div>
<p> Hello World </p>
<span> Text with spaces </span>
</div>"""
result, _ = set_html_attributes(html, ["data-root"], ["data-all"])
expected = """<div data-root data-all>
<p data-all> Hello World </p>
<span data-all> Text with spaces </span>
</div>"""
assert result == expected
# This checks that the parser works irrespective of the main use case
class TestHTMLParserInternal(TestCase):
def test_parse_simple_tag(self):
processed_tags = []
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
processed_tags.append(tag)
html = "<div>Hello</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, html)
self.assertEqual(len(processed_tags), 1)
self.assertEqual(processed_tags[0].name, "div")
def test_parse_nested_tags(self):
processed_tags = []
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
processed_tags.append((tag.name, len(tag_stack)))
html = "<div><p>Hello</p></div>"
result = parse_html(html, on_tag)
self.assertEqual(result, html)
self.assertEqual(len(processed_tags), 2)
self.assertEqual(processed_tags[0], ("p", 2)) # p tag with stack depth 2
self.assertEqual(processed_tags[1], ("div", 1)) # div tag with stack depth 1
def test_parse_attributes(self):
processed_tags = []
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
processed_tags.append(tag)
html = '<div class="container" id="main">Hello</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, html)
self.assertEqual(len(processed_tags), 1)
self.assertEqual(len(processed_tags[0].attrs), 2)
self.assertEqual(processed_tags[0].attrs[0].key, "class")
self.assertEqual(processed_tags[0].attrs[0].value, "container")
self.assertEqual(processed_tags[0].attrs[1].key, "id")
self.assertEqual(processed_tags[0].attrs[1].value, "main")
def test_void_elements(self):
processed_tags = []
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
processed_tags.append(tag)
html = '<img src="test.jpg" />'
result = parse_html(html, on_tag)
self.assertEqual(result, html)
self.assertEqual(len(processed_tags), 1)
self.assertEqual(processed_tags[0].name, "img")
self.assertEqual(processed_tags[0].attrs[0].key, "src")
self.assertEqual(processed_tags[0].attrs[0].value, "test.jpg")
def test_add_attr(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.add_attr("data-test", "value", quoted=True)
tag.add_attr("hidden", None, quoted=False)
html = "<div>Content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, '<div data-test="value" hidden>Content</div>')
def test_rename_attr(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.rename_attr("class", "className")
html = '<div class="test">Content</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, '<div className="test">Content</div>')
def test_delete_attr(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.delete_attr("id")
html = '<div class="test" id="main">Content</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, '<div class="test" >Content</div>')
def test_clear_attrs(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.clear_attrs()
html = '<div class="test" id="main" data-value="123">Content</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, "<div >Content</div>")
def test_add_after_clearing_attrs(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.clear_attrs()
tag.add_attr("data-test", "value", quoted=True)
html = '<div class="test" id="main" data-value="123">Content</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, '<div data-test="value">Content</div>')
def test_insert_content(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.insert_content("Start ", 0)
tag.insert_content(" End", -1)
html = "<div>Content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "<div>Start Content End</div>")
def test_clear_content(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.clear_content()
html = "<div>Original content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "<div></div>")
def test_replace_content(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.replace_content("New content")
html = "<div>Original content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "<div>New content</div>")
def test_prepend_append(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.prepend("Before ")
tag.append(" after")
html = "<div>Content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "Before <div>Content</div> after")
def test_wrap(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.wrap('<section class="wrapper">', "</section>")
html = "<div>Content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, '<section class="wrapper"><div>Content</div></section>')
def test_unwrap(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
if tag.name == "span":
tag.unwrap()
html = "<div><span>Content</span></div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "<div>Content</div>")
def test_rename_tag(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.rename_tag("article")
html = "<div>Content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "<article>Content</article>")
def test_get_attr_has_attr(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
assert tag.has_attr("class")
assert not tag.has_attr("id")
attr = tag.get_attr("class")
assert attr is not None and attr.value == "test"
assert tag.get_attr("id") is None
html = '<div class="test">Content</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, html)
def test_tag_manipulation_complex(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
if tag.name == "div":
# Test add_attr
tag.add_attr("data-new", "value", quoted=True)
# Test rename_attr
tag.rename_attr("class", "className")
# Test delete_attr
tag.delete_attr("id")
# Test insert_content
tag.insert_content("<span>Start</span>", 0)
tag.insert_content("<span>End</span>", -1)
# Test wrap
tag.wrap("<section>", "</section>")
elif tag.name == "p":
# Test get_attr and has_attr
assert tag.has_attr("class")
attr = tag.get_attr("class")
assert attr is not None and attr.value == "inner"
# Test clear_attrs
tag.clear_attrs()
# Test clear_content and replace_content
tag.clear_content()
tag.replace_content("New content")
# Test prepend and append
tag.prepend("Before ")
tag.append(" after")
# Test rename_tag
tag.rename_tag("article")
# Test unwrap
tag.unwrap()
html = '<div class="test" id="main"><p class="inner">Original content</p></div>'
expected = '<section><div className="test" data-new="value"><span>Start</span>Before New content after<span>End</span></div></section>' # noqa: E501
result = parse_html(html, on_tag)
self.assertEqual(result, expected)
def test_complex_html(self):
processed_tags = []
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
processed_tags.append(tag)
if tag.name == "body":
# Test attribute manipulation
tag.add_attr("data-modified", "true", quoted=True)
tag.rename_attr("class", "className")
elif tag.name == "div":
# Test content manipulation
tag.insert_content("<!-- Modified -->", 0)
tag.wrap('<div class="wrapper">', "</div>")
elif tag.name == "p":
# Test attribute without value
tag.add_attr("hidden", None, quoted=False)
html = """<!DOCTYPE html>
<html lang="en" data-theme="light">
<!-- Header section -->
<head>
<meta charset="UTF-8"/>
<title>Complex Test</title>
<link rel="stylesheet" href="style.css">
<script type="text/javascript">
// Single line comment with tags: <div></div>
/* Multi-line comment
</script>
*/
const template = `<div>${value}</div>`;
console.log('</script>');
</script>
</head>
<body class="main" id="content" data-loaded>
<![CDATA[
Some CDATA content with <tags> that should be preserved
]]>
<div class="container" style="display: flex">
<img src="test.jpg" alt="Test Image"/>
<p>Hello <strong>World</strong>!</p>
<input type="text" disabled value="test"/>
</div>
</body>
</html>"""
expected = """<!DOCTYPE html>
<html lang="en" data-theme="light">
<!-- Header section -->
<head>
<meta charset="UTF-8"/>
<title>Complex Test</title>
<link rel="stylesheet" href="style.css">
<script type="text/javascript">
// Single line comment with tags: <div></div>
/* Multi-line comment
</script>
*/
const template = `<div>${value}</div>`;
console.log('</script>');
</script>
</head>
<body className="main" id="content" data-loaded data-modified="true">
<![CDATA[
Some CDATA content with <tags> that should be preserved
]]>
<div class="wrapper"><div class="container" style="display: flex"><!-- Modified -->
<img src="test.jpg" alt="Test Image"/>
<p hidden>Hello <strong>World</strong>!</p>
<input type="text" disabled value="test"/>
</div></div>
</body>
</html>"""
result = parse_html(html, on_tag)
self.assertEqual(result, expected)
# Verify the structure of processed tags
self.assertEqual(len(processed_tags), 12) # Count all non-void elements
# Verify specific tag attributes
html_tag = next(tag for tag in processed_tags if tag.name == "html")
self.assertEqual(len(html_tag.attrs), 2)
self.assertEqual(html_tag.attrs[0].key, "lang")
self.assertEqual(html_tag.attrs[0].value, "en")
self.assertEqual(html_tag.attrs[1].key, "data-theme")
self.assertEqual(html_tag.attrs[1].value, "light")
# Verify void elements
img_tag = next(tag for tag in processed_tags if tag.name == "img")
self.assertEqual(len(img_tag.attrs), 2)
self.assertEqual(img_tag.attrs[0].key, "src")
self.assertEqual(img_tag.attrs[0].value, "test.jpg")
# Verify attribute without value
body_tag = next(tag for tag in processed_tags if tag.name == "body")
data_loaded_attr = next(attr for attr in body_tag.attrs if attr.key == "data-loaded")
self.assertIsNone(data_loaded_attr.value)
# Verify modified attributes
self.assertTrue(any(attr.key == "data-modified" and attr.value == "true" for attr in body_tag.attrs))
self.assertTrue(any(attr.key == "className" and attr.value == "main" for attr in body_tag.attrs))
# Verify p tag modifications
p_tag = next(tag for tag in processed_tags if tag.name == "p")
self.assertTrue(any(attr.key == "hidden" and attr.value is None for attr in p_tag.attrs))

View file

@ -36,6 +36,9 @@ class BaseTestCase(SimpleTestCase):
if template_cache:
template_cache.clear()
from django_components.component import component_node_subclasses_by_name
component_node_subclasses_by_name.clear()
# Mock the `generate` function used inside `gen_id` so it returns deterministic IDs
def _start_gen_id_patch(self):
# Random number so that the generated IDs are "hex-looking", e.g. a1bc3d
@ -182,6 +185,9 @@ def parametrize_context_behavior(cases: List[ContextBehParam], settings: Optiona
if template_cache: # May be None if the cache was not initialized
template_cache.clear()
from django_components.component import component_node_subclasses_by_name
component_node_subclasses_by_name.clear()
case_has_data = not isinstance(case, str)
if isinstance(case, str):