refactor: replace bs4 and perf optimizations (#927)

This commit is contained in:
Juro Oravec 2025-01-24 10:30:41 +01:00 committed by GitHub
parent d407a8cd13
commit 0b65761fce
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 2078 additions and 418 deletions

View file

@ -1,5 +1,15 @@
# Release notes # Release notes
## v0.126
#### Refactor
- Replaced [BeautifulSoup4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) with a custom HTML parser.
- The heuristic for inserting JS and CSS dependenies into the default place has changed.
- JS is still inserted at the end of the `<body>`, and CSS at the end of `<head>`.
- However, we find end of `<body>` by searching for **last** occurrence of `</body>`
- And for the end of `<head>` we search for the **first** occurrence of `</head>`
## v0.125 ## v0.125
⚠️ Attention ⚠️ - We migrated from `EmilStenstrom/django-components` to `django-components/django-components`. ⚠️ Attention ⚠️ - We migrated from `EmilStenstrom/django-components` to `django-components/django-components`.

View file

@ -20,7 +20,7 @@ Import as
<a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1170" target="_blank">See source code</a> <a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1118" target="_blank">See source code</a>
@ -43,7 +43,7 @@ If you insert this tag multiple times, ALL CSS links will be duplicately inserte
<a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1192" target="_blank">See source code</a> <a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1140" target="_blank">See source code</a>
@ -67,7 +67,7 @@ If you insert this tag multiple times, ALL JS scripts will be duplicately insert
<a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1516" target="_blank">See source code</a> <a href="https://github.com/django-components/django-components/tree/master/src/django_components/templatetags/component_tags.py#L1257" target="_blank">See source code</a>

View file

@ -29,7 +29,6 @@ classifiers = [
] ]
dependencies = [ dependencies = [
'Django>=4.2', 'Django>=4.2',
'beautifulsoup4>=4.12',
] ]
license = {text = "MIT"} license = {text = "MIT"}

View file

@ -11,4 +11,3 @@ playwright
requests requests
types-requests types-requests
whitenoise whitenoise
beautifulsoup4

View file

@ -6,8 +6,6 @@
# #
asgiref==3.8.1 asgiref==3.8.1
# via django # via django
beautifulsoup4==4.12.3
# via -r requirements-dev.in
black==24.10.0 black==24.10.0
# via -r requirements-dev.in # via -r requirements-dev.in
cachetools==5.5.0 cachetools==5.5.0
@ -93,8 +91,6 @@ pyyaml==6.0.2
# via pre-commit # via pre-commit
requests==2.32.3 requests==2.32.3
# via -r requirements-dev.in # via -r requirements-dev.in
soupsieve==2.6
# via beautifulsoup4
sqlparse==0.5.2 sqlparse==0.5.2
# via django # via django
tox==4.24.1 tox==4.24.1

View file

@ -20,7 +20,6 @@
# - pymdown-extensions # - pymdown-extensions
# - black # - black
# - django>=4.2 # - django>=4.2
# - beautifulsoup4>=4.12
# #
asgiref==3.8.1 asgiref==3.8.1
@ -29,8 +28,6 @@ babel==2.16.0
# via # via
# mkdocs-git-revision-date-localized-plugin # mkdocs-git-revision-date-localized-plugin
# mkdocs-material # mkdocs-material
beautifulsoup4==4.12.3
# via hatch.envs.docs
black==24.10.0 black==24.10.0
# via hatch.envs.docs # via hatch.envs.docs
bracex==2.5.post1 bracex==2.5.post1
@ -213,8 +210,6 @@ six==1.17.0
# via python-dateutil # via python-dateutil
smmap==5.0.2 smmap==5.0.2
# via gitdb # via gitdb
soupsieve==2.6
# via beautifulsoup4
sqlparse==0.5.3 sqlparse==0.5.3
# via django # via django
tinycss2==1.4.0 tinycss2==1.4.0

View file

@ -53,8 +53,10 @@ from django_components.dependencies import (
cache_component_js, cache_component_js,
cache_component_js_vars, cache_component_js_vars,
postprocess_component_html, postprocess_component_html,
set_component_attrs_for_js_and_css,
) )
from django_components.node import BaseNode from django_components.node import BaseNode
from django_components.perfutil.component import component_post_render
from django_components.slots import ( from django_components.slots import (
ComponentSlotContext, ComponentSlotContext,
Slot, Slot,
@ -992,10 +994,11 @@ class Component(
# By adding the current input to the stack, we temporarily allow users # By adding the current input to the stack, we temporarily allow users
# to access the provided context, slots, etc. Also required so users can # to access the provided context, slots, etc. Also required so users can
# call `self.inject()` from within `get_context_data()`. # call `self.inject()` from within `get_context_data()`.
render_id = gen_id()
self._render_stack.append( self._render_stack.append(
RenderStackItem( RenderStackItem(
input=RenderInput( input=RenderInput(
id=gen_id(), id=render_id,
context=context, context=context,
slots=slots, slots=slots,
args=args, args=args,
@ -1026,8 +1029,17 @@ class Component(
is_filled = SlotIsFilled(slots_untyped) is_filled = SlotIsFilled(slots_untyped)
self._render_stack[-1].is_filled = is_filled self._render_stack[-1].is_filled = is_filled
# If any slot fills were defined within the template, we want to scope them
# to the CSS of the parent component. Thus we keep track of the parent component.
if context.get(_COMPONENT_SLOT_CTX_CONTEXT_KEY, None):
parent_comp_ctx: ComponentSlotContext = context[_COMPONENT_SLOT_CTX_CONTEXT_KEY]
parent_id = parent_comp_ctx.component_id
else:
parent_id = None
component_slot_ctx = ComponentSlotContext( component_slot_ctx = ComponentSlotContext(
component_name=self.name, component_name=self.name,
component_id=render_id,
template_name=template.name, template_name=template.name,
fills=slots_untyped, fills=slots_untyped,
is_dynamic_component=getattr(self, "_is_dynamic_component", False), is_dynamic_component=getattr(self, "_is_dynamic_component", False),
@ -1062,22 +1074,49 @@ class Component(
new_output = self.on_render_after(context, template, html_content) new_output = self.on_render_after(context, template, html_content)
html_content = new_output if new_output is not None else html_content html_content = new_output if new_output is not None else html_content
output = postprocess_component_html(
component_cls=self.__class__,
component_id=self.id,
html_content=html_content,
css_input_hash=css_input_hash,
js_input_hash=js_input_hash,
type=type,
render_dependencies=render_dependencies,
)
# After rendering is done, remove the current state from the stack, which means # After rendering is done, remove the current state from the stack, which means
# properties like `self.context` will no longer return the current state. # properties like `self.context` will no longer return the current state.
self._render_stack.pop() self._render_stack.pop()
context.render_context.pop() context.render_context.pop()
return output # Internal component HTML post-processing:
# - Add the HTML attributes to work with JS and CSS variables
# - Resolve component's JS / CSS into <script> and <link> (if render_dependencies=True)
#
# However, to ensure that we run an HTML parser only once over the HTML content,
# we have to wrap it in this callback. This callback runs only once we know whether
# there are any extra HTML attributes that should be applied to this component's root elements.
#
# This makes it possible for multiple components to resolve to the same HTML element.
# E.g. if CompA renders CompB, and CompB renders a <div>, then the <div> element will have
# IDs of both CompA and CompB.
# ```html
# <div djc-id-a1b3cf djc-id-f3d3cf>...</div>
# ```
def post_processor(root_attributes: Optional[List[str]] = None) -> Tuple[str, Dict[str, List[str]]]:
nonlocal html_content
updated_html, child_components = set_component_attrs_for_js_and_css(
html_content=html_content,
component_id=render_id,
css_input_hash=css_input_hash,
css_scope_id=None, # TODO - Implement
root_attributes=root_attributes,
)
updated_html = postprocess_component_html(
component_cls=self.__class__,
component_id=render_id,
html_content=updated_html,
css_input_hash=css_input_hash,
js_input_hash=js_input_hash,
type=type,
render_dependencies=render_dependencies,
)
return updated_html, child_components
return component_post_render(post_processor, render_id, parent_id)
def _normalize_slot_fills( def _normalize_slot_fills(
self, self,
@ -1208,6 +1247,14 @@ class Component(
validate_typed_dict(data, data_type, f"Component '{self.name}'", "data") validate_typed_dict(data, data_type, f"Component '{self.name}'", "data")
# Perf
# Each component may use different start and end tags. We represent this
# as individual subclasses of `ComponentNode`. However, multiple components
# may use the same start & end tag combination, e.g. `{% component %}` and `{% endcomponent %}`.
# So we cache the already-created subclasses to be reused.
component_node_subclasses_by_name: Dict[str, Tuple[Type["ComponentNode"], ComponentRegistry]] = {}
class ComponentNode(BaseNode): class ComponentNode(BaseNode):
""" """
Renders one of the components that was previously registered with Renders one of the components that was previously registered with
@ -1336,15 +1383,31 @@ class ComponentNode(BaseNode):
start_tag: str, start_tag: str,
end_tag: str, end_tag: str,
) -> "ComponentNode": ) -> "ComponentNode":
# Set the component-specific start and end tags by subclassing the base node # Set the component-specific start and end tags by subclassing the BaseNode
subcls_name = cls.__name__ + "_" + name subcls_name = cls.__name__ + "_" + name
subcls: Type[ComponentNode] = type(subcls_name, (cls,), {"tag": start_tag, "end_tag": end_tag})
# We try to reuse the same subclass for the same start tag, so we can
# avoid creating a new subclass for each time `{% component %}` is called.
if start_tag not in component_node_subclasses_by_name:
subcls: Type[ComponentNode] = type(subcls_name, (cls,), {"tag": start_tag, "end_tag": end_tag})
component_node_subclasses_by_name[start_tag] = (subcls, registry)
cached_subcls, cached_registry = component_node_subclasses_by_name[start_tag]
if cached_registry is not registry:
raise RuntimeError(
f"Detected two Components from different registries using the same start tag '{start_tag}'"
)
elif cached_subcls.end_tag != end_tag:
raise RuntimeError(
f"Detected two Components using the same start tag '{start_tag}' but with different end tags"
)
# Call `BaseNode.parse()` as if with the context of subcls. # Call `BaseNode.parse()` as if with the context of subcls.
node: ComponentNode = super(cls, subcls).parse( # type: ignore[attr-defined] node: ComponentNode = super(cls, cached_subcls).parse( # type: ignore[attr-defined]
parser, parser,
token, token,
registry=registry, registry=cached_registry,
name=name, name=name,
) )
return node return node

View file

@ -1,6 +1,8 @@
import inspect import inspect
from typing import Any, Dict, Optional, Type, Union, cast from typing import Any, Dict, Optional, Type, Union, cast
from django.template import Context, Template
from django_components import Component, ComponentRegistry, NotRegistered, types from django_components import Component, ComponentRegistry, NotRegistered, types
from django_components.component_registry import all_registries from django_components.component_registry import all_registries
@ -110,7 +112,21 @@ class DynamicComponent(Component):
comp_class = self._resolve_component(comp_name_or_class, registry) comp_class = self._resolve_component(comp_name_or_class, registry)
# NOTE: Slots are passed at component instantiation return {
"comp_class": comp_class,
"args": args,
"kwargs": kwargs,
}
# NOTE: The inner component is rendered in `on_render_before`, so that the `Context` object
# is already configured as if the inner component was rendered inside the template.
# E.g. the `_COMPONENT_SLOT_CTX_CONTEXT_KEY` is set, which means that the child component
# will know that it's a child of this component.
def on_render_before(self, context: Context, template: Template) -> Context:
comp_class = context["comp_class"]
args = context["args"]
kwargs = context["kwargs"]
comp = comp_class( comp = comp_class(
registered_name=self.registered_name, registered_name=self.registered_name,
outer_context=self.outer_context, outer_context=self.outer_context,
@ -128,11 +144,10 @@ class DynamicComponent(Component):
render_dependencies=self.input.render_dependencies, render_dependencies=self.input.render_dependencies,
) )
return { context["output"] = output
"output": output, return context
}
template: types.django_html = """{{ output }}""" template: types.django_html = """{{ output|safe }}"""
def _resolve_component( def _resolve_component(
self, self,

View file

@ -35,7 +35,7 @@ from django.utils.decorators import sync_and_async_middleware
from django.utils.safestring import SafeString, mark_safe from django.utils.safestring import SafeString, mark_safe
from django_components.node import BaseNode from django_components.node import BaseNode
from django_components.util.html import SoupNode from django_components.util.html_parser import set_html_attributes
from django_components.util.misc import get_import_path, is_nonempty_str from django_components.util.misc import get_import_path, is_nonempty_str
if TYPE_CHECKING: if TYPE_CHECKING:
@ -272,33 +272,63 @@ def wrap_component_css(comp_cls: Type["Component"], content: str) -> SafeString:
######################################################### #########################################################
def _link_dependencies_with_component_html( def set_component_attrs_for_js_and_css(
component_id: str, html_content: Union[str, SafeString],
html_content: str, component_id: Optional[str],
css_input_hash: Optional[str], css_input_hash: Optional[str],
) -> str: css_scope_id: Optional[str],
elems = SoupNode.from_fragment(html_content) root_attributes: Optional[List[str]] = None,
) -> Tuple[Union[str, SafeString], Dict[str, List[str]]]:
# These are the attributes that we want to set on the root element.
all_root_attributes = [*root_attributes] if root_attributes else []
# Insert component ID # Component ID is used for executing JS script, e.g. `data-djc-id-a1b2c3`
for elem in elems: #
# Ignore comments, text, doctype, etc. # NOTE: We use `data-djc-css-a1b2c3` and `data-djc-id-a1b2c3` instead of
if not elem.is_element(): # `data-djc-css="a1b2c3"` and `data-djc-id="a1b2c3"`, to allow
continue # multiple values to be associated with the same element, which may happen if
# one component renders another.
if component_id:
all_root_attributes.append(f"data-djc-id-{component_id}")
# Component ID is used for executing JS script, e.g. `data-djc-id-a1b2c3` # Attribute by which we bind the CSS variables to the component's CSS,
# e.g. `data-djc-css-a1b2c3`
if css_input_hash:
all_root_attributes.append(f"data-djc-css-{css_input_hash}")
# These attributes are set on all tags
all_attributes = []
# We apply the CSS scoping attribute to both root and non-root tags.
#
# This is the HTML part of Vue-like CSS scoping.
# That is, for each HTML element that the component renders, we add a `data-djc-scope-a1b2c3` attribute.
# And we stop when we come across a nested components.
if css_scope_id:
all_attributes.append(f"data-djc-scope-{css_scope_id}")
is_safestring = isinstance(html_content, SafeString)
updated_html, child_components = set_html_attributes(
html_content,
root_attributes=all_root_attributes,
all_attributes=all_attributes,
# Setting this means that set_html_attributes will check for HTML elemetnts with this
# attribute, and return a dictionary of {attribute_value: [attributes_set_on_this_tag]}.
# #
# NOTE: We use `data-djc-css-a1b2c3` and `data-djc-id-a1b2c3` instead of # So if HTML contains tag <template djc-render-id="123"></template>,
# `data-djc-css="a1b2c3"` and `data-djc-id="a1b2c3"`, to allow # and we set on that tag `data-djc-id-123`, then we will get
# multiple values to be associated with the same element, which may happen if # {
# One component renders another. # "123": ["data-djc-id-123"],
elem.set_attr(f"data-djc-id-{component_id}", True) # }
#
# This is a minor optimization. Without this, when we're rendering components in
# component_post_render(), we'd have to parse each `<template djc-render-id="123"></template>`
# to find the HTML attribute that were set on it.
watch_on_attribute="djc-render-id",
)
updated_html = mark_safe(updated_html) if is_safestring else updated_html
# Attribute by which we bind the CSS variables to the component's CSS, return updated_html, child_components
# e.g. `data-djc-css-a1b2c3`
if css_input_hash:
elem.set_attr(f"data-djc-css-{css_input_hash}", True)
return SoupNode.to_html_multiroot(elems)
def _insert_component_comment( def _insert_component_comment(
@ -332,18 +362,13 @@ def _insert_component_comment(
def postprocess_component_html( def postprocess_component_html(
component_cls: Type["Component"], component_cls: Type["Component"],
component_id: str, component_id: str,
html_content: str, html_content: Union[str, SafeString],
css_input_hash: Optional[str], css_input_hash: Optional[str],
js_input_hash: Optional[str], js_input_hash: Optional[str],
type: RenderType, type: RenderType,
render_dependencies: bool, render_dependencies: bool,
) -> str: ) -> Union[str, SafeString]:
# Make the HTML work with JS and CSS dependencies is_safestring = isinstance(html_content, SafeString)
html_content = _link_dependencies_with_component_html(
component_id=component_id,
html_content=html_content,
css_input_hash=css_input_hash,
)
# NOTE: To better understand the next section, consider this: # NOTE: To better understand the next section, consider this:
# #
@ -375,6 +400,8 @@ def postprocess_component_html(
if render_dependencies: if render_dependencies:
output = _render_dependencies(output, type) output = _render_dependencies(output, type)
output = mark_safe(output) if is_safestring else output
return output return output
@ -417,8 +444,8 @@ MAYBE_COMP_CSS_ID = r"(?: data-djc-css-\w{6})?"
PLACEHOLDER_REGEX = re.compile( PLACEHOLDER_REGEX = re.compile(
r"{css_placeholder}|{js_placeholder}".format( r"{css_placeholder}|{js_placeholder}".format(
css_placeholder=f'<link{MAYBE_COMP_CSS_ID}{MAYBE_COMP_ID} name="{CSS_PLACEHOLDER_NAME}"/?>', css_placeholder=f'<link name="{CSS_PLACEHOLDER_NAME}"{MAYBE_COMP_CSS_ID}{MAYBE_COMP_ID}/?>',
js_placeholder=f'<script{MAYBE_COMP_CSS_ID}{MAYBE_COMP_ID} name="{JS_PLACEHOLDER_NAME}"></script>', js_placeholder=f'<script name="{JS_PLACEHOLDER_NAME}"{MAYBE_COMP_CSS_ID}{MAYBE_COMP_ID}></script>',
).encode() ).encode()
) )
@ -734,6 +761,10 @@ def _process_dep_declarations(content: bytes, type: RenderType) -> Tuple[bytes,
return (content, final_script_tags.encode("utf-8"), final_css_tags.encode("utf-8")) return (content, final_script_tags.encode("utf-8"), final_css_tags.encode("utf-8"))
href_pattern = re.compile(r'href="([^"]+)"')
src_pattern = re.compile(r'src="([^"]+)"')
# Detect duplicates by URLs, extract URLs, and sort by URLs # Detect duplicates by URLs, extract URLs, and sort by URLs
def _postprocess_media_tags( def _postprocess_media_tags(
script_type: ScriptType, script_type: ScriptType,
@ -743,15 +774,21 @@ def _postprocess_media_tags(
tags_by_url: Dict[str, str] = {} tags_by_url: Dict[str, str] = {}
for tag in tags: for tag in tags:
node = SoupNode.from_fragment(tag.strip())[0] # Extract the URL from <script src="..."> or <link href="...">
# <script src="..."> vs <link href="..."> if script_type == "js":
attr = "src" if script_type == "js" else "href" attr = "src"
maybe_url = node.get_attr(attr, None) attr_pattern = src_pattern
else:
attr = "href"
attr_pattern = href_pattern
maybe_url_match = attr_pattern.search(tag.strip())
maybe_url = maybe_url_match.group(1) if maybe_url_match else None
if not is_nonempty_str(maybe_url): if not is_nonempty_str(maybe_url):
raise RuntimeError( raise RuntimeError(
f"One of entries for `Component.Media.{script_type}` media is missing a " f"One of entries for `Component.Media.{script_type}` media is missing a "
f"value for attribute '{attr}'. If there is content inlined inside the `<{node.name()}>` tags, " f"value for attribute '{attr}'. If there is content inlined inside the `<{attr}>` tags, "
f"you must move the content to a `.{script_type}` file and reference it via '{attr}'.\nGot:\n{tag}" f"you must move the content to a `.{script_type}` file and reference it via '{attr}'.\nGot:\n{tag}"
) )
@ -908,6 +945,9 @@ def _gen_exec_script(
return exec_script return exec_script
head_or_body_end_tag_re = re.compile(r"<\/(?:head|body)\s*>", re.DOTALL)
def _insert_js_css_to_default_locations( def _insert_js_css_to_default_locations(
html_content: str, html_content: str,
js_content: Optional[str], js_content: Optional[str],
@ -917,37 +957,50 @@ def _insert_js_css_to_default_locations(
This function tries to insert the JS and CSS content into the default locations. This function tries to insert the JS and CSS content into the default locations.
JS is inserted at the end of `<body>`, and CSS is inserted at the end of `<head>`. JS is inserted at the end of `<body>`, and CSS is inserted at the end of `<head>`.
"""
elems = SoupNode.from_fragment(html_content)
if not elems: We find these tags by looking for the first `</head>` and last `</body>` tags.
"""
if css_content is None and js_content is None:
return None return None
did_modify_html = False did_modify_html = False
if css_content is not None: first_end_head_tag_index = None
for elem in elems: last_end_body_tag_index = None
if not elem.is_element():
continue
head = elem.find_tag("head")
if head:
css_elems = SoupNode.from_fragment(css_content)
head.append_children(css_elems)
did_modify_html = True
if js_content is not None: # First check the content for the first `</head>` and last `</body>` tags
for elem in elems: for match in head_or_body_end_tag_re.finditer(html_content):
if not elem.is_element(): tag_name = match[0][2:6]
continue
body = elem.find_tag("body") # We target the first `</head>`, thus, after we set it, we skip the rest
if body: if tag_name == "head":
js_elems = SoupNode.from_fragment(js_content) if css_content is not None and first_end_head_tag_index is None:
body.append_children(js_elems) first_end_head_tag_index = match.start()
did_modify_html = True
# But for `</body>`, we want the last occurrence, so we insert the content only
# after the loop.
elif tag_name == "body":
if js_content is not None:
last_end_body_tag_index = match.start()
else:
raise ValueError(f"Unexpected tag name '{tag_name}'")
# Then do two string insertions. First the CSS, because we assume that <head> is before <body>.
index_offset = 0
updated_html = html_content
if css_content is not None and first_end_head_tag_index is not None:
updated_html = updated_html[:first_end_head_tag_index] + css_content + updated_html[first_end_head_tag_index:]
index_offset = len(css_content)
did_modify_html = True
if js_content is not None and last_end_body_tag_index is not None:
js_index = last_end_body_tag_index + index_offset
updated_html = updated_html[:js_index] + js_content + updated_html[js_index:]
did_modify_html = True
if did_modify_html: if did_modify_html:
transformed = SoupNode.to_html_multiroot(elems) return updated_html
return transformed
else: else:
return None # No changes made return None # No changes made

View file

@ -0,0 +1,169 @@
import re
from collections import deque
from typing import Callable, Deque, Dict, List, Optional, Tuple
from django.utils.safestring import mark_safe
# Function that accepts a list of extra HTML attributes to be set on the component's root elements
# and returns the component's HTML content and a dictionary of child components' IDs
# and their root elements' HTML attributes.
#
# In other words, we use this to "delay" the actual rendering of the component's HTML content,
# until we know what HTML attributes to apply to the root elements.
ComponentRenderer = Callable[[Optional[List[str]]], Tuple[str, Dict[str, List[str]]]]
# Render-time cache for component rendering
# See Component._post_render()
component_renderer_cache: Dict[str, ComponentRenderer] = {}
child_component_attrs: Dict[str, List[str]] = {}
nested_comp_pattern = re.compile(r'<template [^>]*?djc-render-id="\w{6}"[^>]*?></template>')
render_id_pattern = re.compile(r'djc-render-id="(?P<render_id>\w{6})"')
# When a component is rendered, we want to apply HTML attributes like `data-djc-id-a1b3cf`
# to all root elements. However, we have to approach it smartly, to minimize the HTML parsing.
#
# If we naively first rendered the child components, and then the parent component, then we would
# have to parse the child's HTML twice (once for itself, and once as part of the parent).
# When we have a deeply nested component structure, this can add up to a lot of parsing.
# See https://github.com/django-components/django-components/issues/14#issuecomment-2596096632.
#
# Imagine we first render the child components. Once rendered, child's HTML gets embedded into
# the HTML of the parent. So by the time we get to the root, we will have to parse the full HTML
# document, even if the root component is only a small part of the document.
#
# So instead, when a nested component is rendered, we put there only a placeholder, and store the
# actual HTML content in `component_renderer_cache`.
#
# ```django
# <div>
# <h2>...</h2>
# <template djc-render-id="a1b3cf"></template>
# <span>...</span>
# <template djc-render-id="f3d3cf"></template>
# </div>
# ```
#
# The full flow is as follows:
# 1. When a component is nested in another, the child component is rendered, but it returns
# only a placeholder like `<template djc-render-id="a1b3cf"></template>`.
# The actual HTML output is stored in `component_renderer_cache`.
# 2. The parent of the child component is rendered normally.
# 3. If the placeholder for the child component is at root of the parent component,
# then the placeholder may be tagged with extra attributes, e.g. `data-djc-id-a1b3cf`.
# `<template djc-render-id="a1b3cf" data-djc-id-a1b3cf></template>`.
# 4. When the parent is done rendering, we go back to step 1., the parent component
# either returns the actual HTML, or a placeholder.
# 5. Only once we get to the root component, that has no further parents, is when we finally
# start putting it all together.
# 6. We start at the root component. We search the root component's output HTML for placeholders.
# Each placeholder has ID `data-djc-render-id` that links to its actual content.
# 7. For each found placeholder, we replace it with the actual content.
# But as part of step 7), we also:
# - If any of the child placeholders had extra attributes, we cache these, so we can access them
# once we get to rendering the child component.
# - And if the parent component had any extra attributes set by its parent, we apply these
# to the root elements.
# 8. Lastly, we merge all the parts together, and return the final HTML.
def component_post_render(
renderer: ComponentRenderer,
render_id: str,
parent_id: Optional[str],
) -> str:
# Instead of rendering the component's HTML content immediately, we store it,
# so we can render the component only once we know if there are any HTML attributes
# to be applied to the resulting HTML.
component_renderer_cache[render_id] = renderer
if parent_id is not None:
# Case: Nested component
# If component is nested, return a placeholder
return mark_safe(f'<template djc-render-id="{render_id}"></template>')
# Case: Root component - Construct the final HTML by recursively replacing placeholders
#
# We first generate the component's HTML content, by calling the renderer.
#
# Then we process the component's HTML from root-downwards, going depth-first.
# So if we have a structure:
# <div>
# <h2>...</h2>
# <template djc-render-id="a1b3cf"></template>
# <span>...</span>
# <template djc-render-id="f3d3cf"></template>
# </div>
#
# Then we first split up the current HTML into parts, splitting at placeholders:
# - <div><h2>...</h2>
# - PLACEHOLDER djc-render-id="a1b3cf"
# - <span>...</span>
# - PLACEHOLDER djc-render-id="f3d3cf"
# - </div>
#
# And put the pairs of (content, placeholder_id) into a queue:
# - ("<div><h2>...</h2>", "a1b3cf")
# - ("<span>...</span>", "f3d3cf")
# - ("</div>", None)
#
# Then we process each part:
# 1. Append the content to the output
# 2. If the placeholder ID is not None, then we fetch the renderer by its placeholder ID (e.g. "a1b3cf")
# 3. If there were any extra attributes set by the parent component, we apply these to the renderer.
# 4. We split the content by placeholders, and put the pairs of (content, placeholder_id) into the queue,
# repeating this whole process until we've processed all nested components.
content_parts: List[str] = []
process_queue: Deque[Tuple[str, Optional[str]]] = deque()
process_queue.append(("", render_id))
while len(process_queue):
curr_content_before_component, curr_comp_id = process_queue.popleft()
# Process content before the component
if curr_content_before_component:
content_parts.append(curr_content_before_component)
# The entry was only a remaining text, no more components to process, we're done
if curr_comp_id is None:
continue
# Generate component's content, applying the extra HTML attributes set by the parent component
curr_comp_renderer = component_renderer_cache.pop(curr_comp_id)
# NOTE: This may be undefined, because this is set only for components that
# are also root elements in their parent's HTML
curr_comp_attrs = child_component_attrs.pop(curr_comp_id, None)
curr_comp_content, curr_child_component_attrs = curr_comp_renderer(curr_comp_attrs)
# Exclude the `data-djc-scope-...` attribute from being applied to the child component's HTML
for key in list(curr_child_component_attrs.keys()):
if key.startswith("data-djc-scope-"):
curr_child_component_attrs.pop(key, None)
child_component_attrs.update(curr_child_component_attrs)
# Process the component's content
last_index = 0
parts_to_process: List[Tuple[str, Optional[str]]] = []
# Split component's content by placeholders, and put the pairs of (content, placeholder_id) into the queue
for match in nested_comp_pattern.finditer(curr_comp_content):
part_before_component = curr_comp_content[last_index : match.start()] # noqa: E203
last_index = match.end()
comp_part = match[0]
# Extract the placeholder ID from `<template djc-render-id="a1b3cf"></template>`
curr_child_id_match = render_id_pattern.search(comp_part)
if curr_child_id_match is None:
raise ValueError(f"No placeholder ID found in {comp_part}")
curr_child_id = curr_child_id_match.group("render_id")
parts_to_process.append((part_before_component, curr_child_id))
# Append any remaining text
if last_index < len(curr_comp_content):
parts_to_process.append((curr_comp_content[last_index:], None))
process_queue.extendleft(reversed(parts_to_process))
output = "".join(content_parts)
return mark_safe(output)

View file

@ -140,6 +140,7 @@ class SlotIsFilled(dict):
@dataclass @dataclass
class ComponentSlotContext: class ComponentSlotContext:
component_name: str component_name: str
component_id: str
template_name: str template_name: str
is_dynamic_component: bool is_dynamic_component: bool
default_slot: Optional[str] default_slot: Optional[str]

View file

@ -1,111 +0,0 @@
from abc import ABC, abstractmethod
from typing import Any, List, Optional, Sequence
from bs4 import BeautifulSoup, CData, Comment, Doctype, NavigableString, Tag
class HTMLNode(ABC):
"""
Interface for an HTML manipulation library. This allows us to potentially swap
between different libraries.
"""
@classmethod
@abstractmethod
def from_fragment(cls, html: str) -> Sequence["HTMLNode"]: ... # noqa: E704
@abstractmethod
def to_html(self) -> str: ... # noqa: E704
@abstractmethod
def name(self) -> str:
"""Get tag name"""
...
@abstractmethod
def find_tag(self, tag: str) -> Optional["HTMLNode"]: ... # noqa: E704
@abstractmethod
def append_children(self, children: Sequence[Any]) -> None: ... # noqa: E704
@abstractmethod
def get_attr(self, attr: str, default: Any = None) -> Any: ... # noqa: E704
@abstractmethod
def set_attr(self, attr: str, value: Any) -> None: ... # noqa: E704
@abstractmethod
def is_element(self) -> bool: ... # noqa: E704
"""Returns `False` if the node is a text, comment, or doctype node. `True` otherwise."""
@classmethod
def to_html_multiroot(cls, elems: Sequence["HTMLNode"]) -> str:
return "".join([elem.to_html() for elem in elems])
class SoupNode(HTMLNode):
"""BeautifulSoup implementation of HTMLNode."""
def __init__(self, node: Tag):
self.node = node
@classmethod
def from_fragment(cls, html: str) -> List["SoupNode"]:
soup = BeautifulSoup(html, "html.parser")
# Get top-level elements in the fragment
return [cls(elem) for elem in soup.contents]
def to_html(self) -> str:
if isinstance(self.node, CData):
return f"<![CDATA[{self.node}]]>"
elif isinstance(self.node, Comment):
return f"<!-- {self.node} -->"
elif isinstance(self.node, Doctype):
return f"<!DOCTYPE {self.node}>"
elif isinstance(self.node, NavigableString):
return str(self.node)
else:
# See https://github.com/EmilStenstrom/django-components/pull/861#discussion_r1898516210
return self.node.encode(formatter="html5").decode()
def name(self) -> str:
return self.node.name
def find_tag(self, tag: str) -> Optional["SoupNode"]:
if isinstance(self.node, Tag) and self.node.name == tag:
return self
else:
match = self.node.select_one(tag)
if match:
return SoupNode(match)
return None
def append_children(self, children: Sequence["SoupNode"]) -> None:
if isinstance(self.node, Tag):
for child in children:
self.node.append(child.node)
def get_attr(self, attr: str, default: Any = None) -> Any:
if isinstance(self.node, Tag):
res = self.node.get(attr, default)
if isinstance(res, list):
return " ".join(res)
return res
return default
def set_attr(self, attr: str, value: Any) -> None:
if not isinstance(self.node, Tag):
return
if value is True:
# Set boolean attributes without a value
self.node[attr] = None
elif value is False:
# Remove the attribute
self.node.attrs.pop(attr, None)
else:
self.node[attr] = value
def is_element(self) -> bool:
return isinstance(self.node, Tag)

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,7 @@ if calling `Component.render()` or `render_dependencies()` behave as expected.
For checking the OUTPUT of the dependencies, see `test_dependency_rendering.py`. For checking the OUTPUT of the dependencies, see `test_dependency_rendering.py`.
""" """
import re
from unittest.mock import Mock from unittest.mock import Mock
from django.http import HttpResponseNotModified from django.http import HttpResponseNotModified
@ -13,7 +14,6 @@ from django.template import Context, Template
from django_components import Component, registry, render_dependencies, types from django_components import Component, registry, render_dependencies, types
from django_components.components.dynamic import DynamicComponent from django_components.components.dynamic import DynamicComponent
from django_components.middleware import ComponentDependencyMiddleware from django_components.middleware import ComponentDependencyMiddleware
from django_components.util.html import SoupNode
from .django_test_setup import setup_test_config from .django_test_setup import setup_test_config
from .testutils import BaseTestCase, create_and_process_template_response from .testutils import BaseTestCase, create_and_process_template_response
@ -223,9 +223,8 @@ class RenderDependenciesTests(BaseTestCase):
count=1, count=1,
) )
# Nodes: [Doctype, whitespace, <html>] body_re = re.compile(r"<body>(.*?)</body>", re.DOTALL)
nodes = SoupNode.from_fragment(rendered.strip()) rendered_body = body_re.search(rendered).group(1) # type: ignore[union-attr]
rendered_body = nodes[2].find_tag("body").to_html() # type: ignore[union-attr]
self.assertInHTML( self.assertInHTML(
"""<script src="django_components/django_components.min.js">""", """<script src="django_components/django_components.min.js">""",
@ -275,9 +274,8 @@ class RenderDependenciesTests(BaseTestCase):
count=1, count=1,
) )
# Nodes: [Doctype, whitespace, <html>] head_re = re.compile(r"<head>(.*?)</head>", re.DOTALL)
nodes = SoupNode.from_fragment(rendered.strip()) rendered_head = head_re.search(rendered).group(1) # type: ignore[union-attr]
rendered_head = nodes[2].find_tag("head").to_html() # type: ignore[union-attr]
self.assertInHTML( self.assertInHTML(
"""<script src="django_components/django_components.min.js">""", """<script src="django_components/django_components.min.js">""",
@ -518,6 +516,7 @@ class MiddlewareTests(BaseTestCase):
template, template,
context=Context({"component_name": "test-component"}), context=Context({"component_name": "test-component"}),
) )
assert_dependencies(rendered2) assert_dependencies(rendered2)
self.assertEqual( self.assertEqual(
rendered2.count("Variable: <strong data-djc-id-a1bc43 data-djc-id-a1bc44>value</strong>"), rendered2.count("Variable: <strong data-djc-id-a1bc43 data-djc-id-a1bc44>value</strong>"),

View file

@ -95,9 +95,7 @@ class DynamicExprTests(BaseTestCase):
bool_var="{{ is_active }}" bool_var="{{ is_active }}"
list_var="{{ list|slice:':-1' }}" list_var="{{ list|slice:':-1' }}"
/ %} / %}
""".replace( """
"\n", " "
)
) )
template = Template(template_str) template = Template(template_str)
@ -116,9 +114,14 @@ class DynamicExprTests(BaseTestCase):
self.assertEqual(captured["bool_var"], True) self.assertEqual(captured["bool_var"], True)
self.assertEqual(captured["list_var"], [{"a": 1}, {"a": 2}]) self.assertEqual(captured["list_var"], [{"a": 1}, {"a": 2}])
self.assertEqual( self.assertHTMLEqual(
rendered.strip(), rendered,
"<!-- _RENDERED SimpleComponent_5b8d97,a1bc3f,, -->\n<div data-djc-id-a1bc3f>lorem</div>\n<div data-djc-id-a1bc3f>True</div>\n<div data-djc-id-a1bc3f>[{'a': 1}, {'a': 2}]</div>", # noqa: E501 """
<!-- _RENDERED SimpleComponent_5b8d97,a1bc3f,, -->
<div data-djc-id-a1bc3f>lorem</div>
<div data-djc-id-a1bc3f>True</div>
<div data-djc-id-a1bc3f>[{'a': 1}, {'a': 2}]</div>
""",
) )
@parametrize_context_behavior(["django", "isolated"]) @parametrize_context_behavior(["django", "isolated"])
@ -164,9 +167,7 @@ class DynamicExprTests(BaseTestCase):
list_var="{% noop list %}" list_var="{% noop list %}"
dict_var="{% noop dict %}" dict_var="{% noop dict %}"
/ %} / %}
""".replace( """
"\n", " "
)
) )
template = Template(template_str) template = Template(template_str)
@ -186,15 +187,15 @@ class DynamicExprTests(BaseTestCase):
self.assertEqual(captured["dict_var"], {"a": 3}) self.assertEqual(captured["dict_var"], {"a": 3})
self.assertEqual(captured["list_var"], [{"a": 1}, {"a": 2}]) self.assertEqual(captured["list_var"], [{"a": 1}, {"a": 2}])
self.assertEqual( self.assertHTMLEqual(
rendered.strip(), rendered,
( """
"<!-- _RENDERED SimpleComponent_743413,a1bc3f,, -->\n" <!-- _RENDERED SimpleComponent_743413,a1bc3f,, -->
"<div data-djc-id-a1bc3f>lorem ipsum dolor</div>\n" <div data-djc-id-a1bc3f>lorem ipsum dolor</div>
"<div data-djc-id-a1bc3f>True</div>\n" <div data-djc-id-a1bc3f>True</div>
"<div data-djc-id-a1bc3f>[{'a': 1}, {'a': 2}]</div>\n" <div data-djc-id-a1bc3f>[{'a': 1}, {'a': 2}]</div>
"<div data-djc-id-a1bc3f>{'a': 3}</div>" <div data-djc-id-a1bc3f>{'a': 3}</div>
), """,
) )
@parametrize_context_behavior(["django", "isolated"]) @parametrize_context_behavior(["django", "isolated"])
@ -240,9 +241,7 @@ class DynamicExprTests(BaseTestCase):
bool_var="{# noop is_active #}" bool_var="{# noop is_active #}"
list_var=" {# noop list #} " list_var=" {# noop list #} "
/ %} / %}
""".replace( """
"\n", " "
)
) )
template = Template(template_str) template = Template(template_str)
@ -262,14 +261,15 @@ class DynamicExprTests(BaseTestCase):
self.assertEqual(captured["bool_var"], "") self.assertEqual(captured["bool_var"], "")
self.assertEqual(captured["list_var"], " ") self.assertEqual(captured["list_var"], " ")
# NOTE: This is whitespace-sensitive test, so we check exact output
self.assertEqual( self.assertEqual(
rendered.strip(), rendered.strip(),
( (
"<!-- _RENDERED SimpleComponent_e258c0,a1bc3f,, -->\n" "<!-- _RENDERED SimpleComponent_e258c0,a1bc3f,, -->\n"
"<div data-djc-id-a1bc3f></div>\n" " <div data-djc-id-a1bc3f></div>\n"
"<div data-djc-id-a1bc3f> abc</div>\n" " <div data-djc-id-a1bc3f> abc</div>\n"
"<div data-djc-id-a1bc3f></div>\n" " <div data-djc-id-a1bc3f></div>\n"
"<div data-djc-id-a1bc3f> </div>" " <div data-djc-id-a1bc3f> </div>"
), ),
) )
@ -315,14 +315,12 @@ class DynamicExprTests(BaseTestCase):
{% load component_tags %} {% load component_tags %}
{% component 'test' {% component 'test'
" {% lorem var_a w %} " " {% lorem var_a w %} "
" {% lorem var_a w %} {{ list|slice:':-1' }} " " {% lorem var_a w %} {{ list|slice:':-1'|safe }} "
bool_var=" {% noop is_active %} " bool_var=" {% noop is_active %} "
list_var=" {% noop list %} " list_var=" {% noop list %} "
dict_var=" {% noop dict %} " dict_var=" {% noop dict %} "
/ %} / %}
""".replace( """
"\n", " "
)
) )
template = Template(template_str) template = Template(template_str)
@ -342,15 +340,16 @@ class DynamicExprTests(BaseTestCase):
self.assertEqual(captured["dict_var"], " {'a': 3} ") self.assertEqual(captured["dict_var"], " {'a': 3} ")
self.assertEqual(captured["list_var"], " [{'a': 1}, {'a': 2}] ") self.assertEqual(captured["list_var"], " [{'a': 1}, {'a': 2}] ")
# NOTE: This is whitespace-sensitive test, so we check exact output
self.assertEqual( self.assertEqual(
rendered.strip(), rendered.strip(),
( (
"<!-- _RENDERED SimpleComponent_6c8e94,a1bc3f,, -->\n" "<!-- _RENDERED SimpleComponent_6c8e94,a1bc3f,, -->\n"
"<div data-djc-id-a1bc3f> lorem ipsum dolor </div>\n" " <div data-djc-id-a1bc3f> lorem ipsum dolor </div>\n"
"<div data-djc-id-a1bc3f> lorem ipsum dolor [{'a': 1}] </div>\n" " <div data-djc-id-a1bc3f> lorem ipsum dolor [{'a': 1}] </div>\n"
"<div data-djc-id-a1bc3f> True </div>\n" " <div data-djc-id-a1bc3f> True </div>\n"
"<div data-djc-id-a1bc3f> [{'a': 1}, {'a': 2}] </div>\n" " <div data-djc-id-a1bc3f> [{'a': 1}, {'a': 2}] </div>\n"
"<div data-djc-id-a1bc3f> {'a': 3} </div>" " <div data-djc-id-a1bc3f> {'a': 3} </div>"
), ),
) )
@ -383,9 +382,7 @@ class DynamicExprTests(BaseTestCase):
""" """
{% load component_tags %} {% load component_tags %}
{% component 'test' '"' "{%}" bool_var="{% noop is_active %}" / %} {% component 'test' '"' "{%}" bool_var="{% noop is_active %}" / %}
""".replace( """
"\n", " "
)
) )
template = Template(template_str) template = Template(template_str)
@ -393,14 +390,14 @@ class DynamicExprTests(BaseTestCase):
Context({"is_active": True}), Context({"is_active": True}),
) )
self.assertEqual( self.assertHTMLEqual(
rendered.strip(), rendered,
( """
"<!-- _RENDERED SimpleComponent_c7a5c3,a1bc3f,, -->\n" <!-- _RENDERED SimpleComponent_c7a5c3,a1bc3f,, -->
'<div data-djc-id-a1bc3f>"</div>\n' <div data-djc-id-a1bc3f>"</div>
"<div data-djc-id-a1bc3f>{%}</div>\n" <div data-djc-id-a1bc3f>{%}</div>
"<div data-djc-id-a1bc3f>True</div>" <div data-djc-id-a1bc3f>True</div>
), """,
) )
@parametrize_context_behavior(["django", "isolated"]) @parametrize_context_behavior(["django", "isolated"])
@ -432,9 +429,7 @@ class DynamicExprTests(BaseTestCase):
"{% component 'test' '{{ var_a }}' bool_var=is_active / %}" "{% component 'test' '{{ var_a }}' bool_var=is_active / %}"
bool_var="{% noop is_active %}" bool_var="{% noop is_active %}"
/ %} / %}
""".replace( """
"\n", " "
)
) )
template = Template(template_str) template = Template(template_str)
@ -447,16 +442,17 @@ class DynamicExprTests(BaseTestCase):
), ),
) )
self.assertEqual( self.assertHTMLEqual(
rendered.strip(), rendered,
( """
"<!-- _RENDERED SimpleComponent_5c8766,a1bc41,, -->\n" <!-- _RENDERED SimpleComponent_5c8766,a1bc41,, -->
"<div data-djc-id-a1bc41><!-- _RENDERED SimpleComponent_5c8766,a1bc40,, -->\n" <div data-djc-id-a1bc41>
"<div data-djc-id-a1bc40>3</div>\n" <!-- _RENDERED SimpleComponent_5c8766,a1bc40,, -->
"<div data-djc-id-a1bc40>True</div>\n" <div data-djc-id-a1bc40>3</div>
"</div>\n" <div data-djc-id-a1bc40>True</div>
"<div data-djc-id-a1bc41>True</div>" </div>
), <div data-djc-id-a1bc41>True</div>
"""
) )
@ -498,9 +494,7 @@ class SpreadOperatorTests(BaseTestCase):
..."{{ list|first }}" ..."{{ list|first }}"
x=123 x=123
/ %} / %}
""".replace( """
"\n", " "
)
) )
template = Template(template_str) template = Template(template_str)
@ -738,9 +732,7 @@ class SpreadOperatorTests(BaseTestCase):
x=123 x=123
..."{{ list|first }}" ..."{{ list|first }}"
/ %} / %}
""".replace( """
"\n", " "
)
) )
template1 = Template(template_str1) template1 = Template(template_str1)
@ -761,9 +753,7 @@ class SpreadOperatorTests(BaseTestCase):
} }
attrs:style="OVERWRITTEN" attrs:style="OVERWRITTEN"
/ %} / %}
""".replace( """
"\n", " "
)
) )
template2 = Template(template_str2) template2 = Template(template_str2)
@ -792,9 +782,7 @@ class SpreadOperatorTests(BaseTestCase):
var_a var_a
... ...
/ %} / %}
""".replace( """
"\n", " "
)
) )
with self.assertRaisesMessage(TemplateSyntaxError, "Spread syntax '...' is missing a value"): with self.assertRaisesMessage(TemplateSyntaxError, "Spread syntax '...' is missing a value"):
@ -820,9 +808,7 @@ class SpreadOperatorTests(BaseTestCase):
...var_a ...var_a
...var_b ...var_b
/ %} / %}
""".replace( """
"\n", " "
)
) )
template = Template(template_str) template = Template(template_str)
@ -855,9 +841,7 @@ class SpreadOperatorTests(BaseTestCase):
{% component 'test' {% component 'test'
...var_b ...var_b
/ %} / %}
""".replace( """
"\n", " "
)
) )
template = Template(template_str) template = Template(template_str)

View file

@ -1,127 +0,0 @@
from django.test import TestCase
from django_components.util.html import SoupNode
from .django_test_setup import setup_test_config
setup_test_config({"autodiscover": False})
class HtmlTests(TestCase):
def test_beautifulsoup_impl(self):
nodes = SoupNode.from_fragment(
"""
<div class="abc xyz" data-id="123">
<ul>
<li>Hi</li>
</ul>
</div>
<!-- I'M COMMENT -->
<button>
Click me!
</button>
""".strip()
)
# Items: <div>, whitespace, comment, whitespace, <button>
self.assertEqual(len(nodes), 5)
self.assertHTMLEqual(
nodes[0].to_html(),
"""
<div class="abc xyz" data-id="123">
<ul>
<li>Hi</li>
</ul>
</div>
""",
)
self.assertHTMLEqual(
nodes[2].to_html(),
"<!-- I&#x27;M COMMENT -->",
)
self.assertHTMLEqual(
nodes[4].to_html(),
"""
<button>
Click me!
</button>
""",
)
self.assertEqual(nodes[0].name(), "div")
self.assertEqual(nodes[4].name(), "button")
self.assertEqual(nodes[0].is_element(), True)
self.assertEqual(nodes[2].is_element(), False)
self.assertEqual(nodes[4].is_element(), True)
self.assertEqual(nodes[0].get_attr("class"), "abc xyz")
self.assertEqual(nodes[4].get_attr("class"), None)
nodes[0].set_attr("class", "123 456")
nodes[4].set_attr("class", "abc def")
self.assertEqual(nodes[0].get_attr("class"), "123 456")
self.assertEqual(nodes[4].get_attr("class"), "abc def")
self.assertHTMLEqual(
nodes[0].to_html(),
"""
<div class="123 456" data-id="123">
<ul>
<li>Hi</li>
</ul>
</div>
""",
)
self.assertHTMLEqual(
nodes[4].to_html(),
"""
<button class="abc def">
Click me!
</button>
""",
)
# Setting attr to `True` will set it to boolean attribute,
# while setting it to `False` will remove the attribute.
nodes[4].set_attr("disabled", True)
self.assertHTMLEqual(
nodes[4].to_html(),
"""
<button class="abc def" disabled>
Click me!
</button>
""",
)
nodes[4].set_attr("disabled", False)
self.assertHTMLEqual(
nodes[4].to_html(),
"""
<button class="abc def">
Click me!
</button>
""",
)
# Return self
self.assertEqual(nodes[0].node, nodes[0].find_tag("div").node) # type: ignore[union-attr]
# Return descendant
li = nodes[0].find_tag("li")
self.assertHTMLEqual(li.to_html(), "<li>Hi</li>") # type: ignore[union-attr]
# Return None when not found
self.assertEqual(nodes[0].find_tag("main"), None)
# Insert children
li.append_children([nodes[4]]) # type: ignore[union-attr]
self.assertHTMLEqual(
li.to_html(), # type: ignore[union-attr]
"""
<li>
Hi
<button class="abc def">
Click me!
</button>
</li>
""",
)

477
tests/test_html_parser.py Normal file
View file

@ -0,0 +1,477 @@
from django.test import TestCase
from typing import List
from django_components.util.html_parser import HTMLTag, _parse_html as parse_html, set_html_attributes
from .django_test_setup import setup_test_config
setup_test_config({"autodiscover": False})
# This same set of tests is also found in djc_html_parser, to ensure that
# this implementation can be replaced with the djc_html_parser's Rust-based implementation
class TestHTMLParser(TestCase):
def test_basic_transformation(self):
html = "<div><p>Hello</p></div>"
result, _ = set_html_attributes(html, root_attributes=["data-root"], all_attributes=["data-all"])
expected = "<div data-root data-all><p data-all>Hello</p></div>"
assert result == expected
def test_multiple_roots(self):
html = "<div>First</div><span>Second</span>"
result, _ = set_html_attributes(html, root_attributes=["data-root"], all_attributes=["data-all"])
expected = "<div data-root data-all>First</div><span data-root data-all>Second</span>"
assert result == expected
def test_complex_html(self):
html = """
<div class="container" id="main">
<header class="flex">
<h1 title="Main Title">Hello & Welcome</h1>
<nav data-existing="true">
<a href="/home">Home</a>
<a href="/about" class="active">About</a>
</nav>
</header>
<main>
<article data-existing="true">
<h2>Article 1</h2>
<p>Some text with <strong>bold</strong> and <em>emphasis</em></p>
<img src="test.jpg" alt="Test Image"/>
</article>
</main>
</div>
<footer id="footer">
<p>&copy; 2024</p>
</footer>
"""
result, _ = set_html_attributes(html, ["data-root"], ["data-all", "data-v-123"])
expected = """
<div class="container" id="main" data-root data-all data-v-123>
<header class="flex" data-all data-v-123>
<h1 title="Main Title" data-all data-v-123>Hello & Welcome</h1>
<nav data-existing="true" data-all data-v-123>
<a href="/home" data-all data-v-123>Home</a>
<a href="/about" class="active" data-all data-v-123>About</a>
</nav>
</header>
<main data-all data-v-123>
<article data-existing="true" data-all data-v-123>
<h2 data-all data-v-123>Article 1</h2>
<p data-all data-v-123>Some text with <strong data-all data-v-123>bold</strong> and <em data-all data-v-123>emphasis</em></p>
<img src="test.jpg" alt="Test Image" data-all data-v-123/>
</article>
</main>
</div>
<footer id="footer" data-root data-all data-v-123>
<p data-all data-v-123>&copy; 2024</p>
</footer>
""" # noqa: E501
assert result == expected
def test_void_elements(self):
test_cases = [
('<meta charset="utf-8">', '<meta charset="utf-8" data-root data-v-123>'),
('<meta charset="utf-8"/>', '<meta charset="utf-8" data-root data-v-123/>'),
("<div><br><hr></div>", "<div data-root data-v-123><br data-v-123><hr data-v-123></div>"),
('<img src="test.jpg" alt="Test">', '<img src="test.jpg" alt="Test" data-root data-v-123>'),
]
for input_html, expected in test_cases:
result, _ = set_html_attributes(input_html, ["data-root"], ["data-v-123"])
assert result == expected
def test_html_head_with_meta(self):
html = """
<head>
<meta charset="utf-8">
<title>Test Page</title>
<link rel="stylesheet" href="style.css">
<meta name="description" content="Test">
</head>"""
result, _ = set_html_attributes(html, ["data-root"], ["data-v-123"])
expected = """
<head data-root data-v-123>
<meta charset="utf-8" data-v-123>
<title data-v-123>Test Page</title>
<link rel="stylesheet" href="style.css" data-v-123>
<meta name="description" content="Test" data-v-123>
</head>"""
assert result == expected
def test_watch_attribute(self):
html = """
<div data-id="123">
<p>Regular element</p>
<span data-id="456">Nested element</span>
<img data-id="789" src="test.jpg"/>
</div>"""
result, captured = set_html_attributes(html, ["data-root"], ["data-v-123"], watch_on_attribute="data-id")
expected = """
<div data-id="123" data-root data-v-123>
<p data-v-123>Regular element</p>
<span data-id="456" data-v-123>Nested element</span>
<img data-id="789" src="test.jpg" data-v-123/>
</div>"""
assert result == expected
# Verify attribute capturing
assert len(captured) == 3
# Root element should have both root and all attributes
assert captured["123"] == ["data-root", "data-v-123"]
# Non-root elements should only have all attributes
assert captured["456"] == ["data-v-123"]
assert captured["789"] == ["data-v-123"]
def test_whitespace_preservation(self):
html = """<div>
<p> Hello World </p>
<span> Text with spaces </span>
</div>"""
result, _ = set_html_attributes(html, ["data-root"], ["data-all"])
expected = """<div data-root data-all>
<p data-all> Hello World </p>
<span data-all> Text with spaces </span>
</div>"""
assert result == expected
# This checks that the parser works irrespective of the main use case
class TestHTMLParserInternal(TestCase):
def test_parse_simple_tag(self):
processed_tags = []
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
processed_tags.append(tag)
html = "<div>Hello</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, html)
self.assertEqual(len(processed_tags), 1)
self.assertEqual(processed_tags[0].name, "div")
def test_parse_nested_tags(self):
processed_tags = []
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
processed_tags.append((tag.name, len(tag_stack)))
html = "<div><p>Hello</p></div>"
result = parse_html(html, on_tag)
self.assertEqual(result, html)
self.assertEqual(len(processed_tags), 2)
self.assertEqual(processed_tags[0], ("p", 2)) # p tag with stack depth 2
self.assertEqual(processed_tags[1], ("div", 1)) # div tag with stack depth 1
def test_parse_attributes(self):
processed_tags = []
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
processed_tags.append(tag)
html = '<div class="container" id="main">Hello</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, html)
self.assertEqual(len(processed_tags), 1)
self.assertEqual(len(processed_tags[0].attrs), 2)
self.assertEqual(processed_tags[0].attrs[0].key, "class")
self.assertEqual(processed_tags[0].attrs[0].value, "container")
self.assertEqual(processed_tags[0].attrs[1].key, "id")
self.assertEqual(processed_tags[0].attrs[1].value, "main")
def test_void_elements(self):
processed_tags = []
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
processed_tags.append(tag)
html = '<img src="test.jpg" />'
result = parse_html(html, on_tag)
self.assertEqual(result, html)
self.assertEqual(len(processed_tags), 1)
self.assertEqual(processed_tags[0].name, "img")
self.assertEqual(processed_tags[0].attrs[0].key, "src")
self.assertEqual(processed_tags[0].attrs[0].value, "test.jpg")
def test_add_attr(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.add_attr("data-test", "value", quoted=True)
tag.add_attr("hidden", None, quoted=False)
html = "<div>Content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, '<div data-test="value" hidden>Content</div>')
def test_rename_attr(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.rename_attr("class", "className")
html = '<div class="test">Content</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, '<div className="test">Content</div>')
def test_delete_attr(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.delete_attr("id")
html = '<div class="test" id="main">Content</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, '<div class="test" >Content</div>')
def test_clear_attrs(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.clear_attrs()
html = '<div class="test" id="main" data-value="123">Content</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, "<div >Content</div>")
def test_add_after_clearing_attrs(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.clear_attrs()
tag.add_attr("data-test", "value", quoted=True)
html = '<div class="test" id="main" data-value="123">Content</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, '<div data-test="value">Content</div>')
def test_insert_content(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.insert_content("Start ", 0)
tag.insert_content(" End", -1)
html = "<div>Content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "<div>Start Content End</div>")
def test_clear_content(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.clear_content()
html = "<div>Original content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "<div></div>")
def test_replace_content(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.replace_content("New content")
html = "<div>Original content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "<div>New content</div>")
def test_prepend_append(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.prepend("Before ")
tag.append(" after")
html = "<div>Content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "Before <div>Content</div> after")
def test_wrap(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.wrap('<section class="wrapper">', "</section>")
html = "<div>Content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, '<section class="wrapper"><div>Content</div></section>')
def test_unwrap(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
if tag.name == "span":
tag.unwrap()
html = "<div><span>Content</span></div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "<div>Content</div>")
def test_rename_tag(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
tag.rename_tag("article")
html = "<div>Content</div>"
result = parse_html(html, on_tag)
self.assertEqual(result, "<article>Content</article>")
def test_get_attr_has_attr(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
assert tag.has_attr("class")
assert not tag.has_attr("id")
attr = tag.get_attr("class")
assert attr is not None and attr.value == "test"
assert tag.get_attr("id") is None
html = '<div class="test">Content</div>'
result = parse_html(html, on_tag)
self.assertEqual(result, html)
def test_tag_manipulation_complex(self):
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
if tag.name == "div":
# Test add_attr
tag.add_attr("data-new", "value", quoted=True)
# Test rename_attr
tag.rename_attr("class", "className")
# Test delete_attr
tag.delete_attr("id")
# Test insert_content
tag.insert_content("<span>Start</span>", 0)
tag.insert_content("<span>End</span>", -1)
# Test wrap
tag.wrap("<section>", "</section>")
elif tag.name == "p":
# Test get_attr and has_attr
assert tag.has_attr("class")
attr = tag.get_attr("class")
assert attr is not None and attr.value == "inner"
# Test clear_attrs
tag.clear_attrs()
# Test clear_content and replace_content
tag.clear_content()
tag.replace_content("New content")
# Test prepend and append
tag.prepend("Before ")
tag.append(" after")
# Test rename_tag
tag.rename_tag("article")
# Test unwrap
tag.unwrap()
html = '<div class="test" id="main"><p class="inner">Original content</p></div>'
expected = '<section><div className="test" data-new="value"><span>Start</span>Before New content after<span>End</span></div></section>' # noqa: E501
result = parse_html(html, on_tag)
self.assertEqual(result, expected)
def test_complex_html(self):
processed_tags = []
def on_tag(tag: HTMLTag, tag_stack: List[HTMLTag]) -> None:
processed_tags.append(tag)
if tag.name == "body":
# Test attribute manipulation
tag.add_attr("data-modified", "true", quoted=True)
tag.rename_attr("class", "className")
elif tag.name == "div":
# Test content manipulation
tag.insert_content("<!-- Modified -->", 0)
tag.wrap('<div class="wrapper">', "</div>")
elif tag.name == "p":
# Test attribute without value
tag.add_attr("hidden", None, quoted=False)
html = """<!DOCTYPE html>
<html lang="en" data-theme="light">
<!-- Header section -->
<head>
<meta charset="UTF-8"/>
<title>Complex Test</title>
<link rel="stylesheet" href="style.css">
<script type="text/javascript">
// Single line comment with tags: <div></div>
/* Multi-line comment
</script>
*/
const template = `<div>${value}</div>`;
console.log('</script>');
</script>
</head>
<body class="main" id="content" data-loaded>
<![CDATA[
Some CDATA content with <tags> that should be preserved
]]>
<div class="container" style="display: flex">
<img src="test.jpg" alt="Test Image"/>
<p>Hello <strong>World</strong>!</p>
<input type="text" disabled value="test"/>
</div>
</body>
</html>"""
expected = """<!DOCTYPE html>
<html lang="en" data-theme="light">
<!-- Header section -->
<head>
<meta charset="UTF-8"/>
<title>Complex Test</title>
<link rel="stylesheet" href="style.css">
<script type="text/javascript">
// Single line comment with tags: <div></div>
/* Multi-line comment
</script>
*/
const template = `<div>${value}</div>`;
console.log('</script>');
</script>
</head>
<body className="main" id="content" data-loaded data-modified="true">
<![CDATA[
Some CDATA content with <tags> that should be preserved
]]>
<div class="wrapper"><div class="container" style="display: flex"><!-- Modified -->
<img src="test.jpg" alt="Test Image"/>
<p hidden>Hello <strong>World</strong>!</p>
<input type="text" disabled value="test"/>
</div></div>
</body>
</html>"""
result = parse_html(html, on_tag)
self.assertEqual(result, expected)
# Verify the structure of processed tags
self.assertEqual(len(processed_tags), 12) # Count all non-void elements
# Verify specific tag attributes
html_tag = next(tag for tag in processed_tags if tag.name == "html")
self.assertEqual(len(html_tag.attrs), 2)
self.assertEqual(html_tag.attrs[0].key, "lang")
self.assertEqual(html_tag.attrs[0].value, "en")
self.assertEqual(html_tag.attrs[1].key, "data-theme")
self.assertEqual(html_tag.attrs[1].value, "light")
# Verify void elements
img_tag = next(tag for tag in processed_tags if tag.name == "img")
self.assertEqual(len(img_tag.attrs), 2)
self.assertEqual(img_tag.attrs[0].key, "src")
self.assertEqual(img_tag.attrs[0].value, "test.jpg")
# Verify attribute without value
body_tag = next(tag for tag in processed_tags if tag.name == "body")
data_loaded_attr = next(attr for attr in body_tag.attrs if attr.key == "data-loaded")
self.assertIsNone(data_loaded_attr.value)
# Verify modified attributes
self.assertTrue(any(attr.key == "data-modified" and attr.value == "true" for attr in body_tag.attrs))
self.assertTrue(any(attr.key == "className" and attr.value == "main" for attr in body_tag.attrs))
# Verify p tag modifications
p_tag = next(tag for tag in processed_tags if tag.name == "p")
self.assertTrue(any(attr.key == "hidden" and attr.value is None for attr in p_tag.attrs))

View file

@ -36,6 +36,9 @@ class BaseTestCase(SimpleTestCase):
if template_cache: if template_cache:
template_cache.clear() template_cache.clear()
from django_components.component import component_node_subclasses_by_name
component_node_subclasses_by_name.clear()
# Mock the `generate` function used inside `gen_id` so it returns deterministic IDs # Mock the `generate` function used inside `gen_id` so it returns deterministic IDs
def _start_gen_id_patch(self): def _start_gen_id_patch(self):
# Random number so that the generated IDs are "hex-looking", e.g. a1bc3d # Random number so that the generated IDs are "hex-looking", e.g. a1bc3d
@ -182,6 +185,9 @@ def parametrize_context_behavior(cases: List[ContextBehParam], settings: Optiona
if template_cache: # May be None if the cache was not initialized if template_cache: # May be None if the cache was not initialized
template_cache.clear() template_cache.clear()
from django_components.component import component_node_subclasses_by_name
component_node_subclasses_by_name.clear()
case_has_data = not isinstance(case, str) case_has_data = not isinstance(case, str)
if isinstance(case, str): if isinstance(case, str):