refactor: replace selectolax with beautifulsoup (#823)

* refactor: replace selectolax with beautifulsoup * refactor: add tests for html parser impl * refactor: add missing import * refactor: fix tests * refactor: fix linter issues * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-11-18 22:11:26 +00:00 · 2024-12-08 08:42:48 +01:00 · 2024-12-08 08:42:48 +01:00 · 1cd545b986
commit 1cd545b986
parent c61847d30d
9 changed files with 391 additions and 351 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -29,7 +29,7 @@ classifiers = [
 ]
 dependencies = [
    'Django>=4.2',
-    'selectolax>=0.3.24',
+    'beautifulsoup4>=4.12',
 ]
 license = {text = "MIT"}

--- a/requirements-dev.in
+++ b/requirements-dev.in
@ -11,4 +11,4 @@ playwright
 requests
 types-requests
 whitenoise
-selectolax
+beautifulsoup4
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -6,7 +6,9 @@
 #
 asgiref==3.8.1
    # via django
-black==24.8.0
+beautifulsoup4==4.12.3
+    # via -r requirements-dev.in
+black==24.10.0
    # via -r requirements-dev.in
 cachetools==5.5.0
    # via tox
@ -16,15 +18,15 @@ cfgv==3.4.0
    # via pre-commit
 chardet==5.2.0
    # via tox
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
    # via requests
 click==8.1.7
    # via black
 colorama==0.4.6
    # via tox
-distlib==0.3.8
+distlib==0.3.9
    # via virtualenv
-django==5.1.1
+django==5.1.4
    # via -r requirements-dev.in
 filelock==3.16.1
    # via
@ -38,7 +40,7 @@ flake8-pyproject==1.2.3
    # via -r requirements-dev.in
 greenlet==3.1.1
    # via playwright
-identify==2.5.33
+identify==2.6.3
    # via pre-commit
 idna==3.10
    # via requests
@ -54,9 +56,9 @@ mypy-extensions==1.0.0
    # via
    #   black
    #   mypy
-nodeenv==1.8.0
+nodeenv==1.9.1
    # via pre-commit
-packaging==24.1
+packaging==24.2
    # via
    #   black
    #   pyproject-api
@ -69,7 +71,7 @@ platformdirs==4.3.6
    #   black
    #   tox
    #   virtualenv
-playwright==1.48.0
+playwright==1.49.0
    # via -r requirements-dev.in
 pluggy==1.5.0
    # via
@ -77,7 +79,7 @@ pluggy==1.5.0
    #   tox
 pre-commit==4.0.1
    # via -r requirements-dev.in
-pycodestyle==2.12.0
+pycodestyle==2.12.1
    # via flake8
 pyee==12.0.0
    # via playwright
@ -87,19 +89,19 @@ pyproject-api==1.8.0
    # via tox
 pytest==8.3.4
    # via -r requirements-dev.in
-pyyaml==6.0.1
+pyyaml==6.0.2
    # via pre-commit
 requests==2.32.3
    # via -r requirements-dev.in
-selectolax==0.3.26
-    # via -r requirements-dev.in
-sqlparse==0.5.0
+soupsieve==2.6
+    # via beautifulsoup4
+sqlparse==0.5.2
    # via django
 tox==4.23.2
    # via -r requirements-dev.in
 types-requests==2.32.0.20241016
    # via -r requirements-dev.in
-typing-extensions==4.10.0
+typing-extensions==4.12.2
    # via
    #   mypy
    #   pyee
@ -107,12 +109,9 @@ urllib3==2.2.3
    # via
    #   requests
    #   types-requests
-virtualenv==20.26.6
+virtualenv==20.28.0
    # via
    #   pre-commit
    #   tox
-whitenoise==6.7.0
+whitenoise==6.8.2
    # via -r requirements-dev.in
-
-# The following packages are considered to be unsafe in a requirements file:
-# setuptools
--- a/requirements-docs.txt
+++ b/requirements-docs.txt
@ -20,7 +20,7 @@
 # - pymdown-extensions
 # - black
 # - django>=4.2
-# - selectolax>=0.3.24
+# - beautifulsoup4>=4.12
 #

 asgiref==3.8.1
@ -29,6 +29,8 @@ babel==2.16.0
    # via
    #   mkdocs-git-revision-date-localized-plugin
    #   mkdocs-material
+beautifulsoup4==4.12.3
+    # via hatch.envs.docs
 black==24.10.0
    # via hatch.envs.docs
 bracex==2.5.post1
@ -207,12 +209,12 @@ regex==2024.11.6
    # via mkdocs-material
 requests==2.32.3
    # via mkdocs-material
-selectolax==0.3.26
-    # via hatch.envs.docs
 six==1.16.0
    # via python-dateutil
 smmap==5.0.1
    # via gitdb
+soupsieve==2.6
+    # via beautifulsoup4
 sqlparse==0.5.2
    # via django
 tinycss2==1.4.0
--- a/src/django_components/dependencies.py
+++ b/src/django_components/dependencies.py
@ -33,11 +33,10 @@ from django.templatetags.static import static
 from django.urls import path, reverse
 from django.utils.decorators import sync_and_async_middleware
 from django.utils.safestring import SafeString, mark_safe
-from selectolax.lexbor import LexborHTMLParser

 import django_components.types as types
-from django_components.util.html import parse_document_or_nodes, parse_multiroot_html, parse_node
-from django_components.util.misc import escape_js_string_literal, get_import_path
+from django_components.util.html import SoupNode
+from django_components.util.misc import _escape_js, get_import_path

 if TYPE_CHECKING:
    from django_components.component import Component
@ -362,26 +361,14 @@ def render_dependencies(content: TContent, type: RenderType = "document") -> TCo
    # then try to insert the JS scripts at the end of <body> and CSS sheets at the end
    # of <head>
    if type == "document" and (not did_find_js_placeholder or not did_find_css_placeholder):
-        tree = parse_document_or_nodes(content_.decode())
+        maybe_transformed = _insert_js_css_to_default_locations(
+            content_.decode(),
+            css_content=None if did_find_css_placeholder else css_dependencies.decode(),
+            js_content=None if did_find_js_placeholder else js_dependencies.decode(),
+        )

-        if isinstance(tree, LexborHTMLParser):
-            did_modify_html = False
-
-            if not did_find_css_placeholder and tree.head:
-                css_elems = parse_multiroot_html(css_dependencies.decode())
-                for css_elem in css_elems:
-                    tree.head.insert_child(css_elem)  # type: ignore # TODO: Update to selectolax 0.3.25
-                did_modify_html = True
-
-            if not did_find_js_placeholder and tree.body:
-                js_elems = parse_multiroot_html(js_dependencies.decode())
-                for js_elem in js_elems:
-                    tree.body.insert_child(js_elem)  # type: ignore # TODO: Update to selectolax 0.3.25
-                did_modify_html = True
-
-            transformed = cast(str, tree.html)
-            if did_modify_html:
-                content_ = transformed.encode()
+        if maybe_transformed is not None:
+            content_ = maybe_transformed.encode()

    # Return the same type as we were given
    output = content_.decode() if isinstance(content, str) else content_
@ -567,15 +554,15 @@ def _postprocess_media_tags(
    tags_by_url: Dict[str, str] = {}

    for tag in tags:
-        node = parse_node(tag)
+        node = SoupNode.from_fragment(tag.strip())[0]
        # <script src="..."> vs <link href="...">
        attr = "src" if script_type == "js" else "href"
-        maybe_url = node.attrs.get(attr, None)
+        maybe_url = node.get_attr(attr, None)

        if not _is_nonempty_str(maybe_url):
            raise RuntimeError(
                f"One of entries for `Component.Media.{script_type}` media is missing a "
-                f"value for attribute '{attr}'. If there is content inlined inside the `<{node.tag}>` tags, "
+                f"value for attribute '{attr}'. If there is content inlined inside the `<{node.name()}>` tags, "
                f"you must move the content to a `.{script_type}` file and reference it via '{attr}'.\nGot:\n{tag}"
            )

@ -739,11 +726,48 @@ def _gen_exec_script(
    return exec_script


-def _escape_js(js: str, eval: bool = True) -> str:
-    escaped_js = escape_js_string_literal(js)
-    # `unescapeJs` is the function we call in the browser to parse the escaped JS
-    escaped_js = f"Components.unescapeJs(`{escaped_js}`)"
-    return f"eval({escaped_js})" if eval else escaped_js
+def _insert_js_css_to_default_locations(
+    html_content: str,
+    js_content: Optional[str],
+    css_content: Optional[str],
+) -> Optional[str]:
+    """
+    This function tries to insert the JS and CSS content into the default locations.
+
+    JS is inserted at the end of `<body>`, and CSS is inserted at the end of `<head>`.
+    """
+    elems = SoupNode.from_fragment(html_content)
+
+    if not elems:
+        return None
+
+    did_modify_html = False
+
+    if css_content is not None:
+        for elem in elems:
+            if not elem.is_element():
+                continue
+            head = elem.find_tag("head")
+            if head:
+                css_elems = SoupNode.from_fragment(css_content)
+                head.append_children(css_elems)
+                did_modify_html = True
+
+    if js_content is not None:
+        for elem in elems:
+            if not elem.is_element():
+                continue
+            body = elem.find_tag("body")
+            if body:
+                js_elems = SoupNode.from_fragment(js_content)
+                body.append_children(js_elems)
+                did_modify_html = True
+
+    if did_modify_html:
+        transformed = SoupNode.to_html_multiroot(elems)
+        return transformed
+    else:
+        return None  # No changes made


 #########################################################
@ -802,27 +826,27 @@ class ComponentDependencyMiddleware:
    """

    def __init__(self, get_response: "Callable[[HttpRequest], HttpResponse]") -> None:
-        self.get_response = get_response
+        self._get_response = get_response

        # NOTE: Required to work with async
-        if iscoroutinefunction(self.get_response):
+        if iscoroutinefunction(self._get_response):
            markcoroutinefunction(self)

    def __call__(self, request: HttpRequest) -> HttpResponseBase:
        if iscoroutinefunction(self):
            return self.__acall__(request)

-        response = self.get_response(request)
-        response = self.process_response(response)
+        response = self._get_response(request)
+        response = self._process_response(response)
        return response

    # NOTE: Required to work with async
    async def __acall__(self, request: HttpRequest) -> HttpResponseBase:
-        response = await self.get_response(request)
-        response = self.process_response(response)
+        response = await self._get_response(request)
+        response = self._process_response(response)
        return response

-    def process_response(self, response: HttpResponse) -> HttpResponse:
+    def _process_response(self, response: HttpResponse) -> HttpResponse:
        if not isinstance(response, StreamingHttpResponse) and response.get("Content-Type", "").startswith(
            "text/html"
        ):
--- a/src/django_components/util/html.py
+++ b/src/django_components/util/html.py
@ -1,100 +1,108 @@
-from typing import List, Union
+from abc import ABC, abstractmethod
+from typing import Any, List, Optional, Sequence

-from selectolax.lexbor import LexborHTMLParser, LexborNode
+from bs4 import BeautifulSoup, CData, Comment, Doctype, Tag


-def parse_node(html: str) -> LexborNode:
+class HTMLNode(ABC):
    """
-    Use this when you know the given HTML is a single node like
-
-    `<div> Hi </div>`
+    Interface for an HTML manipulation library. This allows us to potentially swap
+    between different libraries.
    """
-    tree = LexborHTMLParser(html)
-    # NOTE: The parser automatically places <style> tags inside <head>
-    # while <script> tags are inside <body>.
-    return tree.body.child or tree.head.child  # type: ignore[union-attr, return-value]
+
+    @classmethod
+    @abstractmethod
+    def from_fragment(cls, html: str) -> Sequence["HTMLNode"]: ...  # noqa: E704
+
+    @abstractmethod
+    def to_html(self) -> str: ...  # noqa: E704
+
+    @abstractmethod
+    def name(self) -> str:
+        """Get tag name"""
+        ...
+
+    @abstractmethod
+    def find_tag(self, tag: str) -> Optional["HTMLNode"]: ...  # noqa: E704
+
+    @abstractmethod
+    def append_children(self, children: Sequence[Any]) -> None: ...  # noqa: E704
+
+    @abstractmethod
+    def get_attr(self, attr: str, default: Any = None) -> Any: ...  # noqa: E704
+
+    @abstractmethod
+    def set_attr(self, attr: str, value: Any) -> None: ...  # noqa: E704
+
+    @abstractmethod
+    def is_element(self) -> bool: ...  # noqa: E704
+
+    """Returns `False` if the node is a text, comment, or doctype node. `True` otherwise."""
+
+    @classmethod
+    def to_html_multiroot(cls, elems: Sequence["HTMLNode"]) -> str:
+        return "".join([elem.to_html() for elem in elems])


-def parse_document_or_nodes(html: str) -> Union[List[LexborNode], LexborHTMLParser]:
-    """
-    Use this if you do NOT know whether the given HTML is a full document
-    with `<html>`, `<head>`, and `<body>` tags, or an HTML fragment.
-    """
-    html = html.strip()
-    tree = LexborHTMLParser(html)
-    is_fragment = is_html_parser_fragment(html, tree)
+class SoupNode(HTMLNode):
+    """BeautifulSoup implementation of HTMLNode."""

-    if is_fragment:
-        nodes = parse_multiroot_html(html)
-        return nodes
-    else:
-        return tree
+    def __init__(self, node: Tag):
+        self.node = node

+    @classmethod
+    def from_fragment(cls, html: str) -> List["SoupNode"]:
+        soup = BeautifulSoup(html, "html.parser")
+        # Get top-level elements in the fragment
+        return [cls(elem) for elem in soup.contents]

-def parse_multiroot_html(html: str) -> List[LexborNode]:
-    """
-    Use this when you know the given HTML is a multiple nodes like
+    def to_html(self) -> str:
+        if isinstance(self.node, CData):
+            return f"<![CDATA[{self.node}]]>"
+        elif isinstance(self.node, Comment):
+            return f"<!-- {self.node} -->"
+        elif isinstance(self.node, Doctype):
+            return f"<!DOCTYPE {self.node}>"
+        else:
+            return str(self.node)

-    `<div> Hi </div> <span> Hello </span>`
-    """
-    # NOTE: HTML / XML MUST have a single root. So, to support multiple
-    # top-level elements, we wrap them in a dummy singular root.
-    parser = LexborHTMLParser(f"<root>{html}</root>")
+    def name(self) -> str:
+        return self.node.name

-    # Get all contents of the root
-    root_elem = parser.css_first("root")
-    elems = [*root_elem.iter()] if root_elem else []
-    return elems
+    def find_tag(self, tag: str) -> Optional["SoupNode"]:
+        if isinstance(self.node, Tag) and self.node.name == tag:
+            return self
+        else:
+            match = self.node.select_one(tag)
+            if match:
+                return SoupNode(match)
+        return None

+    def append_children(self, children: Sequence["SoupNode"]) -> None:
+        if isinstance(self.node, Tag):
+            for child in children:
+                self.node.append(child.node)

-def is_html_parser_fragment(html: str, tree: LexborHTMLParser) -> bool:
-    # If we pass only an HTML fragment to the parser, like `<div>123</div>`, then
-    # the parser automatically wraps it in `<html>`, `<head>`, and `<body>` tags.
-    #
-    # <html>
-    #   <head>
-    #   </head>
-    #   <body>
-    #     <div>123</div>
-    #   </body>
-    # </html>
-    #
-    # But also, as described in Lexbor (https://github.com/lexbor/lexbor/issues/183#issuecomment-1611975340),
-    # if the parser first comes across HTML tags that could go into the `<head>`,
-    # it will put them there, and then put the rest in `<body>`.
-    #
-    # So `<link href="..." /><div></div>` will be parsed as
-    #
-    # <html>
-    #   <head>
-    #     <link href="..." />
-    #   </head>
-    #   <body>
-    #     <div>123</div>
-    #   </body>
-    # </html>
-    #
-    # BUT, if we're dealing with a fragment, we want to parse it correctly as
-    # a multi-root fragment:
-    #
-    # <link href="..." />
-    # <div>123</div>
-    #
-    # The way do so is that we:
-    # 1. Take the original HTML string
-    # 2. Subtract the content of parsed `<head>` from the START of the original HTML
-    # 3. Subtract the content of parsed `<body>` from the END of the original HTML
-    # 4. Then, if we have an HTML fragment, we should be left with empty string (maybe whitespace?).
-    # 5. But if we have an HTML document, then the "space between" should contain text,
-    #    because we didn't account for the length of `<html>`, `<head>`, `<body>` tags.
-    #
-    # TODO: Replace with fragment parser?
-    #       See https://github.com/rushter/selectolax/issues/74#issuecomment-2404470344
-    parsed_head_html: str = tree.head.html  # type: ignore
-    parsed_body_html: str = tree.body.html  # type: ignore
-    head_content = parsed_head_html[len("<head>") : -len("</head>")]  # noqa: E203
-    body_content = parsed_body_html[len("<body>") : -len("</body>")]  # noqa: E203
-    between_content = html[len(head_content) : -len(body_content)].strip()  # noqa: E203
+    def get_attr(self, attr: str, default: Any = None) -> Any:
+        if isinstance(self.node, Tag):
+            res = self.node.get(attr, default)
+            if isinstance(res, list):
+                return " ".join(res)
+            return res
+        return default

-    is_fragment = not html or not between_content
-    return is_fragment
+    def set_attr(self, attr: str, value: Any) -> None:
+        if not isinstance(self.node, Tag):
+            return
+
+        if value is True:
+            # Set boolean attributes without a value
+            self.node[attr] = None
+        elif value is False:
+            # Remove the attribute
+            self.node.attrs.pop(attr, None)
+        else:
+            self.node[attr] = value
+
+    def is_element(self) -> bool:
+        return isinstance(self.node, Tag)
--- a/src/django_components/util/misc.py
+++ b/src/django_components/util/misc.py
@ -77,3 +77,10 @@ def get_last_index(lst: List, key: Callable[[Any], bool]) -> Optional[int]:
        if key(item):
            return len(lst) - 1 - index
    return None
+
+
+def _escape_js(js: str, eval: bool = True) -> str:
+    escaped_js = escape_js_string_literal(js)
+    # `unescapeJs` is the function we call in the browser to parse the escaped JS
+    escaped_js = f"Components.unescapeJs(`{escaped_js}`)"
+    return f"eval({escaped_js})" if eval else escaped_js
--- a/tests/test_dependencies.py
+++ b/tests/test_dependencies.py
@ -2,11 +2,11 @@ from unittest.mock import Mock

 from django.http import HttpResponseNotModified
 from django.template import Context, Template
-from selectolax.lexbor import LexborHTMLParser

 from django_components import Component, registry, render_dependencies, types
 from django_components.components.dynamic import DynamicComponent
 from django_components.middleware import ComponentDependencyMiddleware
+from django_components.util.html import SoupNode

 from .django_test_setup import setup_test_config
 from .testutils import BaseTestCase, create_and_process_template_response
@ -224,7 +224,9 @@ class RenderDependenciesTests(BaseTestCase):
            count=1,
        )

-        rendered_body = LexborHTMLParser(rendered).body.html  # type: ignore[union-attr]
+        # Nodes: [Doctype, whitespace, <html>]
+        nodes = SoupNode.from_fragment(rendered.strip())
+        rendered_body = nodes[2].find_tag("body").to_html()  # type: ignore[union-attr]

        self.assertInHTML(
            """<script src="django_components/django_components.min.js">""",
@ -274,7 +276,9 @@ class RenderDependenciesTests(BaseTestCase):
            count=1,
        )

-        rendered_head = LexborHTMLParser(rendered).head.html  # type: ignore[union-attr]
+        # Nodes: [Doctype, whitespace, <html>]
+        nodes = SoupNode.from_fragment(rendered.strip())
+        rendered_head = nodes[2].find_tag("head").to_html()  # type: ignore[union-attr]

        self.assertInHTML(
            """<script src="django_components/django_components.min.js">""",
@ -287,6 +291,142 @@ class RenderDependenciesTests(BaseTestCase):
            count=1,
        )

+    # NOTE: Some HTML parser libraries like selectolax or lxml try to "correct" the given HTML.
+    #       We want to avoid this behavior, so user gets the exact same HTML back.
+    def test_does_not_try_to_add_close_tags(self):
+        registry.register(name="test", component=SimpleComponent)
+
+        template_str: types.django_html = """
+            <thead>
+        """
+
+        rendered_raw = Template(template_str).render(Context({"formset": [1]}))
+        rendered = render_dependencies(rendered_raw, type="fragment")
+
+        self.assertHTMLEqual(rendered, "<thead>")
+
+    def test_does_not_modify_html_when_no_component_used(self):
+        registry.register(name="test", component=SimpleComponent)
+
+        template_str: types.django_html = """
+            <table class="table-auto border-collapse divide-y divide-x divide-slate-300 w-full">
+                <!-- Table head -->
+                <thead>
+                    <tr class="py-0 my-0 h-7">
+                        <!-- Empty row -->
+                        <th class="min-w-12">#</th>
+                    </tr>
+                </thead>
+                <!-- Table body -->
+                <tbody id="items" class="divide-y divide-slate-300">
+                    {% for form in formset %}
+                        {% with row_number=forloop.counter %}
+                            <tr class=" hover:bg-gray-200 py-0 {% cycle 'bg-white' 'bg-gray-50' %} divide-x "
+                                aria-rowindex="{{ row_number }}">
+                                <!-- row num -->
+                                <td class="whitespace-nowrap w-fit text-center px-4 w-px"
+                                    aria-colindex="1">
+                                    {{ row_number }}
+                                </td>
+                            </tr>
+                        {% endwith %}
+                    {% endfor %}
+                </tbody>
+            </table>
+        """
+
+        rendered_raw = Template(template_str).render(Context({"formset": [1]}))
+        rendered = render_dependencies(rendered_raw, type="fragment")
+
+        expected = """
+            <table class="table-auto border-collapse divide-y divide-x divide-slate-300 w-full">
+                <!-- Table head -->
+                <thead>
+                    <tr class="py-0 my-0 h-7">
+                        <!-- Empty row -->
+                        <th class="min-w-12">#</th>
+                    </tr>
+                </thead>
+                <!-- Table body -->
+                <tbody id="items" class="divide-y divide-slate-300">
+                    <tr class=" hover:bg-gray-200 py-0 bg-white divide-x "
+                        aria-rowindex="1">
+                        <!-- row num -->
+                        <td class="whitespace-nowrap w-fit text-center px-4 w-px"
+                            aria-colindex="1">
+                            1
+                        </td>
+                    </tr>
+                </tbody>
+            </table>
+        """
+
+        self.assertHTMLEqual(expected, rendered)
+
+    # Explanation: The component is used in the template, but the template doesn't use
+    # {% component_js_dependencies %} or {% component_css_dependencies %} tags,
+    # nor defines a `<head>` or `<body>` tag. In which case, the dependencies are not rendered.
+    def test_does_not_modify_html_when_component_used_but_nowhere_to_insert(self):
+        registry.register(name="test", component=SimpleComponent)
+
+        template_str: types.django_html = """
+            {% load component_tags %}
+            <table class="table-auto border-collapse divide-y divide-x divide-slate-300 w-full">
+                <!-- Table head -->
+                <thead>
+                    <tr class="py-0 my-0 h-7">
+                        <!-- Empty row -->
+                        <th class="min-w-12">#</th>
+                    </tr>
+                </thead>
+                <!-- Table body -->
+                <tbody id="items" class="divide-y divide-slate-300">
+                    {% for form in formset %}
+                        {% with row_number=forloop.counter %}
+                            <tr class=" hover:bg-gray-200 py-0 {% cycle 'bg-white' 'bg-gray-50' %} divide-x "
+                                aria-rowindex="{{ row_number }}">
+                                <!-- row num -->
+                                <td class="whitespace-nowrap w-fit text-center px-4 w-px"
+                                    aria-colindex="1">
+                                    {{ row_number }}
+                                    {% component "test" variable="hi" / %}
+                                </td>
+                            </tr>
+                        {% endwith %}
+                    {% endfor %}
+                </tbody>
+            </table>
+        """
+
+        rendered_raw = Template(template_str).render(Context({"formset": [1]}))
+        rendered = render_dependencies(rendered_raw, type="fragment")
+
+        expected = """
+            <table class="table-auto border-collapse divide-y divide-x divide-slate-300 w-full">
+                <!-- Table head -->
+                <thead>
+                    <tr class="py-0 my-0 h-7">
+                        <!-- Empty row -->
+                        <th class="min-w-12">#</th>
+                    </tr>
+                </thead>
+                <!-- Table body -->
+                <tbody id="items" class="divide-y divide-slate-300">
+                    <tr class=" hover:bg-gray-200 py-0 bg-white divide-x "
+                        aria-rowindex="1">
+                        <!-- row num -->
+                        <td class="whitespace-nowrap w-fit text-center px-4 w-px"
+                            aria-colindex="1">
+                            1
+                            Variable: <strong>hi</strong>
+                        </td>
+                    </tr>
+                </tbody>
+            </table>
+        """
+
+        self.assertHTMLEqual(expected, rendered)
+

 class MiddlewareTests(BaseTestCase):
    def test_middleware_response_without_content_type(self):
--- a/tests/test_html.py
+++ b/tests/test_html.py
@ -1,14 +1,6 @@
-from typing import List, cast
-
 from django.test import TestCase
-from selectolax.lexbor import LexborHTMLParser, LexborNode

-from django_components.util.html import (
-    is_html_parser_fragment,
-    parse_document_or_nodes,
-    parse_multiroot_html,
-    parse_node,
-)
+from django_components.util.html import SoupNode

 from .django_test_setup import setup_test_config

@ -16,50 +8,26 @@ setup_test_config({"autodiscover": False})


 class HtmlTests(TestCase):
-    def test_parse_node(self):
-        node = parse_node(
+    def test_beautifulsoup_impl(self):
+        nodes = SoupNode.from_fragment(
            """
            <div class="abc xyz" data-id="123">
                <ul>
                    <li>Hi</li>
                </ul>
            </div>
-            """
-        )
-        node.attrs["id"] = "my-id"  # type: ignore[index]
-        node.css("li")[0].attrs["class"] = "item"  # type: ignore[index]
-
-        self.assertHTMLEqual(
-            node.html,
-            """
-            <div class="abc xyz" data-id="123" id="my-id">
-                <ul>
-                    <li class="item">Hi</li>
-                </ul>
-            </div>
-            """,
+            <!-- I'M COMMENT -->
+            <button>
+                Click me!
+            </button>
+            """.strip()
        )

-    def test_parse_multiroot_html(self):
-        html = """
-            <div class="abc xyz" data-id="123">
-                <ul>
-                    <li>Hi</li>
-                </ul>
-            </div>
-            <main id="123" class="one">
-                <div>
-                    42
-                </div>
-            </main>
-            <span>
-                Hello
-            </span>
-        """
-        nodes = parse_multiroot_html(html)
+        # Items: <div>, whitespace, comment, whitespace, <button>
+        self.assertEqual(len(nodes), 5)

        self.assertHTMLEqual(
-            nodes[0].html,
+            nodes[0].to_html(),
            """
            <div class="abc xyz" data-id="123">
                <ul>
@ -69,87 +37,37 @@ class HtmlTests(TestCase):
            """,
        )
        self.assertHTMLEqual(
-            nodes[1].html,
-            """
-            <main id="123" class="one">
-                <div>
-                    42
-                </div>
-            </main>
-            """,
+            nodes[2].to_html(),
+            "<!-- I&#x27;M COMMENT -->",
        )
        self.assertHTMLEqual(
-            nodes[2].html,
+            nodes[4].to_html(),
            """
-            <span>
-                Hello
-            </span>
+            <button>
+                Click me!
+            </button>
            """,
        )

-    def test_is_html_parser_fragment(self):
-        fragment_html = """
-            <div class="abc xyz" data-id="123">
-                <ul>
-                    <li>Hi</li>
-                </ul>
-            </div>
-            <main id="123" class="one">
-                <div>
-                    42
-                </div>
-            </main>
-            <span>
-                Hello
-            </span>
-        """
-        fragment_tree = LexborHTMLParser(fragment_html)
-        fragment_result = is_html_parser_fragment(fragment_html, fragment_tree)
+        self.assertEqual(nodes[0].name(), "div")
+        self.assertEqual(nodes[4].name(), "button")

-        self.assertEqual(fragment_result, True)
+        self.assertEqual(nodes[0].is_element(), True)
+        self.assertEqual(nodes[2].is_element(), False)
+        self.assertEqual(nodes[4].is_element(), True)

-        doc_html = """
-            <!doctype html>
-            <html>
-              <head>
-                <link href="https://..." />
-              </head>
-              <body>
-                <div class="abc xyz" data-id="123">
-                    <ul>
-                        <li>Hi</li>
-                    </ul>
-                </div>
-              </body>
-            </html>
-        """
-        doc_tree = LexborHTMLParser(doc_html)
-        doc_result = is_html_parser_fragment(doc_html, doc_tree)
+        self.assertEqual(nodes[0].get_attr("class"), "abc xyz")
+        self.assertEqual(nodes[4].get_attr("class"), None)

-        self.assertEqual(doc_result, False)
-
-    def test_parse_document_or_nodes__fragment(self):
-        fragment_html = """
-            <div class="abc xyz" data-id="123">
-                <ul>
-                    <li>Hi</li>
-                </ul>
-            </div>
-            <main id="123" class="one">
-                <div>
-                    42
-                </div>
-            </main>
-            <span>
-                Hello
-            </span>
-        """
-        fragment_result = cast(List[LexborNode], parse_document_or_nodes(fragment_html))
+        nodes[0].set_attr("class", "123 456")
+        nodes[4].set_attr("class", "abc def")
+        self.assertEqual(nodes[0].get_attr("class"), "123 456")
+        self.assertEqual(nodes[4].get_attr("class"), "abc def")

        self.assertHTMLEqual(
-            fragment_result[0].html,
+            nodes[0].to_html(),
            """
-            <div class="abc xyz" data-id="123">
+            <div class="123 456" data-id="123">
                <ul>
                    <li>Hi</li>
                </ul>
@ -157,111 +75,53 @@ class HtmlTests(TestCase):
            """,
        )
        self.assertHTMLEqual(
-            fragment_result[1].html,
+            nodes[4].to_html(),
            """
-            <main id="123" class="one">
-                <div>
-                    42
-                </div>
-            </main>
-            """,
-        )
-        self.assertHTMLEqual(
-            fragment_result[2].html,
-            """
-            <span>
-                Hello
-            </span>
+            <button class="abc def">
+                Click me!
+            </button>
            """,
        )

-    def test_parse_document_or_nodes__mixed(self):
-        fragment_html = """
-            <link href="" />
-            <div class="abc xyz" data-id="123">
-                <ul>
-                    <li>Hi</li>
-                </ul>
-            </div>
-            <main id="123" class="one">
-                <div>
-                    42
-                </div>
-            </main>
-            <span>
-                Hello
-            </span>
-        """
-        fragment_result = cast(List[LexborNode], parse_document_or_nodes(fragment_html))
-
+        # Setting attr to `True` will set it to boolean attribute,
+        # while setting it to `False` will remove the attribute.
+        nodes[4].set_attr("disabled", True)
        self.assertHTMLEqual(
-            fragment_result[0].html,
+            nodes[4].to_html(),
            """
-            <link href="" />
+            <button class="abc def" disabled>
+                Click me!
+            </button>
            """,
        )
+        nodes[4].set_attr("disabled", False)
        self.assertHTMLEqual(
-            fragment_result[1].html,
+            nodes[4].to_html(),
            """
-            <div class="abc xyz" data-id="123">
-                <ul>
-                    <li>Hi</li>
-                </ul>
-            </div>
-            """,
-        )
-        self.assertHTMLEqual(
-            fragment_result[2].html,
-            """
-            <main id="123" class="one">
-                <div>
-                    42
-                </div>
-            </main>
-            """,
-        )
-        self.assertHTMLEqual(
-            fragment_result[3].html,
-            """
-            <span>
-                Hello
-            </span>
+            <button class="abc def">
+                Click me!
+            </button>
            """,
        )

-    def test_parse_document_or_nodes__doc(self):
-        doc_html = """
-            <!doctype html>
-            <html>
-              <head>
-                <link href="https://..." />
-              </head>
-              <body>
-                <div class="abc xyz" data-id="123">
-                    <ul>
-                        <li>Hi</li>
-                    </ul>
-                </div>
-              </body>
-            </html>
-        """
-        fragment_result = cast(LexborHTMLParser, parse_document_or_nodes(doc_html))
+        # Return self
+        self.assertEqual(nodes[0].node, nodes[0].find_tag("div").node)  # type: ignore[union-attr]
+        # Return descendant
+        li = nodes[0].find_tag("li")
+        self.assertHTMLEqual(li.to_html(), "<li>Hi</li>")  # type: ignore[union-attr]
+        # Return None when not found
+        self.assertEqual(nodes[0].find_tag("main"), None)

+        # Insert children
+        li.append_children([nodes[4]])  # type: ignore[union-attr]
        self.assertHTMLEqual(
-            fragment_result.html,
+            li.to_html(),  # type: ignore[union-attr]
            """
-            <!doctype html>
-            <html>
-              <head>
-                <link href="https://..." />
-              </head>
-              <body>
-                <div class="abc xyz" data-id="123">
-                    <ul>
-                        <li>Hi</li>
-                    </ul>
-                </div>
-              </body>
-            </html>
+            <li>
+                Hi
+                <button class="abc def">
+                    Click me!
+                </button>
+            </li>
            """,
        )