mirror of
https://github.com/django-components/django-components.git
synced 2025-09-26 15:39:08 +00:00
refactor: replace selectolax with beautifulsoup (#823)
* refactor: replace selectolax with beautifulsoup * refactor: add tests for html parser impl * refactor: add missing import * refactor: fix tests * refactor: fix linter issues * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
c61847d30d
commit
1cd545b986
9 changed files with 391 additions and 351 deletions
|
@ -1,14 +1,6 @@
|
|||
from typing import List, cast
|
||||
|
||||
from django.test import TestCase
|
||||
from selectolax.lexbor import LexborHTMLParser, LexborNode
|
||||
|
||||
from django_components.util.html import (
|
||||
is_html_parser_fragment,
|
||||
parse_document_or_nodes,
|
||||
parse_multiroot_html,
|
||||
parse_node,
|
||||
)
|
||||
from django_components.util.html import SoupNode
|
||||
|
||||
from .django_test_setup import setup_test_config
|
||||
|
||||
|
@ -16,50 +8,26 @@ setup_test_config({"autodiscover": False})
|
|||
|
||||
|
||||
class HtmlTests(TestCase):
|
||||
def test_parse_node(self):
|
||||
node = parse_node(
|
||||
def test_beautifulsoup_impl(self):
|
||||
nodes = SoupNode.from_fragment(
|
||||
"""
|
||||
<div class="abc xyz" data-id="123">
|
||||
<ul>
|
||||
<li>Hi</li>
|
||||
</ul>
|
||||
</div>
|
||||
"""
|
||||
)
|
||||
node.attrs["id"] = "my-id" # type: ignore[index]
|
||||
node.css("li")[0].attrs["class"] = "item" # type: ignore[index]
|
||||
|
||||
self.assertHTMLEqual(
|
||||
node.html,
|
||||
"""
|
||||
<div class="abc xyz" data-id="123" id="my-id">
|
||||
<ul>
|
||||
<li class="item">Hi</li>
|
||||
</ul>
|
||||
</div>
|
||||
""",
|
||||
<!-- I'M COMMENT -->
|
||||
<button>
|
||||
Click me!
|
||||
</button>
|
||||
""".strip()
|
||||
)
|
||||
|
||||
def test_parse_multiroot_html(self):
|
||||
html = """
|
||||
<div class="abc xyz" data-id="123">
|
||||
<ul>
|
||||
<li>Hi</li>
|
||||
</ul>
|
||||
</div>
|
||||
<main id="123" class="one">
|
||||
<div>
|
||||
42
|
||||
</div>
|
||||
</main>
|
||||
<span>
|
||||
Hello
|
||||
</span>
|
||||
"""
|
||||
nodes = parse_multiroot_html(html)
|
||||
# Items: <div>, whitespace, comment, whitespace, <button>
|
||||
self.assertEqual(len(nodes), 5)
|
||||
|
||||
self.assertHTMLEqual(
|
||||
nodes[0].html,
|
||||
nodes[0].to_html(),
|
||||
"""
|
||||
<div class="abc xyz" data-id="123">
|
||||
<ul>
|
||||
|
@ -69,87 +37,37 @@ class HtmlTests(TestCase):
|
|||
""",
|
||||
)
|
||||
self.assertHTMLEqual(
|
||||
nodes[1].html,
|
||||
"""
|
||||
<main id="123" class="one">
|
||||
<div>
|
||||
42
|
||||
</div>
|
||||
</main>
|
||||
""",
|
||||
nodes[2].to_html(),
|
||||
"<!-- I'M COMMENT -->",
|
||||
)
|
||||
self.assertHTMLEqual(
|
||||
nodes[2].html,
|
||||
nodes[4].to_html(),
|
||||
"""
|
||||
<span>
|
||||
Hello
|
||||
</span>
|
||||
<button>
|
||||
Click me!
|
||||
</button>
|
||||
""",
|
||||
)
|
||||
|
||||
def test_is_html_parser_fragment(self):
|
||||
fragment_html = """
|
||||
<div class="abc xyz" data-id="123">
|
||||
<ul>
|
||||
<li>Hi</li>
|
||||
</ul>
|
||||
</div>
|
||||
<main id="123" class="one">
|
||||
<div>
|
||||
42
|
||||
</div>
|
||||
</main>
|
||||
<span>
|
||||
Hello
|
||||
</span>
|
||||
"""
|
||||
fragment_tree = LexborHTMLParser(fragment_html)
|
||||
fragment_result = is_html_parser_fragment(fragment_html, fragment_tree)
|
||||
self.assertEqual(nodes[0].name(), "div")
|
||||
self.assertEqual(nodes[4].name(), "button")
|
||||
|
||||
self.assertEqual(fragment_result, True)
|
||||
self.assertEqual(nodes[0].is_element(), True)
|
||||
self.assertEqual(nodes[2].is_element(), False)
|
||||
self.assertEqual(nodes[4].is_element(), True)
|
||||
|
||||
doc_html = """
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="https://..." />
|
||||
</head>
|
||||
<body>
|
||||
<div class="abc xyz" data-id="123">
|
||||
<ul>
|
||||
<li>Hi</li>
|
||||
</ul>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
doc_tree = LexborHTMLParser(doc_html)
|
||||
doc_result = is_html_parser_fragment(doc_html, doc_tree)
|
||||
self.assertEqual(nodes[0].get_attr("class"), "abc xyz")
|
||||
self.assertEqual(nodes[4].get_attr("class"), None)
|
||||
|
||||
self.assertEqual(doc_result, False)
|
||||
|
||||
def test_parse_document_or_nodes__fragment(self):
|
||||
fragment_html = """
|
||||
<div class="abc xyz" data-id="123">
|
||||
<ul>
|
||||
<li>Hi</li>
|
||||
</ul>
|
||||
</div>
|
||||
<main id="123" class="one">
|
||||
<div>
|
||||
42
|
||||
</div>
|
||||
</main>
|
||||
<span>
|
||||
Hello
|
||||
</span>
|
||||
"""
|
||||
fragment_result = cast(List[LexborNode], parse_document_or_nodes(fragment_html))
|
||||
nodes[0].set_attr("class", "123 456")
|
||||
nodes[4].set_attr("class", "abc def")
|
||||
self.assertEqual(nodes[0].get_attr("class"), "123 456")
|
||||
self.assertEqual(nodes[4].get_attr("class"), "abc def")
|
||||
|
||||
self.assertHTMLEqual(
|
||||
fragment_result[0].html,
|
||||
nodes[0].to_html(),
|
||||
"""
|
||||
<div class="abc xyz" data-id="123">
|
||||
<div class="123 456" data-id="123">
|
||||
<ul>
|
||||
<li>Hi</li>
|
||||
</ul>
|
||||
|
@ -157,111 +75,53 @@ class HtmlTests(TestCase):
|
|||
""",
|
||||
)
|
||||
self.assertHTMLEqual(
|
||||
fragment_result[1].html,
|
||||
nodes[4].to_html(),
|
||||
"""
|
||||
<main id="123" class="one">
|
||||
<div>
|
||||
42
|
||||
</div>
|
||||
</main>
|
||||
""",
|
||||
)
|
||||
self.assertHTMLEqual(
|
||||
fragment_result[2].html,
|
||||
"""
|
||||
<span>
|
||||
Hello
|
||||
</span>
|
||||
<button class="abc def">
|
||||
Click me!
|
||||
</button>
|
||||
""",
|
||||
)
|
||||
|
||||
def test_parse_document_or_nodes__mixed(self):
|
||||
fragment_html = """
|
||||
<link href="" />
|
||||
<div class="abc xyz" data-id="123">
|
||||
<ul>
|
||||
<li>Hi</li>
|
||||
</ul>
|
||||
</div>
|
||||
<main id="123" class="one">
|
||||
<div>
|
||||
42
|
||||
</div>
|
||||
</main>
|
||||
<span>
|
||||
Hello
|
||||
</span>
|
||||
"""
|
||||
fragment_result = cast(List[LexborNode], parse_document_or_nodes(fragment_html))
|
||||
|
||||
# Setting attr to `True` will set it to boolean attribute,
|
||||
# while setting it to `False` will remove the attribute.
|
||||
nodes[4].set_attr("disabled", True)
|
||||
self.assertHTMLEqual(
|
||||
fragment_result[0].html,
|
||||
nodes[4].to_html(),
|
||||
"""
|
||||
<link href="" />
|
||||
<button class="abc def" disabled>
|
||||
Click me!
|
||||
</button>
|
||||
""",
|
||||
)
|
||||
nodes[4].set_attr("disabled", False)
|
||||
self.assertHTMLEqual(
|
||||
fragment_result[1].html,
|
||||
nodes[4].to_html(),
|
||||
"""
|
||||
<div class="abc xyz" data-id="123">
|
||||
<ul>
|
||||
<li>Hi</li>
|
||||
</ul>
|
||||
</div>
|
||||
""",
|
||||
)
|
||||
self.assertHTMLEqual(
|
||||
fragment_result[2].html,
|
||||
"""
|
||||
<main id="123" class="one">
|
||||
<div>
|
||||
42
|
||||
</div>
|
||||
</main>
|
||||
""",
|
||||
)
|
||||
self.assertHTMLEqual(
|
||||
fragment_result[3].html,
|
||||
"""
|
||||
<span>
|
||||
Hello
|
||||
</span>
|
||||
<button class="abc def">
|
||||
Click me!
|
||||
</button>
|
||||
""",
|
||||
)
|
||||
|
||||
def test_parse_document_or_nodes__doc(self):
|
||||
doc_html = """
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="https://..." />
|
||||
</head>
|
||||
<body>
|
||||
<div class="abc xyz" data-id="123">
|
||||
<ul>
|
||||
<li>Hi</li>
|
||||
</ul>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
fragment_result = cast(LexborHTMLParser, parse_document_or_nodes(doc_html))
|
||||
# Return self
|
||||
self.assertEqual(nodes[0].node, nodes[0].find_tag("div").node) # type: ignore[union-attr]
|
||||
# Return descendant
|
||||
li = nodes[0].find_tag("li")
|
||||
self.assertHTMLEqual(li.to_html(), "<li>Hi</li>") # type: ignore[union-attr]
|
||||
# Return None when not found
|
||||
self.assertEqual(nodes[0].find_tag("main"), None)
|
||||
|
||||
# Insert children
|
||||
li.append_children([nodes[4]]) # type: ignore[union-attr]
|
||||
self.assertHTMLEqual(
|
||||
fragment_result.html,
|
||||
li.to_html(), # type: ignore[union-attr]
|
||||
"""
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="https://..." />
|
||||
</head>
|
||||
<body>
|
||||
<div class="abc xyz" data-id="123">
|
||||
<ul>
|
||||
<li>Hi</li>
|
||||
</ul>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
<li>
|
||||
Hi
|
||||
<button class="abc def">
|
||||
Click me!
|
||||
</button>
|
||||
</li>
|
||||
""",
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue