refactor: Patch Template.compile_nodelist with custom template parser (#908)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-09-27 07:59:08 +00:00 · 2025-01-15 22:34:32 +01:00 · 2025-01-15 22:34:32 +01:00 · 7ed4fd88f9
commit 7ed4fd88f9
parent 8cd4b03286
9 changed files with 810 additions and 204 deletions
--- a/benchmarks/test_lexer_performance.py
+++ b/benchmarks/test_lexer_performance.py
@ -0,0 +1,195 @@
 # NOTE: This file is more of a playground than a proper test
 import timeit
 from typing import List, Tuple
 from django.template.base import DebugLexer, Lexer, Token
 from django_components.util.template_parser import parse_template
 def django_lexer(template: str) -> List[Token]:
    """Use Django's built-in lexer to tokenize a template."""
    lexer = Lexer(template)
    return list(lexer.tokenize())
 def django_debug_lexer(template: str) -> List[Token]:
    """Use Django's built-in lexer to tokenize a template."""
    lexer = DebugLexer(template)
    return list(lexer.tokenize())
 def run_benchmark(template: str, num_iterations: int = 5000) -> Tuple[float, float]:
    """Run performance comparison between Django and custom lexer."""
    # django_time = timeit.timeit(lambda: django_lexer(template), number=num_iterations)
    django_debug_time = timeit.timeit(lambda: django_debug_lexer(template), number=num_iterations)
    custom_time = timeit.timeit(lambda: parse_template(template), number=num_iterations)
    # return django_time, django_debug_time
    return django_debug_time, custom_time
 def print_benchmark_results(template: str, django_time: float, custom_time: float, num_iterations: int) -> None:
    """Print formatted benchmark results."""
    print(f"\nTemplate: {template}")
    print(f"Iterations: {num_iterations}")
    print(f"Django Lexer: {django_time:.6f} seconds")
    print(f"Custom Lexer: {custom_time:.6f} seconds")
    print(f"Difference: {abs(django_time - custom_time):.6f} seconds")
    print(f"Custom lexer is {(django_time / custom_time):.2f}x {'faster' if custom_time < django_time else 'slower'}")
 if __name__ == "__main__":
    test_cases = [
        # Simple text
        "Hello World",
        # Simple variable
        "Hello {{ name }}",
        # Simple block
        "{% if condition %}Hello{% endif %}",
        # Complex nested template
        """
        {% extends "base.html" %}
        {% block content %}
            <h1>{{ title }}</h1>
            {% for item in items %}
                <div class="{{ item.class }}">
                    {{ item.name }}
                    {% if item.description %}
                        <p>{{ item.description }}</p>
                    {% endif %}
                </div>
            {% endfor %}
        {% endblock %}
        """,
        # Component with nested tags
        """
        {% component 'table'
            headers=headers
            rows=rows
            footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
            title="{% trans 'Data Table' %}"
        %}
        """,
        # Real world example
        """
        <div class="prose flex flex-col gap-8">
        {# Info section #}
        <div class="border-b border-neutral-300">
            <div class="flex justify-between items-start">
            <h3 class="mt-0">Project Info</h3>
                {% if editable %}
                {% component "Button"
                    href=project_edit_url
                    attrs:class="not-prose"
                    footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
                    title="{% trans 'Data Table' %}"
                %}
                    Edit Project
                {% endcomponent %}
                {% endif %}
            </div>
            <table>
            {% for key, value in project_info %}
                <tr>
                <td class="font-bold pr-4">
                    {{ key }}:
                </td>
                <td>
                {{ value }}
                </td>
                </tr>
            {% endfor %}
            </table>
        </div>
        {# Status Updates section #}
        {% component "ProjectStatusUpdates"
            project_id=project.pk
            status_updates=status_updates
            editable=editable
            footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
            title="{% trans 'Data Table' %}"
        / %}
        <div class="xl:grid xl:grid-cols-2 gap-10">
            {# Team section #}
            <div class="border-b border-neutral-300">
            <div class="flex justify-between items-start">
                <h3 class="mt-0">Dcode Team</h3>
                {% if editable %}
                    {% component "Button"
                        href=edit_project_roles_url
                        attrs:class="not-prose"
                        footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
                        title="{% trans 'Data Table' %}"
                    %}
                    Edit Team
                    {% endcomponent %}
                {% endif %}
            </div>
            {% component "ProjectUsers"
                project_id=project.pk
                roles_with_users=roles_with_users
                editable=False
                footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
                title="{% trans 'Data Table' %}"
            / %}
            </div>
            {# POCs section #}
            <div>
            <div class="flex justify-between items-start max-xl:mt-6">
                <h3 class="mt-0">Client POCs</h3>
                {% if editable %}
                {% component "Button"
                    href=edit_pocs_url
                    attrs:class="not-prose"
                    footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
                    title="{% trans 'Data Table' %}"
                %}
                    Edit POCs
                {% endcomponent %}
                {% endif %}
            </div>
            {% if poc_data %}
                <table>
                <tr>
                    <th>Name</th>
                    <th>Job Title</th>
                    <th>Hubspot Profile</th>
                </tr>
                {% for data in poc_data %}
                    <tr>
                    <td>{{ data.poc.contact.first_name }} {{ data.poc.contact.last_name }}</td>
                    <td>{{ data.poc.contact.job_title }}</td>
                    <td>
                        {% component "Icon"
                            href=data.hubspot_url
                            name="arrow-top-right-on-square"
                            variant="outline"
                            color="text-gray-400 hover:text-gray-500"
                            footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
                            title="{% trans 'Data Table' %}"
                        / %}
                    </td>
                    </tr>
                {% endfor %}
                </table>
            {% else %}
                <p class="text-sm italic">No entries</p>
            {% endif %}
            </div>
        </div>
        </div>
        """,
    ]
    for template in test_cases:
        django_time, custom_time = run_benchmark(template)
        print_benchmark_results(template, django_time, custom_time, 200)
--- a/src/django_components/apps.py
+++ b/src/django_components/apps.py
@ -15,14 +15,14 @@ class ComponentsConfig(AppConfig):
    def ready(self) -> None:
        from django_components.app_settings import app_settings
        from django_components.autodiscovery import autodiscover, import_libraries
        from django_components.component import monkeypatch_template
        from django_components.component_registry import registry
        from django_components.components.dynamic import DynamicComponent
        from django_components.util.django_monkeypatch import monkeypatch_template_cls
        # NOTE: This monkeypatch is applied here, before Django processes any requests.
        #       To make django-components work with django-debug-toolbar-template-profiler
        #       See https://github.com/EmilStenstrom/django-components/discussions/819
-        monkeypatch_template(Template)
+        monkeypatch_template_cls(Template)
        # Import modules set in `COMPONENTS.libraries` setting
        import_libraries()
--- a/src/django_components/component.py
+++ b/src/django_components/component.py
@ -68,6 +68,7 @@ from django_components.slots import (
    resolve_fills,
 )
 from django_components.template import cached_template
 from django_components.util.django_monkeypatch import is_template_cls_patched
 from django_components.util.logger import trace_msg
 from django_components.util.misc import gen_id
 from django_components.util.template_tag import TagParams
@ -1272,54 +1273,6 @@ class ComponentNode(BaseNode):
        return output
 def monkeypatch_template(template_cls: Type[Template]) -> None:
    # Modify `Template.render` to set `isolated_context` kwarg of `push_state`
    # based on our custom `Template._dc_is_component_nested`.
    #
    # Part of fix for https://github.com/EmilStenstrom/django-components/issues/508
    #
    # NOTE 1: While we could've subclassed Template, then we would need to either
    # 1) ask the user to change the backend, so all templates are of our subclass, or
    # 2) copy the data from user's Template class instance to our subclass instance,
    # which could lead to doubly parsing the source, and could be problematic if users
    # used more exotic subclasses of Template.
    #
    # Instead, modifying only the `render` method of an already-existing instance
    # should work well with any user-provided custom subclasses of Template, and it
    # doesn't require the source to be parsed multiple times. User can pass extra args/kwargs,
    # and can modify the rendering behavior by overriding the `_render` method.
    #
    # NOTE 2: Instead of setting `Template._dc_is_component_nested`, alternatively we could
    # have passed the value to `monkeypatch_template` directly. However, we intentionally
    # did NOT do that, so the monkey-patched method is more robust, and can be e.g. copied
    # to other.
    if hasattr(template_cls, "_dc_patched"):
        # Do not patch if done so already. This helps us avoid RecursionError
        return
    def _template_render(self: Template, context: Context, *args: Any, **kwargs: Any) -> str:
        #  ---------------- OUR CHANGES START ----------------
        # We parametrized `isolated_context`, which was `True` in the original method.
        if not hasattr(self, "_dc_is_component_nested"):
            isolated_context = True
        else:
            # MUST be `True` for templates that are NOT import with `{% extends %}` tag,
            # and `False` otherwise.
            isolated_context = not self._dc_is_component_nested
        #  ---------------- OUR CHANGES END ----------------
        with context.render_context.push_state(self, isolated_context=isolated_context):
            if context.template is None:
                with context.bind_template(self):
                    context.template_name = self.name
                    return self._render(context, *args, **kwargs)
            else:
                return self._render(context, *args, **kwargs)
    template_cls.render = _template_render
    template_cls._dc_patched = True
@contextmanager
 def _maybe_bind_template(context: Context, template: Template) -> Generator[None, Any, None]:
    if context.template is None:
@ -1342,7 +1295,7 @@ def _prepare_template(
        # And https://github.com/EmilStenstrom/django-components/issues/634
        template = component._get_template(context)
-        if not getattr(template, "_dc_patched"):
+        if not is_template_cls_patched(template):
            raise RuntimeError(
                "Django-components received a Template instance which was not patched."
                "If you are using Django's Template class, check if you added django-components"
@ -1350,10 +1303,11 @@ def _prepare_template(
                "manually patch the class."
            )
-        # Set `Template._dc_is_component_nested` based on whether we're currently INSIDE
+        # Set `Template._djc_is_component_nested` based on whether we're currently INSIDE
        # the `{% extends %}` tag.
        # Part of fix for https://github.com/EmilStenstrom/django-components/issues/508
-        template._dc_is_component_nested = bool(context.render_context.get(BLOCK_CONTEXT_KEY))
+        # See django_monkeypatch.py
        template._djc_is_component_nested = bool(context.render_context.get(BLOCK_CONTEXT_KEY))
        with _maybe_bind_template(context, template):
            yield template
--- a/src/django_components/expression.py
+++ b/src/django_components/expression.py
@ -2,7 +2,9 @@ import re
 from typing import TYPE_CHECKING, Any, Dict, List
 from django.template import Context, Node, NodeList, TemplateSyntaxError
-from django.template.base import Lexer, Parser, VariableNode
+from django.template.base import Parser, VariableNode
 from django_components.util.template_parser import parse_template
 if TYPE_CHECKING:
    from django_components.util.template_tag import TagParam
@ -48,8 +50,7 @@ class DynamicFilterExpression:
        # Copy the Parser, and pass through the tags and filters available
        # in the current context. Thus, if user calls `{% load %}` inside
        # the expression, it won't spill outside.
-        lexer = Lexer(self.expr)
+        tokens = parse_template(self.expr)
        tokens = lexer.tokenize()
        expr_parser = Parser(tokens=tokens)
        expr_parser.tags = {**parser.tags}
        expr_parser.filters = {**parser.filters}
--- a/src/django_components/templatetags/component_tags.py
+++ b/src/django_components/templatetags/component_tags.py
@ -28,7 +28,7 @@ from django_components.slots import SLOT_DEFAULT_KEYWORD, SLOT_REQUIRED_KEYWORD,
 from django_components.tag_formatter import get_tag_formatter
 from django_components.util.logger import trace_msg
 from django_components.util.misc import gen_id
-from django_components.util.template_tag import TagSpec, fix_nested_tags, parse_template_tag, with_tag_spec
+from django_components.util.template_tag import TagSpec, parse_template_tag, with_tag_spec
 # NOTE: Variable name `register` is required by Django to recognize this as a template tag library
 # See https://docs.djangoproject.com/en/dev/howto/custom-template-tags
@ -492,7 +492,6 @@ def component(
    """
    tag_id = gen_id()
    fix_nested_tags(parser, token)
    bits = token.split_contents()
    # Let the TagFormatter pre-process the tokens
--- a/src/django_components/util/django_monkeypatch.py
+++ b/src/django_components/util/django_monkeypatch.py
@ -0,0 +1,110 @@
 from typing import Any, Type
 from django.template import Context, NodeList, Template
 from django.template.base import Parser
 from django_components.util.template_parser import parse_template
 # In some cases we can't work around Django's design, and need to patch the template class.
 def monkeypatch_template_cls(template_cls: Type[Template]) -> None:
    monkeypatch_template_compile_nodelist(template_cls)
    monkeypatch_template_render(template_cls)
    template_cls._djc_patched = True
 # Patch `Template.compile_nodelist` to use our custom parser. Our parser makes it possible
 # to use template tags as inputs to the component tag:
 #
 # {% component "my-component" description="{% lorem 3 w %}" / %}
 def monkeypatch_template_compile_nodelist(template_cls: Type[Template]) -> None:
    def _compile_nodelist(self: Template) -> NodeList:
        """
        Parse and compile the template source into a nodelist. If debug
        is True and an exception occurs during parsing, the exception is
        annotated with contextual line information where it occurred in the
        template source.
        """
        #  ---------------- ORIGINAL (Django v5.1.3) ----------------
        # if self.engine.debug:
        #     lexer = DebugLexer(self.source)
        # else:
        #     lexer = Lexer(self.source)
        # tokens = lexer.tokenize()
        #  ---------------- OUR CHANGES START ----------------
        tokens = parse_template(self.source)
        #  ---------------- OUR CHANGES END ----------------
        parser = Parser(
            tokens,
            self.engine.template_libraries,
            self.engine.template_builtins,
            self.origin,
        )
        try:
            #  ---------------- ADDED IN Django v5.1 - See https://github.com/django/django/commit/35bbb2c9c01882b1d77b0b8c737ac646144833d4  # noqa: E501
            nodelist = parser.parse()
            self.extra_data = getattr(parser, "extra_data", {})
            #  ---------------- END OF ADDED IN Django v5.1 ----------------
            return nodelist
        except Exception as e:
            if self.engine.debug:
                e.template_debug = self.get_exception_info(e, e.token)  # type: ignore
            raise
    template_cls.compile_nodelist = _compile_nodelist
 def monkeypatch_template_render(template_cls: Type[Template]) -> None:
    # Modify `Template.render` to set `isolated_context` kwarg of `push_state`
    # based on our custom `Template._djc_is_component_nested`.
    #
    # Part of fix for https://github.com/EmilStenstrom/django-components/issues/508
    #
    # NOTE 1: While we could've subclassed Template, then we would need to either
    # 1) ask the user to change the backend, so all templates are of our subclass, or
    # 2) copy the data from user's Template class instance to our subclass instance,
    # which could lead to doubly parsing the source, and could be problematic if users
    # used more exotic subclasses of Template.
    #
    # Instead, modifying only the `render` method of an already-existing instance
    # should work well with any user-provided custom subclasses of Template, and it
    # doesn't require the source to be parsed multiple times. User can pass extra args/kwargs,
    # and can modify the rendering behavior by overriding the `_render` method.
    #
    # NOTE 2: Instead of setting `Template._djc_is_component_nested`, alternatively we could
    # have passed the value to `monkeypatch_template_render` directly. However, we intentionally
    # did NOT do that, so the monkey-patched method is more robust, and can be e.g. copied
    # to other.
    if is_template_cls_patched(template_cls):
        # Do not patch if done so already. This helps us avoid RecursionError
        return
    def _template_render(self: Template, context: Context, *args: Any, **kwargs: Any) -> str:
        "Display stage -- can be called many times"
        #  ---------------- ORIGINAL (Django v5.1.3) ----------------
        # with context.render_context.push_state(self):
        #  ---------------- OUR CHANGES START ----------------
        # We parametrized `isolated_context`, which was `True` in the original method.
        if not hasattr(self, "_djc_is_component_nested"):
            isolated_context = True
        else:
            # MUST be `True` for templates that are NOT import with `{% extends %}` tag,
            # and `False` otherwise.
            isolated_context = not self._djc_is_component_nested
        with context.render_context.push_state(self, isolated_context=isolated_context):
            #  ---------------- OUR CHANGES END ----------------
            if context.template is None:
                with context.bind_template(self):
                    context.template_name = self.name
                    return self._render(context, *args, **kwargs)
            else:
                return self._render(context, *args, **kwargs)
    template_cls.render = _template_render
 def is_template_cls_patched(template_cls: Type[Template]) -> bool:
    return getattr(template_cls, "_djc_patched", False)
--- a/src/django_components/util/template_parser.py
+++ b/src/django_components/util/template_parser.py
@ -0,0 +1,227 @@
 """
 Parser for Django template.
 The parser reads a template file (usually HTML, but not necessarily), which may contain
 "template tags" like this:
 ```django
 {% component 'my_comp' key=val key2='val2 two' %}
 {% endcomponent %}
 {{ my_var }}
 {# I am comment #}
 ```
 and returns a list of Tokens:
 ```py
 [
    (TokenType.TEXT, '\n', (0, 1), 1),
    (TokenType.BLOCK, "component 'my_comp' key=val key2='val2 two'", (1, 50), 2),
    (TokenType.TEXT, '\n', (50, 51), 2),
    (TokenType.BLOCK, 'endcomponent', (51, 69), 3),
    (TokenType.TEXT, '\n\n', (69, 71), 3),
    (TokenType.VAR, 'my_var', (71, 83), 5),
    (TokenType.TEXT, '\n\n', (83, 85), 5),
    (TokenType.COMMENT, 'I am comment', (85, 103), 7),
    (TokenType.TEXT, '\n', (103, 104), 7),
 ]
 ```
 See `parse_template()` for details.
 """
 import re
 from functools import lru_cache
 from typing import List, Optional, Tuple
 from django.template.base import DebugLexer, Token, TokenType
 from django.template.exceptions import TemplateSyntaxError
 # NOTE: As of 0.125, the strategy is to use Django's lexer, and use our own parser
 #   only when necessary, for the shortest time possible.
 #
 #   Before I switched to this strategy, my initial parser was about 50x slower than Django's lexer.
 #   I (Juro) assume it was because I was walking character by character, instead of using a regex.
 #
 #   The overall speed should then depend on the number of broken tokens in the template.
 #
 #   Performance of the new strategy on a real-world example:
 #   - A template with about 110 lines and 6 components
 #   - Components spanning ~35 lines in total, so roughly 1/3 of the template
 #   - The custom parser is about 8x slower than Django's Debug lexer.
 #   - For a mid-sized project of 200 templates, it would take 7-8 seconds to load all the templates
 #     (from 1 second with Django's lexer).
 #     - However, thanks to django-component's lazy-loading, this should not be a problem.
 #
 #   How it works is that:
 #   1. We use Django's lexer to get the tokens.
 #   2. We check them one-by-one, and if we find a broken token, we switch to our parser to fix it.
 #   3. Once the broken token is fixed, we find it's end position, and switch back to the Django lexer
 #      for the remaining text (step 1).
 def parse_template(text: str) -> List[Token]:
    resolved_tokens: List[Token] = []
    index_start = 0
    index_end = len(text)
    lineno_offset = 0
    while index_start < index_end:
        broken_token: Optional[Token] = None
        # Do fast tokenization with regex - This is about 50x faster than our custom tokenizer.
        # We use DebugLexer because we need to get the position of the tokens.
        # DebugLexer and Lexer have very similar speeds, Debug is about 33% slower.
        lexer = DebugLexer(text[index_start:index_end])
        tokens: List[Token] = lexer.tokenize()
        for token in tokens:
            token.lineno += lineno_offset
            token.position = (token.position[0] + index_start, token.position[1] + index_start)
            if token.token_type == TokenType.BLOCK and ("'" in token.contents or '"' in token.contents):
                broken_token = token
                break
            else:
                resolved_tokens.append(token)
        # If we found a broken token, we switch to our slow parser
        if broken_token is not None:
            broken_token_start = broken_token.position[0]
            fixed_token = _detailed_tag_parser(text[broken_token_start:], broken_token.lineno, broken_token_start)
            resolved_tokens.append(fixed_token)
            index_start = fixed_token.position[1]
            lineno_offset += (
                fixed_token.lineno - 1  # -1 because lines are 1-indexed
                + fixed_token.contents.count("\n")
            )  # fmt: skip
        else:
            break
    return resolved_tokens
 # Handle parsing of `{% %}` tags, while allowing `%}` inside of strings
 def _detailed_tag_parser(text: str, lineno: int, start_index: int) -> Token:
    index = 0
    length = len(text)
    result_content: List[str] = []
    # Pre-compute common substrings
    QUOTE_CHARS = ("'", '"')
    QUOTE_OR_PERCENT = (*QUOTE_CHARS, "%")
    def take_char() -> str:
        nonlocal index
        if index >= length:
            return ""
        char = text[index]
        index += 1
        return char
    def peek_char(offset: int = 0) -> str:
        peek_index = index + offset
        if peek_index >= length:
            return ""
        return text[peek_index]
    # This is an optimized version that uses regex to find the next stop character
    # and ignores the stop characters if they are prefixed by a backslash, if allow_escapes is True.
    #
    # For the intuition, the original version is:
    #
    # ```py
    # def take_until_any(stop_chars: Tuple[str, ...], allow_escapes: bool = False) -> str:
    #     nonlocal index
    #     start = index
    #     while index < length:
    #         char = text[index]
    #         if allow_escapes and char == BACKSLASH and index + 1 < length:
    #             index += 2
    #             continue
    #         if char in stop_chars:
    #             break
    #         index += 1
    #     return text[start:index]
    # ```
    def take_until_any(stop_chars: Tuple[str, ...], allow_escapes: bool = False) -> str:
        nonlocal index
        stop_chars_str = "".join(stop_chars)
        pattern = _compile_take_until_pattern(stop_chars_str, allow_escapes)
        # Find match at current position
        match = pattern.match(text, index)
        if match:
            matched_text = match.group(0)
            index += len(matched_text)
            return matched_text
        return ""
    # Given that this function is called only when there's a broken token,
    # we know that the first two characters are always "{%"
    take_char()  # {
    take_char()  # %
    # Main parsing loop
    while index < length:
        char = peek_char()
        # Handle strings within `{% %}`
        if char in QUOTE_CHARS:
            quote_char = take_char()
            result_content.append(quote_char)
            # Take content until matching quote, allowing escaped quotes
            content = take_until_any((quote_char,), allow_escapes=True)
            result_content.append(content)
            # Handle the closing quote
            if peek_char() == quote_char:
                result_content.append(take_char())
            else:
                raise TemplateSyntaxError(f"Unexpected end of text - unterminated {quote_char} string")
            continue
        # Check for closing tag
        if char == "%":
            if peek_char(1) == "}":
                take_char()  # %
                take_char()  # }
                break
            else:
                # False alarm, just a string
                content = take_until_any(QUOTE_CHARS)
                result_content.append(content)
                continue
        # Take regular content until we hit a quote or potential closing tag
        content = take_until_any(QUOTE_OR_PERCENT)
        result_content.append(content)
    else:
        raise TemplateSyntaxError("Unexpected end of text - unterminated {% tag")
    result_str = "".join(result_content).strip()  # Django's Lexer.tokenize() strips the whitespace
    return Token(TokenType.BLOCK, result_str, (start_index, index + start_index), lineno)
 # Create a regex pattern that takes anything until any of the stop characters are found.
 #
 # If allow_escapes is True, also the stop characters are allowed, given that they are
 # prefixed by a backslash.
@lru_cache(maxsize=128)
 def _compile_take_until_pattern(stop_chars: str, allow_escapes: bool) -> re.Pattern:
    escaped_stops = "".join(re.escape(c) for c in stop_chars)
    if allow_escapes:
        # Match either escaped characters or anything until stop chars
        pattern = f"(?:\\\\.|[^{escaped_stops}])*"
    else:
        # Match anything until stop chars
        pattern = f"[^{escaped_stops}]*"
    return re.compile(pattern)
--- a/src/django_components/util/template_tag.py
+++ b/src/django_components/util/template_tag.py
@ -1,14 +1,14 @@
 import functools
 import inspect
 from dataclasses import dataclass
-from typing import Any, Callable, Dict, Iterable, List, Mapping, NamedTuple, Optional, Set, Tuple, cast
+from typing import Any, Callable, Dict, Iterable, List, Mapping, NamedTuple, Optional, Set, Tuple
 from django.template import Context, NodeList
-from django.template.base import Parser, Token, TokenType
+from django.template.base import Parser, Token
 from django.template.exceptions import TemplateSyntaxError
 from django_components.expression import process_aggregate_kwargs
-from django_components.util.tag_parser import TagAttr, TagValue, parse_tag
+from django_components.util.tag_parser import TagAttr, parse_tag
@dataclass
@ -97,6 +97,17 @@ class TagSpec:
        # Set the signature on the function
        validator.__signature__ = self.signature  # type: ignore[attr-defined]
        # Call the validator with our args and kwargs, in such a way to
        # let the Python interpreter validate on repeated kwargs. E.g.
        #
        # ```
        # args, kwargs = validator(
        #     *call_args,
        #     **call_kwargs[0],
        #     **call_kwargs[1],
        #     ...
        # )
        # ```
        call_args = []
        call_kwargs = []
        for param in params:
@ -105,13 +116,12 @@ class TagSpec:
            else:
                call_kwargs.append({param.key: param.value})
        # Call the validator with our args and kwargs, in such a way to
        # let the Python interpreter validate on repeated kwargs.
        #
        # E.g. `args, kwargs = validator(*call_args, **call_kwargs[0], **call_kwargs[1])`
        #
        # NOTE: Although we use `exec()` here, it's safe, because we control the input -
-        #       we pass in only the list index.
+        #       we make dynamic only the list index.
        #
        #       We MUST use the indices, because we can't trust neither the param keys nor values,
        #       so we MUST NOT reference them directly in the exec script, otherwise we'd be at risk
        #       of injection attack.
        validator_call_script = "args, kwargs = validator(*call_args, "
        for kw_index, _ in enumerate(call_kwargs):
            validator_call_script += f"**call_kwargs[{kw_index}], "
@ -229,8 +239,6 @@ def parse_template_tag(
    token: Token,
    tag_spec: TagSpec,
 ) -> ParsedTag:
    fix_nested_tags(parser, token)
    _, attrs = parse_tag(token.contents, parser)
    # First token is tag name, e.g. `slot` in `{% slot <name> ... %}`
@ -340,138 +348,3 @@ def merge_repeated_kwargs(params: List[TagParam]) -> List[TagParam]:
            params_by_key[param.key].value += " " + str(param.value)
    return resolved_params
 def fix_nested_tags(parser: Parser, block_token: Token) -> None:
    # Since the nested tags MUST be wrapped in quotes, e.g.
    # `{% component 'test' "{% lorem var_a w %}" %}`
    # `{% component 'test' key="{% lorem var_a w %}" %}`
    #
    # We can parse the tag's tokens so we can find the last one, and so we consider
    # the unclosed `{%` only for the last bit.
    _, attrs = parse_tag(block_token.contents, parser)
    # If there are no attributes, then there are no nested tags
    if not attrs:
        return
    last_attr = attrs[-1]
    # TODO: Currently, using a nested template inside a list or dict
    #    e.g. `{% component ... key=["{% nested %}"] %}` is NOT supported.
    #    Hence why we leave if value is not "simple" (which means the value is list or dict).
    if last_attr.value.type != "simple":
        return
    last_attr_value = cast(TagValue, last_attr.value.entries[0])
    last_token = last_attr_value.parts[-1]
    # User probably forgot to wrap the nested tag in quotes, or this is the end of the input.
    # `{% component ... key={% nested %} %}`
    # `{% component ... key= %}`
    if not last_token.value:
        return
    # When our template tag contains a nested tag, e.g.:
    # `{% component 'test' "{% lorem var_a w %}" %}`
    #
    # Django parses this into:
    # `TokenType.BLOCK: 'component 'test'     "{% lorem var_a w'`
    #
    # Above you can see that the token ends at the end of the NESTED tag,
    # and includes `{%`. So that's what we use to identify if we need to fix
    # nested tags or not.
    has_unclosed_tag = (
        (last_token.value.count("{%") > last_token.value.count("%}"))
        # Moreover we need to also check for unclosed quotes for this edge case:
        # `{% component 'test' "{%}" %}`
        #
        # Which Django parses this into:
        # `TokenType.BLOCK: 'component 'test'  "{'`
        #
        # Here we cannot see any unclosed tags, but there is an unclosed double quote at the end.
        #
        # But we cannot naively search the full contents for unclosed quotes, but
        # only within the last 'bit'. Consider this:
        # `{% component 'test' '"' "{%}" %}`
        #
        or (last_token.value in ("'{", '"{'))
    )
    # There is 3 double quotes, but if the contents get split at the first `%}`
    # then there will be a single unclosed double quote in the last bit.
    first_char_index = len(last_token.spread or "")
    has_unclosed_quote = (
        not last_token.quoted
        and last_token.value
        and last_token.value[first_char_index] in ('"', "'")
    )  # fmt: skip
    needs_fixing = has_unclosed_tag and has_unclosed_quote
    if not needs_fixing:
        return
    block_token.contents += "%}" if has_unclosed_quote else " %}"
    expects_text = True
    while True:
        # This is where we need to take parsing in our own hands, because Django parser parsed
        # only up to the first closing tag `%}`, but that closing tag corresponds to a nested tag,
        # and not to the end of the outer template tag.
        #
        # NOTE: If we run out of tokens, this will raise, and break out of the loop
        token = parser.next_token()
        # If there is a nested BLOCK `{% %}`, VAR `{{ }}`, or COMMENT `{# #}` tag inside the template tag,
        # then the way Django parses it results in alternating Tokens of TEXT and non-TEXT types.
        #
        # We use `expects_text` to know which type to handle.
        if expects_text:
            if token.token_type != TokenType.TEXT:
                raise TemplateSyntaxError(f"Template parser received TokenType '{token.token_type}' instead of 'TEXT'")
            expects_text = False
            # Once we come across a closing tag in the text, we know that's our original
            # end tag. Until then, append all the text to the block token and continue
            if "%}" not in token.contents:
                block_token.contents += token.contents
                continue
            # This is the ACTUAL end of the block template tag
            remaining_block_content, text_content = token.contents.split("%}", 1)
            block_token.contents += remaining_block_content
            # We put back into the Parser the remaining bit of the text.
            # NOTE: Looking at the implementation, `parser.prepend_token()` is the opposite
            # of `parser.next_token()`.
            parser.prepend_token(Token(TokenType.TEXT, contents=text_content))
            break
        # In this case we've come across a next block tag `{% %}` inside the template tag
        # This isn't the first occurence, where the `{%` was ignored. And so, the content
        # between the `{% %}` is correctly captured, e.g.
        #
        # `{% firstof False 0 is_active %}`
        # gives
        # `TokenType.BLOCK: 'firstof False 0 is_active'`
        #
        # But we don't want to evaluate this as a standalone BLOCK tag, and instead append
        # it to the block tag that this nested block is part of
        else:
            if token.token_type == TokenType.TEXT:
                raise TemplateSyntaxError(
                    f"Template parser received TokenType '{token.token_type}' instead of 'BLOCK', 'VAR', 'COMMENT'"
                )
            if token.token_type == TokenType.BLOCK:
                block_token.contents += "{% " + token.contents + " %}"
            elif token.token_type == TokenType.VAR:
                block_token.contents += "{{ " + token.contents + " }}"
            elif token.token_type == TokenType.COMMENT:
                pass  # Comments are ignored
            else:
                raise TemplateSyntaxError(f"Unknown token type '{token.token_type}'")
            expects_text = True
            continue
--- a/tests/test_template_parser.py
+++ b/tests/test_template_parser.py
@ -0,0 +1,247 @@
 from django.template import Context
 from django.template.base import Template, Token, TokenType
 from django_components import Component, register, types
 from django_components.util.template_parser import parse_template
 from .django_test_setup import setup_test_config
 from .testutils import BaseTestCase
 setup_test_config({"autodiscover": False})
 def token2tuple(token: Token):
    return (
        token.token_type,
        token.contents,
        (token.position[0], token.position[1]),
        token.lineno,
    )
 class TemplateParserTests(BaseTestCase):
    def test_template_text(self):
        tokens = parse_template("Hello world")
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.TEXT, "Hello world", (0, 11), 1),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    def test_template_variable(self):
        tokens = parse_template("Hello {{ name }}")
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.TEXT, "Hello ", (0, 6), 1),
            (TokenType.VAR, "name", (6, 16), 1),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    # NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it
    def test_template_variable_unterminated(self):
        tokens = parse_template("Hello {{ name")
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.TEXT, "Hello {{ name", (0, 13), 1),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    def test_template_tag(self):
        tokens = parse_template("{% component 'my_comp' key=val %}")
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.BLOCK, "component 'my_comp' key=val", (0, 33), 1),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    # NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it
    def test_template_tag_unterminated(self):
        tokens = parse_template("{% if true")
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.TEXT, "{% if true", (0, 10), 1),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    def test_template_comment(self):
        tokens = parse_template("Hello{# this is a comment #}World")
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.TEXT, "Hello", (0, 5), 1),
            (TokenType.COMMENT, "this is a comment", (5, 28), 1),
            (TokenType.TEXT, "World", (28, 33), 1),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    # NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it
    def test_template_comment_unterminated(self):
        tokens = parse_template("{# comment")
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.TEXT, "{# comment", (0, 10), 1),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    def test_template_verbatim(self):
        tokens = parse_template(
            """{% verbatim %}
                {{ this_is_not_a_var }}
                {% this_is_not_a_tag %}
            {% endverbatim %}"""
        )
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.BLOCK, "verbatim", (0, 14), 1),
            (TokenType.TEXT, "\n                ", (14, 31), 1),
            (TokenType.TEXT, "{{ this_is_not_a_var }}", (31, 54), 2),
            (TokenType.TEXT, "\n                ", (54, 71), 2),
            (TokenType.TEXT, "{% this_is_not_a_tag %}", (71, 94), 3),
            (TokenType.TEXT, "\n            ", (94, 107), 3),
            (TokenType.BLOCK, "endverbatim", (107, 124), 4),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    def test_template_verbatim_with_name(self):
        tokens = parse_template(
            """{% verbatim myblock %}
                {{ this_is_not_a_var }}
                {% verbatim %}
                {% endverbatim %}
                {% endverbatim blockname %}
            {% endverbatim myblock %}"""
        )
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.BLOCK, "verbatim myblock", (0, 22), 1),
            (TokenType.TEXT, "\n                ", (22, 39), 1),
            (TokenType.TEXT, "{{ this_is_not_a_var }}", (39, 62), 2),
            (TokenType.TEXT, "\n                ", (62, 79), 2),
            (TokenType.TEXT, "{% verbatim %}", (79, 93), 3),
            (TokenType.TEXT, "\n                ", (93, 110), 3),
            (TokenType.TEXT, "{% endverbatim %}", (110, 127), 4),
            (TokenType.TEXT, "\n                ", (127, 144), 4),
            (TokenType.TEXT, "{% endverbatim blockname %}", (144, 171), 5),
            (TokenType.TEXT, "\n            ", (171, 184), 5),
            (TokenType.BLOCK, "endverbatim myblock", (184, 209), 6),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    def test_template_nested_tags(self):
        tokens = parse_template("""{% component 'test' "{% lorem var_a w %}" %}""")
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.BLOCK, "component 'test' \"{% lorem var_a w %}\"", (0, 44), 1),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    def test_brackets_and_percent_in_text(self):
        tokens = parse_template('{% component \'test\' \'"\' "{%}" bool_var="{% noop is_active %}" / %}')
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.BLOCK, 'component \'test\' \'"\' "{%}" bool_var="{% noop is_active %}" /', (0, 66), 1),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    def test_template_mixed(self):
        tokens = parse_template(
            """Hello {{ name }}
            {# greeting #}
            {% if show_greeting %}
                <h1>Welcome!</h1>
                {% component 'test' key="{% lorem var_a w %}" %}
                    {% verbatim %}
                        {% endcomponent %}
                    {% endverbatim %}
                {% endcomponent %}
            {% endif %}"""
        )
        token_tuples = [token2tuple(token) for token in tokens]
        expected_tokens = [
            (TokenType.TEXT, "Hello ", (0, 6), 1),
            (TokenType.VAR, "name", (6, 16), 1),
            (TokenType.TEXT, "\n            ", (16, 29), 1),
            (TokenType.COMMENT, "greeting", (29, 43), 2),
            (TokenType.TEXT, "\n            ", (43, 56), 2),
            (TokenType.BLOCK, "if show_greeting", (56, 78), 3),
            (TokenType.TEXT, "\n                <h1>Welcome!</h1>\n                ", (78, 129), 3),
            (TokenType.BLOCK, "component 'test' key=\"{% lorem var_a w %}\"", (129, 177), 5),
            (TokenType.TEXT, "\n                    ", (177, 198), 5),
            (TokenType.BLOCK, "verbatim", (198, 212), 6),
            (TokenType.TEXT, "\n                        ", (212, 237), 6),
            (TokenType.TEXT, "{% endcomponent %}", (237, 255), 7),
            (TokenType.TEXT, "\n                    ", (255, 276), 7),
            (TokenType.BLOCK, "endverbatim", (276, 293), 8),
            (TokenType.TEXT, "\n                ", (293, 310), 8),
            (TokenType.BLOCK, "endcomponent", (310, 328), 9),
            (TokenType.TEXT, "\n            ", (328, 341), 9),
            (TokenType.BLOCK, "endif", (341, 352), 10),
        ]
        self.assertEqual(token_tuples, expected_tokens)
    # Check that a template that contains `{% %}` inside of a component tag is parsed correctly
    def test_component_mixed(self):
        @register("test")
        class Test(Component):
            template: types.django_html = """
                {% load component_tags %}
                Var: {{ var }}
                Slot: {% slot "content" default / %}
            """
            def get_context_data(self, var: str) -> dict:
                return {"var": var}
        template_str: types.django_html = """
            {% load component_tags %}
            <div>
                Hello {{ name }}
                {# greeting #}
                {% if show_greeting %}
                    <h1>Welcome!</h1>
                    {% component 'test' var="{% lorem var_a w %}" %}
                        {% verbatim %}
                            {% endcomponent %}
                        {% endverbatim %}
                    {% endcomponent %}
                {% endif %}
            </div>
        """
        template = Template(template_str)
        rendered = template.render(Context({"name": "John", "show_greeting": True, "var_a": 2}))
        self.assertHTMLEqual(
            rendered,
            """
            <div>
                Hello John
                <h1>Welcome!</h1>
                Var: lorem ipsum
                Slot: {% endcomponent %}
            </div>
            """,
        )