diff --git a/benchmarks/test_lexer_performance.py b/benchmarks/test_lexer_performance.py
new file mode 100644
index 00000000..b95f7d63
--- /dev/null
+++ b/benchmarks/test_lexer_performance.py
@@ -0,0 +1,195 @@
+# NOTE: This file is more of a playground than a proper test
+
+import timeit
+from typing import List, Tuple
+
+from django.template.base import DebugLexer, Lexer, Token
+
+from django_components.util.template_parser import parse_template
+
+
+def django_lexer(template: str) -> List[Token]:
+ """Use Django's built-in lexer to tokenize a template."""
+ lexer = Lexer(template)
+ return list(lexer.tokenize())
+
+
+def django_debug_lexer(template: str) -> List[Token]:
+ """Use Django's built-in lexer to tokenize a template."""
+ lexer = DebugLexer(template)
+ return list(lexer.tokenize())
+
+
+def run_benchmark(template: str, num_iterations: int = 5000) -> Tuple[float, float]:
+ """Run performance comparison between Django and custom lexer."""
+ # django_time = timeit.timeit(lambda: django_lexer(template), number=num_iterations)
+ django_debug_time = timeit.timeit(lambda: django_debug_lexer(template), number=num_iterations)
+ custom_time = timeit.timeit(lambda: parse_template(template), number=num_iterations)
+ # return django_time, django_debug_time
+ return django_debug_time, custom_time
+
+
+def print_benchmark_results(template: str, django_time: float, custom_time: float, num_iterations: int) -> None:
+ """Print formatted benchmark results."""
+ print(f"\nTemplate: {template}")
+ print(f"Iterations: {num_iterations}")
+ print(f"Django Lexer: {django_time:.6f} seconds")
+ print(f"Custom Lexer: {custom_time:.6f} seconds")
+ print(f"Difference: {abs(django_time - custom_time):.6f} seconds")
+ print(f"Custom lexer is {(django_time / custom_time):.2f}x {'faster' if custom_time < django_time else 'slower'}")
+
+
+if __name__ == "__main__":
+ test_cases = [
+ # Simple text
+ "Hello World",
+ # Simple variable
+ "Hello {{ name }}",
+ # Simple block
+ "{% if condition %}Hello{% endif %}",
+ # Complex nested template
+ """
+ {% extends "base.html" %}
+ {% block content %}
+
{{ title }}
+ {% for item in items %}
+
+ {{ item.name }}
+ {% if item.description %}
+
{{ item.description }}
+ {% endif %}
+
+ {% endfor %}
+ {% endblock %}
+ """,
+ # Component with nested tags
+ """
+ {% component 'table'
+ headers=headers
+ rows=rows
+ footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
+ title="{% trans 'Data Table' %}"
+ %}
+ """,
+ # Real world example
+ """
+
+ {# Info section #}
+
+
+
Project Info
+
+ {% if editable %}
+ {% component "Button"
+ href=project_edit_url
+ attrs:class="not-prose"
+ footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
+ title="{% trans 'Data Table' %}"
+ %}
+ Edit Project
+ {% endcomponent %}
+ {% endif %}
+
+
+
+ {% for key, value in project_info %}
+
+
+ {{ key }}:
+ |
+
+ {{ value }}
+ |
+
+ {% endfor %}
+
+
+
+ {# Status Updates section #}
+ {% component "ProjectStatusUpdates"
+ project_id=project.pk
+ status_updates=status_updates
+ editable=editable
+ footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
+ title="{% trans 'Data Table' %}"
+ / %}
+
+ {# Team section #}
+
+
+
Dcode Team
+
+ {% if editable %}
+ {% component "Button"
+ href=edit_project_roles_url
+ attrs:class="not-prose"
+ footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
+ title="{% trans 'Data Table' %}"
+ %}
+ Edit Team
+ {% endcomponent %}
+ {% endif %}
+
+
+ {% component "ProjectUsers"
+ project_id=project.pk
+ roles_with_users=roles_with_users
+ editable=False
+ footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
+ title="{% trans 'Data Table' %}"
+ / %}
+
+
+ {# POCs section #}
+
+
+
Client POCs
+
+ {% if editable %}
+ {% component "Button"
+ href=edit_pocs_url
+ attrs:class="not-prose"
+ footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
+ title="{% trans 'Data Table' %}"
+ %}
+ Edit POCs
+ {% endcomponent %}
+ {% endif %}
+
+
+ {% if poc_data %}
+
+
+ Name |
+ Job Title |
+ Hubspot Profile |
+
+ {% for data in poc_data %}
+
+ {{ data.poc.contact.first_name }} {{ data.poc.contact.last_name }} |
+ {{ data.poc.contact.job_title }} |
+
+ {% component "Icon"
+ href=data.hubspot_url
+ name="arrow-top-right-on-square"
+ variant="outline"
+ color="text-gray-400 hover:text-gray-500"
+ footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
+ title="{% trans 'Data Table' %}"
+ / %}
+ |
+
+ {% endfor %}
+
+ {% else %}
+
No entries
+ {% endif %}
+
+
+
+ """,
+ ]
+
+ for template in test_cases:
+ django_time, custom_time = run_benchmark(template)
+ print_benchmark_results(template, django_time, custom_time, 200)
diff --git a/src/django_components/apps.py b/src/django_components/apps.py
index ba32beba..e15b7fb5 100644
--- a/src/django_components/apps.py
+++ b/src/django_components/apps.py
@@ -15,14 +15,14 @@ class ComponentsConfig(AppConfig):
def ready(self) -> None:
from django_components.app_settings import app_settings
from django_components.autodiscovery import autodiscover, import_libraries
- from django_components.component import monkeypatch_template
from django_components.component_registry import registry
from django_components.components.dynamic import DynamicComponent
+ from django_components.util.django_monkeypatch import monkeypatch_template_cls
# NOTE: This monkeypatch is applied here, before Django processes any requests.
# To make django-components work with django-debug-toolbar-template-profiler
# See https://github.com/EmilStenstrom/django-components/discussions/819
- monkeypatch_template(Template)
+ monkeypatch_template_cls(Template)
# Import modules set in `COMPONENTS.libraries` setting
import_libraries()
diff --git a/src/django_components/component.py b/src/django_components/component.py
index 01188fae..4fdb2033 100644
--- a/src/django_components/component.py
+++ b/src/django_components/component.py
@@ -68,6 +68,7 @@ from django_components.slots import (
resolve_fills,
)
from django_components.template import cached_template
+from django_components.util.django_monkeypatch import is_template_cls_patched
from django_components.util.logger import trace_msg
from django_components.util.misc import gen_id
from django_components.util.template_tag import TagParams
@@ -1272,54 +1273,6 @@ class ComponentNode(BaseNode):
return output
-def monkeypatch_template(template_cls: Type[Template]) -> None:
- # Modify `Template.render` to set `isolated_context` kwarg of `push_state`
- # based on our custom `Template._dc_is_component_nested`.
- #
- # Part of fix for https://github.com/EmilStenstrom/django-components/issues/508
- #
- # NOTE 1: While we could've subclassed Template, then we would need to either
- # 1) ask the user to change the backend, so all templates are of our subclass, or
- # 2) copy the data from user's Template class instance to our subclass instance,
- # which could lead to doubly parsing the source, and could be problematic if users
- # used more exotic subclasses of Template.
- #
- # Instead, modifying only the `render` method of an already-existing instance
- # should work well with any user-provided custom subclasses of Template, and it
- # doesn't require the source to be parsed multiple times. User can pass extra args/kwargs,
- # and can modify the rendering behavior by overriding the `_render` method.
- #
- # NOTE 2: Instead of setting `Template._dc_is_component_nested`, alternatively we could
- # have passed the value to `monkeypatch_template` directly. However, we intentionally
- # did NOT do that, so the monkey-patched method is more robust, and can be e.g. copied
- # to other.
- if hasattr(template_cls, "_dc_patched"):
- # Do not patch if done so already. This helps us avoid RecursionError
- return
-
- def _template_render(self: Template, context: Context, *args: Any, **kwargs: Any) -> str:
- # ---------------- OUR CHANGES START ----------------
- # We parametrized `isolated_context`, which was `True` in the original method.
- if not hasattr(self, "_dc_is_component_nested"):
- isolated_context = True
- else:
- # MUST be `True` for templates that are NOT import with `{% extends %}` tag,
- # and `False` otherwise.
- isolated_context = not self._dc_is_component_nested
- # ---------------- OUR CHANGES END ----------------
-
- with context.render_context.push_state(self, isolated_context=isolated_context):
- if context.template is None:
- with context.bind_template(self):
- context.template_name = self.name
- return self._render(context, *args, **kwargs)
- else:
- return self._render(context, *args, **kwargs)
-
- template_cls.render = _template_render
- template_cls._dc_patched = True
-
-
@contextmanager
def _maybe_bind_template(context: Context, template: Template) -> Generator[None, Any, None]:
if context.template is None:
@@ -1342,7 +1295,7 @@ def _prepare_template(
# And https://github.com/EmilStenstrom/django-components/issues/634
template = component._get_template(context)
- if not getattr(template, "_dc_patched"):
+ if not is_template_cls_patched(template):
raise RuntimeError(
"Django-components received a Template instance which was not patched."
"If you are using Django's Template class, check if you added django-components"
@@ -1350,10 +1303,11 @@ def _prepare_template(
"manually patch the class."
)
- # Set `Template._dc_is_component_nested` based on whether we're currently INSIDE
+ # Set `Template._djc_is_component_nested` based on whether we're currently INSIDE
# the `{% extends %}` tag.
# Part of fix for https://github.com/EmilStenstrom/django-components/issues/508
- template._dc_is_component_nested = bool(context.render_context.get(BLOCK_CONTEXT_KEY))
+ # See django_monkeypatch.py
+ template._djc_is_component_nested = bool(context.render_context.get(BLOCK_CONTEXT_KEY))
with _maybe_bind_template(context, template):
yield template
diff --git a/src/django_components/expression.py b/src/django_components/expression.py
index 81b819ea..f2e28606 100644
--- a/src/django_components/expression.py
+++ b/src/django_components/expression.py
@@ -2,7 +2,9 @@ import re
from typing import TYPE_CHECKING, Any, Dict, List
from django.template import Context, Node, NodeList, TemplateSyntaxError
-from django.template.base import Lexer, Parser, VariableNode
+from django.template.base import Parser, VariableNode
+
+from django_components.util.template_parser import parse_template
if TYPE_CHECKING:
from django_components.util.template_tag import TagParam
@@ -48,8 +50,7 @@ class DynamicFilterExpression:
# Copy the Parser, and pass through the tags and filters available
# in the current context. Thus, if user calls `{% load %}` inside
# the expression, it won't spill outside.
- lexer = Lexer(self.expr)
- tokens = lexer.tokenize()
+ tokens = parse_template(self.expr)
expr_parser = Parser(tokens=tokens)
expr_parser.tags = {**parser.tags}
expr_parser.filters = {**parser.filters}
diff --git a/src/django_components/templatetags/component_tags.py b/src/django_components/templatetags/component_tags.py
index b14000c8..78938630 100644
--- a/src/django_components/templatetags/component_tags.py
+++ b/src/django_components/templatetags/component_tags.py
@@ -28,7 +28,7 @@ from django_components.slots import SLOT_DEFAULT_KEYWORD, SLOT_REQUIRED_KEYWORD,
from django_components.tag_formatter import get_tag_formatter
from django_components.util.logger import trace_msg
from django_components.util.misc import gen_id
-from django_components.util.template_tag import TagSpec, fix_nested_tags, parse_template_tag, with_tag_spec
+from django_components.util.template_tag import TagSpec, parse_template_tag, with_tag_spec
# NOTE: Variable name `register` is required by Django to recognize this as a template tag library
# See https://docs.djangoproject.com/en/dev/howto/custom-template-tags
@@ -492,7 +492,6 @@ def component(
"""
tag_id = gen_id()
- fix_nested_tags(parser, token)
bits = token.split_contents()
# Let the TagFormatter pre-process the tokens
diff --git a/src/django_components/util/django_monkeypatch.py b/src/django_components/util/django_monkeypatch.py
new file mode 100644
index 00000000..e81b8a4c
--- /dev/null
+++ b/src/django_components/util/django_monkeypatch.py
@@ -0,0 +1,110 @@
+from typing import Any, Type
+
+from django.template import Context, NodeList, Template
+from django.template.base import Parser
+
+from django_components.util.template_parser import parse_template
+
+
+# In some cases we can't work around Django's design, and need to patch the template class.
+def monkeypatch_template_cls(template_cls: Type[Template]) -> None:
+ monkeypatch_template_compile_nodelist(template_cls)
+ monkeypatch_template_render(template_cls)
+ template_cls._djc_patched = True
+
+
+# Patch `Template.compile_nodelist` to use our custom parser. Our parser makes it possible
+# to use template tags as inputs to the component tag:
+#
+# {% component "my-component" description="{% lorem 3 w %}" / %}
+def monkeypatch_template_compile_nodelist(template_cls: Type[Template]) -> None:
+ def _compile_nodelist(self: Template) -> NodeList:
+ """
+ Parse and compile the template source into a nodelist. If debug
+ is True and an exception occurs during parsing, the exception is
+ annotated with contextual line information where it occurred in the
+ template source.
+ """
+ # ---------------- ORIGINAL (Django v5.1.3) ----------------
+ # if self.engine.debug:
+ # lexer = DebugLexer(self.source)
+ # else:
+ # lexer = Lexer(self.source)
+
+ # tokens = lexer.tokenize()
+ # ---------------- OUR CHANGES START ----------------
+ tokens = parse_template(self.source)
+ # ---------------- OUR CHANGES END ----------------
+ parser = Parser(
+ tokens,
+ self.engine.template_libraries,
+ self.engine.template_builtins,
+ self.origin,
+ )
+
+ try:
+ # ---------------- ADDED IN Django v5.1 - See https://github.com/django/django/commit/35bbb2c9c01882b1d77b0b8c737ac646144833d4 # noqa: E501
+ nodelist = parser.parse()
+ self.extra_data = getattr(parser, "extra_data", {})
+ # ---------------- END OF ADDED IN Django v5.1 ----------------
+ return nodelist
+ except Exception as e:
+ if self.engine.debug:
+ e.template_debug = self.get_exception_info(e, e.token) # type: ignore
+ raise
+
+ template_cls.compile_nodelist = _compile_nodelist
+
+
+def monkeypatch_template_render(template_cls: Type[Template]) -> None:
+ # Modify `Template.render` to set `isolated_context` kwarg of `push_state`
+ # based on our custom `Template._djc_is_component_nested`.
+ #
+ # Part of fix for https://github.com/EmilStenstrom/django-components/issues/508
+ #
+ # NOTE 1: While we could've subclassed Template, then we would need to either
+ # 1) ask the user to change the backend, so all templates are of our subclass, or
+ # 2) copy the data from user's Template class instance to our subclass instance,
+ # which could lead to doubly parsing the source, and could be problematic if users
+ # used more exotic subclasses of Template.
+ #
+ # Instead, modifying only the `render` method of an already-existing instance
+ # should work well with any user-provided custom subclasses of Template, and it
+ # doesn't require the source to be parsed multiple times. User can pass extra args/kwargs,
+ # and can modify the rendering behavior by overriding the `_render` method.
+ #
+ # NOTE 2: Instead of setting `Template._djc_is_component_nested`, alternatively we could
+ # have passed the value to `monkeypatch_template_render` directly. However, we intentionally
+ # did NOT do that, so the monkey-patched method is more robust, and can be e.g. copied
+ # to other.
+ if is_template_cls_patched(template_cls):
+ # Do not patch if done so already. This helps us avoid RecursionError
+ return
+
+ def _template_render(self: Template, context: Context, *args: Any, **kwargs: Any) -> str:
+ "Display stage -- can be called many times"
+ # ---------------- ORIGINAL (Django v5.1.3) ----------------
+ # with context.render_context.push_state(self):
+ # ---------------- OUR CHANGES START ----------------
+ # We parametrized `isolated_context`, which was `True` in the original method.
+ if not hasattr(self, "_djc_is_component_nested"):
+ isolated_context = True
+ else:
+ # MUST be `True` for templates that are NOT import with `{% extends %}` tag,
+ # and `False` otherwise.
+ isolated_context = not self._djc_is_component_nested
+
+ with context.render_context.push_state(self, isolated_context=isolated_context):
+ # ---------------- OUR CHANGES END ----------------
+ if context.template is None:
+ with context.bind_template(self):
+ context.template_name = self.name
+ return self._render(context, *args, **kwargs)
+ else:
+ return self._render(context, *args, **kwargs)
+
+ template_cls.render = _template_render
+
+
+def is_template_cls_patched(template_cls: Type[Template]) -> bool:
+ return getattr(template_cls, "_djc_patched", False)
diff --git a/src/django_components/util/template_parser.py b/src/django_components/util/template_parser.py
new file mode 100644
index 00000000..c66cbcd9
--- /dev/null
+++ b/src/django_components/util/template_parser.py
@@ -0,0 +1,227 @@
+"""
+Parser for Django template.
+
+The parser reads a template file (usually HTML, but not necessarily), which may contain
+"template tags" like this:
+
+```django
+{% component 'my_comp' key=val key2='val2 two' %}
+{% endcomponent %}
+
+{{ my_var }}
+
+{# I am comment #}
+```
+
+and returns a list of Tokens:
+
+```py
+[
+ (TokenType.TEXT, '\n', (0, 1), 1),
+ (TokenType.BLOCK, "component 'my_comp' key=val key2='val2 two'", (1, 50), 2),
+ (TokenType.TEXT, '\n', (50, 51), 2),
+ (TokenType.BLOCK, 'endcomponent', (51, 69), 3),
+ (TokenType.TEXT, '\n\n', (69, 71), 3),
+ (TokenType.VAR, 'my_var', (71, 83), 5),
+ (TokenType.TEXT, '\n\n', (83, 85), 5),
+ (TokenType.COMMENT, 'I am comment', (85, 103), 7),
+ (TokenType.TEXT, '\n', (103, 104), 7),
+]
+```
+
+See `parse_template()` for details.
+"""
+
+import re
+from functools import lru_cache
+from typing import List, Optional, Tuple
+
+from django.template.base import DebugLexer, Token, TokenType
+from django.template.exceptions import TemplateSyntaxError
+
+
+# NOTE: As of 0.125, the strategy is to use Django's lexer, and use our own parser
+# only when necessary, for the shortest time possible.
+#
+# Before I switched to this strategy, my initial parser was about 50x slower than Django's lexer.
+# I (Juro) assume it was because I was walking character by character, instead of using a regex.
+#
+# The overall speed should then depend on the number of broken tokens in the template.
+#
+# Performance of the new strategy on a real-world example:
+# - A template with about 110 lines and 6 components
+# - Components spanning ~35 lines in total, so roughly 1/3 of the template
+# - The custom parser is about 8x slower than Django's Debug lexer.
+# - For a mid-sized project of 200 templates, it would take 7-8 seconds to load all the templates
+# (from 1 second with Django's lexer).
+# - However, thanks to django-component's lazy-loading, this should not be a problem.
+#
+# How it works is that:
+# 1. We use Django's lexer to get the tokens.
+# 2. We check them one-by-one, and if we find a broken token, we switch to our parser to fix it.
+# 3. Once the broken token is fixed, we find it's end position, and switch back to the Django lexer
+# for the remaining text (step 1).
+def parse_template(text: str) -> List[Token]:
+ resolved_tokens: List[Token] = []
+
+ index_start = 0
+ index_end = len(text)
+ lineno_offset = 0
+
+ while index_start < index_end:
+ broken_token: Optional[Token] = None
+ # Do fast tokenization with regex - This is about 50x faster than our custom tokenizer.
+ # We use DebugLexer because we need to get the position of the tokens.
+ # DebugLexer and Lexer have very similar speeds, Debug is about 33% slower.
+ lexer = DebugLexer(text[index_start:index_end])
+ tokens: List[Token] = lexer.tokenize()
+
+ for token in tokens:
+ token.lineno += lineno_offset
+ token.position = (token.position[0] + index_start, token.position[1] + index_start)
+
+ if token.token_type == TokenType.BLOCK and ("'" in token.contents or '"' in token.contents):
+ broken_token = token
+ break
+ else:
+ resolved_tokens.append(token)
+
+ # If we found a broken token, we switch to our slow parser
+ if broken_token is not None:
+ broken_token_start = broken_token.position[0]
+ fixed_token = _detailed_tag_parser(text[broken_token_start:], broken_token.lineno, broken_token_start)
+
+ resolved_tokens.append(fixed_token)
+ index_start = fixed_token.position[1]
+ lineno_offset += (
+ fixed_token.lineno - 1 # -1 because lines are 1-indexed
+ + fixed_token.contents.count("\n")
+ ) # fmt: skip
+ else:
+ break
+
+ return resolved_tokens
+
+
+# Handle parsing of `{% %}` tags, while allowing `%}` inside of strings
+def _detailed_tag_parser(text: str, lineno: int, start_index: int) -> Token:
+ index = 0
+ length = len(text)
+ result_content: List[str] = []
+
+ # Pre-compute common substrings
+ QUOTE_CHARS = ("'", '"')
+ QUOTE_OR_PERCENT = (*QUOTE_CHARS, "%")
+
+ def take_char() -> str:
+ nonlocal index
+ if index >= length:
+ return ""
+ char = text[index]
+ index += 1
+ return char
+
+ def peek_char(offset: int = 0) -> str:
+ peek_index = index + offset
+ if peek_index >= length:
+ return ""
+ return text[peek_index]
+
+ # This is an optimized version that uses regex to find the next stop character
+ # and ignores the stop characters if they are prefixed by a backslash, if allow_escapes is True.
+ #
+ # For the intuition, the original version is:
+ #
+ # ```py
+ # def take_until_any(stop_chars: Tuple[str, ...], allow_escapes: bool = False) -> str:
+ # nonlocal index
+ # start = index
+ # while index < length:
+ # char = text[index]
+ # if allow_escapes and char == BACKSLASH and index + 1 < length:
+ # index += 2
+ # continue
+ # if char in stop_chars:
+ # break
+ # index += 1
+ # return text[start:index]
+ # ```
+ def take_until_any(stop_chars: Tuple[str, ...], allow_escapes: bool = False) -> str:
+ nonlocal index
+
+ stop_chars_str = "".join(stop_chars)
+ pattern = _compile_take_until_pattern(stop_chars_str, allow_escapes)
+
+ # Find match at current position
+ match = pattern.match(text, index)
+ if match:
+ matched_text = match.group(0)
+ index += len(matched_text)
+ return matched_text
+
+ return ""
+
+ # Given that this function is called only when there's a broken token,
+ # we know that the first two characters are always "{%"
+ take_char() # {
+ take_char() # %
+
+ # Main parsing loop
+ while index < length:
+ char = peek_char()
+
+ # Handle strings within `{% %}`
+ if char in QUOTE_CHARS:
+ quote_char = take_char()
+ result_content.append(quote_char)
+
+ # Take content until matching quote, allowing escaped quotes
+ content = take_until_any((quote_char,), allow_escapes=True)
+ result_content.append(content)
+
+ # Handle the closing quote
+ if peek_char() == quote_char:
+ result_content.append(take_char())
+ else:
+ raise TemplateSyntaxError(f"Unexpected end of text - unterminated {quote_char} string")
+ continue
+
+ # Check for closing tag
+ if char == "%":
+ if peek_char(1) == "}":
+ take_char() # %
+ take_char() # }
+ break
+ else:
+ # False alarm, just a string
+ content = take_until_any(QUOTE_CHARS)
+ result_content.append(content)
+ continue
+
+ # Take regular content until we hit a quote or potential closing tag
+ content = take_until_any(QUOTE_OR_PERCENT)
+ result_content.append(content)
+
+ else:
+ raise TemplateSyntaxError("Unexpected end of text - unterminated {% tag")
+
+ result_str = "".join(result_content).strip() # Django's Lexer.tokenize() strips the whitespace
+ return Token(TokenType.BLOCK, result_str, (start_index, index + start_index), lineno)
+
+
+# Create a regex pattern that takes anything until any of the stop characters are found.
+#
+# If allow_escapes is True, also the stop characters are allowed, given that they are
+# prefixed by a backslash.
+@lru_cache(maxsize=128)
+def _compile_take_until_pattern(stop_chars: str, allow_escapes: bool) -> re.Pattern:
+ escaped_stops = "".join(re.escape(c) for c in stop_chars)
+
+ if allow_escapes:
+ # Match either escaped characters or anything until stop chars
+ pattern = f"(?:\\\\.|[^{escaped_stops}])*"
+ else:
+ # Match anything until stop chars
+ pattern = f"[^{escaped_stops}]*"
+
+ return re.compile(pattern)
diff --git a/src/django_components/util/template_tag.py b/src/django_components/util/template_tag.py
index 873c2da8..520e5e23 100644
--- a/src/django_components/util/template_tag.py
+++ b/src/django_components/util/template_tag.py
@@ -1,14 +1,14 @@
import functools
import inspect
from dataclasses import dataclass
-from typing import Any, Callable, Dict, Iterable, List, Mapping, NamedTuple, Optional, Set, Tuple, cast
+from typing import Any, Callable, Dict, Iterable, List, Mapping, NamedTuple, Optional, Set, Tuple
from django.template import Context, NodeList
-from django.template.base import Parser, Token, TokenType
+from django.template.base import Parser, Token
from django.template.exceptions import TemplateSyntaxError
from django_components.expression import process_aggregate_kwargs
-from django_components.util.tag_parser import TagAttr, TagValue, parse_tag
+from django_components.util.tag_parser import TagAttr, parse_tag
@dataclass
@@ -97,6 +97,17 @@ class TagSpec:
# Set the signature on the function
validator.__signature__ = self.signature # type: ignore[attr-defined]
+ # Call the validator with our args and kwargs, in such a way to
+ # let the Python interpreter validate on repeated kwargs. E.g.
+ #
+ # ```
+ # args, kwargs = validator(
+ # *call_args,
+ # **call_kwargs[0],
+ # **call_kwargs[1],
+ # ...
+ # )
+ # ```
call_args = []
call_kwargs = []
for param in params:
@@ -105,13 +116,12 @@ class TagSpec:
else:
call_kwargs.append({param.key: param.value})
- # Call the validator with our args and kwargs, in such a way to
- # let the Python interpreter validate on repeated kwargs.
- #
- # E.g. `args, kwargs = validator(*call_args, **call_kwargs[0], **call_kwargs[1])`
- #
# NOTE: Although we use `exec()` here, it's safe, because we control the input -
- # we pass in only the list index.
+ # we make dynamic only the list index.
+ #
+ # We MUST use the indices, because we can't trust neither the param keys nor values,
+ # so we MUST NOT reference them directly in the exec script, otherwise we'd be at risk
+ # of injection attack.
validator_call_script = "args, kwargs = validator(*call_args, "
for kw_index, _ in enumerate(call_kwargs):
validator_call_script += f"**call_kwargs[{kw_index}], "
@@ -229,8 +239,6 @@ def parse_template_tag(
token: Token,
tag_spec: TagSpec,
) -> ParsedTag:
- fix_nested_tags(parser, token)
-
_, attrs = parse_tag(token.contents, parser)
# First token is tag name, e.g. `slot` in `{% slot ... %}`
@@ -340,138 +348,3 @@ def merge_repeated_kwargs(params: List[TagParam]) -> List[TagParam]:
params_by_key[param.key].value += " " + str(param.value)
return resolved_params
-
-
-def fix_nested_tags(parser: Parser, block_token: Token) -> None:
- # Since the nested tags MUST be wrapped in quotes, e.g.
- # `{% component 'test' "{% lorem var_a w %}" %}`
- # `{% component 'test' key="{% lorem var_a w %}" %}`
- #
- # We can parse the tag's tokens so we can find the last one, and so we consider
- # the unclosed `{%` only for the last bit.
- _, attrs = parse_tag(block_token.contents, parser)
-
- # If there are no attributes, then there are no nested tags
- if not attrs:
- return
-
- last_attr = attrs[-1]
-
- # TODO: Currently, using a nested template inside a list or dict
- # e.g. `{% component ... key=["{% nested %}"] %}` is NOT supported.
- # Hence why we leave if value is not "simple" (which means the value is list or dict).
- if last_attr.value.type != "simple":
- return
-
- last_attr_value = cast(TagValue, last_attr.value.entries[0])
- last_token = last_attr_value.parts[-1]
-
- # User probably forgot to wrap the nested tag in quotes, or this is the end of the input.
- # `{% component ... key={% nested %} %}`
- # `{% component ... key= %}`
- if not last_token.value:
- return
-
- # When our template tag contains a nested tag, e.g.:
- # `{% component 'test' "{% lorem var_a w %}" %}`
- #
- # Django parses this into:
- # `TokenType.BLOCK: 'component 'test' "{% lorem var_a w'`
- #
- # Above you can see that the token ends at the end of the NESTED tag,
- # and includes `{%`. So that's what we use to identify if we need to fix
- # nested tags or not.
- has_unclosed_tag = (
- (last_token.value.count("{%") > last_token.value.count("%}"))
- # Moreover we need to also check for unclosed quotes for this edge case:
- # `{% component 'test' "{%}" %}`
- #
- # Which Django parses this into:
- # `TokenType.BLOCK: 'component 'test' "{'`
- #
- # Here we cannot see any unclosed tags, but there is an unclosed double quote at the end.
- #
- # But we cannot naively search the full contents for unclosed quotes, but
- # only within the last 'bit'. Consider this:
- # `{% component 'test' '"' "{%}" %}`
- #
- or (last_token.value in ("'{", '"{'))
- )
-
- # There is 3 double quotes, but if the contents get split at the first `%}`
- # then there will be a single unclosed double quote in the last bit.
- first_char_index = len(last_token.spread or "")
- has_unclosed_quote = (
- not last_token.quoted
- and last_token.value
- and last_token.value[first_char_index] in ('"', "'")
- ) # fmt: skip
-
- needs_fixing = has_unclosed_tag and has_unclosed_quote
-
- if not needs_fixing:
- return
-
- block_token.contents += "%}" if has_unclosed_quote else " %}"
- expects_text = True
- while True:
- # This is where we need to take parsing in our own hands, because Django parser parsed
- # only up to the first closing tag `%}`, but that closing tag corresponds to a nested tag,
- # and not to the end of the outer template tag.
- #
- # NOTE: If we run out of tokens, this will raise, and break out of the loop
- token = parser.next_token()
-
- # If there is a nested BLOCK `{% %}`, VAR `{{ }}`, or COMMENT `{# #}` tag inside the template tag,
- # then the way Django parses it results in alternating Tokens of TEXT and non-TEXT types.
- #
- # We use `expects_text` to know which type to handle.
- if expects_text:
- if token.token_type != TokenType.TEXT:
- raise TemplateSyntaxError(f"Template parser received TokenType '{token.token_type}' instead of 'TEXT'")
-
- expects_text = False
-
- # Once we come across a closing tag in the text, we know that's our original
- # end tag. Until then, append all the text to the block token and continue
- if "%}" not in token.contents:
- block_token.contents += token.contents
- continue
-
- # This is the ACTUAL end of the block template tag
- remaining_block_content, text_content = token.contents.split("%}", 1)
- block_token.contents += remaining_block_content
-
- # We put back into the Parser the remaining bit of the text.
- # NOTE: Looking at the implementation, `parser.prepend_token()` is the opposite
- # of `parser.next_token()`.
- parser.prepend_token(Token(TokenType.TEXT, contents=text_content))
- break
-
- # In this case we've come across a next block tag `{% %}` inside the template tag
- # This isn't the first occurence, where the `{%` was ignored. And so, the content
- # between the `{% %}` is correctly captured, e.g.
- #
- # `{% firstof False 0 is_active %}`
- # gives
- # `TokenType.BLOCK: 'firstof False 0 is_active'`
- #
- # But we don't want to evaluate this as a standalone BLOCK tag, and instead append
- # it to the block tag that this nested block is part of
- else:
- if token.token_type == TokenType.TEXT:
- raise TemplateSyntaxError(
- f"Template parser received TokenType '{token.token_type}' instead of 'BLOCK', 'VAR', 'COMMENT'"
- )
-
- if token.token_type == TokenType.BLOCK:
- block_token.contents += "{% " + token.contents + " %}"
- elif token.token_type == TokenType.VAR:
- block_token.contents += "{{ " + token.contents + " }}"
- elif token.token_type == TokenType.COMMENT:
- pass # Comments are ignored
- else:
- raise TemplateSyntaxError(f"Unknown token type '{token.token_type}'")
-
- expects_text = True
- continue
diff --git a/tests/test_template_parser.py b/tests/test_template_parser.py
new file mode 100644
index 00000000..4380c936
--- /dev/null
+++ b/tests/test_template_parser.py
@@ -0,0 +1,247 @@
+from django.template import Context
+from django.template.base import Template, Token, TokenType
+
+from django_components import Component, register, types
+from django_components.util.template_parser import parse_template
+
+from .django_test_setup import setup_test_config
+from .testutils import BaseTestCase
+
+setup_test_config({"autodiscover": False})
+
+
+def token2tuple(token: Token):
+ return (
+ token.token_type,
+ token.contents,
+ (token.position[0], token.position[1]),
+ token.lineno,
+ )
+
+
+class TemplateParserTests(BaseTestCase):
+ def test_template_text(self):
+ tokens = parse_template("Hello world")
+
+ token_tuples = [token2tuple(token) for token in tokens]
+ expected_tokens = [
+ (TokenType.TEXT, "Hello world", (0, 11), 1),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ def test_template_variable(self):
+ tokens = parse_template("Hello {{ name }}")
+
+ token_tuples = [token2tuple(token) for token in tokens]
+ expected_tokens = [
+ (TokenType.TEXT, "Hello ", (0, 6), 1),
+ (TokenType.VAR, "name", (6, 16), 1),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ # NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it
+ def test_template_variable_unterminated(self):
+ tokens = parse_template("Hello {{ name")
+
+ token_tuples = [token2tuple(token) for token in tokens]
+ expected_tokens = [
+ (TokenType.TEXT, "Hello {{ name", (0, 13), 1),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ def test_template_tag(self):
+ tokens = parse_template("{% component 'my_comp' key=val %}")
+
+ token_tuples = [token2tuple(token) for token in tokens]
+ expected_tokens = [
+ (TokenType.BLOCK, "component 'my_comp' key=val", (0, 33), 1),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ # NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it
+ def test_template_tag_unterminated(self):
+ tokens = parse_template("{% if true")
+
+ token_tuples = [token2tuple(token) for token in tokens]
+ expected_tokens = [
+ (TokenType.TEXT, "{% if true", (0, 10), 1),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ def test_template_comment(self):
+ tokens = parse_template("Hello{# this is a comment #}World")
+
+ token_tuples = [token2tuple(token) for token in tokens]
+ expected_tokens = [
+ (TokenType.TEXT, "Hello", (0, 5), 1),
+ (TokenType.COMMENT, "this is a comment", (5, 28), 1),
+ (TokenType.TEXT, "World", (28, 33), 1),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ # NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it
+ def test_template_comment_unterminated(self):
+ tokens = parse_template("{# comment")
+
+ token_tuples = [token2tuple(token) for token in tokens]
+ expected_tokens = [
+ (TokenType.TEXT, "{# comment", (0, 10), 1),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ def test_template_verbatim(self):
+ tokens = parse_template(
+ """{% verbatim %}
+ {{ this_is_not_a_var }}
+ {% this_is_not_a_tag %}
+ {% endverbatim %}"""
+ )
+
+ token_tuples = [token2tuple(token) for token in tokens]
+ expected_tokens = [
+ (TokenType.BLOCK, "verbatim", (0, 14), 1),
+ (TokenType.TEXT, "\n ", (14, 31), 1),
+ (TokenType.TEXT, "{{ this_is_not_a_var }}", (31, 54), 2),
+ (TokenType.TEXT, "\n ", (54, 71), 2),
+ (TokenType.TEXT, "{% this_is_not_a_tag %}", (71, 94), 3),
+ (TokenType.TEXT, "\n ", (94, 107), 3),
+ (TokenType.BLOCK, "endverbatim", (107, 124), 4),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ def test_template_verbatim_with_name(self):
+ tokens = parse_template(
+ """{% verbatim myblock %}
+ {{ this_is_not_a_var }}
+ {% verbatim %}
+ {% endverbatim %}
+ {% endverbatim blockname %}
+ {% endverbatim myblock %}"""
+ )
+
+ token_tuples = [token2tuple(token) for token in tokens]
+ expected_tokens = [
+ (TokenType.BLOCK, "verbatim myblock", (0, 22), 1),
+ (TokenType.TEXT, "\n ", (22, 39), 1),
+ (TokenType.TEXT, "{{ this_is_not_a_var }}", (39, 62), 2),
+ (TokenType.TEXT, "\n ", (62, 79), 2),
+ (TokenType.TEXT, "{% verbatim %}", (79, 93), 3),
+ (TokenType.TEXT, "\n ", (93, 110), 3),
+ (TokenType.TEXT, "{% endverbatim %}", (110, 127), 4),
+ (TokenType.TEXT, "\n ", (127, 144), 4),
+ (TokenType.TEXT, "{% endverbatim blockname %}", (144, 171), 5),
+ (TokenType.TEXT, "\n ", (171, 184), 5),
+ (TokenType.BLOCK, "endverbatim myblock", (184, 209), 6),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ def test_template_nested_tags(self):
+ tokens = parse_template("""{% component 'test' "{% lorem var_a w %}" %}""")
+
+ token_tuples = [token2tuple(token) for token in tokens]
+ expected_tokens = [
+ (TokenType.BLOCK, "component 'test' \"{% lorem var_a w %}\"", (0, 44), 1),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ def test_brackets_and_percent_in_text(self):
+ tokens = parse_template('{% component \'test\' \'"\' "{%}" bool_var="{% noop is_active %}" / %}')
+
+ token_tuples = [token2tuple(token) for token in tokens]
+
+ expected_tokens = [
+ (TokenType.BLOCK, 'component \'test\' \'"\' "{%}" bool_var="{% noop is_active %}" /', (0, 66), 1),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ def test_template_mixed(self):
+ tokens = parse_template(
+ """Hello {{ name }}
+ {# greeting #}
+ {% if show_greeting %}
+ Welcome!
+ {% component 'test' key="{% lorem var_a w %}" %}
+ {% verbatim %}
+ {% endcomponent %}
+ {% endverbatim %}
+ {% endcomponent %}
+ {% endif %}"""
+ )
+
+ token_tuples = [token2tuple(token) for token in tokens]
+ expected_tokens = [
+ (TokenType.TEXT, "Hello ", (0, 6), 1),
+ (TokenType.VAR, "name", (6, 16), 1),
+ (TokenType.TEXT, "\n ", (16, 29), 1),
+ (TokenType.COMMENT, "greeting", (29, 43), 2),
+ (TokenType.TEXT, "\n ", (43, 56), 2),
+ (TokenType.BLOCK, "if show_greeting", (56, 78), 3),
+ (TokenType.TEXT, "\n Welcome!
\n ", (78, 129), 3),
+ (TokenType.BLOCK, "component 'test' key=\"{% lorem var_a w %}\"", (129, 177), 5),
+ (TokenType.TEXT, "\n ", (177, 198), 5),
+ (TokenType.BLOCK, "verbatim", (198, 212), 6),
+ (TokenType.TEXT, "\n ", (212, 237), 6),
+ (TokenType.TEXT, "{% endcomponent %}", (237, 255), 7),
+ (TokenType.TEXT, "\n ", (255, 276), 7),
+ (TokenType.BLOCK, "endverbatim", (276, 293), 8),
+ (TokenType.TEXT, "\n ", (293, 310), 8),
+ (TokenType.BLOCK, "endcomponent", (310, 328), 9),
+ (TokenType.TEXT, "\n ", (328, 341), 9),
+ (TokenType.BLOCK, "endif", (341, 352), 10),
+ ]
+
+ self.assertEqual(token_tuples, expected_tokens)
+
+ # Check that a template that contains `{% %}` inside of a component tag is parsed correctly
+ def test_component_mixed(self):
+ @register("test")
+ class Test(Component):
+ template: types.django_html = """
+ {% load component_tags %}
+ Var: {{ var }}
+ Slot: {% slot "content" default / %}
+ """
+
+ def get_context_data(self, var: str) -> dict:
+ return {"var": var}
+
+ template_str: types.django_html = """
+ {% load component_tags %}
+
+ Hello {{ name }}
+ {# greeting #}
+ {% if show_greeting %}
+
Welcome!
+ {% component 'test' var="{% lorem var_a w %}" %}
+ {% verbatim %}
+ {% endcomponent %}
+ {% endverbatim %}
+ {% endcomponent %}
+ {% endif %}
+
+ """
+ template = Template(template_str)
+ rendered = template.render(Context({"name": "John", "show_greeting": True, "var_a": 2}))
+
+ self.assertHTMLEqual(
+ rendered,
+ """
+
+ Hello John
+
Welcome!
+ Var: lorem ipsum
+ Slot: {% endcomponent %}
+
+ """,
+ )