diff --git a/benchmarks/test_lexer_performance.py b/benchmarks/test_lexer_performance.py new file mode 100644 index 00000000..b95f7d63 --- /dev/null +++ b/benchmarks/test_lexer_performance.py @@ -0,0 +1,195 @@ +# NOTE: This file is more of a playground than a proper test + +import timeit +from typing import List, Tuple + +from django.template.base import DebugLexer, Lexer, Token + +from django_components.util.template_parser import parse_template + + +def django_lexer(template: str) -> List[Token]: + """Use Django's built-in lexer to tokenize a template.""" + lexer = Lexer(template) + return list(lexer.tokenize()) + + +def django_debug_lexer(template: str) -> List[Token]: + """Use Django's built-in lexer to tokenize a template.""" + lexer = DebugLexer(template) + return list(lexer.tokenize()) + + +def run_benchmark(template: str, num_iterations: int = 5000) -> Tuple[float, float]: + """Run performance comparison between Django and custom lexer.""" + # django_time = timeit.timeit(lambda: django_lexer(template), number=num_iterations) + django_debug_time = timeit.timeit(lambda: django_debug_lexer(template), number=num_iterations) + custom_time = timeit.timeit(lambda: parse_template(template), number=num_iterations) + # return django_time, django_debug_time + return django_debug_time, custom_time + + +def print_benchmark_results(template: str, django_time: float, custom_time: float, num_iterations: int) -> None: + """Print formatted benchmark results.""" + print(f"\nTemplate: {template}") + print(f"Iterations: {num_iterations}") + print(f"Django Lexer: {django_time:.6f} seconds") + print(f"Custom Lexer: {custom_time:.6f} seconds") + print(f"Difference: {abs(django_time - custom_time):.6f} seconds") + print(f"Custom lexer is {(django_time / custom_time):.2f}x {'faster' if custom_time < django_time else 'slower'}") + + +if __name__ == "__main__": + test_cases = [ + # Simple text + "Hello World", + # Simple variable + "Hello {{ name }}", + # Simple block + "{% if condition %}Hello{% endif %}", + # Complex nested template + """ + {% extends "base.html" %} + {% block content %} +

{{ title }}

+ {% for item in items %} +
+ {{ item.name }} + {% if item.description %} +

{{ item.description }}

+ {% endif %} +
+ {% endfor %} + {% endblock %} + """, + # Component with nested tags + """ + {% component 'table' + headers=headers + rows=rows + footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}" + title="{% trans 'Data Table' %}" + %} + """, + # Real world example + """ +
+ {# Info section #} +
+
+

Project Info

+ + {% if editable %} + {% component "Button" + href=project_edit_url + attrs:class="not-prose" + footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}" + title="{% trans 'Data Table' %}" + %} + Edit Project + {% endcomponent %} + {% endif %} +
+ + + {% for key, value in project_info %} + + + + + {% endfor %} +
+ {{ key }}: + + {{ value }} +
+
+ + {# Status Updates section #} + {% component "ProjectStatusUpdates" + project_id=project.pk + status_updates=status_updates + editable=editable + footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}" + title="{% trans 'Data Table' %}" + / %} +
+ {# Team section #} +
+
+

Dcode Team

+ + {% if editable %} + {% component "Button" + href=edit_project_roles_url + attrs:class="not-prose" + footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}" + title="{% trans 'Data Table' %}" + %} + Edit Team + {% endcomponent %} + {% endif %} +
+ + {% component "ProjectUsers" + project_id=project.pk + roles_with_users=roles_with_users + editable=False + footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}" + title="{% trans 'Data Table' %}" + / %} +
+ + {# POCs section #} +
+
+

Client POCs

+ + {% if editable %} + {% component "Button" + href=edit_pocs_url + attrs:class="not-prose" + footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}" + title="{% trans 'Data Table' %}" + %} + Edit POCs + {% endcomponent %} + {% endif %} +
+ + {% if poc_data %} + + + + + + + {% for data in poc_data %} + + + + + + {% endfor %} +
NameJob TitleHubspot Profile
{{ data.poc.contact.first_name }} {{ data.poc.contact.last_name }}{{ data.poc.contact.job_title }} + {% component "Icon" + href=data.hubspot_url + name="arrow-top-right-on-square" + variant="outline" + color="text-gray-400 hover:text-gray-500" + footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}" + title="{% trans 'Data Table' %}" + / %} +
+ {% else %} +

No entries

+ {% endif %} +
+
+
+ """, + ] + + for template in test_cases: + django_time, custom_time = run_benchmark(template) + print_benchmark_results(template, django_time, custom_time, 200) diff --git a/src/django_components/apps.py b/src/django_components/apps.py index ba32beba..e15b7fb5 100644 --- a/src/django_components/apps.py +++ b/src/django_components/apps.py @@ -15,14 +15,14 @@ class ComponentsConfig(AppConfig): def ready(self) -> None: from django_components.app_settings import app_settings from django_components.autodiscovery import autodiscover, import_libraries - from django_components.component import monkeypatch_template from django_components.component_registry import registry from django_components.components.dynamic import DynamicComponent + from django_components.util.django_monkeypatch import monkeypatch_template_cls # NOTE: This monkeypatch is applied here, before Django processes any requests. # To make django-components work with django-debug-toolbar-template-profiler # See https://github.com/EmilStenstrom/django-components/discussions/819 - monkeypatch_template(Template) + monkeypatch_template_cls(Template) # Import modules set in `COMPONENTS.libraries` setting import_libraries() diff --git a/src/django_components/component.py b/src/django_components/component.py index 01188fae..4fdb2033 100644 --- a/src/django_components/component.py +++ b/src/django_components/component.py @@ -68,6 +68,7 @@ from django_components.slots import ( resolve_fills, ) from django_components.template import cached_template +from django_components.util.django_monkeypatch import is_template_cls_patched from django_components.util.logger import trace_msg from django_components.util.misc import gen_id from django_components.util.template_tag import TagParams @@ -1272,54 +1273,6 @@ class ComponentNode(BaseNode): return output -def monkeypatch_template(template_cls: Type[Template]) -> None: - # Modify `Template.render` to set `isolated_context` kwarg of `push_state` - # based on our custom `Template._dc_is_component_nested`. - # - # Part of fix for https://github.com/EmilStenstrom/django-components/issues/508 - # - # NOTE 1: While we could've subclassed Template, then we would need to either - # 1) ask the user to change the backend, so all templates are of our subclass, or - # 2) copy the data from user's Template class instance to our subclass instance, - # which could lead to doubly parsing the source, and could be problematic if users - # used more exotic subclasses of Template. - # - # Instead, modifying only the `render` method of an already-existing instance - # should work well with any user-provided custom subclasses of Template, and it - # doesn't require the source to be parsed multiple times. User can pass extra args/kwargs, - # and can modify the rendering behavior by overriding the `_render` method. - # - # NOTE 2: Instead of setting `Template._dc_is_component_nested`, alternatively we could - # have passed the value to `monkeypatch_template` directly. However, we intentionally - # did NOT do that, so the monkey-patched method is more robust, and can be e.g. copied - # to other. - if hasattr(template_cls, "_dc_patched"): - # Do not patch if done so already. This helps us avoid RecursionError - return - - def _template_render(self: Template, context: Context, *args: Any, **kwargs: Any) -> str: - # ---------------- OUR CHANGES START ---------------- - # We parametrized `isolated_context`, which was `True` in the original method. - if not hasattr(self, "_dc_is_component_nested"): - isolated_context = True - else: - # MUST be `True` for templates that are NOT import with `{% extends %}` tag, - # and `False` otherwise. - isolated_context = not self._dc_is_component_nested - # ---------------- OUR CHANGES END ---------------- - - with context.render_context.push_state(self, isolated_context=isolated_context): - if context.template is None: - with context.bind_template(self): - context.template_name = self.name - return self._render(context, *args, **kwargs) - else: - return self._render(context, *args, **kwargs) - - template_cls.render = _template_render - template_cls._dc_patched = True - - @contextmanager def _maybe_bind_template(context: Context, template: Template) -> Generator[None, Any, None]: if context.template is None: @@ -1342,7 +1295,7 @@ def _prepare_template( # And https://github.com/EmilStenstrom/django-components/issues/634 template = component._get_template(context) - if not getattr(template, "_dc_patched"): + if not is_template_cls_patched(template): raise RuntimeError( "Django-components received a Template instance which was not patched." "If you are using Django's Template class, check if you added django-components" @@ -1350,10 +1303,11 @@ def _prepare_template( "manually patch the class." ) - # Set `Template._dc_is_component_nested` based on whether we're currently INSIDE + # Set `Template._djc_is_component_nested` based on whether we're currently INSIDE # the `{% extends %}` tag. # Part of fix for https://github.com/EmilStenstrom/django-components/issues/508 - template._dc_is_component_nested = bool(context.render_context.get(BLOCK_CONTEXT_KEY)) + # See django_monkeypatch.py + template._djc_is_component_nested = bool(context.render_context.get(BLOCK_CONTEXT_KEY)) with _maybe_bind_template(context, template): yield template diff --git a/src/django_components/expression.py b/src/django_components/expression.py index 81b819ea..f2e28606 100644 --- a/src/django_components/expression.py +++ b/src/django_components/expression.py @@ -2,7 +2,9 @@ import re from typing import TYPE_CHECKING, Any, Dict, List from django.template import Context, Node, NodeList, TemplateSyntaxError -from django.template.base import Lexer, Parser, VariableNode +from django.template.base import Parser, VariableNode + +from django_components.util.template_parser import parse_template if TYPE_CHECKING: from django_components.util.template_tag import TagParam @@ -48,8 +50,7 @@ class DynamicFilterExpression: # Copy the Parser, and pass through the tags and filters available # in the current context. Thus, if user calls `{% load %}` inside # the expression, it won't spill outside. - lexer = Lexer(self.expr) - tokens = lexer.tokenize() + tokens = parse_template(self.expr) expr_parser = Parser(tokens=tokens) expr_parser.tags = {**parser.tags} expr_parser.filters = {**parser.filters} diff --git a/src/django_components/templatetags/component_tags.py b/src/django_components/templatetags/component_tags.py index b14000c8..78938630 100644 --- a/src/django_components/templatetags/component_tags.py +++ b/src/django_components/templatetags/component_tags.py @@ -28,7 +28,7 @@ from django_components.slots import SLOT_DEFAULT_KEYWORD, SLOT_REQUIRED_KEYWORD, from django_components.tag_formatter import get_tag_formatter from django_components.util.logger import trace_msg from django_components.util.misc import gen_id -from django_components.util.template_tag import TagSpec, fix_nested_tags, parse_template_tag, with_tag_spec +from django_components.util.template_tag import TagSpec, parse_template_tag, with_tag_spec # NOTE: Variable name `register` is required by Django to recognize this as a template tag library # See https://docs.djangoproject.com/en/dev/howto/custom-template-tags @@ -492,7 +492,6 @@ def component( """ tag_id = gen_id() - fix_nested_tags(parser, token) bits = token.split_contents() # Let the TagFormatter pre-process the tokens diff --git a/src/django_components/util/django_monkeypatch.py b/src/django_components/util/django_monkeypatch.py new file mode 100644 index 00000000..e81b8a4c --- /dev/null +++ b/src/django_components/util/django_monkeypatch.py @@ -0,0 +1,110 @@ +from typing import Any, Type + +from django.template import Context, NodeList, Template +from django.template.base import Parser + +from django_components.util.template_parser import parse_template + + +# In some cases we can't work around Django's design, and need to patch the template class. +def monkeypatch_template_cls(template_cls: Type[Template]) -> None: + monkeypatch_template_compile_nodelist(template_cls) + monkeypatch_template_render(template_cls) + template_cls._djc_patched = True + + +# Patch `Template.compile_nodelist` to use our custom parser. Our parser makes it possible +# to use template tags as inputs to the component tag: +# +# {% component "my-component" description="{% lorem 3 w %}" / %} +def monkeypatch_template_compile_nodelist(template_cls: Type[Template]) -> None: + def _compile_nodelist(self: Template) -> NodeList: + """ + Parse and compile the template source into a nodelist. If debug + is True and an exception occurs during parsing, the exception is + annotated with contextual line information where it occurred in the + template source. + """ + # ---------------- ORIGINAL (Django v5.1.3) ---------------- + # if self.engine.debug: + # lexer = DebugLexer(self.source) + # else: + # lexer = Lexer(self.source) + + # tokens = lexer.tokenize() + # ---------------- OUR CHANGES START ---------------- + tokens = parse_template(self.source) + # ---------------- OUR CHANGES END ---------------- + parser = Parser( + tokens, + self.engine.template_libraries, + self.engine.template_builtins, + self.origin, + ) + + try: + # ---------------- ADDED IN Django v5.1 - See https://github.com/django/django/commit/35bbb2c9c01882b1d77b0b8c737ac646144833d4 # noqa: E501 + nodelist = parser.parse() + self.extra_data = getattr(parser, "extra_data", {}) + # ---------------- END OF ADDED IN Django v5.1 ---------------- + return nodelist + except Exception as e: + if self.engine.debug: + e.template_debug = self.get_exception_info(e, e.token) # type: ignore + raise + + template_cls.compile_nodelist = _compile_nodelist + + +def monkeypatch_template_render(template_cls: Type[Template]) -> None: + # Modify `Template.render` to set `isolated_context` kwarg of `push_state` + # based on our custom `Template._djc_is_component_nested`. + # + # Part of fix for https://github.com/EmilStenstrom/django-components/issues/508 + # + # NOTE 1: While we could've subclassed Template, then we would need to either + # 1) ask the user to change the backend, so all templates are of our subclass, or + # 2) copy the data from user's Template class instance to our subclass instance, + # which could lead to doubly parsing the source, and could be problematic if users + # used more exotic subclasses of Template. + # + # Instead, modifying only the `render` method of an already-existing instance + # should work well with any user-provided custom subclasses of Template, and it + # doesn't require the source to be parsed multiple times. User can pass extra args/kwargs, + # and can modify the rendering behavior by overriding the `_render` method. + # + # NOTE 2: Instead of setting `Template._djc_is_component_nested`, alternatively we could + # have passed the value to `monkeypatch_template_render` directly. However, we intentionally + # did NOT do that, so the monkey-patched method is more robust, and can be e.g. copied + # to other. + if is_template_cls_patched(template_cls): + # Do not patch if done so already. This helps us avoid RecursionError + return + + def _template_render(self: Template, context: Context, *args: Any, **kwargs: Any) -> str: + "Display stage -- can be called many times" + # ---------------- ORIGINAL (Django v5.1.3) ---------------- + # with context.render_context.push_state(self): + # ---------------- OUR CHANGES START ---------------- + # We parametrized `isolated_context`, which was `True` in the original method. + if not hasattr(self, "_djc_is_component_nested"): + isolated_context = True + else: + # MUST be `True` for templates that are NOT import with `{% extends %}` tag, + # and `False` otherwise. + isolated_context = not self._djc_is_component_nested + + with context.render_context.push_state(self, isolated_context=isolated_context): + # ---------------- OUR CHANGES END ---------------- + if context.template is None: + with context.bind_template(self): + context.template_name = self.name + return self._render(context, *args, **kwargs) + else: + return self._render(context, *args, **kwargs) + + template_cls.render = _template_render + + +def is_template_cls_patched(template_cls: Type[Template]) -> bool: + return getattr(template_cls, "_djc_patched", False) diff --git a/src/django_components/util/template_parser.py b/src/django_components/util/template_parser.py new file mode 100644 index 00000000..c66cbcd9 --- /dev/null +++ b/src/django_components/util/template_parser.py @@ -0,0 +1,227 @@ +""" +Parser for Django template. + +The parser reads a template file (usually HTML, but not necessarily), which may contain +"template tags" like this: + +```django +{% component 'my_comp' key=val key2='val2 two' %} +{% endcomponent %} + +{{ my_var }} + +{# I am comment #} +``` + +and returns a list of Tokens: + +```py +[ + (TokenType.TEXT, '\n', (0, 1), 1), + (TokenType.BLOCK, "component 'my_comp' key=val key2='val2 two'", (1, 50), 2), + (TokenType.TEXT, '\n', (50, 51), 2), + (TokenType.BLOCK, 'endcomponent', (51, 69), 3), + (TokenType.TEXT, '\n\n', (69, 71), 3), + (TokenType.VAR, 'my_var', (71, 83), 5), + (TokenType.TEXT, '\n\n', (83, 85), 5), + (TokenType.COMMENT, 'I am comment', (85, 103), 7), + (TokenType.TEXT, '\n', (103, 104), 7), +] +``` + +See `parse_template()` for details. +""" + +import re +from functools import lru_cache +from typing import List, Optional, Tuple + +from django.template.base import DebugLexer, Token, TokenType +from django.template.exceptions import TemplateSyntaxError + + +# NOTE: As of 0.125, the strategy is to use Django's lexer, and use our own parser +# only when necessary, for the shortest time possible. +# +# Before I switched to this strategy, my initial parser was about 50x slower than Django's lexer. +# I (Juro) assume it was because I was walking character by character, instead of using a regex. +# +# The overall speed should then depend on the number of broken tokens in the template. +# +# Performance of the new strategy on a real-world example: +# - A template with about 110 lines and 6 components +# - Components spanning ~35 lines in total, so roughly 1/3 of the template +# - The custom parser is about 8x slower than Django's Debug lexer. +# - For a mid-sized project of 200 templates, it would take 7-8 seconds to load all the templates +# (from 1 second with Django's lexer). +# - However, thanks to django-component's lazy-loading, this should not be a problem. +# +# How it works is that: +# 1. We use Django's lexer to get the tokens. +# 2. We check them one-by-one, and if we find a broken token, we switch to our parser to fix it. +# 3. Once the broken token is fixed, we find it's end position, and switch back to the Django lexer +# for the remaining text (step 1). +def parse_template(text: str) -> List[Token]: + resolved_tokens: List[Token] = [] + + index_start = 0 + index_end = len(text) + lineno_offset = 0 + + while index_start < index_end: + broken_token: Optional[Token] = None + # Do fast tokenization with regex - This is about 50x faster than our custom tokenizer. + # We use DebugLexer because we need to get the position of the tokens. + # DebugLexer and Lexer have very similar speeds, Debug is about 33% slower. + lexer = DebugLexer(text[index_start:index_end]) + tokens: List[Token] = lexer.tokenize() + + for token in tokens: + token.lineno += lineno_offset + token.position = (token.position[0] + index_start, token.position[1] + index_start) + + if token.token_type == TokenType.BLOCK and ("'" in token.contents or '"' in token.contents): + broken_token = token + break + else: + resolved_tokens.append(token) + + # If we found a broken token, we switch to our slow parser + if broken_token is not None: + broken_token_start = broken_token.position[0] + fixed_token = _detailed_tag_parser(text[broken_token_start:], broken_token.lineno, broken_token_start) + + resolved_tokens.append(fixed_token) + index_start = fixed_token.position[1] + lineno_offset += ( + fixed_token.lineno - 1 # -1 because lines are 1-indexed + + fixed_token.contents.count("\n") + ) # fmt: skip + else: + break + + return resolved_tokens + + +# Handle parsing of `{% %}` tags, while allowing `%}` inside of strings +def _detailed_tag_parser(text: str, lineno: int, start_index: int) -> Token: + index = 0 + length = len(text) + result_content: List[str] = [] + + # Pre-compute common substrings + QUOTE_CHARS = ("'", '"') + QUOTE_OR_PERCENT = (*QUOTE_CHARS, "%") + + def take_char() -> str: + nonlocal index + if index >= length: + return "" + char = text[index] + index += 1 + return char + + def peek_char(offset: int = 0) -> str: + peek_index = index + offset + if peek_index >= length: + return "" + return text[peek_index] + + # This is an optimized version that uses regex to find the next stop character + # and ignores the stop characters if they are prefixed by a backslash, if allow_escapes is True. + # + # For the intuition, the original version is: + # + # ```py + # def take_until_any(stop_chars: Tuple[str, ...], allow_escapes: bool = False) -> str: + # nonlocal index + # start = index + # while index < length: + # char = text[index] + # if allow_escapes and char == BACKSLASH and index + 1 < length: + # index += 2 + # continue + # if char in stop_chars: + # break + # index += 1 + # return text[start:index] + # ``` + def take_until_any(stop_chars: Tuple[str, ...], allow_escapes: bool = False) -> str: + nonlocal index + + stop_chars_str = "".join(stop_chars) + pattern = _compile_take_until_pattern(stop_chars_str, allow_escapes) + + # Find match at current position + match = pattern.match(text, index) + if match: + matched_text = match.group(0) + index += len(matched_text) + return matched_text + + return "" + + # Given that this function is called only when there's a broken token, + # we know that the first two characters are always "{%" + take_char() # { + take_char() # % + + # Main parsing loop + while index < length: + char = peek_char() + + # Handle strings within `{% %}` + if char in QUOTE_CHARS: + quote_char = take_char() + result_content.append(quote_char) + + # Take content until matching quote, allowing escaped quotes + content = take_until_any((quote_char,), allow_escapes=True) + result_content.append(content) + + # Handle the closing quote + if peek_char() == quote_char: + result_content.append(take_char()) + else: + raise TemplateSyntaxError(f"Unexpected end of text - unterminated {quote_char} string") + continue + + # Check for closing tag + if char == "%": + if peek_char(1) == "}": + take_char() # % + take_char() # } + break + else: + # False alarm, just a string + content = take_until_any(QUOTE_CHARS) + result_content.append(content) + continue + + # Take regular content until we hit a quote or potential closing tag + content = take_until_any(QUOTE_OR_PERCENT) + result_content.append(content) + + else: + raise TemplateSyntaxError("Unexpected end of text - unterminated {% tag") + + result_str = "".join(result_content).strip() # Django's Lexer.tokenize() strips the whitespace + return Token(TokenType.BLOCK, result_str, (start_index, index + start_index), lineno) + + +# Create a regex pattern that takes anything until any of the stop characters are found. +# +# If allow_escapes is True, also the stop characters are allowed, given that they are +# prefixed by a backslash. +@lru_cache(maxsize=128) +def _compile_take_until_pattern(stop_chars: str, allow_escapes: bool) -> re.Pattern: + escaped_stops = "".join(re.escape(c) for c in stop_chars) + + if allow_escapes: + # Match either escaped characters or anything until stop chars + pattern = f"(?:\\\\.|[^{escaped_stops}])*" + else: + # Match anything until stop chars + pattern = f"[^{escaped_stops}]*" + + return re.compile(pattern) diff --git a/src/django_components/util/template_tag.py b/src/django_components/util/template_tag.py index 873c2da8..520e5e23 100644 --- a/src/django_components/util/template_tag.py +++ b/src/django_components/util/template_tag.py @@ -1,14 +1,14 @@ import functools import inspect from dataclasses import dataclass -from typing import Any, Callable, Dict, Iterable, List, Mapping, NamedTuple, Optional, Set, Tuple, cast +from typing import Any, Callable, Dict, Iterable, List, Mapping, NamedTuple, Optional, Set, Tuple from django.template import Context, NodeList -from django.template.base import Parser, Token, TokenType +from django.template.base import Parser, Token from django.template.exceptions import TemplateSyntaxError from django_components.expression import process_aggregate_kwargs -from django_components.util.tag_parser import TagAttr, TagValue, parse_tag +from django_components.util.tag_parser import TagAttr, parse_tag @dataclass @@ -97,6 +97,17 @@ class TagSpec: # Set the signature on the function validator.__signature__ = self.signature # type: ignore[attr-defined] + # Call the validator with our args and kwargs, in such a way to + # let the Python interpreter validate on repeated kwargs. E.g. + # + # ``` + # args, kwargs = validator( + # *call_args, + # **call_kwargs[0], + # **call_kwargs[1], + # ... + # ) + # ``` call_args = [] call_kwargs = [] for param in params: @@ -105,13 +116,12 @@ class TagSpec: else: call_kwargs.append({param.key: param.value}) - # Call the validator with our args and kwargs, in such a way to - # let the Python interpreter validate on repeated kwargs. - # - # E.g. `args, kwargs = validator(*call_args, **call_kwargs[0], **call_kwargs[1])` - # # NOTE: Although we use `exec()` here, it's safe, because we control the input - - # we pass in only the list index. + # we make dynamic only the list index. + # + # We MUST use the indices, because we can't trust neither the param keys nor values, + # so we MUST NOT reference them directly in the exec script, otherwise we'd be at risk + # of injection attack. validator_call_script = "args, kwargs = validator(*call_args, " for kw_index, _ in enumerate(call_kwargs): validator_call_script += f"**call_kwargs[{kw_index}], " @@ -229,8 +239,6 @@ def parse_template_tag( token: Token, tag_spec: TagSpec, ) -> ParsedTag: - fix_nested_tags(parser, token) - _, attrs = parse_tag(token.contents, parser) # First token is tag name, e.g. `slot` in `{% slot ... %}` @@ -340,138 +348,3 @@ def merge_repeated_kwargs(params: List[TagParam]) -> List[TagParam]: params_by_key[param.key].value += " " + str(param.value) return resolved_params - - -def fix_nested_tags(parser: Parser, block_token: Token) -> None: - # Since the nested tags MUST be wrapped in quotes, e.g. - # `{% component 'test' "{% lorem var_a w %}" %}` - # `{% component 'test' key="{% lorem var_a w %}" %}` - # - # We can parse the tag's tokens so we can find the last one, and so we consider - # the unclosed `{%` only for the last bit. - _, attrs = parse_tag(block_token.contents, parser) - - # If there are no attributes, then there are no nested tags - if not attrs: - return - - last_attr = attrs[-1] - - # TODO: Currently, using a nested template inside a list or dict - # e.g. `{% component ... key=["{% nested %}"] %}` is NOT supported. - # Hence why we leave if value is not "simple" (which means the value is list or dict). - if last_attr.value.type != "simple": - return - - last_attr_value = cast(TagValue, last_attr.value.entries[0]) - last_token = last_attr_value.parts[-1] - - # User probably forgot to wrap the nested tag in quotes, or this is the end of the input. - # `{% component ... key={% nested %} %}` - # `{% component ... key= %}` - if not last_token.value: - return - - # When our template tag contains a nested tag, e.g.: - # `{% component 'test' "{% lorem var_a w %}" %}` - # - # Django parses this into: - # `TokenType.BLOCK: 'component 'test' "{% lorem var_a w'` - # - # Above you can see that the token ends at the end of the NESTED tag, - # and includes `{%`. So that's what we use to identify if we need to fix - # nested tags or not. - has_unclosed_tag = ( - (last_token.value.count("{%") > last_token.value.count("%}")) - # Moreover we need to also check for unclosed quotes for this edge case: - # `{% component 'test' "{%}" %}` - # - # Which Django parses this into: - # `TokenType.BLOCK: 'component 'test' "{'` - # - # Here we cannot see any unclosed tags, but there is an unclosed double quote at the end. - # - # But we cannot naively search the full contents for unclosed quotes, but - # only within the last 'bit'. Consider this: - # `{% component 'test' '"' "{%}" %}` - # - or (last_token.value in ("'{", '"{')) - ) - - # There is 3 double quotes, but if the contents get split at the first `%}` - # then there will be a single unclosed double quote in the last bit. - first_char_index = len(last_token.spread or "") - has_unclosed_quote = ( - not last_token.quoted - and last_token.value - and last_token.value[first_char_index] in ('"', "'") - ) # fmt: skip - - needs_fixing = has_unclosed_tag and has_unclosed_quote - - if not needs_fixing: - return - - block_token.contents += "%}" if has_unclosed_quote else " %}" - expects_text = True - while True: - # This is where we need to take parsing in our own hands, because Django parser parsed - # only up to the first closing tag `%}`, but that closing tag corresponds to a nested tag, - # and not to the end of the outer template tag. - # - # NOTE: If we run out of tokens, this will raise, and break out of the loop - token = parser.next_token() - - # If there is a nested BLOCK `{% %}`, VAR `{{ }}`, or COMMENT `{# #}` tag inside the template tag, - # then the way Django parses it results in alternating Tokens of TEXT and non-TEXT types. - # - # We use `expects_text` to know which type to handle. - if expects_text: - if token.token_type != TokenType.TEXT: - raise TemplateSyntaxError(f"Template parser received TokenType '{token.token_type}' instead of 'TEXT'") - - expects_text = False - - # Once we come across a closing tag in the text, we know that's our original - # end tag. Until then, append all the text to the block token and continue - if "%}" not in token.contents: - block_token.contents += token.contents - continue - - # This is the ACTUAL end of the block template tag - remaining_block_content, text_content = token.contents.split("%}", 1) - block_token.contents += remaining_block_content - - # We put back into the Parser the remaining bit of the text. - # NOTE: Looking at the implementation, `parser.prepend_token()` is the opposite - # of `parser.next_token()`. - parser.prepend_token(Token(TokenType.TEXT, contents=text_content)) - break - - # In this case we've come across a next block tag `{% %}` inside the template tag - # This isn't the first occurence, where the `{%` was ignored. And so, the content - # between the `{% %}` is correctly captured, e.g. - # - # `{% firstof False 0 is_active %}` - # gives - # `TokenType.BLOCK: 'firstof False 0 is_active'` - # - # But we don't want to evaluate this as a standalone BLOCK tag, and instead append - # it to the block tag that this nested block is part of - else: - if token.token_type == TokenType.TEXT: - raise TemplateSyntaxError( - f"Template parser received TokenType '{token.token_type}' instead of 'BLOCK', 'VAR', 'COMMENT'" - ) - - if token.token_type == TokenType.BLOCK: - block_token.contents += "{% " + token.contents + " %}" - elif token.token_type == TokenType.VAR: - block_token.contents += "{{ " + token.contents + " }}" - elif token.token_type == TokenType.COMMENT: - pass # Comments are ignored - else: - raise TemplateSyntaxError(f"Unknown token type '{token.token_type}'") - - expects_text = True - continue diff --git a/tests/test_template_parser.py b/tests/test_template_parser.py new file mode 100644 index 00000000..4380c936 --- /dev/null +++ b/tests/test_template_parser.py @@ -0,0 +1,247 @@ +from django.template import Context +from django.template.base import Template, Token, TokenType + +from django_components import Component, register, types +from django_components.util.template_parser import parse_template + +from .django_test_setup import setup_test_config +from .testutils import BaseTestCase + +setup_test_config({"autodiscover": False}) + + +def token2tuple(token: Token): + return ( + token.token_type, + token.contents, + (token.position[0], token.position[1]), + token.lineno, + ) + + +class TemplateParserTests(BaseTestCase): + def test_template_text(self): + tokens = parse_template("Hello world") + + token_tuples = [token2tuple(token) for token in tokens] + expected_tokens = [ + (TokenType.TEXT, "Hello world", (0, 11), 1), + ] + + self.assertEqual(token_tuples, expected_tokens) + + def test_template_variable(self): + tokens = parse_template("Hello {{ name }}") + + token_tuples = [token2tuple(token) for token in tokens] + expected_tokens = [ + (TokenType.TEXT, "Hello ", (0, 6), 1), + (TokenType.VAR, "name", (6, 16), 1), + ] + + self.assertEqual(token_tuples, expected_tokens) + + # NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it + def test_template_variable_unterminated(self): + tokens = parse_template("Hello {{ name") + + token_tuples = [token2tuple(token) for token in tokens] + expected_tokens = [ + (TokenType.TEXT, "Hello {{ name", (0, 13), 1), + ] + + self.assertEqual(token_tuples, expected_tokens) + + def test_template_tag(self): + tokens = parse_template("{% component 'my_comp' key=val %}") + + token_tuples = [token2tuple(token) for token in tokens] + expected_tokens = [ + (TokenType.BLOCK, "component 'my_comp' key=val", (0, 33), 1), + ] + + self.assertEqual(token_tuples, expected_tokens) + + # NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it + def test_template_tag_unterminated(self): + tokens = parse_template("{% if true") + + token_tuples = [token2tuple(token) for token in tokens] + expected_tokens = [ + (TokenType.TEXT, "{% if true", (0, 10), 1), + ] + + self.assertEqual(token_tuples, expected_tokens) + + def test_template_comment(self): + tokens = parse_template("Hello{# this is a comment #}World") + + token_tuples = [token2tuple(token) for token in tokens] + expected_tokens = [ + (TokenType.TEXT, "Hello", (0, 5), 1), + (TokenType.COMMENT, "this is a comment", (5, 28), 1), + (TokenType.TEXT, "World", (28, 33), 1), + ] + + self.assertEqual(token_tuples, expected_tokens) + + # NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it + def test_template_comment_unterminated(self): + tokens = parse_template("{# comment") + + token_tuples = [token2tuple(token) for token in tokens] + expected_tokens = [ + (TokenType.TEXT, "{# comment", (0, 10), 1), + ] + + self.assertEqual(token_tuples, expected_tokens) + + def test_template_verbatim(self): + tokens = parse_template( + """{% verbatim %} + {{ this_is_not_a_var }} + {% this_is_not_a_tag %} + {% endverbatim %}""" + ) + + token_tuples = [token2tuple(token) for token in tokens] + expected_tokens = [ + (TokenType.BLOCK, "verbatim", (0, 14), 1), + (TokenType.TEXT, "\n ", (14, 31), 1), + (TokenType.TEXT, "{{ this_is_not_a_var }}", (31, 54), 2), + (TokenType.TEXT, "\n ", (54, 71), 2), + (TokenType.TEXT, "{% this_is_not_a_tag %}", (71, 94), 3), + (TokenType.TEXT, "\n ", (94, 107), 3), + (TokenType.BLOCK, "endverbatim", (107, 124), 4), + ] + + self.assertEqual(token_tuples, expected_tokens) + + def test_template_verbatim_with_name(self): + tokens = parse_template( + """{% verbatim myblock %} + {{ this_is_not_a_var }} + {% verbatim %} + {% endverbatim %} + {% endverbatim blockname %} + {% endverbatim myblock %}""" + ) + + token_tuples = [token2tuple(token) for token in tokens] + expected_tokens = [ + (TokenType.BLOCK, "verbatim myblock", (0, 22), 1), + (TokenType.TEXT, "\n ", (22, 39), 1), + (TokenType.TEXT, "{{ this_is_not_a_var }}", (39, 62), 2), + (TokenType.TEXT, "\n ", (62, 79), 2), + (TokenType.TEXT, "{% verbatim %}", (79, 93), 3), + (TokenType.TEXT, "\n ", (93, 110), 3), + (TokenType.TEXT, "{% endverbatim %}", (110, 127), 4), + (TokenType.TEXT, "\n ", (127, 144), 4), + (TokenType.TEXT, "{% endverbatim blockname %}", (144, 171), 5), + (TokenType.TEXT, "\n ", (171, 184), 5), + (TokenType.BLOCK, "endverbatim myblock", (184, 209), 6), + ] + + self.assertEqual(token_tuples, expected_tokens) + + def test_template_nested_tags(self): + tokens = parse_template("""{% component 'test' "{% lorem var_a w %}" %}""") + + token_tuples = [token2tuple(token) for token in tokens] + expected_tokens = [ + (TokenType.BLOCK, "component 'test' \"{% lorem var_a w %}\"", (0, 44), 1), + ] + + self.assertEqual(token_tuples, expected_tokens) + + def test_brackets_and_percent_in_text(self): + tokens = parse_template('{% component \'test\' \'"\' "{%}" bool_var="{% noop is_active %}" / %}') + + token_tuples = [token2tuple(token) for token in tokens] + + expected_tokens = [ + (TokenType.BLOCK, 'component \'test\' \'"\' "{%}" bool_var="{% noop is_active %}" /', (0, 66), 1), + ] + + self.assertEqual(token_tuples, expected_tokens) + + def test_template_mixed(self): + tokens = parse_template( + """Hello {{ name }} + {# greeting #} + {% if show_greeting %} +

Welcome!

+ {% component 'test' key="{% lorem var_a w %}" %} + {% verbatim %} + {% endcomponent %} + {% endverbatim %} + {% endcomponent %} + {% endif %}""" + ) + + token_tuples = [token2tuple(token) for token in tokens] + expected_tokens = [ + (TokenType.TEXT, "Hello ", (0, 6), 1), + (TokenType.VAR, "name", (6, 16), 1), + (TokenType.TEXT, "\n ", (16, 29), 1), + (TokenType.COMMENT, "greeting", (29, 43), 2), + (TokenType.TEXT, "\n ", (43, 56), 2), + (TokenType.BLOCK, "if show_greeting", (56, 78), 3), + (TokenType.TEXT, "\n

Welcome!

\n ", (78, 129), 3), + (TokenType.BLOCK, "component 'test' key=\"{% lorem var_a w %}\"", (129, 177), 5), + (TokenType.TEXT, "\n ", (177, 198), 5), + (TokenType.BLOCK, "verbatim", (198, 212), 6), + (TokenType.TEXT, "\n ", (212, 237), 6), + (TokenType.TEXT, "{% endcomponent %}", (237, 255), 7), + (TokenType.TEXT, "\n ", (255, 276), 7), + (TokenType.BLOCK, "endverbatim", (276, 293), 8), + (TokenType.TEXT, "\n ", (293, 310), 8), + (TokenType.BLOCK, "endcomponent", (310, 328), 9), + (TokenType.TEXT, "\n ", (328, 341), 9), + (TokenType.BLOCK, "endif", (341, 352), 10), + ] + + self.assertEqual(token_tuples, expected_tokens) + + # Check that a template that contains `{% %}` inside of a component tag is parsed correctly + def test_component_mixed(self): + @register("test") + class Test(Component): + template: types.django_html = """ + {% load component_tags %} + Var: {{ var }} + Slot: {% slot "content" default / %} + """ + + def get_context_data(self, var: str) -> dict: + return {"var": var} + + template_str: types.django_html = """ + {% load component_tags %} +
+ Hello {{ name }} + {# greeting #} + {% if show_greeting %} +

Welcome!

+ {% component 'test' var="{% lorem var_a w %}" %} + {% verbatim %} + {% endcomponent %} + {% endverbatim %} + {% endcomponent %} + {% endif %} +
+ """ + template = Template(template_str) + rendered = template.render(Context({"name": "John", "show_greeting": True, "var_a": 2})) + + self.assertHTMLEqual( + rendered, + """ +
+ Hello John +

Welcome!

+ Var: lorem ipsum + Slot: {% endcomponent %} +
+ """, + )