refactor: Patch Template.compile_nodelist with custom template parser (#908)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Juro Oravec 2025-01-15 22:34:32 +01:00 committed by GitHub
parent 8cd4b03286
commit 7ed4fd88f9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 810 additions and 204 deletions

View file

@ -0,0 +1,195 @@
# NOTE: This file is more of a playground than a proper test
import timeit
from typing import List, Tuple
from django.template.base import DebugLexer, Lexer, Token
from django_components.util.template_parser import parse_template
def django_lexer(template: str) -> List[Token]:
"""Use Django's built-in lexer to tokenize a template."""
lexer = Lexer(template)
return list(lexer.tokenize())
def django_debug_lexer(template: str) -> List[Token]:
"""Use Django's built-in lexer to tokenize a template."""
lexer = DebugLexer(template)
return list(lexer.tokenize())
def run_benchmark(template: str, num_iterations: int = 5000) -> Tuple[float, float]:
"""Run performance comparison between Django and custom lexer."""
# django_time = timeit.timeit(lambda: django_lexer(template), number=num_iterations)
django_debug_time = timeit.timeit(lambda: django_debug_lexer(template), number=num_iterations)
custom_time = timeit.timeit(lambda: parse_template(template), number=num_iterations)
# return django_time, django_debug_time
return django_debug_time, custom_time
def print_benchmark_results(template: str, django_time: float, custom_time: float, num_iterations: int) -> None:
"""Print formatted benchmark results."""
print(f"\nTemplate: {template}")
print(f"Iterations: {num_iterations}")
print(f"Django Lexer: {django_time:.6f} seconds")
print(f"Custom Lexer: {custom_time:.6f} seconds")
print(f"Difference: {abs(django_time - custom_time):.6f} seconds")
print(f"Custom lexer is {(django_time / custom_time):.2f}x {'faster' if custom_time < django_time else 'slower'}")
if __name__ == "__main__":
test_cases = [
# Simple text
"Hello World",
# Simple variable
"Hello {{ name }}",
# Simple block
"{% if condition %}Hello{% endif %}",
# Complex nested template
"""
{% extends "base.html" %}
{% block content %}
<h1>{{ title }}</h1>
{% for item in items %}
<div class="{{ item.class }}">
{{ item.name }}
{% if item.description %}
<p>{{ item.description }}</p>
{% endif %}
</div>
{% endfor %}
{% endblock %}
""",
# Component with nested tags
"""
{% component 'table'
headers=headers
rows=rows
footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
title="{% trans 'Data Table' %}"
%}
""",
# Real world example
"""
<div class="prose flex flex-col gap-8">
{# Info section #}
<div class="border-b border-neutral-300">
<div class="flex justify-between items-start">
<h3 class="mt-0">Project Info</h3>
{% if editable %}
{% component "Button"
href=project_edit_url
attrs:class="not-prose"
footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
title="{% trans 'Data Table' %}"
%}
Edit Project
{% endcomponent %}
{% endif %}
</div>
<table>
{% for key, value in project_info %}
<tr>
<td class="font-bold pr-4">
{{ key }}:
</td>
<td>
{{ value }}
</td>
</tr>
{% endfor %}
</table>
</div>
{# Status Updates section #}
{% component "ProjectStatusUpdates"
project_id=project.pk
status_updates=status_updates
editable=editable
footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
title="{% trans 'Data Table' %}"
/ %}
<div class="xl:grid xl:grid-cols-2 gap-10">
{# Team section #}
<div class="border-b border-neutral-300">
<div class="flex justify-between items-start">
<h3 class="mt-0">Dcode Team</h3>
{% if editable %}
{% component "Button"
href=edit_project_roles_url
attrs:class="not-prose"
footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
title="{% trans 'Data Table' %}"
%}
Edit Team
{% endcomponent %}
{% endif %}
</div>
{% component "ProjectUsers"
project_id=project.pk
roles_with_users=roles_with_users
editable=False
footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
title="{% trans 'Data Table' %}"
/ %}
</div>
{# POCs section #}
<div>
<div class="flex justify-between items-start max-xl:mt-6">
<h3 class="mt-0">Client POCs</h3>
{% if editable %}
{% component "Button"
href=edit_pocs_url
attrs:class="not-prose"
footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
title="{% trans 'Data Table' %}"
%}
Edit POCs
{% endcomponent %}
{% endif %}
</div>
{% if poc_data %}
<table>
<tr>
<th>Name</th>
<th>Job Title</th>
<th>Hubspot Profile</th>
</tr>
{% for data in poc_data %}
<tr>
<td>{{ data.poc.contact.first_name }} {{ data.poc.contact.last_name }}</td>
<td>{{ data.poc.contact.job_title }}</td>
<td>
{% component "Icon"
href=data.hubspot_url
name="arrow-top-right-on-square"
variant="outline"
color="text-gray-400 hover:text-gray-500"
footer="{% slot 'footer' %}Total: {{ total }}{% endslot %}"
title="{% trans 'Data Table' %}"
/ %}
</td>
</tr>
{% endfor %}
</table>
{% else %}
<p class="text-sm italic">No entries</p>
{% endif %}
</div>
</div>
</div>
""",
]
for template in test_cases:
django_time, custom_time = run_benchmark(template)
print_benchmark_results(template, django_time, custom_time, 200)

View file

@ -15,14 +15,14 @@ class ComponentsConfig(AppConfig):
def ready(self) -> None: def ready(self) -> None:
from django_components.app_settings import app_settings from django_components.app_settings import app_settings
from django_components.autodiscovery import autodiscover, import_libraries from django_components.autodiscovery import autodiscover, import_libraries
from django_components.component import monkeypatch_template
from django_components.component_registry import registry from django_components.component_registry import registry
from django_components.components.dynamic import DynamicComponent from django_components.components.dynamic import DynamicComponent
from django_components.util.django_monkeypatch import monkeypatch_template_cls
# NOTE: This monkeypatch is applied here, before Django processes any requests. # NOTE: This monkeypatch is applied here, before Django processes any requests.
# To make django-components work with django-debug-toolbar-template-profiler # To make django-components work with django-debug-toolbar-template-profiler
# See https://github.com/EmilStenstrom/django-components/discussions/819 # See https://github.com/EmilStenstrom/django-components/discussions/819
monkeypatch_template(Template) monkeypatch_template_cls(Template)
# Import modules set in `COMPONENTS.libraries` setting # Import modules set in `COMPONENTS.libraries` setting
import_libraries() import_libraries()

View file

@ -68,6 +68,7 @@ from django_components.slots import (
resolve_fills, resolve_fills,
) )
from django_components.template import cached_template from django_components.template import cached_template
from django_components.util.django_monkeypatch import is_template_cls_patched
from django_components.util.logger import trace_msg from django_components.util.logger import trace_msg
from django_components.util.misc import gen_id from django_components.util.misc import gen_id
from django_components.util.template_tag import TagParams from django_components.util.template_tag import TagParams
@ -1272,54 +1273,6 @@ class ComponentNode(BaseNode):
return output return output
def monkeypatch_template(template_cls: Type[Template]) -> None:
# Modify `Template.render` to set `isolated_context` kwarg of `push_state`
# based on our custom `Template._dc_is_component_nested`.
#
# Part of fix for https://github.com/EmilStenstrom/django-components/issues/508
#
# NOTE 1: While we could've subclassed Template, then we would need to either
# 1) ask the user to change the backend, so all templates are of our subclass, or
# 2) copy the data from user's Template class instance to our subclass instance,
# which could lead to doubly parsing the source, and could be problematic if users
# used more exotic subclasses of Template.
#
# Instead, modifying only the `render` method of an already-existing instance
# should work well with any user-provided custom subclasses of Template, and it
# doesn't require the source to be parsed multiple times. User can pass extra args/kwargs,
# and can modify the rendering behavior by overriding the `_render` method.
#
# NOTE 2: Instead of setting `Template._dc_is_component_nested`, alternatively we could
# have passed the value to `monkeypatch_template` directly. However, we intentionally
# did NOT do that, so the monkey-patched method is more robust, and can be e.g. copied
# to other.
if hasattr(template_cls, "_dc_patched"):
# Do not patch if done so already. This helps us avoid RecursionError
return
def _template_render(self: Template, context: Context, *args: Any, **kwargs: Any) -> str:
# ---------------- OUR CHANGES START ----------------
# We parametrized `isolated_context`, which was `True` in the original method.
if not hasattr(self, "_dc_is_component_nested"):
isolated_context = True
else:
# MUST be `True` for templates that are NOT import with `{% extends %}` tag,
# and `False` otherwise.
isolated_context = not self._dc_is_component_nested
# ---------------- OUR CHANGES END ----------------
with context.render_context.push_state(self, isolated_context=isolated_context):
if context.template is None:
with context.bind_template(self):
context.template_name = self.name
return self._render(context, *args, **kwargs)
else:
return self._render(context, *args, **kwargs)
template_cls.render = _template_render
template_cls._dc_patched = True
@contextmanager @contextmanager
def _maybe_bind_template(context: Context, template: Template) -> Generator[None, Any, None]: def _maybe_bind_template(context: Context, template: Template) -> Generator[None, Any, None]:
if context.template is None: if context.template is None:
@ -1342,7 +1295,7 @@ def _prepare_template(
# And https://github.com/EmilStenstrom/django-components/issues/634 # And https://github.com/EmilStenstrom/django-components/issues/634
template = component._get_template(context) template = component._get_template(context)
if not getattr(template, "_dc_patched"): if not is_template_cls_patched(template):
raise RuntimeError( raise RuntimeError(
"Django-components received a Template instance which was not patched." "Django-components received a Template instance which was not patched."
"If you are using Django's Template class, check if you added django-components" "If you are using Django's Template class, check if you added django-components"
@ -1350,10 +1303,11 @@ def _prepare_template(
"manually patch the class." "manually patch the class."
) )
# Set `Template._dc_is_component_nested` based on whether we're currently INSIDE # Set `Template._djc_is_component_nested` based on whether we're currently INSIDE
# the `{% extends %}` tag. # the `{% extends %}` tag.
# Part of fix for https://github.com/EmilStenstrom/django-components/issues/508 # Part of fix for https://github.com/EmilStenstrom/django-components/issues/508
template._dc_is_component_nested = bool(context.render_context.get(BLOCK_CONTEXT_KEY)) # See django_monkeypatch.py
template._djc_is_component_nested = bool(context.render_context.get(BLOCK_CONTEXT_KEY))
with _maybe_bind_template(context, template): with _maybe_bind_template(context, template):
yield template yield template

View file

@ -2,7 +2,9 @@ import re
from typing import TYPE_CHECKING, Any, Dict, List from typing import TYPE_CHECKING, Any, Dict, List
from django.template import Context, Node, NodeList, TemplateSyntaxError from django.template import Context, Node, NodeList, TemplateSyntaxError
from django.template.base import Lexer, Parser, VariableNode from django.template.base import Parser, VariableNode
from django_components.util.template_parser import parse_template
if TYPE_CHECKING: if TYPE_CHECKING:
from django_components.util.template_tag import TagParam from django_components.util.template_tag import TagParam
@ -48,8 +50,7 @@ class DynamicFilterExpression:
# Copy the Parser, and pass through the tags and filters available # Copy the Parser, and pass through the tags and filters available
# in the current context. Thus, if user calls `{% load %}` inside # in the current context. Thus, if user calls `{% load %}` inside
# the expression, it won't spill outside. # the expression, it won't spill outside.
lexer = Lexer(self.expr) tokens = parse_template(self.expr)
tokens = lexer.tokenize()
expr_parser = Parser(tokens=tokens) expr_parser = Parser(tokens=tokens)
expr_parser.tags = {**parser.tags} expr_parser.tags = {**parser.tags}
expr_parser.filters = {**parser.filters} expr_parser.filters = {**parser.filters}

View file

@ -28,7 +28,7 @@ from django_components.slots import SLOT_DEFAULT_KEYWORD, SLOT_REQUIRED_KEYWORD,
from django_components.tag_formatter import get_tag_formatter from django_components.tag_formatter import get_tag_formatter
from django_components.util.logger import trace_msg from django_components.util.logger import trace_msg
from django_components.util.misc import gen_id from django_components.util.misc import gen_id
from django_components.util.template_tag import TagSpec, fix_nested_tags, parse_template_tag, with_tag_spec from django_components.util.template_tag import TagSpec, parse_template_tag, with_tag_spec
# NOTE: Variable name `register` is required by Django to recognize this as a template tag library # NOTE: Variable name `register` is required by Django to recognize this as a template tag library
# See https://docs.djangoproject.com/en/dev/howto/custom-template-tags # See https://docs.djangoproject.com/en/dev/howto/custom-template-tags
@ -492,7 +492,6 @@ def component(
""" """
tag_id = gen_id() tag_id = gen_id()
fix_nested_tags(parser, token)
bits = token.split_contents() bits = token.split_contents()
# Let the TagFormatter pre-process the tokens # Let the TagFormatter pre-process the tokens

View file

@ -0,0 +1,110 @@
from typing import Any, Type
from django.template import Context, NodeList, Template
from django.template.base import Parser
from django_components.util.template_parser import parse_template
# In some cases we can't work around Django's design, and need to patch the template class.
def monkeypatch_template_cls(template_cls: Type[Template]) -> None:
monkeypatch_template_compile_nodelist(template_cls)
monkeypatch_template_render(template_cls)
template_cls._djc_patched = True
# Patch `Template.compile_nodelist` to use our custom parser. Our parser makes it possible
# to use template tags as inputs to the component tag:
#
# {% component "my-component" description="{% lorem 3 w %}" / %}
def monkeypatch_template_compile_nodelist(template_cls: Type[Template]) -> None:
def _compile_nodelist(self: Template) -> NodeList:
"""
Parse and compile the template source into a nodelist. If debug
is True and an exception occurs during parsing, the exception is
annotated with contextual line information where it occurred in the
template source.
"""
# ---------------- ORIGINAL (Django v5.1.3) ----------------
# if self.engine.debug:
# lexer = DebugLexer(self.source)
# else:
# lexer = Lexer(self.source)
# tokens = lexer.tokenize()
# ---------------- OUR CHANGES START ----------------
tokens = parse_template(self.source)
# ---------------- OUR CHANGES END ----------------
parser = Parser(
tokens,
self.engine.template_libraries,
self.engine.template_builtins,
self.origin,
)
try:
# ---------------- ADDED IN Django v5.1 - See https://github.com/django/django/commit/35bbb2c9c01882b1d77b0b8c737ac646144833d4 # noqa: E501
nodelist = parser.parse()
self.extra_data = getattr(parser, "extra_data", {})
# ---------------- END OF ADDED IN Django v5.1 ----------------
return nodelist
except Exception as e:
if self.engine.debug:
e.template_debug = self.get_exception_info(e, e.token) # type: ignore
raise
template_cls.compile_nodelist = _compile_nodelist
def monkeypatch_template_render(template_cls: Type[Template]) -> None:
# Modify `Template.render` to set `isolated_context` kwarg of `push_state`
# based on our custom `Template._djc_is_component_nested`.
#
# Part of fix for https://github.com/EmilStenstrom/django-components/issues/508
#
# NOTE 1: While we could've subclassed Template, then we would need to either
# 1) ask the user to change the backend, so all templates are of our subclass, or
# 2) copy the data from user's Template class instance to our subclass instance,
# which could lead to doubly parsing the source, and could be problematic if users
# used more exotic subclasses of Template.
#
# Instead, modifying only the `render` method of an already-existing instance
# should work well with any user-provided custom subclasses of Template, and it
# doesn't require the source to be parsed multiple times. User can pass extra args/kwargs,
# and can modify the rendering behavior by overriding the `_render` method.
#
# NOTE 2: Instead of setting `Template._djc_is_component_nested`, alternatively we could
# have passed the value to `monkeypatch_template_render` directly. However, we intentionally
# did NOT do that, so the monkey-patched method is more robust, and can be e.g. copied
# to other.
if is_template_cls_patched(template_cls):
# Do not patch if done so already. This helps us avoid RecursionError
return
def _template_render(self: Template, context: Context, *args: Any, **kwargs: Any) -> str:
"Display stage -- can be called many times"
# ---------------- ORIGINAL (Django v5.1.3) ----------------
# with context.render_context.push_state(self):
# ---------------- OUR CHANGES START ----------------
# We parametrized `isolated_context`, which was `True` in the original method.
if not hasattr(self, "_djc_is_component_nested"):
isolated_context = True
else:
# MUST be `True` for templates that are NOT import with `{% extends %}` tag,
# and `False` otherwise.
isolated_context = not self._djc_is_component_nested
with context.render_context.push_state(self, isolated_context=isolated_context):
# ---------------- OUR CHANGES END ----------------
if context.template is None:
with context.bind_template(self):
context.template_name = self.name
return self._render(context, *args, **kwargs)
else:
return self._render(context, *args, **kwargs)
template_cls.render = _template_render
def is_template_cls_patched(template_cls: Type[Template]) -> bool:
return getattr(template_cls, "_djc_patched", False)

View file

@ -0,0 +1,227 @@
"""
Parser for Django template.
The parser reads a template file (usually HTML, but not necessarily), which may contain
"template tags" like this:
```django
{% component 'my_comp' key=val key2='val2 two' %}
{% endcomponent %}
{{ my_var }}
{# I am comment #}
```
and returns a list of Tokens:
```py
[
(TokenType.TEXT, '\n', (0, 1), 1),
(TokenType.BLOCK, "component 'my_comp' key=val key2='val2 two'", (1, 50), 2),
(TokenType.TEXT, '\n', (50, 51), 2),
(TokenType.BLOCK, 'endcomponent', (51, 69), 3),
(TokenType.TEXT, '\n\n', (69, 71), 3),
(TokenType.VAR, 'my_var', (71, 83), 5),
(TokenType.TEXT, '\n\n', (83, 85), 5),
(TokenType.COMMENT, 'I am comment', (85, 103), 7),
(TokenType.TEXT, '\n', (103, 104), 7),
]
```
See `parse_template()` for details.
"""
import re
from functools import lru_cache
from typing import List, Optional, Tuple
from django.template.base import DebugLexer, Token, TokenType
from django.template.exceptions import TemplateSyntaxError
# NOTE: As of 0.125, the strategy is to use Django's lexer, and use our own parser
# only when necessary, for the shortest time possible.
#
# Before I switched to this strategy, my initial parser was about 50x slower than Django's lexer.
# I (Juro) assume it was because I was walking character by character, instead of using a regex.
#
# The overall speed should then depend on the number of broken tokens in the template.
#
# Performance of the new strategy on a real-world example:
# - A template with about 110 lines and 6 components
# - Components spanning ~35 lines in total, so roughly 1/3 of the template
# - The custom parser is about 8x slower than Django's Debug lexer.
# - For a mid-sized project of 200 templates, it would take 7-8 seconds to load all the templates
# (from 1 second with Django's lexer).
# - However, thanks to django-component's lazy-loading, this should not be a problem.
#
# How it works is that:
# 1. We use Django's lexer to get the tokens.
# 2. We check them one-by-one, and if we find a broken token, we switch to our parser to fix it.
# 3. Once the broken token is fixed, we find it's end position, and switch back to the Django lexer
# for the remaining text (step 1).
def parse_template(text: str) -> List[Token]:
resolved_tokens: List[Token] = []
index_start = 0
index_end = len(text)
lineno_offset = 0
while index_start < index_end:
broken_token: Optional[Token] = None
# Do fast tokenization with regex - This is about 50x faster than our custom tokenizer.
# We use DebugLexer because we need to get the position of the tokens.
# DebugLexer and Lexer have very similar speeds, Debug is about 33% slower.
lexer = DebugLexer(text[index_start:index_end])
tokens: List[Token] = lexer.tokenize()
for token in tokens:
token.lineno += lineno_offset
token.position = (token.position[0] + index_start, token.position[1] + index_start)
if token.token_type == TokenType.BLOCK and ("'" in token.contents or '"' in token.contents):
broken_token = token
break
else:
resolved_tokens.append(token)
# If we found a broken token, we switch to our slow parser
if broken_token is not None:
broken_token_start = broken_token.position[0]
fixed_token = _detailed_tag_parser(text[broken_token_start:], broken_token.lineno, broken_token_start)
resolved_tokens.append(fixed_token)
index_start = fixed_token.position[1]
lineno_offset += (
fixed_token.lineno - 1 # -1 because lines are 1-indexed
+ fixed_token.contents.count("\n")
) # fmt: skip
else:
break
return resolved_tokens
# Handle parsing of `{% %}` tags, while allowing `%}` inside of strings
def _detailed_tag_parser(text: str, lineno: int, start_index: int) -> Token:
index = 0
length = len(text)
result_content: List[str] = []
# Pre-compute common substrings
QUOTE_CHARS = ("'", '"')
QUOTE_OR_PERCENT = (*QUOTE_CHARS, "%")
def take_char() -> str:
nonlocal index
if index >= length:
return ""
char = text[index]
index += 1
return char
def peek_char(offset: int = 0) -> str:
peek_index = index + offset
if peek_index >= length:
return ""
return text[peek_index]
# This is an optimized version that uses regex to find the next stop character
# and ignores the stop characters if they are prefixed by a backslash, if allow_escapes is True.
#
# For the intuition, the original version is:
#
# ```py
# def take_until_any(stop_chars: Tuple[str, ...], allow_escapes: bool = False) -> str:
# nonlocal index
# start = index
# while index < length:
# char = text[index]
# if allow_escapes and char == BACKSLASH and index + 1 < length:
# index += 2
# continue
# if char in stop_chars:
# break
# index += 1
# return text[start:index]
# ```
def take_until_any(stop_chars: Tuple[str, ...], allow_escapes: bool = False) -> str:
nonlocal index
stop_chars_str = "".join(stop_chars)
pattern = _compile_take_until_pattern(stop_chars_str, allow_escapes)
# Find match at current position
match = pattern.match(text, index)
if match:
matched_text = match.group(0)
index += len(matched_text)
return matched_text
return ""
# Given that this function is called only when there's a broken token,
# we know that the first two characters are always "{%"
take_char() # {
take_char() # %
# Main parsing loop
while index < length:
char = peek_char()
# Handle strings within `{% %}`
if char in QUOTE_CHARS:
quote_char = take_char()
result_content.append(quote_char)
# Take content until matching quote, allowing escaped quotes
content = take_until_any((quote_char,), allow_escapes=True)
result_content.append(content)
# Handle the closing quote
if peek_char() == quote_char:
result_content.append(take_char())
else:
raise TemplateSyntaxError(f"Unexpected end of text - unterminated {quote_char} string")
continue
# Check for closing tag
if char == "%":
if peek_char(1) == "}":
take_char() # %
take_char() # }
break
else:
# False alarm, just a string
content = take_until_any(QUOTE_CHARS)
result_content.append(content)
continue
# Take regular content until we hit a quote or potential closing tag
content = take_until_any(QUOTE_OR_PERCENT)
result_content.append(content)
else:
raise TemplateSyntaxError("Unexpected end of text - unterminated {% tag")
result_str = "".join(result_content).strip() # Django's Lexer.tokenize() strips the whitespace
return Token(TokenType.BLOCK, result_str, (start_index, index + start_index), lineno)
# Create a regex pattern that takes anything until any of the stop characters are found.
#
# If allow_escapes is True, also the stop characters are allowed, given that they are
# prefixed by a backslash.
@lru_cache(maxsize=128)
def _compile_take_until_pattern(stop_chars: str, allow_escapes: bool) -> re.Pattern:
escaped_stops = "".join(re.escape(c) for c in stop_chars)
if allow_escapes:
# Match either escaped characters or anything until stop chars
pattern = f"(?:\\\\.|[^{escaped_stops}])*"
else:
# Match anything until stop chars
pattern = f"[^{escaped_stops}]*"
return re.compile(pattern)

View file

@ -1,14 +1,14 @@
import functools import functools
import inspect import inspect
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Callable, Dict, Iterable, List, Mapping, NamedTuple, Optional, Set, Tuple, cast from typing import Any, Callable, Dict, Iterable, List, Mapping, NamedTuple, Optional, Set, Tuple
from django.template import Context, NodeList from django.template import Context, NodeList
from django.template.base import Parser, Token, TokenType from django.template.base import Parser, Token
from django.template.exceptions import TemplateSyntaxError from django.template.exceptions import TemplateSyntaxError
from django_components.expression import process_aggregate_kwargs from django_components.expression import process_aggregate_kwargs
from django_components.util.tag_parser import TagAttr, TagValue, parse_tag from django_components.util.tag_parser import TagAttr, parse_tag
@dataclass @dataclass
@ -97,6 +97,17 @@ class TagSpec:
# Set the signature on the function # Set the signature on the function
validator.__signature__ = self.signature # type: ignore[attr-defined] validator.__signature__ = self.signature # type: ignore[attr-defined]
# Call the validator with our args and kwargs, in such a way to
# let the Python interpreter validate on repeated kwargs. E.g.
#
# ```
# args, kwargs = validator(
# *call_args,
# **call_kwargs[0],
# **call_kwargs[1],
# ...
# )
# ```
call_args = [] call_args = []
call_kwargs = [] call_kwargs = []
for param in params: for param in params:
@ -105,13 +116,12 @@ class TagSpec:
else: else:
call_kwargs.append({param.key: param.value}) call_kwargs.append({param.key: param.value})
# Call the validator with our args and kwargs, in such a way to
# let the Python interpreter validate on repeated kwargs.
#
# E.g. `args, kwargs = validator(*call_args, **call_kwargs[0], **call_kwargs[1])`
#
# NOTE: Although we use `exec()` here, it's safe, because we control the input - # NOTE: Although we use `exec()` here, it's safe, because we control the input -
# we pass in only the list index. # we make dynamic only the list index.
#
# We MUST use the indices, because we can't trust neither the param keys nor values,
# so we MUST NOT reference them directly in the exec script, otherwise we'd be at risk
# of injection attack.
validator_call_script = "args, kwargs = validator(*call_args, " validator_call_script = "args, kwargs = validator(*call_args, "
for kw_index, _ in enumerate(call_kwargs): for kw_index, _ in enumerate(call_kwargs):
validator_call_script += f"**call_kwargs[{kw_index}], " validator_call_script += f"**call_kwargs[{kw_index}], "
@ -229,8 +239,6 @@ def parse_template_tag(
token: Token, token: Token,
tag_spec: TagSpec, tag_spec: TagSpec,
) -> ParsedTag: ) -> ParsedTag:
fix_nested_tags(parser, token)
_, attrs = parse_tag(token.contents, parser) _, attrs = parse_tag(token.contents, parser)
# First token is tag name, e.g. `slot` in `{% slot <name> ... %}` # First token is tag name, e.g. `slot` in `{% slot <name> ... %}`
@ -340,138 +348,3 @@ def merge_repeated_kwargs(params: List[TagParam]) -> List[TagParam]:
params_by_key[param.key].value += " " + str(param.value) params_by_key[param.key].value += " " + str(param.value)
return resolved_params return resolved_params
def fix_nested_tags(parser: Parser, block_token: Token) -> None:
# Since the nested tags MUST be wrapped in quotes, e.g.
# `{% component 'test' "{% lorem var_a w %}" %}`
# `{% component 'test' key="{% lorem var_a w %}" %}`
#
# We can parse the tag's tokens so we can find the last one, and so we consider
# the unclosed `{%` only for the last bit.
_, attrs = parse_tag(block_token.contents, parser)
# If there are no attributes, then there are no nested tags
if not attrs:
return
last_attr = attrs[-1]
# TODO: Currently, using a nested template inside a list or dict
# e.g. `{% component ... key=["{% nested %}"] %}` is NOT supported.
# Hence why we leave if value is not "simple" (which means the value is list or dict).
if last_attr.value.type != "simple":
return
last_attr_value = cast(TagValue, last_attr.value.entries[0])
last_token = last_attr_value.parts[-1]
# User probably forgot to wrap the nested tag in quotes, or this is the end of the input.
# `{% component ... key={% nested %} %}`
# `{% component ... key= %}`
if not last_token.value:
return
# When our template tag contains a nested tag, e.g.:
# `{% component 'test' "{% lorem var_a w %}" %}`
#
# Django parses this into:
# `TokenType.BLOCK: 'component 'test' "{% lorem var_a w'`
#
# Above you can see that the token ends at the end of the NESTED tag,
# and includes `{%`. So that's what we use to identify if we need to fix
# nested tags or not.
has_unclosed_tag = (
(last_token.value.count("{%") > last_token.value.count("%}"))
# Moreover we need to also check for unclosed quotes for this edge case:
# `{% component 'test' "{%}" %}`
#
# Which Django parses this into:
# `TokenType.BLOCK: 'component 'test' "{'`
#
# Here we cannot see any unclosed tags, but there is an unclosed double quote at the end.
#
# But we cannot naively search the full contents for unclosed quotes, but
# only within the last 'bit'. Consider this:
# `{% component 'test' '"' "{%}" %}`
#
or (last_token.value in ("'{", '"{'))
)
# There is 3 double quotes, but if the contents get split at the first `%}`
# then there will be a single unclosed double quote in the last bit.
first_char_index = len(last_token.spread or "")
has_unclosed_quote = (
not last_token.quoted
and last_token.value
and last_token.value[first_char_index] in ('"', "'")
) # fmt: skip
needs_fixing = has_unclosed_tag and has_unclosed_quote
if not needs_fixing:
return
block_token.contents += "%}" if has_unclosed_quote else " %}"
expects_text = True
while True:
# This is where we need to take parsing in our own hands, because Django parser parsed
# only up to the first closing tag `%}`, but that closing tag corresponds to a nested tag,
# and not to the end of the outer template tag.
#
# NOTE: If we run out of tokens, this will raise, and break out of the loop
token = parser.next_token()
# If there is a nested BLOCK `{% %}`, VAR `{{ }}`, or COMMENT `{# #}` tag inside the template tag,
# then the way Django parses it results in alternating Tokens of TEXT and non-TEXT types.
#
# We use `expects_text` to know which type to handle.
if expects_text:
if token.token_type != TokenType.TEXT:
raise TemplateSyntaxError(f"Template parser received TokenType '{token.token_type}' instead of 'TEXT'")
expects_text = False
# Once we come across a closing tag in the text, we know that's our original
# end tag. Until then, append all the text to the block token and continue
if "%}" not in token.contents:
block_token.contents += token.contents
continue
# This is the ACTUAL end of the block template tag
remaining_block_content, text_content = token.contents.split("%}", 1)
block_token.contents += remaining_block_content
# We put back into the Parser the remaining bit of the text.
# NOTE: Looking at the implementation, `parser.prepend_token()` is the opposite
# of `parser.next_token()`.
parser.prepend_token(Token(TokenType.TEXT, contents=text_content))
break
# In this case we've come across a next block tag `{% %}` inside the template tag
# This isn't the first occurence, where the `{%` was ignored. And so, the content
# between the `{% %}` is correctly captured, e.g.
#
# `{% firstof False 0 is_active %}`
# gives
# `TokenType.BLOCK: 'firstof False 0 is_active'`
#
# But we don't want to evaluate this as a standalone BLOCK tag, and instead append
# it to the block tag that this nested block is part of
else:
if token.token_type == TokenType.TEXT:
raise TemplateSyntaxError(
f"Template parser received TokenType '{token.token_type}' instead of 'BLOCK', 'VAR', 'COMMENT'"
)
if token.token_type == TokenType.BLOCK:
block_token.contents += "{% " + token.contents + " %}"
elif token.token_type == TokenType.VAR:
block_token.contents += "{{ " + token.contents + " }}"
elif token.token_type == TokenType.COMMENT:
pass # Comments are ignored
else:
raise TemplateSyntaxError(f"Unknown token type '{token.token_type}'")
expects_text = True
continue

View file

@ -0,0 +1,247 @@
from django.template import Context
from django.template.base import Template, Token, TokenType
from django_components import Component, register, types
from django_components.util.template_parser import parse_template
from .django_test_setup import setup_test_config
from .testutils import BaseTestCase
setup_test_config({"autodiscover": False})
def token2tuple(token: Token):
return (
token.token_type,
token.contents,
(token.position[0], token.position[1]),
token.lineno,
)
class TemplateParserTests(BaseTestCase):
def test_template_text(self):
tokens = parse_template("Hello world")
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.TEXT, "Hello world", (0, 11), 1),
]
self.assertEqual(token_tuples, expected_tokens)
def test_template_variable(self):
tokens = parse_template("Hello {{ name }}")
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.TEXT, "Hello ", (0, 6), 1),
(TokenType.VAR, "name", (6, 16), 1),
]
self.assertEqual(token_tuples, expected_tokens)
# NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it
def test_template_variable_unterminated(self):
tokens = parse_template("Hello {{ name")
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.TEXT, "Hello {{ name", (0, 13), 1),
]
self.assertEqual(token_tuples, expected_tokens)
def test_template_tag(self):
tokens = parse_template("{% component 'my_comp' key=val %}")
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.BLOCK, "component 'my_comp' key=val", (0, 33), 1),
]
self.assertEqual(token_tuples, expected_tokens)
# NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it
def test_template_tag_unterminated(self):
tokens = parse_template("{% if true")
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.TEXT, "{% if true", (0, 10), 1),
]
self.assertEqual(token_tuples, expected_tokens)
def test_template_comment(self):
tokens = parse_template("Hello{# this is a comment #}World")
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.TEXT, "Hello", (0, 5), 1),
(TokenType.COMMENT, "this is a comment", (5, 28), 1),
(TokenType.TEXT, "World", (28, 33), 1),
]
self.assertEqual(token_tuples, expected_tokens)
# NOTE(Juro): IMO this should be a TemplateSyntaxError, but Django doesn't raise it
def test_template_comment_unterminated(self):
tokens = parse_template("{# comment")
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.TEXT, "{# comment", (0, 10), 1),
]
self.assertEqual(token_tuples, expected_tokens)
def test_template_verbatim(self):
tokens = parse_template(
"""{% verbatim %}
{{ this_is_not_a_var }}
{% this_is_not_a_tag %}
{% endverbatim %}"""
)
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.BLOCK, "verbatim", (0, 14), 1),
(TokenType.TEXT, "\n ", (14, 31), 1),
(TokenType.TEXT, "{{ this_is_not_a_var }}", (31, 54), 2),
(TokenType.TEXT, "\n ", (54, 71), 2),
(TokenType.TEXT, "{% this_is_not_a_tag %}", (71, 94), 3),
(TokenType.TEXT, "\n ", (94, 107), 3),
(TokenType.BLOCK, "endverbatim", (107, 124), 4),
]
self.assertEqual(token_tuples, expected_tokens)
def test_template_verbatim_with_name(self):
tokens = parse_template(
"""{% verbatim myblock %}
{{ this_is_not_a_var }}
{% verbatim %}
{% endverbatim %}
{% endverbatim blockname %}
{% endverbatim myblock %}"""
)
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.BLOCK, "verbatim myblock", (0, 22), 1),
(TokenType.TEXT, "\n ", (22, 39), 1),
(TokenType.TEXT, "{{ this_is_not_a_var }}", (39, 62), 2),
(TokenType.TEXT, "\n ", (62, 79), 2),
(TokenType.TEXT, "{% verbatim %}", (79, 93), 3),
(TokenType.TEXT, "\n ", (93, 110), 3),
(TokenType.TEXT, "{% endverbatim %}", (110, 127), 4),
(TokenType.TEXT, "\n ", (127, 144), 4),
(TokenType.TEXT, "{% endverbatim blockname %}", (144, 171), 5),
(TokenType.TEXT, "\n ", (171, 184), 5),
(TokenType.BLOCK, "endverbatim myblock", (184, 209), 6),
]
self.assertEqual(token_tuples, expected_tokens)
def test_template_nested_tags(self):
tokens = parse_template("""{% component 'test' "{% lorem var_a w %}" %}""")
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.BLOCK, "component 'test' \"{% lorem var_a w %}\"", (0, 44), 1),
]
self.assertEqual(token_tuples, expected_tokens)
def test_brackets_and_percent_in_text(self):
tokens = parse_template('{% component \'test\' \'"\' "{%}" bool_var="{% noop is_active %}" / %}')
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.BLOCK, 'component \'test\' \'"\' "{%}" bool_var="{% noop is_active %}" /', (0, 66), 1),
]
self.assertEqual(token_tuples, expected_tokens)
def test_template_mixed(self):
tokens = parse_template(
"""Hello {{ name }}
{# greeting #}
{% if show_greeting %}
<h1>Welcome!</h1>
{% component 'test' key="{% lorem var_a w %}" %}
{% verbatim %}
{% endcomponent %}
{% endverbatim %}
{% endcomponent %}
{% endif %}"""
)
token_tuples = [token2tuple(token) for token in tokens]
expected_tokens = [
(TokenType.TEXT, "Hello ", (0, 6), 1),
(TokenType.VAR, "name", (6, 16), 1),
(TokenType.TEXT, "\n ", (16, 29), 1),
(TokenType.COMMENT, "greeting", (29, 43), 2),
(TokenType.TEXT, "\n ", (43, 56), 2),
(TokenType.BLOCK, "if show_greeting", (56, 78), 3),
(TokenType.TEXT, "\n <h1>Welcome!</h1>\n ", (78, 129), 3),
(TokenType.BLOCK, "component 'test' key=\"{% lorem var_a w %}\"", (129, 177), 5),
(TokenType.TEXT, "\n ", (177, 198), 5),
(TokenType.BLOCK, "verbatim", (198, 212), 6),
(TokenType.TEXT, "\n ", (212, 237), 6),
(TokenType.TEXT, "{% endcomponent %}", (237, 255), 7),
(TokenType.TEXT, "\n ", (255, 276), 7),
(TokenType.BLOCK, "endverbatim", (276, 293), 8),
(TokenType.TEXT, "\n ", (293, 310), 8),
(TokenType.BLOCK, "endcomponent", (310, 328), 9),
(TokenType.TEXT, "\n ", (328, 341), 9),
(TokenType.BLOCK, "endif", (341, 352), 10),
]
self.assertEqual(token_tuples, expected_tokens)
# Check that a template that contains `{% %}` inside of a component tag is parsed correctly
def test_component_mixed(self):
@register("test")
class Test(Component):
template: types.django_html = """
{% load component_tags %}
Var: {{ var }}
Slot: {% slot "content" default / %}
"""
def get_context_data(self, var: str) -> dict:
return {"var": var}
template_str: types.django_html = """
{% load component_tags %}
<div>
Hello {{ name }}
{# greeting #}
{% if show_greeting %}
<h1>Welcome!</h1>
{% component 'test' var="{% lorem var_a w %}" %}
{% verbatim %}
{% endcomponent %}
{% endverbatim %}
{% endcomponent %}
{% endif %}
</div>
"""
template = Template(template_str)
rendered = template.render(Context({"name": "John", "show_greeting": True, "var_a": 2}))
self.assertHTMLEqual(
rendered,
"""
<div>
Hello John
<h1>Welcome!</h1>
Var: lorem ipsum
Slot: {% endcomponent %}
</div>
""",
)