refactor: Add own LRU cache impl for template caching (#828)

This commit is contained in:
Juro Oravec 2024-12-13 09:29:19 +01:00 committed by GitHub
parent 894dee3cad
commit 87919e1163
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 164 additions and 77 deletions

View file

@ -1,24 +1,16 @@
from functools import lru_cache
from typing import Any, Optional, Type, TypeVar
from django.template import Origin, Template
from django.template.base import UNKNOWN_SOURCE
from django_components.app_settings import app_settings
from django_components.util.cache import lazy_cache
from django_components.util.cache import LRUCache
from django_components.util.misc import get_import_path
TTemplate = TypeVar("TTemplate", bound=Template)
# Lazily initialize the cache. The cached function takes only the parts that can
# affect how the template string is processed - Template class, template string, and engine
@lazy_cache(lambda: lru_cache(maxsize=app_settings.TEMPLATE_CACHE_SIZE))
def _create_template(
template_cls: Type[TTemplate],
template_string: str,
engine: Optional[Any] = None,
) -> TTemplate:
return template_cls(template_string, engine=engine)
# Lazily initialize the cache
template_cache: Optional[LRUCache[Template]] = None
# Central logic for creating Templates from string, so we can cache the results
@ -62,13 +54,20 @@ def cached_template(
)
```
""" # noqa: E501
template = _create_template(template_cls or Template, template_string, engine)
global template_cache
if template_cache is None:
template_cache = LRUCache(maxsize=app_settings.TEMPLATE_CACHE_SIZE)
# Assign the origin and name separately, so the caching doesn't depend on them
# Since we might be accessing a template from cache, we want to define these only once
if not getattr(template, "_dc_cached", False):
template.origin = origin or Origin(UNKNOWN_SOURCE)
template.name = name
template._dc_cached = True
template_cls = template_cls or Template
template_cls_path = get_import_path(template_cls)
engine_cls_path = get_import_path(engine.__class__) if engine else None
cache_key = (template_cls_path, template_string, engine_cls_path)
maybe_cached_template = template_cache.get(cache_key)
if maybe_cached_template is None:
template = template_cls(template_string, origin=origin, name=name, engine=engine)
template_cache.set(cache_key, template)
else:
template = maybe_cached_template
return template

View file

@ -1,45 +1,111 @@
import functools
from typing import Any, Callable, TypeVar, cast
from collections.abc import Hashable
from typing import Dict, Generic, Optional, TypeVar, cast
TFunc = TypeVar("TFunc", bound=Callable)
T = TypeVar("T")
def lazy_cache(
make_cache: Callable[[], Callable[[Callable], Callable]],
) -> Callable[[TFunc], TFunc]:
"""
Decorator that caches the given function similarly to `functools.lru_cache`.
But the cache is instantiated only at first invocation.
class CacheNode(Generic[T]):
"""A node in the doubly linked list."""
`cache` argument is a function that generates the cache function,
e.g. `functools.lru_cache()`.
"""
_cached_fn = None
def __init__(self, key: Hashable, value: T):
self.key = key
self.value = value
self.prev: Optional["CacheNode"] = None
self.next: Optional["CacheNode"] = None
def decorator(fn: TFunc) -> TFunc:
@functools.wraps(fn)
def wrapper(*args: Any, **kwargs: Any) -> Any:
# Lazily initialize the cache
nonlocal _cached_fn
if not _cached_fn:
# E.g. `lambda: functools.lru_cache(maxsize=app_settings.TEMPLATE_CACHE_SIZE)`
cache = make_cache()
_cached_fn = cache(fn)
return _cached_fn(*args, **kwargs)
class LRUCache(Generic[T]):
"""A simple LRU Cache implementation."""
# Allow to access the LRU cache methods
# See https://stackoverflow.com/a/37654201/9788634
wrapper.cache_info = lambda: _cached_fn.cache_info() # type: ignore
wrapper.cache_clear = lambda: _cached_fn.cache_clear() # type: ignore
def __init__(self, maxsize: Optional[int] = None):
"""
Initialize the LRU cache.
# And allow to remove the cache instance (mostly for tests)
def cache_remove() -> None:
nonlocal _cached_fn
_cached_fn = None
:param maxsize: Maximum number of items the cache can hold. If None, the cache is unbounded.
"""
self.maxsize = maxsize
self.cache: Dict[Hashable, CacheNode[T]] = {} # Maps keys to nodes in the doubly linked list
# Dummy head and tail nodes to simplify operations
self.head = CacheNode[T]("", cast(T, None)) # Most recently used
self.tail = CacheNode[T]("", cast(T, None)) # Least recently used
self.head.next = self.tail
self.tail.prev = self.head
wrapper.cache_remove = cache_remove # type: ignore
def get(self, key: Hashable) -> Optional[T]:
"""
Retrieve the value associated with the key.
return cast(TFunc, wrapper)
:param key: Key to look up in the cache.
:return: Value associated with the key, or None if not found.
"""
if key in self.cache:
node = self.cache[key]
# Move the accessed node to the front (most recently used)
self._remove(node)
self._add_to_front(node)
return node.value
else:
return None # Key not found
return decorator
def has(self, key: Hashable) -> bool:
"""
Check if the key is in the cache.
:param key: Key to check.
:return: True if the key is in the cache, False otherwise.
"""
return key in self.cache
def set(self, key: Hashable, value: T) -> None:
"""
Insert or update the value associated with the key.
:param key: Key to insert or update.
:param value: Value to associate with the key.
"""
if key in self.cache:
node = self.cache[key]
# Update the value
node.value = value
# Move the node to the front (most recently used)
self._remove(node)
self._add_to_front(node)
else:
if self.maxsize is not None and len(self.cache) >= self.maxsize:
# Cache is full; remove the least recently used item
lru_node = self.tail.prev
if lru_node is None:
raise RuntimeError("LRUCache: Tail node is None")
self._remove(lru_node)
del self.cache[lru_node.key]
# Add the new node to the front
new_node = CacheNode[T](key, value)
self.cache[key] = new_node
self._add_to_front(new_node)
def clear(self) -> None:
"""Clear the cache."""
self.cache.clear()
self.head.next = self.tail
self.tail.prev = self.head
def _remove(self, node: CacheNode) -> None:
"""Remove a node from the doubly linked list."""
prev_node = node.prev
next_node = node.next
if prev_node is not None:
prev_node.next = next_node
if next_node is not None:
next_node.prev = prev_node
def _add_to_front(self, node: CacheNode) -> None:
"""Add a node right after the head (mark it as most recently used)."""
node.next = self.head.next
node.prev = self.head
if self.head.next:
self.head.next.prev = node
self.head.next = node

42
tests/test_cache.py Normal file
View file

@ -0,0 +1,42 @@
from django.test import TestCase
from django_components.util.cache import LRUCache
from .django_test_setup import setup_test_config
setup_test_config({"autodiscover": False})
class CacheTests(TestCase):
def test_cache(self):
cache = LRUCache[int](maxsize=3)
cache.set("a", 1)
cache.set("b", 2)
cache.set("c", 3)
self.assertEqual(cache.get("a"), 1)
self.assertEqual(cache.get("b"), 2)
self.assertEqual(cache.get("c"), 3)
cache.set("d", 4)
self.assertEqual(cache.get("a"), None)
self.assertEqual(cache.get("b"), 2)
self.assertEqual(cache.get("c"), 3)
self.assertEqual(cache.get("d"), 4)
cache.set("e", 5)
cache.set("f", 6)
self.assertEqual(cache.get("b"), None)
self.assertEqual(cache.get("c"), None)
self.assertEqual(cache.get("d"), 4)
self.assertEqual(cache.get("e"), 5)
self.assertEqual(cache.get("f"), 6)
cache.clear()
self.assertEqual(cache.get("d"), None)
self.assertEqual(cache.get("e"), None)
self.assertEqual(cache.get("f"), None)

View file

@ -1,5 +1,4 @@
from django.template import Context, Template
from django.test import override_settings
from django_components import Component, cached_template, types
@ -25,27 +24,6 @@ class TemplateCacheTest(BaseTestCase):
template = cached_template("Variable: <strong>{{ variable }}</strong>", MyTemplate)
self.assertIsInstance(template, MyTemplate)
@override_settings(COMPONENTS={"template_cache_size": 2})
def test_cache_discards_old_entries(self):
template_1 = cached_template("Variable: <strong>{{ variable }}</strong>")
template_1._test_id = "123"
template_2 = cached_template("Variable2")
template_2._test_id = "456"
# Templates 1 and 2 should still be available
template_1_copy = cached_template("Variable: <strong>{{ variable }}</strong>")
self.assertEqual(template_1_copy._test_id, "123")
template_2_copy = cached_template("Variable2")
self.assertEqual(template_2_copy._test_id, "456")
# But once we add the third template, template 1 should go
cached_template("Variable3")
template_1_copy2 = cached_template("Variable: <strong>{{ variable }}</strong>")
self.assertEqual(hasattr(template_1_copy2, "_test_id"), False)
def test_component_template_is_cached(self):
class SimpleComponent(Component):
def get_template(self, context):

View file

@ -30,9 +30,11 @@ class BaseTestCase(SimpleTestCase):
super().tearDown()
registry.clear()
from django_components.template import _create_template
from django_components.template import template_cache
_create_template.cache_remove() # type: ignore[attr-defined]
# NOTE: There are 1-2 tests which check Templates, so we need to clear the cache
if template_cache:
template_cache.clear()
# Mock the `generate` function used inside `gen_id` so it returns deterministic IDs
def _start_gen_id_patch(self):