removed unused files and deps

This commit is contained in:
Will Abbott 2024-10-04 19:16:27 +01:00
parent df016eb08e
commit e74a302ac3
6 changed files with 3 additions and 408 deletions

View file

@ -10,5 +10,4 @@ authors = ["Will Abbott <willabb83@gmail.com>"]
[tool.poetry.dependencies]
python = "^3.10"
Django = "^5.1"
beautifulsoup4 = "~4.12.2"
Django = "^5.1"

View file

@ -1,326 +0,0 @@
import random
import re
import warnings
from html.parser import HTMLParser
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
from bs4.builder._htmlparser import BeautifulSoupHTMLParser, HTMLParserTreeBuilder
from bs4.formatter import HTMLFormatter
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
class UnsortedAttributes(HTMLFormatter):
"""This keeps BS4 from re-ordering attributes"""
def attributes(self, tag):
for k, v in tag.attrs.items():
yield k, v
class CottonHTMLParser(BeautifulSoupHTMLParser):
"""Extending the default HTML parser to override handle_starttag so we can preserve the intended value of the
attribute from the developer so that we can differentiate boolean attributes and simply empty ones.
"""
def __init__(self, tree_builder, soup, on_duplicate_attribute):
# Initialize the parent class (HTMLParser) without additional arguments
HTMLParser.__init__(self)
self._first_processing_instruction = None
self.tree_builder = tree_builder
self.soup = soup
self._root_tag = None # Initialize _root_tag
self.already_closed_empty_element = [] # Initialize this list
self.on_duplicate_attribute = (
on_duplicate_attribute # You can set this according to your needs
)
self.IGNORE = "ignore"
self.REPLACE = "replace"
def handle_starttag(self, name, attrs, handle_empty_element=True):
"""Handle an opening tag, e.g. '<tag>'"""
attr_dict = {}
for key, value in attrs:
# Cotton edit: We want to permit valueless / "boolean" attributes
# if value is None:
# value = ''
if key in attr_dict:
on_dupe = self.on_duplicate_attribute
if on_dupe == self.IGNORE:
pass
elif on_dupe in (None, self.REPLACE):
attr_dict[key] = value
else:
on_dupe(attr_dict, key, value)
else:
attr_dict[key] = value
sourceline, sourcepos = self.getpos()
tag = self.soup.handle_starttag(
name, None, None, attr_dict, sourceline=sourceline, sourcepos=sourcepos
)
if tag and tag.is_empty_element and handle_empty_element:
self.handle_endtag(name, check_already_closed=False)
self.already_closed_empty_element.append(name)
# Cotton edit: We do not need to validate the root element
# if self._root_tag is None:
# self._root_tag_encountered(name)
class CottonHTMLTreeBuilder(HTMLParserTreeBuilder):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.handle_duplicate_attributes = kwargs.get("on_duplicate_attribute", None)
self.parser_class = CottonHTMLParser
def feed(self, markup):
parser = self.parser_class(self, self.soup, self.handle_duplicate_attributes)
parser.feed(markup)
parser.close()
class CottonBs4Compiler:
DJANGO_SYNTAX_PLACEHOLDER_PREFIX = "__django_syntax__"
COTTON_VERBATIM_PATTERN = re.compile(
r"\{% cotton_verbatim %\}(.*?)\{% endcotton_verbatim %\}", re.DOTALL
)
DJANGO_TAG_PATTERN = re.compile(r"(\s?)(\{%.*?%\})(\s?)")
DJANGO_VAR_PATTERN = re.compile(r"(\s?)(\{\{.*?\}\})(\s?)")
HTML_ENTITY_PATTERN = re.compile(r"&[a-zA-Z]+;|&#[0-9]+;|&#x[a-fA-F0-9]+;")
def __init__(self):
self.django_syntax_placeholders = []
self.html_entity_placeholders = []
def process(self, content: str):
processors = [
self._replace_syntax_with_placeholders,
self._replace_html_entities_with_placeholders,
self._compile_cotton_to_django,
self._replace_placeholders_with_syntax,
self._replace_placeholders_with_html_entities,
self._remove_duplicate_attribute_markers,
]
for processor in processors:
# noinspection PyArgumentList
content = processor(content)
return content
def _replace_html_entities_with_placeholders(self, content):
"""Replace HTML entities with placeholders so they dont get touched by BS4"""
def replace_entity(match):
entity = match.group(0)
self.html_entity_placeholders.append(entity)
return f"__HTML_ENTITY_{len(self.html_entity_placeholders) - 1}__"
return self.HTML_ENTITY_PATTERN.sub(replace_entity, content)
def _replace_placeholders_with_html_entities(self, content: str):
for i, entity in enumerate(self.html_entity_placeholders):
content = content.replace(f"__HTML_ENTITY_{i}__", entity)
return content
def _replace_syntax_with_placeholders(self, content: str):
"""Replace {% ... %} and {{ ... }} with placeholders so they dont get touched
or encoded by bs4. We will replace them back after bs4 has done its job."""
self.django_syntax_placeholders = []
def replace_pattern(pattern, replacement_func):
return pattern.sub(replacement_func, content)
def replace_cotton_verbatim(match):
"""{% cotton_verbatim %} protects the content through the bs4 parsing process when we want to actually print
cotton syntax in <pre> blocks."""
inner_content = match.group(1)
self.django_syntax_placeholders.append({"type": "verbatim", "content": inner_content})
return (
f"{self.DJANGO_SYNTAX_PLACEHOLDER_PREFIX}{len(self.django_syntax_placeholders)}__"
)
def replace_django_syntax(match):
"""Store if the match had at least one space on the left or right side of the syntax so we can restore it later"""
left_space, syntax, right_space = match.groups()
self.django_syntax_placeholders.append(
{
"type": "django",
"content": syntax,
"left_space": bool(left_space),
"right_space": bool(right_space),
}
)
return (
f" {self.DJANGO_SYNTAX_PLACEHOLDER_PREFIX}{len(self.django_syntax_placeholders)}__ "
)
# Replace cotton_verbatim blocks
content = replace_pattern(self.COTTON_VERBATIM_PATTERN, replace_cotton_verbatim)
# Replace {% ... %}
content = replace_pattern(self.DJANGO_TAG_PATTERN, replace_django_syntax)
# Replace {{ ... }}
content = replace_pattern(self.DJANGO_VAR_PATTERN, replace_django_syntax)
return content
def _compile_cotton_to_django(self, content: str):
"""Convert cotton <c-* syntax to {%."""
soup = self._make_soup(content)
if cvars_el := soup.find("c-vars"):
soup = self._wrap_with_cotton_vars_frame(soup, cvars_el)
self._transform_components(soup)
return str(soup.encode(formatter=UnsortedAttributes()).decode("utf-8"))
def _replace_placeholders_with_syntax(self, content: str):
"""Replace placeholders with original syntax."""
for i, placeholder in enumerate(self.django_syntax_placeholders, 1):
if placeholder["type"] == "verbatim":
placeholder_pattern = f"{self.DJANGO_SYNTAX_PLACEHOLDER_PREFIX}{i}__"
content = content.replace(placeholder_pattern, placeholder["content"])
else:
"""
Construct the regex pattern based on original whitespace. This is to avoid unnecessary whitespace
changes in the output that can lead to unintended tag type mutations,
i.e. <div{% expr %}></div> --> <div__placeholder></div__placeholder> --> <div{% expr %}></div{% expr %}>
"""
left_group = r"( ?)" if not placeholder["left_space"] else ""
right_group = r"( ?)" if not placeholder["right_space"] else ""
placeholder_pattern = (
f"{left_group}{self.DJANGO_SYNTAX_PLACEHOLDER_PREFIX}{i}__{right_group}"
)
content = re.sub(placeholder_pattern, placeholder["content"], content)
return content
def _remove_duplicate_attribute_markers(self, content: str):
return re.sub(r"__COTTON_DUPE_ATTR__[0-9A-F]{5}", "", content, flags=re.IGNORECASE)
def _wrap_with_cotton_vars_frame(self, soup, cvars_el):
"""If the user has defined a <c-vars> tag, wrap content with {% cotton_vars_frame %} to be able to create and
govern vars and attributes. To be able to defined new vars within a component and also have them available in the
same component's context, we wrap the entire contents in another component: cotton_vars_frame. Only when <c-vars>
is present."""
cvars_attrs = []
for k, v in cvars_el.attrs.items():
if v is None:
cvars_attrs.append(k)
else:
if k == "class":
v = " ".join(v)
cvars_attrs.append(f'{k}="{v}"')
cvars_el.decompose()
opening = f"{{% vars {' '.join(cvars_attrs)} %}}"
opening = opening.replace("\n", "")
closing = "{% endvars %}"
# Convert the remaining soup back to a string and wrap it within {% with %} block
wrapped_content = (
opening
+ str(soup.encode(formatter=UnsortedAttributes()).decode("utf-8")).strip()
+ closing
)
new_soup = self._make_soup(wrapped_content)
return new_soup
def _transform_components(self, soup):
"""Replace <c-[component path]> tags with the {% cotton_component %} template tag"""
for tag in soup.find_all(re.compile("^c-"), recursive=True):
if tag.name == "c-slot":
self._transform_named_slot(tag)
continue
component_key = tag.name[2:]
opening_tag = f"{{% c {component_key} "
# Store attributes that contain template expressions, they are when we use '{{' or '{%' in the value of an attribute
complex_attrs = []
# Build the attributes
for key, value in tag.attrs.items():
# value might be None
if value is None:
opening_tag += f" {key}"
continue
# BS4 stores class values as a list, so we need to join them back into a string
if key == "class":
value = " ".join(value)
# Django templates tags cannot have {{ or {% expressions in their attribute values
# Neither can they have new lines, let's treat them both as "expression attrs"
if self.DJANGO_SYNTAX_PLACEHOLDER_PREFIX in value or "\n" in value or "=" in value:
complex_attrs.append((key, value))
continue
opening_tag += ' {}="{}"'.format(key, value)
opening_tag += " %}"
component_tag = opening_tag
if complex_attrs:
for key, value in complex_attrs:
component_tag += f"{{% attr {key} %}}{value}{{% endattr %}}"
if tag.contents:
tag_soup = self._make_soup(tag.decode_contents(formatter=UnsortedAttributes()))
self._transform_components(tag_soup)
component_tag += str(
tag_soup.encode(formatter=UnsortedAttributes()).decode("utf-8")
# tag_soup.decode_contents(formatter=UnsortedAttributes())
)
component_tag += "{% endc %}"
# Replace the original tag with the compiled django syntax
new_soup = self._make_soup(component_tag)
tag.replace_with(new_soup)
return soup
def _transform_named_slot(self, slot_tag):
"""Compile <c-slot> to {% slot %}"""
slot_name = slot_tag.get("name", "").strip()
inner_html = slot_tag.decode_contents(formatter=UnsortedAttributes())
# Check and process any components in the slot content
slot_soup = self._make_soup(inner_html)
self._transform_components(slot_soup)
cotton_slot_tag = f"{{% slot {slot_name} %}}{str(slot_soup.encode(formatter=UnsortedAttributes()).decode('utf-8'))}{{% endslot %}}"
slot_tag.replace_with(self._make_soup(cotton_slot_tag))
def _make_soup(self, content):
return BeautifulSoup(
content,
"html.parser",
builder=CottonHTMLTreeBuilder(on_duplicate_attribute=handle_duplicate_attributes),
)
def handle_duplicate_attributes(tag_attrs, key, value):
"""BS4 cleans html and removes duplicate attributes. This would be fine if our target was html, but actually
we're targeting Django Template Language. This contains expressions to govern content including attributes of
any XML-like tag. It's perfectly fine to expect duplicate attributes per tag in DTL:
<a href="#" {% if something %} class="this" {% else %} class="that" {% endif %}>Hello</a>
The solution here is to make duplicate attribute keys unique across that tag so BS4 will not attempt to merge or
replace existing. Then in post processing we'll remove the unique mask.
Todo - This could be simplified with a custom formatter
"""
key_id = "".join(random.choice("0123456789ABCDEF") for i in range(5))
key = f"{key}__COTTON_DUPE_ATTR__{key_id}"
tag_attrs[key] = value

View file

@ -1,43 +0,0 @@
ARG PLATFORM=linux/amd64
# ---- Tailwind Build Stage (build is quicker like this vs installing npm, node separately) ----
FROM node:16-slim AS build_tailwind
WORKDIR /css
COPY package*.json ./
RUN npm install
COPY . .
RUN ["npx", "tailwindcss", "-o", "./docs_project/static/app.css"]
# Use an official Python runtime as a base image
FROM python:3.12-slim-bookworm as base
# Setup env
ENV PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_NO_CACHE_DIR=off \
POETRY_NO_INTERACTION=1 \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
DEBIAN_FRONTEND=noninteractive
# Set the working directory in the container
WORKDIR /app
# Install Poetry
RUN pip install "poetry==1.8.3"
# Copy only dependencies definition to the docker image
COPY . /app/
# Copy static files from the build_tailwind stage
COPY --from=build_tailwind /css/docs_project/static/app.css ./docs_project/static/app.css
# Install project dependencies
RUN poetry config virtualenvs.create false \
&& poetry install
RUN SECRET_KEY=dummy STATIC_URL='/staticfiles/' python manage.py collectstatic --noinput --verbosity 2
EXPOSE 8000
ENV PORT 8000
CMD ["gunicorn", "docs_project.wsgi:application", "--bind", "0.0.0.0:8000"]

View file

@ -4,7 +4,6 @@ version = "0.9"
requires-python = ">=3.10, <4"
dependencies = [
"django~=5.1",
"beautifulsoup4~=4.12.2",
"heroicons~=2.6.0",
"gunicorn~=20.1.0",
"asgiref>=3.6.0,<4",
@ -24,7 +23,6 @@ authors = ["Will Abbott <willabb83@gmail.com>"]
[tool.poetry.dependencies]
python = "^3.10"
Django = "^5.1"
beautifulsoup4 = "~4.12.2"
heroicons = "2.6.0"
gunicorn = "20.1.0"
asgiref = ">=3.6.0,<4"

34
poetry.lock generated
View file

@ -45,27 +45,6 @@ files = [
[package.extras]
tzdata = ["tzdata"]
[[package]]
name = "beautifulsoup4"
version = "4.12.3"
description = "Screen-scraping library"
optional = false
python-versions = ">=3.6.0"
files = [
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
{file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
]
[package.dependencies]
soupsieve = ">1.2"
[package.extras]
cchardet = ["cchardet"]
chardet = ["chardet"]
charset-normalizer = ["charset-normalizer"]
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]]
name = "django"
version = "4.2.16"
@ -87,17 +66,6 @@ tzdata = {version = "*", markers = "sys_platform == \"win32\""}
argon2 = ["argon2-cffi (>=19.1.0)"]
bcrypt = ["bcrypt"]
[[package]]
name = "soupsieve"
version = "2.5"
description = "A modern CSS selector implementation for Beautiful Soup."
optional = false
python-versions = ">=3.8"
files = [
{file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"},
{file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"},
]
[[package]]
name = "sqlparse"
version = "0.5.1"
@ -138,4 +106,4 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = ">=3.8,<4"
content-hash = "b9a548f4d72374e4ff45169264c16916e1be93de9fdb4a0eb7d6547acba8c3df"
content-hash = "412d27424bee9618bceac4874a9fc9baa76cc71a6d1f7099799b66caa0c3f28a"

View file

@ -10,7 +10,7 @@ authors = [ "Will Abbott <willabb83@gmail.com>",]
license = "MIT"
readme = "README.md"
classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3.10", "Framework :: Django",]
keywords = [ "django", "cotton", "reusable", "app",]
keywords = [ "django", "components", "ui",]
exclude = [ "dev", "docs", "django_cotton/tests", "django_cotton/templates",]
[tool.black]
@ -22,5 +22,4 @@ profile = "black"
[tool.poetry.dependencies]
python = ">=3.8,<4"
beautifulsoup4 = "~4.12.2"
django = ">=4.2,<5.2"