mirror of
https://github.com/Textualize/rich.git
synced 2025-12-23 07:08:35 +00:00
Merge pull request #1 from ZZGG2/markup-validator
markup: add MarkupValidator (escape handling, nameless close) and tes…
This commit is contained in:
commit
5f9132097f
2 changed files with 191 additions and 0 deletions
113
rich/markup_validator.py
Normal file
113
rich/markup_validator.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
"""Simple markup validator using a stack to check tag pairing.
|
||||
|
||||
This validator only checks tag structure like [tag]...[/tag].
|
||||
It does not parse or validate tag attributes/styles — it extracts the
|
||||
tag name as the first token inside the brackets (so `[link=http://...]`
|
||||
has tag name `link`).
|
||||
|
||||
Usage:
|
||||
validator = MarkupValidator()
|
||||
valid = validator.validate("[b]bold[/b]")
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
from rich.errors import MarkupError
|
||||
|
||||
|
||||
_TAG_NAME_RE = re.compile(r"^/?\s*([A-Za-z0-9_:-]+)")
|
||||
|
||||
|
||||
class MarkupValidator:
|
||||
"""Validate simple bracket-style markup like `[tag]` and `[/tag]`.
|
||||
|
||||
Method `validate(text)` returns `True` when all tags are properly
|
||||
opened and closed with correct nesting. On failure it raises
|
||||
`rich.errors.MarkupError` with an explanatory message.
|
||||
"""
|
||||
|
||||
def validate(self, text: str) -> bool:
|
||||
"""Return True if the markup tags in `text` are well-formed.
|
||||
|
||||
Rules:
|
||||
- Opening tag: `[tag]` or `[tag attr=...]` pushes `tag` onto a stack.
|
||||
- Closing tag: `[/tag]` pops and must match last opened tag.
|
||||
- Nameless closing `[/]` pops the top of the stack.
|
||||
- Tag name is taken as the first token of the bracket content.
|
||||
- On any unmatched, missing, or malformed brackets/tags a
|
||||
`MarkupError` is raised describing the problem.
|
||||
"""
|
||||
stack: List[str] = []
|
||||
i = 0
|
||||
n = len(text)
|
||||
|
||||
def _is_escaped(s: str, idx: int) -> bool:
|
||||
"""Return True if character at idx is escaped by an odd number of backslashes."""
|
||||
# count consecutive backslashes immediately before idx
|
||||
bs = 0
|
||||
j = idx - 1
|
||||
while j >= 0 and s[j] == "\\":
|
||||
bs += 1
|
||||
j -= 1
|
||||
return (bs % 2) == 1
|
||||
|
||||
while i < n:
|
||||
ch = text[i]
|
||||
if ch == "[":
|
||||
# if this '[' is escaped (preceded by an odd number of backslashes),
|
||||
# treat it as literal text and ignore as a tag start
|
||||
if _is_escaped(text, i):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# otherwise it starts a tag
|
||||
# find closing bracket
|
||||
j = text.find("]", i + 1)
|
||||
if j == -1:
|
||||
raise MarkupError("unclosed '[': missing ']' for an opening bracket")
|
||||
|
||||
content = text[i + 1 : j].strip()
|
||||
if not content:
|
||||
# empty brackets `[]` are invalid
|
||||
raise MarkupError("empty tag '[]' is invalid")
|
||||
|
||||
# determine name (handle closing tags) using regex
|
||||
is_closing = content.startswith("/")
|
||||
|
||||
if is_closing:
|
||||
# content after the slash (may be empty for nameless close)
|
||||
name_part = content[1:].lstrip()
|
||||
if not name_part:
|
||||
# nameless closing tag '[/]' pops the top of the stack
|
||||
if not stack:
|
||||
raise MarkupError("nameless closing tag '[/]' with no open tags to close")
|
||||
stack.pop()
|
||||
else:
|
||||
m = _TAG_NAME_RE.match(name_part)
|
||||
if not m:
|
||||
raise MarkupError(f"invalid closing tag '[/{name_part}]'")
|
||||
name = m.group(1)
|
||||
if not stack:
|
||||
raise MarkupError(f"closing tag '[/{name}]' with no matching opening tag")
|
||||
last = stack.pop()
|
||||
if last != name:
|
||||
raise MarkupError(f"mismatched closing tag '[/{name}]', expected '[/{last}]'")
|
||||
else:
|
||||
# opening tag: use only the first token as the tag name
|
||||
m = _TAG_NAME_RE.match(content)
|
||||
if not m:
|
||||
raise MarkupError(f"invalid opening tag '[{content}]'")
|
||||
name = m.group(1)
|
||||
stack.append(name)
|
||||
|
||||
i = j + 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
if stack:
|
||||
# unclosed tags remain on the stack
|
||||
last = stack[-1]
|
||||
raise MarkupError(f"unclosed tag '[{last}]'")
|
||||
return True
|
||||
78
tests/test_markup_validator.py
Normal file
78
tests/test_markup_validator.py
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
import pytest
|
||||
|
||||
from rich.markup_validator import MarkupValidator
|
||||
from rich.errors import MarkupError
|
||||
|
||||
|
||||
def test_simple_valid():
|
||||
v = MarkupValidator()
|
||||
assert v.validate("[b]bold[/b]")
|
||||
|
||||
|
||||
def test_nested_valid():
|
||||
v = MarkupValidator()
|
||||
assert v.validate("[b][i]inner[/i][/b]")
|
||||
|
||||
|
||||
def test_mismatch_tags():
|
||||
v = MarkupValidator()
|
||||
with pytest.raises(MarkupError) as exc:
|
||||
v.validate("[b]bad[/i]")
|
||||
assert "mismatched" in str(exc.value) or "expected" in str(exc.value)
|
||||
|
||||
|
||||
def test_unclosed_tag():
|
||||
v = MarkupValidator()
|
||||
with pytest.raises(MarkupError) as exc:
|
||||
v.validate("[b]open")
|
||||
assert "unclosed" in str(exc.value)
|
||||
|
||||
|
||||
def test_extra_closing():
|
||||
v = MarkupValidator()
|
||||
with pytest.raises(MarkupError):
|
||||
v.validate("text[/b]")
|
||||
|
||||
|
||||
def test_with_attributes():
|
||||
v = MarkupValidator()
|
||||
assert v.validate("[link=https://example.com]click[/link]")
|
||||
|
||||
|
||||
def test_empty_brackets_are_invalid():
|
||||
v = MarkupValidator()
|
||||
with pytest.raises(MarkupError):
|
||||
v.validate("[]")
|
||||
|
||||
|
||||
def test_space_in_opening_tag_uses_first_token():
|
||||
v = MarkupValidator()
|
||||
# '[bold red]' should push 'bold' and be closed by '[/bold]'
|
||||
assert v.validate("[bold red]text[/bold]")
|
||||
|
||||
|
||||
def test_nameless_closing_pops_top():
|
||||
v = MarkupValidator()
|
||||
# nameless closing pops the most recent tag ('b'), leaving 'a' unclosed
|
||||
with pytest.raises(MarkupError) as exc:
|
||||
v.validate("[a][b]x[/]")
|
||||
assert "unclosed" in str(exc.value)
|
||||
|
||||
|
||||
def test_nameless_closing_then_close_parent():
|
||||
v = MarkupValidator()
|
||||
# nameless close pops 'b', then closing [/a] closes 'a'
|
||||
assert v.validate("[a][b]x[/][/a]")
|
||||
|
||||
|
||||
def test_nameless_closing_with_empty_stack_is_invalid():
|
||||
v = MarkupValidator()
|
||||
with pytest.raises(MarkupError):
|
||||
v.validate("text[/]")
|
||||
|
||||
|
||||
def test_escaped_brackets():
|
||||
v = MarkupValidator()
|
||||
# the '[' is escaped, so it should not be treated as a tag start
|
||||
assert v.validate(r"This is \[bold] text")
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue