From 7e715d225b09abc943abb1a0bdae45c4e933724a Mon Sep 17 00:00:00 2001 From: Daeseok Bae Date: Thu, 20 Nov 2025 21:11:17 +0900 Subject: [PATCH] markup: add MarkupValidator (escape handling, nameless close) and tests; raise MarkupError on failures --- rich/markup_validator.py | 113 +++++++++++++++++++++++++++++++++ tests/test_markup_validator.py | 78 +++++++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 rich/markup_validator.py create mode 100644 tests/test_markup_validator.py diff --git a/rich/markup_validator.py b/rich/markup_validator.py new file mode 100644 index 00000000..6a2336dd --- /dev/null +++ b/rich/markup_validator.py @@ -0,0 +1,113 @@ +"""Simple markup validator using a stack to check tag pairing. + +This validator only checks tag structure like [tag]...[/tag]. +It does not parse or validate tag attributes/styles — it extracts the +tag name as the first token inside the brackets (so `[link=http://...]` +has tag name `link`). + +Usage: + validator = MarkupValidator() + valid = validator.validate("[b]bold[/b]") +""" +from __future__ import annotations + +import re +from typing import List + +from rich.errors import MarkupError + + +_TAG_NAME_RE = re.compile(r"^/?\s*([A-Za-z0-9_:-]+)") + + +class MarkupValidator: + """Validate simple bracket-style markup like `[tag]` and `[/tag]`. + + Method `validate(text)` returns `True` when all tags are properly + opened and closed with correct nesting. On failure it raises + `rich.errors.MarkupError` with an explanatory message. + """ + + def validate(self, text: str) -> bool: + """Return True if the markup tags in `text` are well-formed. + + Rules: + - Opening tag: `[tag]` or `[tag attr=...]` pushes `tag` onto a stack. + - Closing tag: `[/tag]` pops and must match last opened tag. + - Nameless closing `[/]` pops the top of the stack. + - Tag name is taken as the first token of the bracket content. + - On any unmatched, missing, or malformed brackets/tags a + `MarkupError` is raised describing the problem. + """ + stack: List[str] = [] + i = 0 + n = len(text) + + def _is_escaped(s: str, idx: int) -> bool: + """Return True if character at idx is escaped by an odd number of backslashes.""" + # count consecutive backslashes immediately before idx + bs = 0 + j = idx - 1 + while j >= 0 and s[j] == "\\": + bs += 1 + j -= 1 + return (bs % 2) == 1 + + while i < n: + ch = text[i] + if ch == "[": + # if this '[' is escaped (preceded by an odd number of backslashes), + # treat it as literal text and ignore as a tag start + if _is_escaped(text, i): + i += 1 + continue + + # otherwise it starts a tag + # find closing bracket + j = text.find("]", i + 1) + if j == -1: + raise MarkupError("unclosed '[': missing ']' for an opening bracket") + + content = text[i + 1 : j].strip() + if not content: + # empty brackets `[]` are invalid + raise MarkupError("empty tag '[]' is invalid") + + # determine name (handle closing tags) using regex + is_closing = content.startswith("/") + + if is_closing: + # content after the slash (may be empty for nameless close) + name_part = content[1:].lstrip() + if not name_part: + # nameless closing tag '[/]' pops the top of the stack + if not stack: + raise MarkupError("nameless closing tag '[/]' with no open tags to close") + stack.pop() + else: + m = _TAG_NAME_RE.match(name_part) + if not m: + raise MarkupError(f"invalid closing tag '[/{name_part}]'") + name = m.group(1) + if not stack: + raise MarkupError(f"closing tag '[/{name}]' with no matching opening tag") + last = stack.pop() + if last != name: + raise MarkupError(f"mismatched closing tag '[/{name}]', expected '[/{last}]'") + else: + # opening tag: use only the first token as the tag name + m = _TAG_NAME_RE.match(content) + if not m: + raise MarkupError(f"invalid opening tag '[{content}]'") + name = m.group(1) + stack.append(name) + + i = j + 1 + else: + i += 1 + + if stack: + # unclosed tags remain on the stack + last = stack[-1] + raise MarkupError(f"unclosed tag '[{last}]'") + return True diff --git a/tests/test_markup_validator.py b/tests/test_markup_validator.py new file mode 100644 index 00000000..aff4d7d6 --- /dev/null +++ b/tests/test_markup_validator.py @@ -0,0 +1,78 @@ +import pytest + +from rich.markup_validator import MarkupValidator +from rich.errors import MarkupError + + +def test_simple_valid(): + v = MarkupValidator() + assert v.validate("[b]bold[/b]") + + +def test_nested_valid(): + v = MarkupValidator() + assert v.validate("[b][i]inner[/i][/b]") + + +def test_mismatch_tags(): + v = MarkupValidator() + with pytest.raises(MarkupError) as exc: + v.validate("[b]bad[/i]") + assert "mismatched" in str(exc.value) or "expected" in str(exc.value) + + +def test_unclosed_tag(): + v = MarkupValidator() + with pytest.raises(MarkupError) as exc: + v.validate("[b]open") + assert "unclosed" in str(exc.value) + + +def test_extra_closing(): + v = MarkupValidator() + with pytest.raises(MarkupError): + v.validate("text[/b]") + + +def test_with_attributes(): + v = MarkupValidator() + assert v.validate("[link=https://example.com]click[/link]") + + +def test_empty_brackets_are_invalid(): + v = MarkupValidator() + with pytest.raises(MarkupError): + v.validate("[]") + + +def test_space_in_opening_tag_uses_first_token(): + v = MarkupValidator() + # '[bold red]' should push 'bold' and be closed by '[/bold]' + assert v.validate("[bold red]text[/bold]") + + +def test_nameless_closing_pops_top(): + v = MarkupValidator() + # nameless closing pops the most recent tag ('b'), leaving 'a' unclosed + with pytest.raises(MarkupError) as exc: + v.validate("[a][b]x[/]") + assert "unclosed" in str(exc.value) + + +def test_nameless_closing_then_close_parent(): + v = MarkupValidator() + # nameless close pops 'b', then closing [/a] closes 'a' + assert v.validate("[a][b]x[/][/a]") + + +def test_nameless_closing_with_empty_stack_is_invalid(): + v = MarkupValidator() + with pytest.raises(MarkupError): + v.validate("text[/]") + + +def test_escaped_brackets(): + v = MarkupValidator() + # the '[' is escaped, so it should not be treated as a tag start + assert v.validate(r"This is \[bold] text") +