From 7e715d225b09abc943abb1a0bdae45c4e933724a Mon Sep 17 00:00:00 2001
From: Daeseok Bae <tkdxm0208@gmail.com>
Date: Thu, 20 Nov 2025 21:11:17 +0900
Subject: [PATCH] markup: add MarkupValidator (escape handling, nameless close)
 and tests; raise MarkupError on failures

---
 rich/markup_validator.py       | 113 +++++++++++++++++++++++++++++++++
 tests/test_markup_validator.py |  78 +++++++++++++++++++++++
 2 files changed, 191 insertions(+)
 create mode 100644 rich/markup_validator.py
 create mode 100644 tests/test_markup_validator.py

diff --git a/rich/markup_validator.py b/rich/markup_validator.py
new file mode 100644
index 00000000..6a2336dd
--- /dev/null
+++ b/rich/markup_validator.py
@@ -0,0 +1,113 @@
+"""Simple markup validator using a stack to check tag pairing.
+
+This validator only checks tag structure like [tag]...[/tag].
+It does not parse or validate tag attributes/styles — it extracts the
+tag name as the first token inside the brackets (so `[link=http://...]`
+has tag name `link`).
+
+Usage:
+    validator = MarkupValidator()
+    valid = validator.validate("[b]bold[/b]")
+"""
+from __future__ import annotations
+
+import re
+from typing import List
+
+from rich.errors import MarkupError
+
+
+_TAG_NAME_RE = re.compile(r"^/?\s*([A-Za-z0-9_:-]+)")
+
+
+class MarkupValidator:
+    """Validate simple bracket-style markup like `[tag]` and `[/tag]`.
+
+    Method `validate(text)` returns `True` when all tags are properly
+    opened and closed with correct nesting. On failure it raises
+    `rich.errors.MarkupError` with an explanatory message.
+    """
+
+    def validate(self, text: str) -> bool:
+        """Return True if the markup tags in `text` are well-formed.
+
+        Rules:
+        - Opening tag: `[tag]` or `[tag attr=...]` pushes `tag` onto a stack.
+        - Closing tag: `[/tag]` pops and must match last opened tag.
+        - Nameless closing `[/]` pops the top of the stack.
+        - Tag name is taken as the first token of the bracket content.
+        - On any unmatched, missing, or malformed brackets/tags a
+          `MarkupError` is raised describing the problem.
+        """
+        stack: List[str] = []
+        i = 0
+        n = len(text)
+
+        def _is_escaped(s: str, idx: int) -> bool:
+            """Return True if character at idx is escaped by an odd number of backslashes."""
+            # count consecutive backslashes immediately before idx
+            bs = 0
+            j = idx - 1
+            while j >= 0 and s[j] == "\\":
+                bs += 1
+                j -= 1
+            return (bs % 2) == 1
+
+        while i < n:
+            ch = text[i]
+            if ch == "[":
+                # if this '[' is escaped (preceded by an odd number of backslashes),
+                # treat it as literal text and ignore as a tag start
+                if _is_escaped(text, i):
+                    i += 1
+                    continue
+                
+                # otherwise it starts a tag
+                # find closing bracket
+                j = text.find("]", i + 1)
+                if j == -1:
+                    raise MarkupError("unclosed '[': missing ']' for an opening bracket")
+
+                content = text[i + 1 : j].strip()
+                if not content:
+                    # empty brackets `[]` are invalid
+                    raise MarkupError("empty tag '[]' is invalid")
+
+                # determine name (handle closing tags) using regex
+                is_closing = content.startswith("/")
+
+                if is_closing:
+                    # content after the slash (may be empty for nameless close)
+                    name_part = content[1:].lstrip()
+                    if not name_part:
+                        # nameless closing tag '[/]' pops the top of the stack
+                        if not stack:
+                            raise MarkupError("nameless closing tag '[/]' with no open tags to close")
+                        stack.pop()
+                    else:
+                        m = _TAG_NAME_RE.match(name_part)
+                        if not m:
+                            raise MarkupError(f"invalid closing tag '[/{name_part}]'")
+                        name = m.group(1)
+                        if not stack:
+                            raise MarkupError(f"closing tag '[/{name}]' with no matching opening tag")
+                        last = stack.pop()
+                        if last != name:
+                            raise MarkupError(f"mismatched closing tag '[/{name}]', expected '[/{last}]'")
+                else:
+                    # opening tag: use only the first token as the tag name
+                    m = _TAG_NAME_RE.match(content)
+                    if not m:
+                        raise MarkupError(f"invalid opening tag '[{content}]'")
+                    name = m.group(1)
+                    stack.append(name)
+
+                i = j + 1
+            else:
+                i += 1
+
+        if stack:
+            # unclosed tags remain on the stack
+            last = stack[-1]
+            raise MarkupError(f"unclosed tag '[{last}]'")
+        return True
diff --git a/tests/test_markup_validator.py b/tests/test_markup_validator.py
new file mode 100644
index 00000000..aff4d7d6
--- /dev/null
+++ b/tests/test_markup_validator.py
@@ -0,0 +1,78 @@
+import pytest
+
+from rich.markup_validator import MarkupValidator
+from rich.errors import MarkupError
+
+
+def test_simple_valid():
+    v = MarkupValidator()
+    assert v.validate("[b]bold[/b]")
+
+
+def test_nested_valid():
+    v = MarkupValidator()
+    assert v.validate("[b][i]inner[/i][/b]")
+
+
+def test_mismatch_tags():
+    v = MarkupValidator()
+    with pytest.raises(MarkupError) as exc:
+        v.validate("[b]bad[/i]")
+    assert "mismatched" in str(exc.value) or "expected" in str(exc.value)
+
+
+def test_unclosed_tag():
+    v = MarkupValidator()
+    with pytest.raises(MarkupError) as exc:
+        v.validate("[b]open")
+    assert "unclosed" in str(exc.value)
+
+
+def test_extra_closing():
+    v = MarkupValidator()
+    with pytest.raises(MarkupError):
+        v.validate("text[/b]")
+
+
+def test_with_attributes():
+    v = MarkupValidator()
+    assert v.validate("[link=https://example.com]click[/link]")
+
+
+def test_empty_brackets_are_invalid():
+    v = MarkupValidator()
+    with pytest.raises(MarkupError):
+        v.validate("[]")
+
+
+def test_space_in_opening_tag_uses_first_token():
+    v = MarkupValidator()
+    # '[bold red]' should push 'bold' and be closed by '[/bold]'
+    assert v.validate("[bold red]text[/bold]")
+
+
+def test_nameless_closing_pops_top():
+    v = MarkupValidator()
+    # nameless closing pops the most recent tag ('b'), leaving 'a' unclosed
+    with pytest.raises(MarkupError) as exc:
+        v.validate("[a][b]x[/]")
+    assert "unclosed" in str(exc.value)
+
+
+def test_nameless_closing_then_close_parent():
+    v = MarkupValidator()
+    # nameless close pops 'b', then closing [/a] closes 'a'
+    assert v.validate("[a][b]x[/][/a]")
+
+
+def test_nameless_closing_with_empty_stack_is_invalid():
+    v = MarkupValidator()
+    with pytest.raises(MarkupError):
+        v.validate("text[/]")
+
+
+def test_escaped_brackets():
+    v = MarkupValidator()
+    # the '[' is escaped, so it should not be treated as a tag start
+    assert v.validate(r"This is \[bold] text")
+