feat: Add math rendering support to markdown

- Add MathRenderer class for converting Latex math to Unicode - Integrate math rendering into markdown processing - Supports inline ($..$) and block (94329...94329) math expressions - Add tests for math rendering functionality Addreses #3159
2025-08-04 10:08:40 +00:00 · 2025-04-23 02:45:43 -04:00 · 2025-04-23 02:45:43 -04:00 · bf87b5ca50
commit bf87b5ca50
parent 0c6c75644f
3 changed files with 420 additions and 1 deletions
--- a/rich/markdown.py
+++ b/rich/markdown.py
@ -1,6 +1,7 @@
 from __future__ import annotations

 import sys
+import re
 from typing import ClassVar, Iterable

 from markdown_it import MarkdownIt
@ -25,6 +26,7 @@ from .segment import Segment
 from .style import Style, StyleStack
 from .syntax import Syntax
 from .text import Text, TextType
+from .math_render import MathRenderer 


 class MarkdownElement:
@ -513,6 +515,8 @@ class Markdown(JupyterMixin):
            enabled. Defaults to None.
        inline_code_theme: (Optional[str], optional): Pygments theme for inline code
            highlighting, or None for no highlighting. Defaults to None.
+        math_enabled (bool, optional): Enable math rendering. Defaults to True.
+        math_style (Style, optional): Style for math expressions. Defaults to None.
    """

    elements: ClassVar[dict[str, type[MarkdownElement]]] = {
@ -545,6 +549,8 @@ class Markdown(JupyterMixin):
        hyperlinks: bool = True,
        inline_code_lexer: str | None = None,
        inline_code_theme: str | None = None,
+        math_enabled: bool = True,  
+        math_style: Style | None = None,  
    ) -> None:
        parser = MarkdownIt().enable("strikethrough").enable("table")
        self.markup = markup
@ -556,6 +562,11 @@ class Markdown(JupyterMixin):
        self.inline_code_lexer = inline_code_lexer
        self.inline_code_theme = inline_code_theme or code_theme

+        # Math rendering support
+        self.math_enabled = math_enabled
+        self.math_style = math_style or Style(italic=True)
+        self.math_renderer = MathRenderer() if math_enabled else None
+
    def _flatten_tokens(self, tokens: Iterable[Token]) -> Iterable[Token]:
        """Flattens the token stream."""
        for token in tokens:
@ -565,11 +576,74 @@ class Markdown(JupyterMixin):
                yield from self._flatten_tokens(token.children)
            else:
                yield token
+    
+    def _process_math_expressions(self, markup: str) -> str:
+        """Process LaTeX math expressions in the text.
+        
+        Args:
+            markup: Text that may contain math expressions
+            
+        Returns:
+            Text with math expressions processed for rendering
+        """
+        if not self.math_enabled or self.math_renderer is None:
+            return markup
+        
+        # Track positions where math expressions are found to replace later
+        replacements = []
+        
+        # Process block math ($$...$$)
+        block_pattern = re.compile(r'\$\$(.*?)\$\$', re.DOTALL)
+        
+        for match in block_pattern.finditer(markup):
+            expression = match.group(1).strip()
+            rendered_text = self.math_renderer.render_to_text(expression, self.math_style)
+            replacements.append((match.start(), match.end(), rendered_text, True))
+            
+        # Process inline math ($...$) - avoid $ used for money
+        inline_pattern = re.compile(r'\$([^\s$][^$]*?[^\s$])\$')
+        
+        for match in inline_pattern.finditer(markup):
+            expression = match.group(1)
+            rendered_text = self.math_renderer.render_to_text(expression, self.math_style)
+            replacements.append((match.start(), match.end(), rendered_text, False))
+        
+        # Apply replacements in reverse order to maintain positions
+        if not replacements:
+            return markup
+            
+        result = []
+        last_end = 0
+        
+        # Sort replacements by position
+        replacements.sort(key=lambda x: x[0])
+        
+        for start, end, rendered_text, is_block in replacements:
+            result.append(markup[last_end:start])
+            
+            if is_block:
+                result.append("\n\n")
+                result.append(str(rendered_text))
+                result.append("\n\n")
+            else:
+                result.append(str(rendered_text))
+                
+            last_end = end
+        
+        result.append(markup[last_end:])
+        return "".join(result)

    def __rich_console__(
        self, console: Console, options: ConsoleOptions
    ) -> RenderResult:
        """Render markdown to the console."""
+        # Process math expressions if enabled
+        if self.math_enabled and self.math_renderer is not None:
+            self.markup = self._process_math_expressions(self.markup)
+            # Re-parse the markup after math processing
+            parser = MarkdownIt().enable("strikethrough").enable("table")
+            self.parsed = parser.parse(self.markup)
+        
        style = console.get_style(self.style, default="none")
        options = options.update(height=None)
        context = MarkdownContext(
@ -781,4 +855,4 @@ if __name__ == "__main__":  # pragma: no cover
        console = Console(
            force_terminal=args.force_color, width=args.width, record=True
        )
-        console.print(markdown)
+        console.print(markdown)
--- a/rich/math_render.py
+++ b/rich/math_render.py
@ -0,0 +1,269 @@
+from typing import Dict, List, Optional, Union, Tuple, Pattern
+import re
+from rich.text import Text
+from rich.style import Style
+
+
+class MathRenderer:
+    """Renders LaTeX-style math expressions as Unicode text."""
+
+    def __init__(self) -> None:
+        """Initialize the math renderer with symbol mappings."""
+        # Symbol mappings for LaTeX commands to Unicode
+        self.symbols: Dict[str, str] = {
+            # Greek letters
+            "\\alpha": "α",
+            "\\beta": "β",
+            "\\gamma": "γ",
+            "\\delta": "δ",
+            "\\epsilon": "ε",
+            "\\zeta": "ζ",
+            "\\eta": "η",
+            "\\theta": "θ",
+            "\\iota": "ι",
+            "\\kappa": "κ",
+            "\\lambda": "λ",
+            "\\mu": "μ",
+            "\\nu": "ν",
+            "\\xi": "ξ",
+            "\\pi": "π",
+            "\\rho": "ρ",
+            "\\sigma": "σ",
+            "\\tau": "τ",
+            "\\upsilon": "υ",
+            "\\phi": "φ",
+            "\\chi": "χ",
+            "\\psi": "ψ",
+            "\\omega": "ω",
+            # Uppercase Greek letters
+            "\\Alpha": "Α",
+            "\\Beta": "Β",
+            "\\Gamma": "Γ",
+            "\\Delta": "Δ",
+            "\\Epsilon": "Ε",
+            "\\Zeta": "Ζ",
+            "\\Eta": "Η",
+            "\\Theta": "Θ",
+            "\\Iota": "Ι",
+            "\\Kappa": "Κ",
+            "\\Lambda": "Λ",
+            "\\Mu": "Μ",
+            "\\Nu": "Ν",
+            "\\Xi": "Ξ",
+            "\\Pi": "Π",
+            "\\Rho": "Ρ",
+            "\\Sigma": "Σ",
+            "\\Tau": "Τ",
+            "\\Upsilon": "Υ",
+            "\\Phi": "Φ",
+            "\\Chi": "Χ",
+            "\\Psi": "Ψ",
+            "\\Omega": "Ω",
+            # Operators and symbols
+            "\\times": "×",
+            "\\div": "÷",
+            "\\pm": "±",
+            "\\mp": "∓",
+            "\\cdot": "·",
+            "\\ast": "∗",
+            "\\leq": "≤",
+            "\\geq": "≥",
+            "\\neq": "≠",
+            "\\approx": "≈",
+            "\\equiv": "≡",
+            "\\sum": "∑",
+            "\\prod": "∏",
+            "\\int": "∫",
+            "\\partial": "∂",
+            "\\infty": "∞",
+            "\\nabla": "∇",
+            "\\forall": "∀",
+            "\\exists": "∃",
+            "\\nexists": "∄",
+            "\\in": "∈",
+            "\\notin": "∉",
+            "\\subset": "⊂",
+            "\\supset": "⊃",
+            "\\cup": "∪",
+            "\\cap": "∩",
+            "\\emptyset": "∅",
+            "\\rightarrow": "→",
+            "\\leftarrow": "←",
+            "\\Rightarrow": "⇒",
+            "\\Leftarrow": "⇐",
+            # Additional symbols
+            "\\sqrt": "√",
+            "\\propto": "∝",
+            "\\angle": "∠",
+            "\\triangle": "△",
+            "\\square": "□",
+        }
+
+        # Regex patterns for math commands
+        self.command_pattern: Pattern = re.compile(r"\\([a-zA-Z]+|.)")
+
+        # Patterns for superscripts and subscripts
+        self.superscript_map = {
+            "0": "⁰",
+            "1": "¹",
+            "2": "²",
+            "3": "³",
+            "4": "⁴",
+            "5": "⁵",
+            "6": "⁶",
+            "7": "⁷",
+            "8": "⁸",
+            "9": "⁹",
+            "+": "⁺",
+            "-": "⁻",
+            "=": "⁼",
+            "(": "⁽",
+            ")": "⁾",
+            "a": "ᵃ",
+            "b": "ᵇ",
+            "c": "ᶜ",
+            "d": "ᵈ",
+            "e": "ᵉ",
+            "f": "ᶠ",
+            "g": "ᵍ",
+            "h": "ʰ",
+            "i": "ⁱ",
+            "j": "ʲ",
+            "k": "ᵏ",
+            "l": "ˡ",
+            "m": "ᵐ",
+            "n": "ⁿ",
+            "o": "ᵒ",
+            "p": "ᵖ",
+            "r": "ʳ",
+            "s": "ˢ",
+            "t": "ᵗ",
+            "u": "ᵘ",
+            "v": "ᵛ",
+            "w": "ʷ",
+            "x": "ˣ",
+            "y": "ʸ",
+            "z": "ᶻ",
+        }
+
+        self.subscript_map = {
+            "0": "₀",
+            "1": "₁",
+            "2": "₂",
+            "3": "₃",
+            "4": "₄",
+            "5": "₅",
+            "6": "₆",
+            "7": "₇",
+            "8": "₈",
+            "9": "₉",
+            "+": "₊",
+            "-": "₋",
+            "=": "₌",
+            "(": "₍",
+            ")": "₎",
+            "a": "ₐ",
+            "e": "ₑ",
+            "h": "ₕ",
+            "i": "ᵢ",
+            "j": "ⱼ",
+            "k": "ₖ",
+            "l": "ₗ",
+            "m": "ₘ",
+            "n": "ₙ",
+            "o": "ₒ",
+            "p": "ₚ",
+            "r": "ᵣ",
+            "s": "ₛ",
+            "t": "ₜ",
+            "u": "ᵤ",
+            "v": "ᵥ",
+            "x": "ₓ",
+        }
+
+    def _convert_superscript(self, text: str) -> str:
+        """Convert text to superscript characters."""
+        result = ""
+        for char in text:
+            result += self.superscript_map.get(char, char)
+        return result
+
+    def _convert_subscript(self, text: str) -> str:
+        """Convert text to subscript characters."""
+        result = ""
+        for char in text:
+            result += self.subscript_map.get(char, char)
+        return result
+
+    def render_to_text(self, expression: str, style: Optional[Style] = None) -> Text:
+        """Render a LaTeX math expression as a Rich Text object.
+
+        Args:
+            expression: LaTeX math expression
+            style: Optional style to apply to the rendered expression
+
+        Returns:
+            Rich Text object containing the rendered expression
+        """
+        rendered_str = self.render_expression(expression)
+        return Text(rendered_str, style=style)
+
+    def render_expression(self, expression: str) -> str:
+        """Convert a LaTeX math expression to Unicode text.
+
+        Args:
+            expression: LaTeX math expression
+
+        Returns:
+            Unicode representation of the math expression
+        """
+        # Process the expression and convert to Unicode
+        result = expression.strip()
+
+        # Replace LaTeX commands with Unicode symbols
+        def replace_command(match):
+            command = match.group(1)
+            replacement = self.symbols.get("\\" + command, f"\\{command}")
+            return replacement
+
+        result = self.command_pattern.sub(replace_command, result)
+        
+        # For operators that should not have spaces
+        unspaced_symbols = ["×", "÷", "±", "∓", "·", "∗", "^", "√"]
+        
+        # First, normalize unspaced operators
+        for symbol in unspaced_symbols:
+            # Remove spaces before and after these operators
+            result = result.replace(f" {symbol} ", symbol)
+            result = result.replace(f"{symbol} ", symbol)
+            result = result.replace(f" {symbol}", symbol)
+        
+        # Process superscripts (^)
+        superscript_pattern = re.compile(r"\^{([^}]+)}|\^([a-zA-Z0-9])")
+
+        def replace_superscript(match):
+            text = match.group(1) if match.group(1) else match.group(2)
+            return self._convert_superscript(text)
+
+        result = superscript_pattern.sub(replace_superscript, result)
+
+        # Process subscripts (_)
+        subscript_pattern = re.compile(r"_{([^}]+)}_([a-zA-Z0-9])")
+
+        def replace_subscript(match):
+            text = match.group(1) if match.group(1) else match.group(2)
+            return self._convert_subscript(text)
+
+        result = subscript_pattern.sub(replace_subscript, result)
+
+        # Handle fractions (very basic)
+        fraction_pattern = re.compile(r"\\frac{([^}]+)}{([^}]+)}")
+
+        def replace_fraction(match):
+            numerator = match.group(1)
+            denominator = match.group(2)
+            return f"{numerator}/{denominator}"
+
+        result = fraction_pattern.sub(replace_fraction, result)
+
+        return result
--- a/tests/test_mathrenderer.py
+++ b/tests/test_mathrenderer.py
@ -0,0 +1,76 @@
+import pytest
+from rich.math_render import MathRenderer
+from rich.markdown import Markdown
+from rich.console import Console
+from rich.text import Text
+from rich.style import Style
+from io import StringIO
+
+def test_math_renderer_symbols():
+    """Test that the math renderer correctly converts LaTeX symbols to Unicode."""
+    renderer = MathRenderer()
+    
+    # Test Greek letters
+    assert renderer.render_expression("\\alpha") == "α"
+    assert renderer.render_expression("\\beta") == "β"
+    assert renderer.render_expression("\\gamma") == "γ"
+    
+    # Test operators
+    assert renderer.render_expression("\\times") == "×"
+    assert renderer.render_expression("\\leq") == "≤"
+    assert renderer.render_expression("\\geq") == "≥"
+    
+    # Test compound expressions
+    assert renderer.render_expression("a\\times b") == "a×b"
+
+def test_math_renderer_superscripts():
+    """Test that the math renderer correctly handles superscripts."""
+    renderer = MathRenderer()
+    
+    # Test simple superscripts
+    assert "²" in renderer.render_expression("x^2")
+    assert "²" in renderer.render_expression("x^{2}")
+    
+    # Test compound superscripts
+    result = renderer.render_expression("x^{23}")
+    assert "²³" in result
+
+def test_math_inline_markdown():
+    """Test that inline math expressions work in markdown."""
+    console = Console(file=StringIO(), width=100)
+    
+    markdown = Markdown("This is an inline expression $E = mc^2$ in text.")
+    console.print(markdown)
+    output = console.file.getvalue()
+    
+    # Check that the expression is rendered with spacing preserved
+    assert "E = mc²" in output  
+    assert "expression" in output
+    assert "in text" in output
+
+def test_space_normalization():
+    """Test that space normalization handles different operators correctly."""
+    renderer = MathRenderer()
+    
+    # Test that equals preserves spaces
+    assert renderer.render_expression("E = mc^2") == "E = mc²"
+    assert renderer.render_expression("y = x + 3") == "y = x + 3"
+    
+    # Test that multiplication removes spaces
+    assert renderer.render_expression("a × b") == "a×b"
+    assert renderer.render_expression("a\\times b") == "a×b"
+    
+    # Test mixed operators
+    assert renderer.render_expression("F = m × a") == "F = m×a"
+
+def test_complex_expressions():
+    """Test more complex mathematical expressions."""
+    renderer = MathRenderer()
+    
+    # Test complex equation with multiple operators
+    result = renderer.render_expression("f(x) = x^2 + 2\\times x + 1")
+    assert result == "f(x) = x² + 2×x + 1"
+    
+    # Test Greek letters with spaces
+    result = renderer.render_expression("\\alpha = \\beta + \\gamma")
+    assert result == "α = β + γ"