feat: Add math rendering support to markdown

- Add MathRenderer class for converting Latex math to Unicode
- Integrate math rendering into markdown processing
- Supports inline ($..$) and block (94329...94329) math expressions
- Add tests for math rendering functionality

Addreses #3159
This commit is contained in:
IAdityaKaushal 2025-04-23 02:45:43 -04:00
parent 0c6c75644f
commit bf87b5ca50
3 changed files with 420 additions and 1 deletions

View file

@ -1,6 +1,7 @@
from __future__ import annotations
import sys
import re
from typing import ClassVar, Iterable
from markdown_it import MarkdownIt
@ -25,6 +26,7 @@ from .segment import Segment
from .style import Style, StyleStack
from .syntax import Syntax
from .text import Text, TextType
from .math_render import MathRenderer
class MarkdownElement:
@ -513,6 +515,8 @@ class Markdown(JupyterMixin):
enabled. Defaults to None.
inline_code_theme: (Optional[str], optional): Pygments theme for inline code
highlighting, or None for no highlighting. Defaults to None.
math_enabled (bool, optional): Enable math rendering. Defaults to True.
math_style (Style, optional): Style for math expressions. Defaults to None.
"""
elements: ClassVar[dict[str, type[MarkdownElement]]] = {
@ -545,6 +549,8 @@ class Markdown(JupyterMixin):
hyperlinks: bool = True,
inline_code_lexer: str | None = None,
inline_code_theme: str | None = None,
math_enabled: bool = True,
math_style: Style | None = None,
) -> None:
parser = MarkdownIt().enable("strikethrough").enable("table")
self.markup = markup
@ -556,6 +562,11 @@ class Markdown(JupyterMixin):
self.inline_code_lexer = inline_code_lexer
self.inline_code_theme = inline_code_theme or code_theme
# Math rendering support
self.math_enabled = math_enabled
self.math_style = math_style or Style(italic=True)
self.math_renderer = MathRenderer() if math_enabled else None
def _flatten_tokens(self, tokens: Iterable[Token]) -> Iterable[Token]:
"""Flattens the token stream."""
for token in tokens:
@ -565,11 +576,74 @@ class Markdown(JupyterMixin):
yield from self._flatten_tokens(token.children)
else:
yield token
def _process_math_expressions(self, markup: str) -> str:
"""Process LaTeX math expressions in the text.
Args:
markup: Text that may contain math expressions
Returns:
Text with math expressions processed for rendering
"""
if not self.math_enabled or self.math_renderer is None:
return markup
# Track positions where math expressions are found to replace later
replacements = []
# Process block math ($$...$$)
block_pattern = re.compile(r'\$\$(.*?)\$\$', re.DOTALL)
for match in block_pattern.finditer(markup):
expression = match.group(1).strip()
rendered_text = self.math_renderer.render_to_text(expression, self.math_style)
replacements.append((match.start(), match.end(), rendered_text, True))
# Process inline math ($...$) - avoid $ used for money
inline_pattern = re.compile(r'\$([^\s$][^$]*?[^\s$])\$')
for match in inline_pattern.finditer(markup):
expression = match.group(1)
rendered_text = self.math_renderer.render_to_text(expression, self.math_style)
replacements.append((match.start(), match.end(), rendered_text, False))
# Apply replacements in reverse order to maintain positions
if not replacements:
return markup
result = []
last_end = 0
# Sort replacements by position
replacements.sort(key=lambda x: x[0])
for start, end, rendered_text, is_block in replacements:
result.append(markup[last_end:start])
if is_block:
result.append("\n\n")
result.append(str(rendered_text))
result.append("\n\n")
else:
result.append(str(rendered_text))
last_end = end
result.append(markup[last_end:])
return "".join(result)
def __rich_console__(
self, console: Console, options: ConsoleOptions
) -> RenderResult:
"""Render markdown to the console."""
# Process math expressions if enabled
if self.math_enabled and self.math_renderer is not None:
self.markup = self._process_math_expressions(self.markup)
# Re-parse the markup after math processing
parser = MarkdownIt().enable("strikethrough").enable("table")
self.parsed = parser.parse(self.markup)
style = console.get_style(self.style, default="none")
options = options.update(height=None)
context = MarkdownContext(
@ -781,4 +855,4 @@ if __name__ == "__main__": # pragma: no cover
console = Console(
force_terminal=args.force_color, width=args.width, record=True
)
console.print(markdown)
console.print(markdown)

269
rich/math_render.py Normal file
View file

@ -0,0 +1,269 @@
from typing import Dict, List, Optional, Union, Tuple, Pattern
import re
from rich.text import Text
from rich.style import Style
class MathRenderer:
"""Renders LaTeX-style math expressions as Unicode text."""
def __init__(self) -> None:
"""Initialize the math renderer with symbol mappings."""
# Symbol mappings for LaTeX commands to Unicode
self.symbols: Dict[str, str] = {
# Greek letters
"\\alpha": "α",
"\\beta": "β",
"\\gamma": "γ",
"\\delta": "δ",
"\\epsilon": "ε",
"\\zeta": "ζ",
"\\eta": "η",
"\\theta": "θ",
"\\iota": "ι",
"\\kappa": "κ",
"\\lambda": "λ",
"\\mu": "μ",
"\\nu": "ν",
"\\xi": "ξ",
"\\pi": "π",
"\\rho": "ρ",
"\\sigma": "σ",
"\\tau": "τ",
"\\upsilon": "υ",
"\\phi": "φ",
"\\chi": "χ",
"\\psi": "ψ",
"\\omega": "ω",
# Uppercase Greek letters
"\\Alpha": "Α",
"\\Beta": "Β",
"\\Gamma": "Γ",
"\\Delta": "Δ",
"\\Epsilon": "Ε",
"\\Zeta": "Ζ",
"\\Eta": "Η",
"\\Theta": "Θ",
"\\Iota": "Ι",
"\\Kappa": "Κ",
"\\Lambda": "Λ",
"\\Mu": "Μ",
"\\Nu": "Ν",
"\\Xi": "Ξ",
"\\Pi": "Π",
"\\Rho": "Ρ",
"\\Sigma": "Σ",
"\\Tau": "Τ",
"\\Upsilon": "Υ",
"\\Phi": "Φ",
"\\Chi": "Χ",
"\\Psi": "Ψ",
"\\Omega": "Ω",
# Operators and symbols
"\\times": "×",
"\\div": "÷",
"\\pm": "±",
"\\mp": "",
"\\cdot": "·",
"\\ast": "",
"\\leq": "",
"\\geq": "",
"\\neq": "",
"\\approx": "",
"\\equiv": "",
"\\sum": "",
"\\prod": "",
"\\int": "",
"\\partial": "",
"\\infty": "",
"\\nabla": "",
"\\forall": "",
"\\exists": "",
"\\nexists": "",
"\\in": "",
"\\notin": "",
"\\subset": "",
"\\supset": "",
"\\cup": "",
"\\cap": "",
"\\emptyset": "",
"\\rightarrow": "",
"\\leftarrow": "",
"\\Rightarrow": "",
"\\Leftarrow": "",
# Additional symbols
"\\sqrt": "",
"\\propto": "",
"\\angle": "",
"\\triangle": "",
"\\square": "",
}
# Regex patterns for math commands
self.command_pattern: Pattern = re.compile(r"\\([a-zA-Z]+|.)")
# Patterns for superscripts and subscripts
self.superscript_map = {
"0": "",
"1": "¹",
"2": "²",
"3": "³",
"4": "",
"5": "",
"6": "",
"7": "",
"8": "",
"9": "",
"+": "",
"-": "",
"=": "",
"(": "",
")": "",
"a": "",
"b": "",
"c": "",
"d": "",
"e": "",
"f": "",
"g": "",
"h": "ʰ",
"i": "",
"j": "ʲ",
"k": "",
"l": "ˡ",
"m": "",
"n": "",
"o": "",
"p": "",
"r": "ʳ",
"s": "ˢ",
"t": "",
"u": "",
"v": "",
"w": "ʷ",
"x": "ˣ",
"y": "ʸ",
"z": "",
}
self.subscript_map = {
"0": "",
"1": "",
"2": "",
"3": "",
"4": "",
"5": "",
"6": "",
"7": "",
"8": "",
"9": "",
"+": "",
"-": "",
"=": "",
"(": "",
")": "",
"a": "",
"e": "",
"h": "",
"i": "",
"j": "",
"k": "",
"l": "",
"m": "",
"n": "",
"o": "",
"p": "",
"r": "",
"s": "",
"t": "",
"u": "",
"v": "",
"x": "",
}
def _convert_superscript(self, text: str) -> str:
"""Convert text to superscript characters."""
result = ""
for char in text:
result += self.superscript_map.get(char, char)
return result
def _convert_subscript(self, text: str) -> str:
"""Convert text to subscript characters."""
result = ""
for char in text:
result += self.subscript_map.get(char, char)
return result
def render_to_text(self, expression: str, style: Optional[Style] = None) -> Text:
"""Render a LaTeX math expression as a Rich Text object.
Args:
expression: LaTeX math expression
style: Optional style to apply to the rendered expression
Returns:
Rich Text object containing the rendered expression
"""
rendered_str = self.render_expression(expression)
return Text(rendered_str, style=style)
def render_expression(self, expression: str) -> str:
"""Convert a LaTeX math expression to Unicode text.
Args:
expression: LaTeX math expression
Returns:
Unicode representation of the math expression
"""
# Process the expression and convert to Unicode
result = expression.strip()
# Replace LaTeX commands with Unicode symbols
def replace_command(match):
command = match.group(1)
replacement = self.symbols.get("\\" + command, f"\\{command}")
return replacement
result = self.command_pattern.sub(replace_command, result)
# For operators that should not have spaces
unspaced_symbols = ["×", "÷", "±", "", "·", "", "^", ""]
# First, normalize unspaced operators
for symbol in unspaced_symbols:
# Remove spaces before and after these operators
result = result.replace(f" {symbol} ", symbol)
result = result.replace(f"{symbol} ", symbol)
result = result.replace(f" {symbol}", symbol)
# Process superscripts (^)
superscript_pattern = re.compile(r"\^{([^}]+)}|\^([a-zA-Z0-9])")
def replace_superscript(match):
text = match.group(1) if match.group(1) else match.group(2)
return self._convert_superscript(text)
result = superscript_pattern.sub(replace_superscript, result)
# Process subscripts (_)
subscript_pattern = re.compile(r"_{([^}]+)}_([a-zA-Z0-9])")
def replace_subscript(match):
text = match.group(1) if match.group(1) else match.group(2)
return self._convert_subscript(text)
result = subscript_pattern.sub(replace_subscript, result)
# Handle fractions (very basic)
fraction_pattern = re.compile(r"\\frac{([^}]+)}{([^}]+)}")
def replace_fraction(match):
numerator = match.group(1)
denominator = match.group(2)
return f"{numerator}/{denominator}"
result = fraction_pattern.sub(replace_fraction, result)
return result

View file

@ -0,0 +1,76 @@
import pytest
from rich.math_render import MathRenderer
from rich.markdown import Markdown
from rich.console import Console
from rich.text import Text
from rich.style import Style
from io import StringIO
def test_math_renderer_symbols():
"""Test that the math renderer correctly converts LaTeX symbols to Unicode."""
renderer = MathRenderer()
# Test Greek letters
assert renderer.render_expression("\\alpha") == "α"
assert renderer.render_expression("\\beta") == "β"
assert renderer.render_expression("\\gamma") == "γ"
# Test operators
assert renderer.render_expression("\\times") == "×"
assert renderer.render_expression("\\leq") == ""
assert renderer.render_expression("\\geq") == ""
# Test compound expressions
assert renderer.render_expression("a\\times b") == "a×b"
def test_math_renderer_superscripts():
"""Test that the math renderer correctly handles superscripts."""
renderer = MathRenderer()
# Test simple superscripts
assert "²" in renderer.render_expression("x^2")
assert "²" in renderer.render_expression("x^{2}")
# Test compound superscripts
result = renderer.render_expression("x^{23}")
assert "²³" in result
def test_math_inline_markdown():
"""Test that inline math expressions work in markdown."""
console = Console(file=StringIO(), width=100)
markdown = Markdown("This is an inline expression $E = mc^2$ in text.")
console.print(markdown)
output = console.file.getvalue()
# Check that the expression is rendered with spacing preserved
assert "E = mc²" in output
assert "expression" in output
assert "in text" in output
def test_space_normalization():
"""Test that space normalization handles different operators correctly."""
renderer = MathRenderer()
# Test that equals preserves spaces
assert renderer.render_expression("E = mc^2") == "E = mc²"
assert renderer.render_expression("y = x + 3") == "y = x + 3"
# Test that multiplication removes spaces
assert renderer.render_expression("a × b") == "a×b"
assert renderer.render_expression("a\\times b") == "a×b"
# Test mixed operators
assert renderer.render_expression("F = m × a") == "F = m×a"
def test_complex_expressions():
"""Test more complex mathematical expressions."""
renderer = MathRenderer()
# Test complex equation with multiple operators
result = renderer.render_expression("f(x) = x^2 + 2\\times x + 1")
assert result == "f(x) = x² + 2×x + 1"
# Test Greek letters with spaces
result = renderer.render_expression("\\alpha = \\beta + \\gamma")
assert result == "α = β + γ"