[syntax] better API and performance for stylized ranges

2025-08-04 18:18:22 +00:00 · 2022-06-17 16:31:26 +01:00 · 2022-06-17 16:31:26 +01:00 · 0b3379f528
commit 0b3379f528
parent 4d80b3a558
2 changed files with 85 additions and 103 deletions
--- a/rich/syntax.py
+++ b/rich/syntax.py
@ -1,5 +1,7 @@
 import os.path
 import platform
+import re
+import sys
 import textwrap
 from abc import ABC, abstractmethod
 from typing import (
@ -46,6 +48,11 @@ from .segment import Segment, Segments
 from .style import Style
 from .text import Text

+if sys.version_info < (3, 10):
+    from typing_extensions import TypeAlias
+else:
+    from typing import TypeAlias
+
 TokenType = Tuple[str, ...]

 WINDOWS = platform.system() == "Windows"
@ -205,16 +212,14 @@ class ANSISyntaxTheme(SyntaxTheme):
        return self._background_style


-class SyntaxPosition(NamedTuple):
-    """A position in a Syntax object. Lines start at 1, columns at 0"""
-
-    line_number: int
-    column_index: int
+SyntaxPosition: TypeAlias = Tuple[int, int]


 class SyntaxHighlightRange(NamedTuple):
    """
    A range to highlight in a Syntax object.
+    `start` and `end` are 2-integers tuples, where the first integer is the line number
+    (starting from 1) and the second integer is the column index (starting from 0).
    The default style, if none is provided, is to highlight the range in bold.
    """

@ -295,7 +300,7 @@ class Syntax(JupyterMixin):
        self.padding = padding

        self._theme = self.get_theme(theme)
-        self._stylized_ranges: Sequence[SyntaxHighlightRange] = ()
+        self._stylized_ranges: List[SyntaxHighlightRange] = []

    @classmethod
    def from_path(
@ -479,7 +484,7 @@ class Syntax(JupyterMixin):

                def line_tokenize() -> Iterable[Tuple[Any, str]]:
                    """Split tokens to one per line."""
-                    assert lexer  # required to make MyPy happy, despite the `if lexer is None/else` 🤷
+                    assert lexer  # required to make MyPy happy

                    for token_type, token in lexer.get_tokens(code):
                        while token:
@ -521,16 +526,21 @@ class Syntax(JupyterMixin):

        return text

-    def stylize_ranges(self, stylized_ranges: Sequence[SyntaxHighlightRange]) -> None:
+    def stylize_range(
+        self, style: Style, start: SyntaxPosition, end: SyntaxPosition
+    ) -> None:
        """
-        Adds custom stylized ranges to apply to the syntax display when it's rendered.
+        Adds a custom style on a part of the code, that will be applied to the syntax display when it's rendered.
+        Line numbers are 1-based, while column indexes are 0-based.

        Args:
-            stylized_ranges (Sequence[SyntaxHighlightRange]): A list of SyntaxHighlightRange objects.
-                SyntaxHighlightRange objects contain the start and end position
-                (each given as a line number and column index), as well as a style to apply to the range.
+            style (Style): The style to apply.
+            start (Tuple[int, int]): The start of the range, in the form `[line number, column index]`.
+            end (Tuple[int, int]): The end of the range, in the form `[line number, column index]`.
        """
-        self._stylized_ranges = stylized_ranges
+        self._stylized_ranges.append(
+            SyntaxHighlightRange(style=style, start=start, end=end)
+        )

    def _get_line_numbers_color(self, blend: float = 0.3) -> Color:
        background_style = self._theme.get_background_style() + self.background_style
@ -620,7 +630,7 @@ class Syntax(JupyterMixin):
            else self.code_width
        )

-        ends_on_nl, processed_code = self._get_processed_code(self.code)
+        ends_on_nl, processed_code = self._process_code(self.code)
        text = self.highlight(processed_code, self.line_range)

        if not self.line_numbers and not self.word_wrap and not self.line_range:
@ -742,16 +752,22 @@ class Syntax(JupyterMixin):
            code (str): Code to highlight.
            text (Text): Text instance to apply the style to.
        """
-        lines_length = _get_lines_length(code)
+        # N.B. using "\n" here is much faster than using metacharacters such as "^" or "\Z":
+        newlines_offsets = [
+            match.start() for match in re.finditer("\n", code, flags=re.MULTILINE)
+        ]
+        # Let's add outer boundaries at each side of the list:
+        newlines_offsets.insert(0, -1)
+        newlines_offsets.append(len(code))

        for stylized_range in self._stylized_ranges:
            start, end = _get_code_indexes_for_syntax_positions(
-                lines_length, (stylized_range.start, stylized_range.end)
+                newlines_offsets, (stylized_range.start, stylized_range.end)
            )
            if start is not None and end is not None:
                text.stylize(stylized_range.style, start, end)

-    def _get_processed_code(self, code: str) -> Tuple[bool, str]:
+    def _process_code(self, code: str) -> Tuple[bool, str]:
        """
        Applies various processing to a raw code string
        (normalises it so it always ends with a line return, dedents it if necessary, etc.)
@ -772,66 +788,33 @@ class Syntax(JupyterMixin):
        return ends_on_nl, processed_code


-def _get_lines_length(code: str) -> List[int]:
-    """
-    Returns the length of each line in the given code string.
-
-    Args:
-        code (str): The code string to get the line lengths from.
-    """
-
-    code_length = len(code)
-
-    lines_length = []
-    search_index = 0
-    while True:
-        new_line_index = code.find("\n", search_index)
-        if new_line_index == -1:
-            if search_index < code_length:
-                # add the last line length:
-                lines_length.append(code_length - search_index)
-            break
-        line_length = (
-            new_line_index if search_index == 0 else (new_line_index - search_index)
-        )
-        lines_length.append(line_length)
-        search_index = new_line_index + 1
-
-    return lines_length
-
-
 def _get_code_indexes_for_syntax_positions(
-    lines_length: Sequence[int], positions: Sequence[SyntaxPosition]
+    newlines_offsets: Sequence[int], positions: Sequence[SyntaxPosition]
 ) -> Sequence[Optional[int]]:
    """
    Returns the index of the code string for the given positions.

    Args:
-        lines_length (Sequence[int]): The lengths of each line that form the code.
+        newlines_offsets (Sequence[int]): The offset of each newline character found in the code snippet.
        positions (Sequence[SyntaxPosition]): The positions to search for.

    Returns:
        Sequence[Optional[int]]: For each position, the index of the code string for this position, or `None`
            if the position is out of range.
    """
-    lines_count = len(lines_length)
+    lines_count = len(newlines_offsets)

    def index_for_position(position: SyntaxPosition) -> Optional[int]:
-        if position.line_number > lines_count:
+        line_number, column_index = position
+        if line_number > lines_count:
            return None  # `line_number` is out of range
-        line_index = position.line_number - 1
-        if lines_length[line_index] < position.column_index:
-            return None  # `column_index` is out of range
-        return sum(
-            (
-                # number of characters of all previous lines...
-                sum(lines_length[0:line_index]),
-                # ...plus one "\n" character for each previous line...
-                line_index,
-                # ...plus the number of characters on this line before the column index
-                position.column_index,
-            )
+        line_index = line_number - 1
+        line_length = (
+            newlines_offsets[line_index + 1] - newlines_offsets[line_index] - 1
        )
+        if line_length < column_index:
+            return None  # `column_index` is out of range
+        return newlines_offsets[line_index] + column_index + 1

    return [index_for_position(position) for position in positions]

--- a/tests/test_syntax.py
+++ b/tests/test_syntax.py