[syntax] better API and performance for stylized ranges

This commit is contained in:
Olivier Philippon 2022-06-17 16:31:26 +01:00
parent 4d80b3a558
commit 0b3379f528
2 changed files with 85 additions and 103 deletions

View file

@ -1,5 +1,7 @@
import os.path
import platform
import re
import sys
import textwrap
from abc import ABC, abstractmethod
from typing import (
@ -46,6 +48,11 @@ from .segment import Segment, Segments
from .style import Style
from .text import Text
if sys.version_info < (3, 10):
from typing_extensions import TypeAlias
else:
from typing import TypeAlias
TokenType = Tuple[str, ...]
WINDOWS = platform.system() == "Windows"
@ -205,16 +212,14 @@ class ANSISyntaxTheme(SyntaxTheme):
return self._background_style
class SyntaxPosition(NamedTuple):
"""A position in a Syntax object. Lines start at 1, columns at 0"""
line_number: int
column_index: int
SyntaxPosition: TypeAlias = Tuple[int, int]
class SyntaxHighlightRange(NamedTuple):
"""
A range to highlight in a Syntax object.
`start` and `end` are 2-integers tuples, where the first integer is the line number
(starting from 1) and the second integer is the column index (starting from 0).
The default style, if none is provided, is to highlight the range in bold.
"""
@ -295,7 +300,7 @@ class Syntax(JupyterMixin):
self.padding = padding
self._theme = self.get_theme(theme)
self._stylized_ranges: Sequence[SyntaxHighlightRange] = ()
self._stylized_ranges: List[SyntaxHighlightRange] = []
@classmethod
def from_path(
@ -479,7 +484,7 @@ class Syntax(JupyterMixin):
def line_tokenize() -> Iterable[Tuple[Any, str]]:
"""Split tokens to one per line."""
assert lexer # required to make MyPy happy, despite the `if lexer is None/else` 🤷
assert lexer # required to make MyPy happy
for token_type, token in lexer.get_tokens(code):
while token:
@ -521,16 +526,21 @@ class Syntax(JupyterMixin):
return text
def stylize_ranges(self, stylized_ranges: Sequence[SyntaxHighlightRange]) -> None:
def stylize_range(
self, style: Style, start: SyntaxPosition, end: SyntaxPosition
) -> None:
"""
Adds custom stylized ranges to apply to the syntax display when it's rendered.
Adds a custom style on a part of the code, that will be applied to the syntax display when it's rendered.
Line numbers are 1-based, while column indexes are 0-based.
Args:
stylized_ranges (Sequence[SyntaxHighlightRange]): A list of SyntaxHighlightRange objects.
SyntaxHighlightRange objects contain the start and end position
(each given as a line number and column index), as well as a style to apply to the range.
style (Style): The style to apply.
start (Tuple[int, int]): The start of the range, in the form `[line number, column index]`.
end (Tuple[int, int]): The end of the range, in the form `[line number, column index]`.
"""
self._stylized_ranges = stylized_ranges
self._stylized_ranges.append(
SyntaxHighlightRange(style=style, start=start, end=end)
)
def _get_line_numbers_color(self, blend: float = 0.3) -> Color:
background_style = self._theme.get_background_style() + self.background_style
@ -620,7 +630,7 @@ class Syntax(JupyterMixin):
else self.code_width
)
ends_on_nl, processed_code = self._get_processed_code(self.code)
ends_on_nl, processed_code = self._process_code(self.code)
text = self.highlight(processed_code, self.line_range)
if not self.line_numbers and not self.word_wrap and not self.line_range:
@ -742,16 +752,22 @@ class Syntax(JupyterMixin):
code (str): Code to highlight.
text (Text): Text instance to apply the style to.
"""
lines_length = _get_lines_length(code)
# N.B. using "\n" here is much faster than using metacharacters such as "^" or "\Z":
newlines_offsets = [
match.start() for match in re.finditer("\n", code, flags=re.MULTILINE)
]
# Let's add outer boundaries at each side of the list:
newlines_offsets.insert(0, -1)
newlines_offsets.append(len(code))
for stylized_range in self._stylized_ranges:
start, end = _get_code_indexes_for_syntax_positions(
lines_length, (stylized_range.start, stylized_range.end)
newlines_offsets, (stylized_range.start, stylized_range.end)
)
if start is not None and end is not None:
text.stylize(stylized_range.style, start, end)
def _get_processed_code(self, code: str) -> Tuple[bool, str]:
def _process_code(self, code: str) -> Tuple[bool, str]:
"""
Applies various processing to a raw code string
(normalises it so it always ends with a line return, dedents it if necessary, etc.)
@ -772,66 +788,33 @@ class Syntax(JupyterMixin):
return ends_on_nl, processed_code
def _get_lines_length(code: str) -> List[int]:
"""
Returns the length of each line in the given code string.
Args:
code (str): The code string to get the line lengths from.
"""
code_length = len(code)
lines_length = []
search_index = 0
while True:
new_line_index = code.find("\n", search_index)
if new_line_index == -1:
if search_index < code_length:
# add the last line length:
lines_length.append(code_length - search_index)
break
line_length = (
new_line_index if search_index == 0 else (new_line_index - search_index)
)
lines_length.append(line_length)
search_index = new_line_index + 1
return lines_length
def _get_code_indexes_for_syntax_positions(
lines_length: Sequence[int], positions: Sequence[SyntaxPosition]
newlines_offsets: Sequence[int], positions: Sequence[SyntaxPosition]
) -> Sequence[Optional[int]]:
"""
Returns the index of the code string for the given positions.
Args:
lines_length (Sequence[int]): The lengths of each line that form the code.
newlines_offsets (Sequence[int]): The offset of each newline character found in the code snippet.
positions (Sequence[SyntaxPosition]): The positions to search for.
Returns:
Sequence[Optional[int]]: For each position, the index of the code string for this position, or `None`
if the position is out of range.
"""
lines_count = len(lines_length)
lines_count = len(newlines_offsets)
def index_for_position(position: SyntaxPosition) -> Optional[int]:
if position.line_number > lines_count:
line_number, column_index = position
if line_number > lines_count:
return None # `line_number` is out of range
line_index = position.line_number - 1
if lines_length[line_index] < position.column_index:
return None # `column_index` is out of range
return sum(
(
# number of characters of all previous lines...
sum(lines_length[0:line_index]),
# ...plus one "\n" character for each previous line...
line_index,
# ...plus the number of characters on this line before the column index
position.column_index,
)
line_index = line_number - 1
line_length = (
newlines_offsets[line_index + 1] - newlines_offsets[line_index] - 1
)
if line_length < column_index:
return None # `column_index` is out of range
return newlines_offsets[line_index] + column_index + 1
return [index_for_position(position) for position in positions]

File diff suppressed because one or more lines are too long