gh-127833: Use productionlist nodes to implement the grammar-snippet directive (#130376)

Co-authored-by: Petr Viktorin <encukou@gmail.com>
Co-authored-by: Blaise Pabon <blaise@gmail.com>
This commit is contained in:
Adam Turner 2025-03-20 15:35:20 +00:00 committed by GitHub
parent 86d5fa95cf
commit 443c0cd17c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 491 additions and 422 deletions

View file

@ -13,8 +13,8 @@ from sphinx.util.docutils import SphinxDirective
from sphinx.util.nodes import make_id from sphinx.util.nodes import make_id
if TYPE_CHECKING: if TYPE_CHECKING:
from collections.abc import Sequence from collections.abc import Iterable, Iterator, Sequence
from typing import Any from typing import Any, Final
from docutils.nodes import Node from docutils.nodes import Node
from sphinx.application import Sphinx from sphinx.application import Sphinx
@ -41,30 +41,7 @@ class GrammarSnippetBase(SphinxDirective):
# The option/argument handling is left to the individual classes. # The option/argument handling is left to the individual classes.
def make_grammar_snippet( grammar_re: Final = re.compile(
self, options: dict[str, Any], content: Sequence[str]
) -> list[nodes.paragraph]:
"""Create a literal block from options & content."""
group_name = options['group']
# Docutils elements have a `rawsource` attribute that is supposed to be
# set to the original ReST source.
# Sphinx does the following with it:
# - if it's empty, set it to `self.astext()`
# - if it matches `self.astext()` when generating the output,
# apply syntax highlighting (which is based on the plain-text content
# and thus discards internal formatting, like references).
# To get around this, we set it to this non-empty string:
rawsource = 'You should not see this.'
literal = nodes.literal_block(
rawsource,
'',
classes=['highlight'],
)
grammar_re = re.compile(
r""" r"""
(?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line (?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
(?=:) # ... followed by a colon (?=:) # ... followed by a colon
@ -78,61 +55,126 @@ class GrammarSnippetBase(SphinxDirective):
re.VERBOSE, re.VERBOSE,
) )
for line in content: def make_grammar_snippet(
last_pos = 0 self, options: dict[str, Any], content: Sequence[str]
for match in grammar_re.finditer(line): ) -> list[addnodes.productionlist]:
# Handle text between matches """Create a literal block from options & content."""
if match.start() > last_pos:
literal += nodes.Text(line[last_pos : match.start()])
last_pos = match.end()
# Handle matches group_name = options['group']
group_dict = { node_location = self.get_location()
name: content production_nodes = []
for name, content in match.groupdict().items() for rawsource, production_defs in self.production_definitions(content):
if content is not None production = self.make_production(
} rawsource,
match group_dict: production_defs,
case {'rule_name': name}: group_name=group_name,
literal += self.make_link_target_for_token( location=node_location,
group_name, name
) )
case {'rule_ref': ref_text}: production_nodes.append(production)
literal += token_xrefs(ref_text, group_name)
case {'single_quoted': name} | {'double_quoted': name}:
literal += snippet_string_node('', name)
case _:
raise ValueError('unhandled match')
literal += nodes.Text(line[last_pos:] + '\n')
node = nodes.paragraph( node = addnodes.productionlist(
'', '',
'', *production_nodes,
literal, support_smartquotes=False,
classes=['highlight'],
) )
self.set_source_info(node)
return [node] return [node]
def make_link_target_for_token( def production_definitions(
self, group_name: str, name: str self, lines: Iterable[str], /
) -> Iterator[tuple[str, list[tuple[str, str]]]]:
"""Yield pairs of rawsource and production content dicts."""
production_lines: list[str] = []
production_content: list[tuple[str, str]] = []
for line in lines:
# If this line is the start of a new rule (text in the column 1),
# emit the current production and start a new one.
if not line[:1].isspace():
rawsource = '\n'.join(production_lines)
production_lines.clear()
if production_content:
yield rawsource, production_content
production_content = []
# Append the current line for the raw source
production_lines.append(line)
# Parse the line into constituent parts
last_pos = 0
for match in self.grammar_re.finditer(line):
# Handle text between matches
if match.start() > last_pos:
unmatched_text = line[last_pos : match.start()]
production_content.append(('text', unmatched_text))
last_pos = match.end()
# Handle matches.
# After filtering None (non-matches), exactly one groupdict()
# entry should remain.
[(re_group_name, content)] = (
(re_group_name, content)
for re_group_name, content in match.groupdict().items()
if content is not None
)
production_content.append((re_group_name, content))
production_content.append(('text', line[last_pos:] + '\n'))
# Emit the final production
if production_content:
rawsource = '\n'.join(production_lines)
yield rawsource, production_content
def make_production(
self,
rawsource: str,
production_defs: list[tuple[str, str]],
*,
group_name: str,
location: str,
) -> addnodes.production:
"""Create a production node from a list of parts."""
production_node = addnodes.production(rawsource)
for re_group_name, content in production_defs:
match re_group_name:
case 'rule_name':
production_node += self.make_name_target(
name=content,
production_group=group_name,
location=location,
)
case 'rule_ref':
production_node += token_xrefs(content, group_name)
case 'single_quoted' | 'double_quoted':
production_node += snippet_string_node('', content)
case 'text':
production_node += nodes.Text(content)
case _:
raise ValueError(f'unhandled match: {re_group_name!r}')
return production_node
def make_name_target(
self,
*,
name: str,
production_group: str,
location: str,
) -> addnodes.literal_strong: ) -> addnodes.literal_strong:
"""Return a literal node which is a link target for the given token.""" """Make a link target for the given production."""
name_node = addnodes.literal_strong()
# Cargo-culted magic to make `name_node` a link target # Cargo-culted magic to make `name_node` a link target
# similar to Sphinx `production`. # similar to Sphinx `production`.
# This needs to be the same as what Sphinx does # This needs to be the same as what Sphinx does
# to avoid breaking existing links. # to avoid breaking existing links.
domain = self.env.domains['std']
obj_name = f"{group_name}:{name}" name_node = addnodes.literal_strong(name, name)
prefix = f'grammar-token-{group_name}' prefix = f'grammar-token-{production_group}'
node_id = make_id(self.env, self.state.document, prefix, name) node_id = make_id(self.env, self.state.document, prefix, name)
name_node['ids'].append(node_id) name_node['ids'].append(node_id)
self.state.document.note_implicit_target(name_node, name_node) self.state.document.note_implicit_target(name_node, name_node)
domain.note_object('token', obj_name, node_id, location=name_node) obj_name = f'{production_group}:{name}' if production_group else name
std = self.env.domains.standard_domain
text_node = nodes.Text(name) std.note_object('token', obj_name, node_id, location=location)
name_node += text_node
return name_node return name_node
@ -168,7 +210,7 @@ class GrammarSnippetDirective(GrammarSnippetBase):
optional_arguments = 1 optional_arguments = 1
final_argument_whitespace = True final_argument_whitespace = True
def run(self) -> list[nodes.paragraph]: def run(self) -> list[addnodes.productionlist]:
return self.make_grammar_snippet(self.options, self.content) return self.make_grammar_snippet(self.options, self.content)
@ -187,7 +229,7 @@ class CompatProductionList(GrammarSnippetBase):
final_argument_whitespace = True final_argument_whitespace = True
option_spec = {} option_spec = {}
def run(self) -> list[nodes.paragraph]: def run(self) -> list[addnodes.productionlist]:
# The "content" of a productionlist is actually the first and only # The "content" of a productionlist is actually the first and only
# argument. The first line is the group; the rest is the content lines. # argument. The first line is the group; the rest is the content lines.
lines = self.arguments[0].splitlines() lines = self.arguments[0].splitlines()

File diff suppressed because it is too large Load diff