mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 18:58:04 +00:00

Rebase of #6365 authored by @davidszotten. ## Summary This PR updates the AST structure for an f-string elements. The main **motivation** behind this change is to have a dedicated node for the string part of an f-string. Previously, the existing `ExprStringLiteral` node was used for this purpose which isn't exactly correct. The `ExprStringLiteral` node should include the quotes as well in the range but the f-string literal element doesn't include the quote as it's a specific part within an f-string. For example, ```python f"foo {x}" # ^^^^ # This is the literal part of an f-string ``` The introduction of `FStringElement` enum is helpful which represent either the literal part or the expression part of an f-string. ### Rule Updates This means that there'll be two nodes representing a string depending on the context. One for a normal string literal while the other is a string literal within an f-string. The AST checker is updated to accommodate this change. The rules which work on string literal are updated to check on the literal part of f-string as well. #### Notes 1. The `Expr::is_literal_expr` method would check for `ExprStringLiteral` and return true if so. But now that we don't represent the literal part of an f-string using that node, this improves the method's behavior and confines to the actual expression. We do have the `FStringElement::is_literal` method. 2. We avoid checking if we're in a f-string context before adding to `string_type_definitions` because the f-string literal is now a dedicated node and not part of `Expr`. 3. Annotations cannot use f-string so we avoid changing any rules which work on annotation and checks for `ExprStringLiteral`. ## Test Plan - All references of `Expr::StringLiteral` were checked to see if any of the rules require updating to account for the f-string literal element node. - New test cases are added for rules which check against the literal part of an f-string. - Check the ecosystem results and ensure it remains unchanged. ## Performance There's a performance penalty in the parser. The reason for this remains unknown as it seems that the generated assembly code is now different for the `__reduce154` function. The reduce function body is just popping the `ParenthesizedExpr` on top of the stack and pushing it with the new location. - The size of `FStringElement` enum is the same as `Expr` which is what it replaces in `FString::format_spec` - The size of `FStringExpressionElement` is the same as `ExprFormattedValue` which is what it replaces I tried reducing the `Expr` enum from 80 bytes to 72 bytes but it hardly resulted in any performance gain. The difference can be seen here: - Original profile: https://share.firefox.dev/3Taa7ES - Profile after boxing some node fields: https://share.firefox.dev/3GsNXpD ### Backtracking I tried backtracking the changes to see if any of the isolated change produced this regression. The problem here is that the overall change is so small that there's only a single checkpoint where I can backtrack and that checkpoint results in the same regression. This checkpoint is to revert using `Expr` to the `FString::format_spec` field. After this point, the change would revert back to the original implementation. ## Review process The review process is similar to #7927. The first set of commits update the node structure, parser, and related AST files. Then, further commits update the linter and formatter part to account for the AST change. --------- Co-authored-by: David Szotten <davidszotten@gmail.com>
169 lines
5.2 KiB
Python
Executable file
169 lines
5.2 KiB
Python
Executable file
#! /usr/bin/python
|
|
|
|
"""See CONTRIBUTING.md"""
|
|
|
|
# %%
|
|
|
|
import re
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
from subprocess import check_output
|
|
|
|
|
|
def rustfmt(code: str) -> str:
|
|
return check_output(["rustfmt", "--emit=stdout"], input=code, text=True)
|
|
|
|
|
|
# %%
|
|
# Read nodes
|
|
|
|
root = Path(
|
|
check_output(["git", "rev-parse", "--show-toplevel"], text=True).strip(),
|
|
)
|
|
nodes_file = (
|
|
root.joinpath("crates")
|
|
.joinpath("ruff_python_ast")
|
|
.joinpath("src")
|
|
.joinpath("node.rs")
|
|
.read_text()
|
|
)
|
|
node_lines = (
|
|
nodes_file.split("pub enum AnyNode {")[1].split("}")[0].strip().splitlines()
|
|
)
|
|
nodes = []
|
|
for node_line in node_lines:
|
|
node = node_line.split("(")[1].split(")")[0].split("::")[-1].split("<")[0]
|
|
# These nodes aren't used in the formatter as the formatting of them is handled
|
|
# in one of the other nodes containing them.
|
|
if node in ("FStringLiteralElement", "FStringExpressionElement"):
|
|
continue
|
|
nodes.append(node)
|
|
print(nodes)
|
|
|
|
# %%
|
|
# Generate newtypes with dummy FormatNodeRule implementations
|
|
|
|
out = (
|
|
root.joinpath("crates")
|
|
.joinpath("ruff_python_formatter")
|
|
.joinpath("src")
|
|
.joinpath("generated.rs")
|
|
)
|
|
src = root.joinpath("crates").joinpath("ruff_python_formatter").joinpath("src")
|
|
|
|
nodes_grouped = defaultdict(list)
|
|
# We rename because mod is a keyword in rust
|
|
groups = {
|
|
"mod": "module",
|
|
"expr": "expression",
|
|
"stmt": "statement",
|
|
"pattern": "pattern",
|
|
"type_param": "type_param",
|
|
"other": "other",
|
|
}
|
|
|
|
|
|
def group_for_node(node: str) -> str:
|
|
for group in groups:
|
|
if node.startswith(group.title().replace("_", "")):
|
|
return group
|
|
else:
|
|
return "other"
|
|
|
|
|
|
def to_camel_case(node: str) -> str:
|
|
"""Converts PascalCase to camel_case"""
|
|
return re.sub("([A-Z])", r"_\1", node).lower().lstrip("_")
|
|
|
|
|
|
for node in nodes:
|
|
nodes_grouped[group_for_node(node)].append(node)
|
|
|
|
for group, group_nodes in nodes_grouped.items():
|
|
# These conflict with the manually content of the mod.rs files
|
|
# src.joinpath(groups[group]).mkdir(exist_ok=True)
|
|
# mod_section = "\n".join(
|
|
# f"pub(crate) mod {to_camel_case(node)};" for node in group_nodes
|
|
# )
|
|
# src.joinpath(groups[group]).joinpath("mod.rs").write_text(rustfmt(mod_section))
|
|
for node in group_nodes:
|
|
node_path = src.joinpath(groups[group]).joinpath(f"{to_camel_case(node)}.rs")
|
|
# Don't override existing manual implementations
|
|
if node_path.exists():
|
|
continue
|
|
|
|
code = f"""
|
|
use ruff_formatter::write;
|
|
use ruff_python_ast::{node};
|
|
use crate::verbatim_text;
|
|
use crate::prelude::*;
|
|
|
|
#[derive(Default)]
|
|
pub struct Format{node};
|
|
|
|
impl FormatNodeRule<{node}> for Format{node} {{
|
|
fn fmt_fields(&self, item: &{node}, f: &mut PyFormatter) -> FormatResult<()> {{
|
|
write!(f, [verbatim_text(item)])
|
|
}}
|
|
}}
|
|
""".strip() # noqa: E501
|
|
|
|
node_path.write_text(rustfmt(code))
|
|
|
|
# %%
|
|
# Generate `FormatRule`, `AsFormat` and `IntoFormat`
|
|
|
|
generated = """//! This is a generated file. Don't modify it by hand! Run `crates/ruff_python_formatter/generate.py` to re-generate the file.
|
|
#![allow(unknown_lints, clippy::default_constructed_unit_structs)]
|
|
|
|
use crate::context::PyFormatContext;
|
|
use crate::{AsFormat, FormatNodeRule, IntoFormat, PyFormatter};
|
|
use ruff_formatter::{FormatOwnedWithRule, FormatRefWithRule, FormatResult, FormatRule};
|
|
use ruff_python_ast as ast;
|
|
|
|
""" # noqa: E501
|
|
for node in nodes:
|
|
text = f"""
|
|
impl FormatRule<ast::{node}, PyFormatContext<'_>>
|
|
for crate::{groups[group_for_node(node)]}::{to_camel_case(node)}::Format{node}
|
|
{{
|
|
#[inline]
|
|
fn fmt(
|
|
&self,
|
|
node: &ast::{node},
|
|
f: &mut PyFormatter,
|
|
) -> FormatResult<()> {{
|
|
FormatNodeRule::<ast::{node}>::fmt(self, node, f)
|
|
}}
|
|
}}
|
|
impl<'ast> AsFormat<PyFormatContext<'ast>> for ast::{node} {{
|
|
type Format<'a> = FormatRefWithRule<
|
|
'a,
|
|
ast::{node},
|
|
crate::{groups[group_for_node(node)]}::{to_camel_case(node)}::Format{node},
|
|
PyFormatContext<'ast>,
|
|
>;
|
|
fn format(&self) -> Self::Format<'_> {{
|
|
FormatRefWithRule::new(
|
|
self,
|
|
crate::{groups[group_for_node(node)]}::{to_camel_case(node)}::Format{node}::default(),
|
|
)
|
|
}}
|
|
}}
|
|
impl<'ast> IntoFormat<PyFormatContext<'ast>> for ast::{node} {{
|
|
type Format = FormatOwnedWithRule<
|
|
ast::{node},
|
|
crate::{groups[group_for_node(node)]}::{to_camel_case(node)}::Format{node},
|
|
PyFormatContext<'ast>,
|
|
>;
|
|
fn into_format(self) -> Self::Format {{
|
|
FormatOwnedWithRule::new(
|
|
self,
|
|
crate::{groups[group_for_node(node)]}::{to_camel_case(node)}::Format{node}::default(),
|
|
)
|
|
}}
|
|
}}
|
|
""" # noqa: E501
|
|
generated += text
|
|
|
|
out.write_text(rustfmt(generated))
|