mirror of
				https://github.com/astral-sh/ruff.git
				synced 2025-10-31 12:05:52 +00:00 
			
		
		
		
	 189e947808
			
		
	
	
		189e947808
		
			
		
	
	
	
	
		
			
			This PR splits the string formatting code in the formatter to be handled
by the respective nodes.
Previously, the string formatting was done through a single
`FormatString` interface. Now, the nodes themselves are responsible for
formatting.
The following changes were made:
1. Remove `StringLayout::ImplicitStringConcatenationInBinaryLike` and
inline the call to `FormatStringContinuation`. After the refactor, the
binary like formatting would delegate to `FormatString` which would then
delegate to `FormatStringContinuation`. This removes the intermediary
steps.
2. Add formatter implementation for `FStringPart` which delegates it to
the respective string literal or f-string node.
3. Add `ExprStringLiteralKind` which is either `String` or `Docstring`.
If it's a docstring variant, then the string expression would not be
implicitly concatenated. This is guaranteed by the
`DocstringStmt::try_from_expression` constructor.
4. Add `StringLiteralKind` which is either a `String`, `Docstring` or
`InImplicitlyConcatenatedFString`. The last variant is for when the
string literal is implicitly concatenated with an f-string (`"foo" f"bar
{x}"`).
5. Remove `FormatString`.
6. Extract the f-string quote detection as a standalone function which
is public to the crate. This is used to detect the quote to be used for
an f-string at the expression level (`ExprFString` or
`FormatStringContinuation`).
### Formatter ecosystem result
**This PR**
| project | similarity index | total files | changed files |
|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |
**main**
| project | similarity index | total files | changed files |
|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |
		
	
			
		
			
				
	
	
		
			175 lines
		
	
	
	
		
			5.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			175 lines
		
	
	
	
		
			5.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #! /usr/bin/python
 | |
| 
 | |
| """See CONTRIBUTING.md"""
 | |
| 
 | |
| # %%
 | |
| 
 | |
| import re
 | |
| from collections import defaultdict
 | |
| from pathlib import Path
 | |
| from subprocess import check_output
 | |
| 
 | |
| 
 | |
| def rustfmt(code: str) -> str:
 | |
|     return check_output(["rustfmt", "--emit=stdout"], input=code, text=True)
 | |
| 
 | |
| 
 | |
| # %%
 | |
| # Read nodes
 | |
| 
 | |
| root = Path(
 | |
|     check_output(["git", "rev-parse", "--show-toplevel"], text=True).strip(),
 | |
| )
 | |
| nodes_file = (
 | |
|     root.joinpath("crates")
 | |
|     .joinpath("ruff_python_ast")
 | |
|     .joinpath("src")
 | |
|     .joinpath("node.rs")
 | |
|     .read_text()
 | |
| )
 | |
| node_lines = (
 | |
|     nodes_file.split("pub enum AnyNode {")[1].split("}")[0].strip().splitlines()
 | |
| )
 | |
| nodes = []
 | |
| for node_line in node_lines:
 | |
|     node = node_line.split("(")[1].split(")")[0].split("::")[-1].split("<")[0]
 | |
|     # `FString` and `StringLiteral` has a custom implementation while the formatting for
 | |
|     # `FStringLiteralElement` and `FStringExpressionElement` are handled by the `FString`
 | |
|     # implementation.
 | |
|     if node in (
 | |
|         "FString",
 | |
|         "StringLiteral",
 | |
|         "FStringLiteralElement",
 | |
|         "FStringExpressionElement",
 | |
|     ):
 | |
|         continue
 | |
|     nodes.append(node)
 | |
| print(nodes)
 | |
| 
 | |
| # %%
 | |
| # Generate newtypes with dummy FormatNodeRule implementations
 | |
| 
 | |
| out = (
 | |
|     root.joinpath("crates")
 | |
|     .joinpath("ruff_python_formatter")
 | |
|     .joinpath("src")
 | |
|     .joinpath("generated.rs")
 | |
| )
 | |
| src = root.joinpath("crates").joinpath("ruff_python_formatter").joinpath("src")
 | |
| 
 | |
| nodes_grouped = defaultdict(list)
 | |
| # We rename because mod is a keyword in rust
 | |
| groups = {
 | |
|     "mod": "module",
 | |
|     "expr": "expression",
 | |
|     "stmt": "statement",
 | |
|     "pattern": "pattern",
 | |
|     "type_param": "type_param",
 | |
|     "other": "other",
 | |
| }
 | |
| 
 | |
| 
 | |
| def group_for_node(node: str) -> str:
 | |
|     for group in groups:
 | |
|         if node.startswith(group.title().replace("_", "")):
 | |
|             return group
 | |
|     else:
 | |
|         return "other"
 | |
| 
 | |
| 
 | |
| def to_camel_case(node: str) -> str:
 | |
|     """Converts PascalCase to camel_case"""
 | |
|     return re.sub("([A-Z])", r"_\1", node).lower().lstrip("_")
 | |
| 
 | |
| 
 | |
| for node in nodes:
 | |
|     nodes_grouped[group_for_node(node)].append(node)
 | |
| 
 | |
| for group, group_nodes in nodes_grouped.items():
 | |
|     # These conflict with the manually content of the mod.rs files
 | |
|     # src.joinpath(groups[group]).mkdir(exist_ok=True)
 | |
|     # mod_section = "\n".join(
 | |
|     #     f"pub(crate) mod {to_camel_case(node)};" for node in group_nodes
 | |
|     # )
 | |
|     # src.joinpath(groups[group]).joinpath("mod.rs").write_text(rustfmt(mod_section))
 | |
|     for node in group_nodes:
 | |
|         node_path = src.joinpath(groups[group]).joinpath(f"{to_camel_case(node)}.rs")
 | |
|         # Don't override existing manual implementations
 | |
|         if node_path.exists():
 | |
|             continue
 | |
| 
 | |
|         code = f"""
 | |
|             use ruff_formatter::write;
 | |
|             use ruff_python_ast::{node};
 | |
|             use crate::verbatim_text;
 | |
|             use crate::prelude::*;
 | |
| 
 | |
|             #[derive(Default)]
 | |
|             pub struct Format{node};
 | |
| 
 | |
|             impl FormatNodeRule<{node}> for Format{node} {{
 | |
|                 fn fmt_fields(&self, item: &{node}, f: &mut PyFormatter) -> FormatResult<()> {{
 | |
|                     write!(f, [verbatim_text(item)])
 | |
|                 }}
 | |
|             }}
 | |
|             """.strip()  # noqa: E501
 | |
| 
 | |
|         node_path.write_text(rustfmt(code))
 | |
| 
 | |
| # %%
 | |
| # Generate `FormatRule`, `AsFormat` and `IntoFormat`
 | |
| 
 | |
| generated = """//! This is a generated file. Don't modify it by hand! Run `crates/ruff_python_formatter/generate.py` to re-generate the file.
 | |
| #![allow(unknown_lints, clippy::default_constructed_unit_structs)]
 | |
| 
 | |
| use crate::context::PyFormatContext;
 | |
| use crate::{AsFormat, FormatNodeRule, IntoFormat, PyFormatter};
 | |
| use ruff_formatter::{FormatOwnedWithRule, FormatRefWithRule, FormatResult, FormatRule};
 | |
| use ruff_python_ast as ast;
 | |
| 
 | |
| """  # noqa: E501
 | |
| for node in nodes:
 | |
|     text = f"""
 | |
|         impl FormatRule<ast::{node}, PyFormatContext<'_>>
 | |
|             for crate::{groups[group_for_node(node)]}::{to_camel_case(node)}::Format{node}
 | |
|         {{
 | |
|             #[inline]
 | |
|             fn fmt(
 | |
|                 &self,
 | |
|                 node: &ast::{node},
 | |
|                 f: &mut PyFormatter,
 | |
|             ) -> FormatResult<()> {{
 | |
|                 FormatNodeRule::<ast::{node}>::fmt(self, node, f)
 | |
|             }}
 | |
|         }}
 | |
|         impl<'ast> AsFormat<PyFormatContext<'ast>> for ast::{node} {{
 | |
|             type Format<'a> = FormatRefWithRule<
 | |
|                 'a,
 | |
|                 ast::{node},
 | |
|                 crate::{groups[group_for_node(node)]}::{to_camel_case(node)}::Format{node},
 | |
|                 PyFormatContext<'ast>,
 | |
|             >;
 | |
|             fn format(&self) -> Self::Format<'_> {{
 | |
|                 FormatRefWithRule::new(
 | |
|                     self,
 | |
|                     crate::{groups[group_for_node(node)]}::{to_camel_case(node)}::Format{node}::default(),
 | |
|                 )
 | |
|             }}
 | |
|         }}
 | |
|         impl<'ast> IntoFormat<PyFormatContext<'ast>> for ast::{node} {{
 | |
|             type Format = FormatOwnedWithRule<
 | |
|                 ast::{node},
 | |
|                 crate::{groups[group_for_node(node)]}::{to_camel_case(node)}::Format{node},
 | |
|                 PyFormatContext<'ast>,
 | |
|             >;
 | |
|             fn into_format(self) -> Self::Format {{
 | |
|                 FormatOwnedWithRule::new(
 | |
|                     self,
 | |
|                     crate::{groups[group_for_node(node)]}::{to_camel_case(node)}::Format{node}::default(),
 | |
|                 )
 | |
|             }}
 | |
|         }}
 | |
|     """  # noqa: E501
 | |
|     generated += text
 | |
| 
 | |
| out.write_text(rustfmt(generated))
 |