mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-30 23:27:38 +00:00
Update string nodes for implicit concatenation (#7927)
## Summary This PR updates the string nodes (`ExprStringLiteral`, `ExprBytesLiteral`, and `ExprFString`) to account for implicit string concatenation. ### Motivation In Python, implicit string concatenation are joined while parsing because the interpreter doesn't require the information for each part. While that's feasible for an interpreter, it falls short for a static analysis tool where having such information is more useful. Currently, various parts of the code uses the lexer to get the individual string parts. One of the main challenge this solves is that of string formatting. Currently, the formatter relies on the lexer to get the individual string parts, and formats them including the comments accordingly. But, with PEP 701, f-string can also contain comments. Without this change, it becomes very difficult to add support for f-string formatting. ### Implementation The initial proposal was made in this discussion: https://github.com/astral-sh/ruff/discussions/6183#discussioncomment-6591993. There were various AST designs which were explored for this task which are available in the linked internal document[^1]. The selected variant was the one where the nodes were kept as it is except that the `implicit_concatenated` field was removed and instead a new struct was added to the `Expr*` struct. This would be a private struct would contain the actual implementation of how the AST is designed for both single and implicitly concatenated strings. This implementation is achieved through an enum with two variants: `Single` and `Concatenated` to avoid allocating a vector even for single strings. There are various public methods available on the value struct to query certain information regarding the node. The nodes are structured in the following way: ``` ExprStringLiteral - "foo" "bar" |- StringLiteral - "foo" |- StringLiteral - "bar" ExprBytesLiteral - b"foo" b"bar" |- BytesLiteral - b"foo" |- BytesLiteral - b"bar" ExprFString - "foo" f"bar {x}" |- FStringPart::Literal - "foo" |- FStringPart::FString - f"bar {x}" |- StringLiteral - "bar " |- FormattedValue - "x" ``` [^1]: Internal document: https://www.notion.so/astral-sh/Implicit-String-Concatenation-e036345dc48943f89e416c087bf6f6d9?pvs=4 #### Visitor The way the nodes are structured is that the entire string, including all the parts that are implicitly concatenation, is a single node containing individual nodes for the parts. The previous section has a representation of that tree for all the string nodes. This means that new visitor methods are added to visit the individual parts of string, bytes, and f-strings for `Visitor`, `PreorderVisitor`, and `Transformer`. ## Test Plan - `cargo insta test --workspace --all-features --unreferenced reject` - Verify that the ecosystem results are unchanged
This commit is contained in:
parent
2590aa30ae
commit
017e829115
121 changed files with 27666 additions and 25501 deletions
|
@ -1,7 +1,8 @@
|
|||
use crate::{
|
||||
Alias, Arguments, BoolOp, CmpOp, Comprehension, Decorator, ElifElseClause, ExceptHandler, Expr,
|
||||
Keyword, MatchCase, Mod, Operator, Parameter, ParameterWithDefault, Parameters, Pattern,
|
||||
PatternArguments, PatternKeyword, Singleton, Stmt, TypeParam, TypeParams, UnaryOp, WithItem,
|
||||
Alias, Arguments, BoolOp, BytesLiteral, CmpOp, Comprehension, Decorator, ElifElseClause,
|
||||
ExceptHandler, Expr, FString, Keyword, MatchCase, Mod, Operator, Parameter,
|
||||
ParameterWithDefault, Parameters, Pattern, PatternArguments, PatternKeyword, Singleton, Stmt,
|
||||
StringLiteral, TypeParam, TypeParams, UnaryOp, WithItem,
|
||||
};
|
||||
use crate::{AnyNodeRef, AstNode};
|
||||
|
||||
|
@ -152,6 +153,21 @@ pub trait PreorderVisitor<'a> {
|
|||
fn visit_elif_else_clause(&mut self, elif_else_clause: &'a ElifElseClause) {
|
||||
walk_elif_else_clause(self, elif_else_clause);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_f_string(&mut self, f_string: &'a FString) {
|
||||
walk_f_string(self, f_string);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_string_literal(&mut self, string_literal: &'a StringLiteral) {
|
||||
walk_string_literal(self, string_literal);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_bytes_literal(&mut self, bytes_literal: &'a BytesLiteral) {
|
||||
walk_bytes_literal(self, bytes_literal);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn walk_module<'a, V>(visitor: &mut V, module: &'a Mod)
|
||||
|
@ -530,6 +546,42 @@ where
|
|||
{
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_f_string<'a, V>(visitor: &mut V, f_string: &'a FString)
|
||||
where
|
||||
V: PreorderVisitor<'a> + ?Sized,
|
||||
{
|
||||
let node = AnyNodeRef::from(f_string);
|
||||
if visitor.enter_node(node).is_traverse() {
|
||||
f_string.visit_preorder(visitor);
|
||||
}
|
||||
visitor.leave_node(node);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_string_literal<'a, V>(visitor: &mut V, string_literal: &'a StringLiteral)
|
||||
where
|
||||
V: PreorderVisitor<'a> + ?Sized,
|
||||
{
|
||||
let node = AnyNodeRef::from(string_literal);
|
||||
if visitor.enter_node(node).is_traverse() {
|
||||
string_literal.visit_preorder(visitor);
|
||||
}
|
||||
visitor.leave_node(node);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_bytes_literal<'a, V>(visitor: &mut V, bytes_literal: &'a BytesLiteral)
|
||||
where
|
||||
V: PreorderVisitor<'a> + ?Sized,
|
||||
{
|
||||
let node = AnyNodeRef::from(bytes_literal);
|
||||
if visitor.enter_node(node).is_traverse() {
|
||||
bytes_literal.visit_preorder(visitor);
|
||||
}
|
||||
visitor.leave_node(node);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_alias<'a, V>(visitor: &mut V, alias: &'a Alias)
|
||||
where
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
use crate::{
|
||||
self as ast, Alias, Arguments, BoolOp, CmpOp, Comprehension, Decorator, ElifElseClause,
|
||||
ExceptHandler, Expr, ExprContext, Keyword, MatchCase, Operator, Parameter, Parameters, Pattern,
|
||||
PatternArguments, PatternKeyword, Stmt, TypeParam, TypeParamTypeVar, TypeParams, UnaryOp,
|
||||
WithItem,
|
||||
self as ast, Alias, Arguments, BoolOp, BytesLiteral, CmpOp, Comprehension, Decorator,
|
||||
ElifElseClause, ExceptHandler, Expr, ExprContext, FString, Keyword, MatchCase, Operator,
|
||||
Parameter, Parameters, Pattern, PatternArguments, PatternKeyword, Stmt, StringLiteral,
|
||||
TypeParam, TypeParamTypeVar, TypeParams, UnaryOp, WithItem,
|
||||
};
|
||||
|
||||
/// A trait for transforming ASTs. Visits all nodes in the AST recursively in evaluation-order.
|
||||
|
@ -85,6 +85,15 @@ pub trait Transformer {
|
|||
fn visit_elif_else_clause(&self, elif_else_clause: &mut ElifElseClause) {
|
||||
walk_elif_else_clause(self, elif_else_clause);
|
||||
}
|
||||
fn visit_f_string(&self, f_string: &mut FString) {
|
||||
walk_f_string(self, f_string);
|
||||
}
|
||||
fn visit_string_literal(&self, string_literal: &mut StringLiteral) {
|
||||
walk_string_literal(self, string_literal);
|
||||
}
|
||||
fn visit_bytes_literal(&self, bytes_literal: &mut BytesLiteral) {
|
||||
walk_bytes_literal(self, bytes_literal);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn walk_body<V: Transformer + ?Sized>(visitor: &V, body: &mut [Stmt]) {
|
||||
|
@ -462,14 +471,29 @@ pub fn walk_expr<V: Transformer + ?Sized>(visitor: &V, expr: &mut Expr) {
|
|||
visitor.visit_format_spec(expr);
|
||||
}
|
||||
}
|
||||
Expr::FString(ast::ExprFString { values, .. }) => {
|
||||
for expr in values {
|
||||
visitor.visit_expr(expr);
|
||||
Expr::FString(ast::ExprFString { value, .. }) => {
|
||||
for f_string_part in value.parts_mut() {
|
||||
match f_string_part {
|
||||
ast::FStringPart::Literal(string_literal) => {
|
||||
visitor.visit_string_literal(string_literal);
|
||||
}
|
||||
ast::FStringPart::FString(f_string) => {
|
||||
visitor.visit_f_string(f_string);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Expr::StringLiteral(_)
|
||||
| Expr::BytesLiteral(_)
|
||||
| Expr::NumberLiteral(_)
|
||||
Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) => {
|
||||
for string_literal in value.parts_mut() {
|
||||
visitor.visit_string_literal(string_literal);
|
||||
}
|
||||
}
|
||||
Expr::BytesLiteral(ast::ExprBytesLiteral { value, .. }) => {
|
||||
for bytes_literal in value.parts_mut() {
|
||||
visitor.visit_bytes_literal(bytes_literal);
|
||||
}
|
||||
}
|
||||
Expr::NumberLiteral(_)
|
||||
| Expr::BooleanLiteral(_)
|
||||
| Expr::NoneLiteral(_)
|
||||
| Expr::EllipsisLiteral(_) => {}
|
||||
|
@ -560,6 +584,12 @@ pub fn walk_except_handler<V: Transformer + ?Sized>(
|
|||
}
|
||||
}
|
||||
|
||||
pub fn walk_f_string<V: Transformer + ?Sized>(visitor: &V, f_string: &mut FString) {
|
||||
for expr in &mut f_string.values {
|
||||
visitor.visit_expr(expr);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn walk_format_spec<V: Transformer + ?Sized>(visitor: &V, format_spec: &mut Expr) {
|
||||
visitor.visit_expr(format_spec);
|
||||
}
|
||||
|
@ -730,3 +760,13 @@ pub fn walk_cmp_op<V: Transformer + ?Sized>(visitor: &V, cmp_op: &mut CmpOp) {}
|
|||
|
||||
#[allow(unused_variables)]
|
||||
pub fn walk_alias<V: Transformer + ?Sized>(visitor: &V, alias: &mut Alias) {}
|
||||
|
||||
#[allow(unused_variables)]
|
||||
pub fn walk_string_literal<V: Transformer + ?Sized>(
|
||||
visitor: &V,
|
||||
string_literal: &mut StringLiteral,
|
||||
) {
|
||||
}
|
||||
|
||||
#[allow(unused_variables)]
|
||||
pub fn walk_bytes_literal<V: Transformer + ?Sized>(visitor: &V, bytes_literal: &mut BytesLiteral) {}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue