mirror of
https://github.com/astral-sh/ruff.git
synced 2025-10-06 00:20:37 +00:00
Disallow implicit concatenation of t-strings and other string types (#19485)
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (linux, release) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Waiting to run
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / ecosystem (push) Blocked by required conditions
CI / Fuzz for new ty panics (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / check playground (push) Blocked by required conditions
CI / benchmarks-instrumented (push) Blocked by required conditions
CI / benchmarks-walltime (push) Blocked by required conditions
[ty Playground] Release / publish (push) Waiting to run
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (linux, release) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Waiting to run
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / ecosystem (push) Blocked by required conditions
CI / Fuzz for new ty panics (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / check playground (push) Blocked by required conditions
CI / benchmarks-instrumented (push) Blocked by required conditions
CI / benchmarks-walltime (push) Blocked by required conditions
[ty Playground] Release / publish (push) Waiting to run
As of [this cpython PR](https://github.com/python/cpython/pull/135996), it is not allowed to concatenate t-strings with non-t-strings, implicitly or explicitly. Expressions such as `"foo" t"{bar}"` are now syntax errors. This PR updates some AST nodes and parsing to reflect this change. The structural change is that `TStringPart` is no longer needed, since, as in the case of `BytesStringLiteral`, the only possibilities are that we have a single `TString` or a vector of such (representing an implicit concatenation of t-strings). This removes a level of nesting from many AST expressions (which is what all the snapshot changes reflect), and simplifies some logic in the implementation of visitors, for example. The other change of note is in the parser. When we meet an implicit concatenation of string-like literals, we now count the number of t-string literals. If these do not exhaust the total number of implicitly concatenated pieces, then we emit a syntax error. To recover from this syntax error, we encode any t-string pieces as _invalid_ string literals (which means we flag them as invalid, record their range, and record the value as `""`). Note that if at least one of the pieces is an f-string we prefer to parse the entire string as an f-string; otherwise we parse it as a string. This logic is exactly the same as how we currently treat `BytesStringLiteral` parsing and error recovery - and carries with it the same pros and cons. Finally, note that I have not implemented any changes in the implementation of the formatter. As far as I can tell, none are needed. I did change a few of the fixtures so that we are always concatenating t-strings with t-strings.
This commit is contained in:
parent
df5eba7583
commit
008bbfdf5a
75 changed files with 4509 additions and 6294 deletions
|
@ -1,4 +1,3 @@
|
|||
use std::cmp::Ordering;
|
||||
use std::ops::Deref;
|
||||
|
||||
use bitflags::bitflags;
|
||||
|
@ -1256,7 +1255,6 @@ impl<'src> Parser<'src> {
|
|||
// t'{there}'
|
||||
// t"""what's
|
||||
// happening?"""
|
||||
// "implicitly"t"concatenated"
|
||||
|
||||
// test_err template_strings_py313
|
||||
// # parse_options: {"target-version": "3.13"}
|
||||
|
@ -1264,7 +1262,6 @@ impl<'src> Parser<'src> {
|
|||
// t'{there}'
|
||||
// t"""what's
|
||||
// happening?"""
|
||||
// "implicitly"t"concatenated"
|
||||
let string_type = StringType::TString(
|
||||
self.parse_interpolated_string(InterpolatedStringKind::TString)
|
||||
.into(),
|
||||
|
@ -1281,7 +1278,7 @@ impl<'src> Parser<'src> {
|
|||
|
||||
match strings.len() {
|
||||
// This is not possible as the function was called by matching against a
|
||||
// `String` or `FStringStart` token.
|
||||
// `String`, `FStringStart`, or `TStringStart` token.
|
||||
0 => unreachable!("Expected to parse at least one string"),
|
||||
// We need a owned value, hence the `pop` here.
|
||||
1 => match strings.pop().unwrap() {
|
||||
|
@ -1322,58 +1319,84 @@ impl<'src> Parser<'src> {
|
|||
) -> Expr {
|
||||
assert!(strings.len() > 1);
|
||||
|
||||
let mut has_tstring = false;
|
||||
let mut has_fstring = false;
|
||||
let mut byte_literal_count = 0;
|
||||
let mut tstring_count = 0;
|
||||
for string in &strings {
|
||||
match string {
|
||||
StringType::FString(_) => has_fstring = true,
|
||||
StringType::TString(_) => has_tstring = true,
|
||||
StringType::TString(_) => tstring_count += 1,
|
||||
StringType::Bytes(_) => byte_literal_count += 1,
|
||||
StringType::Str(_) => {}
|
||||
}
|
||||
}
|
||||
let has_bytes = byte_literal_count > 0;
|
||||
let has_tstring = tstring_count > 0;
|
||||
|
||||
if has_bytes {
|
||||
match byte_literal_count.cmp(&strings.len()) {
|
||||
Ordering::Less => {
|
||||
// TODO(dhruvmanila): This is not an ideal recovery because the parser
|
||||
// replaces the byte literals with an invalid string literal node. Any
|
||||
// downstream tools can extract the raw bytes from the range.
|
||||
//
|
||||
// We could convert the node into a string and mark it as invalid
|
||||
// and would be clever to mark the type which is fewer in quantity.
|
||||
if byte_literal_count < strings.len() {
|
||||
// TODO(dhruvmanila): This is not an ideal recovery because the parser
|
||||
// replaces the byte literals with an invalid string literal node. Any
|
||||
// downstream tools can extract the raw bytes from the range.
|
||||
//
|
||||
// We could convert the node into a string and mark it as invalid
|
||||
// and would be clever to mark the type which is fewer in quantity.
|
||||
|
||||
// test_err mixed_bytes_and_non_bytes_literals
|
||||
// 'first' b'second'
|
||||
// f'first' b'second'
|
||||
// 'first' f'second' b'third'
|
||||
self.add_error(
|
||||
ParseErrorType::OtherError(
|
||||
"Bytes literal cannot be mixed with non-bytes literals".to_string(),
|
||||
),
|
||||
range,
|
||||
);
|
||||
}
|
||||
// Only construct a byte expression if all the literals are bytes
|
||||
// otherwise, we'll try either string, t-string, or f-string. This is to retain
|
||||
// as much information as possible.
|
||||
Ordering::Equal => {
|
||||
let mut values = Vec::with_capacity(strings.len());
|
||||
for string in strings {
|
||||
values.push(match string {
|
||||
StringType::Bytes(value) => value,
|
||||
_ => unreachable!("Expected `StringType::Bytes`"),
|
||||
});
|
||||
}
|
||||
return Expr::from(ast::ExprBytesLiteral {
|
||||
value: ast::BytesLiteralValue::concatenated(values),
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
// test_err mixed_bytes_and_non_bytes_literals
|
||||
// 'first' b'second'
|
||||
// f'first' b'second'
|
||||
// 'first' f'second' b'third'
|
||||
self.add_error(
|
||||
ParseErrorType::OtherError(
|
||||
"Bytes literal cannot be mixed with non-bytes literals".to_string(),
|
||||
),
|
||||
range,
|
||||
);
|
||||
}
|
||||
// Only construct a byte expression if all the literals are bytes
|
||||
// otherwise, we'll try either string, t-string, or f-string. This is to retain
|
||||
// as much information as possible.
|
||||
else {
|
||||
let mut values = Vec::with_capacity(strings.len());
|
||||
for string in strings {
|
||||
values.push(match string {
|
||||
StringType::Bytes(value) => value,
|
||||
_ => unreachable!("Expected `StringType::Bytes`"),
|
||||
});
|
||||
}
|
||||
Ordering::Greater => unreachable!(),
|
||||
return Expr::from(ast::ExprBytesLiteral {
|
||||
value: ast::BytesLiteralValue::concatenated(values),
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if has_tstring {
|
||||
if tstring_count < strings.len() {
|
||||
self.add_error(
|
||||
ParseErrorType::OtherError(
|
||||
"cannot mix t-string literals with string or bytes literals".to_string(),
|
||||
),
|
||||
range,
|
||||
);
|
||||
}
|
||||
// Only construct a t-string expression if all the literals are t-strings
|
||||
// otherwise, we'll try either string or f-string. This is to retain
|
||||
// as much information as possible.
|
||||
else {
|
||||
let mut values = Vec::with_capacity(strings.len());
|
||||
for string in strings {
|
||||
values.push(match string {
|
||||
StringType::TString(value) => value,
|
||||
_ => unreachable!("Expected `StringType::TString`"),
|
||||
});
|
||||
}
|
||||
return Expr::from(ast::ExprTString {
|
||||
value: ast::TStringValue::concatenated(values),
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1414,36 +1437,17 @@ impl<'src> Parser<'src> {
|
|||
});
|
||||
}
|
||||
|
||||
if has_tstring {
|
||||
let mut parts = Vec::with_capacity(strings.len());
|
||||
for string in strings {
|
||||
match string {
|
||||
StringType::TString(tstring) => parts.push(ast::TStringPart::TString(tstring)),
|
||||
StringType::FString(fstring) => {
|
||||
parts.push(ruff_python_ast::TStringPart::FString(fstring));
|
||||
}
|
||||
StringType::Str(string) => parts.push(ast::TStringPart::Literal(string)),
|
||||
StringType::Bytes(bytes) => parts.push(ast::TStringPart::Literal(
|
||||
ast::StringLiteral::invalid(bytes.range()),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
return Expr::from(ast::ExprTString {
|
||||
value: ast::TStringValue::concatenated(parts),
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
});
|
||||
}
|
||||
|
||||
let mut parts = Vec::with_capacity(strings.len());
|
||||
for string in strings {
|
||||
match string {
|
||||
StringType::FString(fstring) => parts.push(ast::FStringPart::FString(fstring)),
|
||||
StringType::TString(_) => {
|
||||
unreachable!("expected no tstring parts by this point")
|
||||
}
|
||||
StringType::Str(string) => parts.push(ast::FStringPart::Literal(string)),
|
||||
// Bytes and Template strings are invalid at this point
|
||||
// and stored as invalid string literal parts in the
|
||||
// f-string
|
||||
StringType::TString(tstring) => parts.push(ast::FStringPart::Literal(
|
||||
ast::StringLiteral::invalid(tstring.range()),
|
||||
)),
|
||||
StringType::Bytes(bytes) => parts.push(ast::FStringPart::Literal(
|
||||
ast::StringLiteral::invalid(bytes.range()),
|
||||
)),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue