Disallow newlines in format specifiers of single quoted f- or t-strings (#18708)

This commit is contained in:
Micha Reiser 2025-06-18 14:56:15 +02:00 committed by GitHub
parent 23261a38a0
commit 1188ffccc4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 521 additions and 513 deletions

View file

@ -65,28 +65,31 @@ pub enum InterpolatedStringErrorType {
LambdaWithoutParentheses,
/// Conversion flag does not immediately follow exclamation.
ConversionFlagNotImmediatelyAfterExclamation,
/// Newline inside of a format spec for a single quoted f- or t-string.
NewlineInFormatSpec,
}
impl std::fmt::Display for InterpolatedStringErrorType {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use InterpolatedStringErrorType::{
ConversionFlagNotImmediatelyAfterExclamation, InvalidConversionFlag,
LambdaWithoutParentheses, SingleRbrace, UnclosedLbrace, UnterminatedString,
UnterminatedTripleQuotedString,
};
match self {
UnclosedLbrace => write!(f, "expecting '}}'"),
InvalidConversionFlag => write!(f, "invalid conversion character"),
SingleRbrace => write!(f, "single '}}' is not allowed"),
UnterminatedString => write!(f, "unterminated string"),
UnterminatedTripleQuotedString => write!(f, "unterminated triple-quoted string"),
LambdaWithoutParentheses => {
Self::UnclosedLbrace => write!(f, "expecting '}}'"),
Self::InvalidConversionFlag => write!(f, "invalid conversion character"),
Self::SingleRbrace => write!(f, "single '}}' is not allowed"),
Self::UnterminatedString => write!(f, "unterminated string"),
Self::UnterminatedTripleQuotedString => write!(f, "unterminated triple-quoted string"),
Self::LambdaWithoutParentheses => {
write!(f, "lambda expressions are not allowed without parentheses")
}
ConversionFlagNotImmediatelyAfterExclamation => write!(
Self::ConversionFlagNotImmediatelyAfterExclamation => write!(
f,
"conversion type must come right after the exclamation mark"
),
Self::NewlineInFormatSpec => {
write!(
f,
"newlines are not allowed in format specifiers when using single quotes"
)
}
}
}
}
@ -430,31 +433,31 @@ impl LexicalErrorType {
impl std::fmt::Display for LexicalErrorType {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"),
LexicalErrorType::TStringError(error) => write!(f, "t-string: {error}"),
LexicalErrorType::InvalidByteLiteral => {
Self::StringError => write!(f, "Got unexpected string"),
Self::FStringError(error) => write!(f, "f-string: {error}"),
Self::TStringError(error) => write!(f, "t-string: {error}"),
Self::InvalidByteLiteral => {
write!(f, "bytes can only contain ASCII literal characters")
}
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
LexicalErrorType::IndentationError => {
Self::UnicodeError => write!(f, "Got unexpected unicode"),
Self::IndentationError => {
write!(f, "unindent does not match any outer indentation level")
}
LexicalErrorType::UnrecognizedToken { tok } => {
Self::UnrecognizedToken { tok } => {
write!(f, "Got unexpected token {tok}")
}
LexicalErrorType::LineContinuationError => {
Self::LineContinuationError => {
write!(f, "Expected a newline after line continuation character")
}
LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
LexicalErrorType::UnclosedStringError => {
Self::Eof => write!(f, "unexpected EOF while parsing"),
Self::OtherError(msg) => write!(f, "{msg}"),
Self::UnclosedStringError => {
write!(f, "missing closing quote in string literal")
}
LexicalErrorType::MissingUnicodeLbrace => {
Self::MissingUnicodeLbrace => {
write!(f, "Missing `{{` in Unicode escape sequence")
}
LexicalErrorType::MissingUnicodeRbrace => {
Self::MissingUnicodeRbrace => {
write!(f, "Missing `}}` in Unicode escape sequence")
}
}

View file

@ -826,19 +826,17 @@ impl<'src> Lexer<'src> {
)));
}
'\n' | '\r' if !interpolated_string.is_triple_quoted() => {
// If we encounter a newline while we're in a format spec, then
// we stop here and let the lexer emit the newline token.
//
// Relevant discussion: https://github.com/python/cpython/issues/110259
if in_format_spec {
break;
}
// https://github.com/astral-sh/ruff/issues/18632
self.interpolated_strings.pop();
let error_type = if in_format_spec {
InterpolatedStringErrorType::NewlineInFormatSpec
} else {
InterpolatedStringErrorType::UnterminatedString
};
return Some(self.push_error(LexicalError::new(
LexicalErrorType::from_interpolated_string_error(
InterpolatedStringErrorType::UnterminatedString,
string_kind,
),
LexicalErrorType::from_interpolated_string_error(error_type, string_kind),
self.token_range(),
)));
}
@ -1768,6 +1766,7 @@ mod tests {
}
}
#[track_caller]
fn lex_valid(source: &str, mode: Mode, start_offset: TextSize) -> LexerOutput {
let output = lex(source, mode, start_offset);
@ -1783,6 +1782,7 @@ mod tests {
output
}
#[track_caller]
fn lex_invalid(source: &str, mode: Mode) -> LexerOutput {
let output = lex(source, mode, TextSize::default());
@ -1794,14 +1794,17 @@ mod tests {
output
}
#[track_caller]
fn lex_source(source: &str) -> LexerOutput {
lex_valid(source, Mode::Module, TextSize::default())
}
#[track_caller]
fn lex_source_with_offset(source: &str, start_offset: TextSize) -> LexerOutput {
lex_valid(source, Mode::Module, start_offset)
}
#[track_caller]
fn lex_jupyter_source(source: &str) -> LexerOutput {
lex_valid(source, Mode::Ipython, TextSize::default())
}
@ -2394,6 +2397,13 @@ f'''__{
b
c
}__'''
";
assert_snapshot!(lex_source(source));
}
#[test]
fn test_fstring_newline_format_spec() {
let source = r"
f'__{
x:d
}__'
@ -2402,7 +2412,7 @@ f'__{
b
}__'
";
assert_snapshot!(lex_source(source));
assert_snapshot!(lex_invalid(source, Mode::Module));
}
#[test]
@ -2572,6 +2582,13 @@ t'''__{
b
c
}__'''
";
assert_snapshot!(lex_source(source));
}
#[test]
fn test_tstring_newline_format_spec() {
let source = r"
t'__{
x:d
}__'
@ -2580,7 +2597,7 @@ t'__{
b
}__'
";
assert_snapshot!(lex_source(source));
assert_snapshot!(lex_invalid(source, Mode::Module));
}
#[test]

View file

@ -0,0 +1,168 @@
---
source: crates/ruff_python_parser/src/lexer.rs
expression: "lex_invalid(source, Mode::Module)"
---
## Tokens
```
[
(
NonLogicalNewline,
0..1,
),
(
FStringStart,
1..3,
TokenFlags(
F_STRING,
),
),
(
InterpolatedStringMiddle(
"__",
),
3..5,
TokenFlags(
F_STRING,
),
),
(
Lbrace,
5..6,
),
(
NonLogicalNewline,
6..7,
),
(
Name(
Name("x"),
),
11..12,
),
(
Colon,
12..13,
),
(
Unknown,
13..14,
),
(
NonLogicalNewline,
14..15,
),
(
Rbrace,
15..16,
),
(
Name(
Name("__"),
),
16..18,
),
(
Unknown,
18..19,
),
(
Newline,
19..20,
),
(
FStringStart,
20..22,
TokenFlags(
F_STRING,
),
),
(
InterpolatedStringMiddle(
"__",
),
22..24,
TokenFlags(
F_STRING,
),
),
(
Lbrace,
24..25,
),
(
NonLogicalNewline,
25..26,
),
(
Name(
Name("x"),
),
30..31,
),
(
Colon,
31..32,
),
(
Unknown,
32..33,
),
(
NonLogicalNewline,
33..34,
),
(
Name(
Name("b"),
),
42..43,
),
(
NonLogicalNewline,
43..44,
),
(
Rbrace,
44..45,
),
(
Name(
Name("__"),
),
45..47,
),
(
Unknown,
47..48,
),
(
Newline,
48..49,
),
]
```
## Errors
```
[
LexicalError {
error: FStringError(
NewlineInFormatSpec,
),
location: 13..14,
},
LexicalError {
error: UnclosedStringError,
location: 18..19,
},
LexicalError {
error: FStringError(
NewlineInFormatSpec,
),
location: 32..33,
},
LexicalError {
error: UnclosedStringError,
location: 47..48,
},
]
```

View file

@ -139,157 +139,5 @@ expression: lex_source(source)
Newline,
67..68,
),
(
FStringStart,
68..70,
TokenFlags(
F_STRING,
),
),
(
InterpolatedStringMiddle(
"__",
),
70..72,
TokenFlags(
F_STRING,
),
),
(
Lbrace,
72..73,
),
(
NonLogicalNewline,
73..74,
),
(
Name(
Name("x"),
),
78..79,
),
(
Colon,
79..80,
),
(
InterpolatedStringMiddle(
"d",
),
80..81,
TokenFlags(
F_STRING,
),
),
(
NonLogicalNewline,
81..82,
),
(
Rbrace,
82..83,
),
(
InterpolatedStringMiddle(
"__",
),
83..85,
TokenFlags(
F_STRING,
),
),
(
FStringEnd,
85..86,
TokenFlags(
F_STRING,
),
),
(
Newline,
86..87,
),
(
FStringStart,
87..89,
TokenFlags(
F_STRING,
),
),
(
InterpolatedStringMiddle(
"__",
),
89..91,
TokenFlags(
F_STRING,
),
),
(
Lbrace,
91..92,
),
(
NonLogicalNewline,
92..93,
),
(
Name(
Name("x"),
),
97..98,
),
(
Colon,
98..99,
),
(
InterpolatedStringMiddle(
"a",
),
99..100,
TokenFlags(
F_STRING,
),
),
(
NonLogicalNewline,
100..101,
),
(
Name(
Name("b"),
),
109..110,
),
(
NonLogicalNewline,
110..111,
),
(
Rbrace,
111..112,
),
(
InterpolatedStringMiddle(
"__",
),
112..114,
TokenFlags(
F_STRING,
),
),
(
FStringEnd,
114..115,
TokenFlags(
F_STRING,
),
),
(
Newline,
115..116,
),
]
```

View file

@ -0,0 +1,168 @@
---
source: crates/ruff_python_parser/src/lexer.rs
expression: "lex_invalid(source, Mode::Module)"
---
## Tokens
```
[
(
NonLogicalNewline,
0..1,
),
(
TStringStart,
1..3,
TokenFlags(
T_STRING,
),
),
(
InterpolatedStringMiddle(
"__",
),
3..5,
TokenFlags(
T_STRING,
),
),
(
Lbrace,
5..6,
),
(
NonLogicalNewline,
6..7,
),
(
Name(
Name("x"),
),
11..12,
),
(
Colon,
12..13,
),
(
Unknown,
13..14,
),
(
NonLogicalNewline,
14..15,
),
(
Rbrace,
15..16,
),
(
Name(
Name("__"),
),
16..18,
),
(
Unknown,
18..19,
),
(
Newline,
19..20,
),
(
TStringStart,
20..22,
TokenFlags(
T_STRING,
),
),
(
InterpolatedStringMiddle(
"__",
),
22..24,
TokenFlags(
T_STRING,
),
),
(
Lbrace,
24..25,
),
(
NonLogicalNewline,
25..26,
),
(
Name(
Name("x"),
),
30..31,
),
(
Colon,
31..32,
),
(
Unknown,
32..33,
),
(
NonLogicalNewline,
33..34,
),
(
Name(
Name("b"),
),
42..43,
),
(
NonLogicalNewline,
43..44,
),
(
Rbrace,
44..45,
),
(
Name(
Name("__"),
),
45..47,
),
(
Unknown,
47..48,
),
(
Newline,
48..49,
),
]
```
## Errors
```
[
LexicalError {
error: TStringError(
NewlineInFormatSpec,
),
location: 13..14,
},
LexicalError {
error: UnclosedStringError,
location: 18..19,
},
LexicalError {
error: TStringError(
NewlineInFormatSpec,
),
location: 32..33,
},
LexicalError {
error: UnclosedStringError,
location: 47..48,
},
]
```

View file

@ -139,157 +139,5 @@ expression: lex_source(source)
Newline,
67..68,
),
(
TStringStart,
68..70,
TokenFlags(
T_STRING,
),
),
(
InterpolatedStringMiddle(
"__",
),
70..72,
TokenFlags(
T_STRING,
),
),
(
Lbrace,
72..73,
),
(
NonLogicalNewline,
73..74,
),
(
Name(
Name("x"),
),
78..79,
),
(
Colon,
79..80,
),
(
InterpolatedStringMiddle(
"d",
),
80..81,
TokenFlags(
T_STRING,
),
),
(
NonLogicalNewline,
81..82,
),
(
Rbrace,
82..83,
),
(
InterpolatedStringMiddle(
"__",
),
83..85,
TokenFlags(
T_STRING,
),
),
(
TStringEnd,
85..86,
TokenFlags(
T_STRING,
),
),
(
Newline,
86..87,
),
(
TStringStart,
87..89,
TokenFlags(
T_STRING,
),
),
(
InterpolatedStringMiddle(
"__",
),
89..91,
TokenFlags(
T_STRING,
),
),
(
Lbrace,
91..92,
),
(
NonLogicalNewline,
92..93,
),
(
Name(
Name("x"),
),
97..98,
),
(
Colon,
98..99,
),
(
InterpolatedStringMiddle(
"a",
),
99..100,
TokenFlags(
T_STRING,
),
),
(
NonLogicalNewline,
100..101,
),
(
Name(
Name("b"),
),
109..110,
),
(
NonLogicalNewline,
110..111,
),
(
Rbrace,
111..112,
),
(
InterpolatedStringMiddle(
"__",
),
112..114,
TokenFlags(
T_STRING,
),
),
(
TStringEnd,
114..115,
TokenFlags(
T_STRING,
),
),
(
Newline,
115..116,
),
]
```

View file

@ -169,15 +169,7 @@ Module(
InterpolatedStringFormatSpec {
range: 226..228,
node_index: AtomicNodeIndex(..),
elements: [
Literal(
InterpolatedStringLiteralElement {
range: 226..228,
node_index: AtomicNodeIndex(..),
value: "\\",
},
),
],
elements: [],
},
),
},
@ -385,11 +377,22 @@ Module(
6 | 'format spec'}
7 |
8 | f'middle {'string':\\
| ^ Syntax Error: f-string: unterminated string
| ^^ Syntax Error: f-string: newlines are not allowed in format specifiers when using single quotes
9 | 'format spec'}
|
|
6 | 'format spec'}
7 |
8 | f'middle {'string':\\
| ^ Syntax Error: f-string: expecting '}'
9 | 'format spec'}
10 |
11 | f'middle {'string':\\\
|
|
8 | f'middle {'string':\\
9 | 'format spec'}

View file

@ -384,7 +384,7 @@ Module(
3 | f"hello {x
4 | 2 + 2
5 | f"hello {x:
| ^ Syntax Error: f-string: unterminated string
| ^ Syntax Error: f-string: newlines are not allowed in format specifiers when using single quotes
6 | 3 + 3
7 | f"hello {x}
|