Fix lexing single-quoted f-string with multi-line format spec (#7787)

## Summary

Reported at https://github.com/python/cpython/issues/110259

## Test Plan

Add test cases for the fix and update the snapshots
This commit is contained in:
Dhruv Manilawala 2023-10-05 23:12:09 +05:30 committed by GitHub
parent 27def479bd
commit 709abd534a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 339 additions and 4 deletions

View file

@ -566,6 +566,9 @@ impl<'source> Lexer<'source> {
// Tracks the last offset of token value that has been written to `normalized`.
let mut last_offset = self.offset();
// This isn't going to change for the duration of the loop.
let in_format_spec = fstring.is_in_format_spec(self.nesting);
let mut in_named_unicode = false;
loop {
@ -585,6 +588,13 @@ impl<'source> Lexer<'source> {
});
}
'\n' | '\r' if !fstring.is_triple_quoted() => {
// If we encounter a newline while we're in a format spec, then
// we stop here and let the lexer emit the newline token.
//
// Relevant discussion: https://github.com/python/cpython/issues/110259
if in_format_spec {
break;
}
return Err(LexicalError {
error: LexicalErrorType::FStringError(FStringErrorType::UnterminatedString),
location: self.offset(),
@ -620,7 +630,7 @@ impl<'source> Lexer<'source> {
}
}
'{' => {
if self.cursor.second() == '{' && !fstring.is_in_format_spec(self.nesting) {
if self.cursor.second() == '{' && !in_format_spec {
self.cursor.bump();
normalized
.push_str(&self.source[TextRange::new(last_offset, self.offset())]);
@ -634,9 +644,7 @@ impl<'source> Lexer<'source> {
if in_named_unicode {
in_named_unicode = false;
self.cursor.bump();
} else if self.cursor.second() == '}'
&& !fstring.is_in_format_spec(self.nesting)
{
} else if self.cursor.second() == '}' && !in_format_spec {
self.cursor.bump();
normalized
.push_str(&self.source[TextRange::new(last_offset, self.offset())]);
@ -1194,6 +1202,9 @@ impl<'source> Lexer<'source> {
self.state = State::AfterNewline;
Tok::Newline
} else {
if let Some(fstring) = self.fstrings.current_mut() {
fstring.try_end_format_spec(self.nesting);
}
Tok::NonLogicalNewline
},
self.token_range(),
@ -1207,6 +1218,9 @@ impl<'source> Lexer<'source> {
self.state = State::AfterNewline;
Tok::Newline
} else {
if let Some(fstring) = self.fstrings.current_mut() {
fstring.try_end_format_spec(self.nesting);
}
Tok::NonLogicalNewline
},
self.token_range(),
@ -2051,6 +2065,29 @@ def f(arg=%timeit a = b):
assert_debug_snapshot!(lex_source(source));
}
#[test]
fn test_fstring_with_multiline_format_spec() {
// The last f-string is invalid syntactically but we should still lex it.
// Note that the `b` is a `Name` token and not a `FStringMiddle` token.
let source = r"f'''__{
x:d
}__'''
f'''__{
x:a
b
c
}__'''
f'__{
x:d
}__'
f'__{
x:a
b
}__'
";
assert_debug_snapshot!(lex_source(source));
}
#[test]
fn test_fstring_conversion() {
let source = r#"f"{x!s} {x=!r} {x:.3f!r} {{x!r}}""#;

View file

@ -1290,6 +1290,11 @@ match foo:
f"\{foo}\{bar:\}"
f"\\{{foo\\}}"
f"""{
foo:x
y
z
}"""
"#
.trim(),
"<test>",

View file

@ -0,0 +1,244 @@
---
source: crates/ruff_python_parser/src/lexer.rs
expression: lex_source(source)
---
[
(
FStringStart,
0..4,
),
(
FStringMiddle {
value: "__",
is_raw: false,
},
4..6,
),
(
Lbrace,
6..7,
),
(
NonLogicalNewline,
7..8,
),
(
Name {
name: "x",
},
12..13,
),
(
Colon,
13..14,
),
(
FStringMiddle {
value: "d\n",
is_raw: false,
},
14..16,
),
(
Rbrace,
16..17,
),
(
FStringMiddle {
value: "__",
is_raw: false,
},
17..19,
),
(
FStringEnd,
19..22,
),
(
Newline,
22..23,
),
(
FStringStart,
23..27,
),
(
FStringMiddle {
value: "__",
is_raw: false,
},
27..29,
),
(
Lbrace,
29..30,
),
(
NonLogicalNewline,
30..31,
),
(
Name {
name: "x",
},
35..36,
),
(
Colon,
36..37,
),
(
FStringMiddle {
value: "a\n b\n c\n",
is_raw: false,
},
37..61,
),
(
Rbrace,
61..62,
),
(
FStringMiddle {
value: "__",
is_raw: false,
},
62..64,
),
(
FStringEnd,
64..67,
),
(
Newline,
67..68,
),
(
FStringStart,
68..70,
),
(
FStringMiddle {
value: "__",
is_raw: false,
},
70..72,
),
(
Lbrace,
72..73,
),
(
NonLogicalNewline,
73..74,
),
(
Name {
name: "x",
},
78..79,
),
(
Colon,
79..80,
),
(
FStringMiddle {
value: "d",
is_raw: false,
},
80..81,
),
(
NonLogicalNewline,
81..82,
),
(
Rbrace,
82..83,
),
(
FStringMiddle {
value: "__",
is_raw: false,
},
83..85,
),
(
FStringEnd,
85..86,
),
(
Newline,
86..87,
),
(
FStringStart,
87..89,
),
(
FStringMiddle {
value: "__",
is_raw: false,
},
89..91,
),
(
Lbrace,
91..92,
),
(
NonLogicalNewline,
92..93,
),
(
Name {
name: "x",
},
97..98,
),
(
Colon,
98..99,
),
(
FStringMiddle {
value: "a",
is_raw: false,
},
99..100,
),
(
NonLogicalNewline,
100..101,
),
(
Name {
name: "b",
},
109..110,
),
(
NonLogicalNewline,
110..111,
),
(
Rbrace,
111..112,
),
(
FStringMiddle {
value: "__",
is_raw: false,
},
112..114,
),
(
FStringEnd,
114..115,
),
(
Newline,
115..116,
),
]

View file

@ -845,4 +845,53 @@ expression: parse_ast
),
},
),
Expr(
StmtExpr {
range: 304..344,
value: FString(
ExprFString {
range: 304..344,
values: [
FormattedValue(
ExprFormattedValue {
range: 308..341,
value: Name(
ExprName {
range: 314..317,
id: "foo",
ctx: Load,
},
),
debug_text: None,
conversion: None,
format_spec: Some(
FString(
ExprFString {
range: 318..340,
values: [
Constant(
ExprConstant {
range: 318..340,
value: Str(
StringConstant {
value: "x\n y\n z\n",
unicode: false,
implicit_concatenated: false,
},
),
},
),
],
implicit_concatenated: false,
},
),
),
},
),
],
implicit_concatenated: false,
},
),
},
),
]