Fix lexing single-quoted f-string with multi-line format spec (#7787)

## Summary

Reported at https://github.com/python/cpython/issues/110259

## Test Plan

Add test cases for the fix and update the snapshots
This commit is contained in:
Dhruv Manilawala 2023-10-05 23:12:09 +05:30 committed by GitHub
parent 27def479bd
commit 709abd534a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 339 additions and 4 deletions

View file

@ -566,6 +566,9 @@ impl<'source> Lexer<'source> {
// Tracks the last offset of token value that has been written to `normalized`.
let mut last_offset = self.offset();
// This isn't going to change for the duration of the loop.
let in_format_spec = fstring.is_in_format_spec(self.nesting);
let mut in_named_unicode = false;
loop {
@ -585,6 +588,13 @@ impl<'source> Lexer<'source> {
});
}
'\n' | '\r' if !fstring.is_triple_quoted() => {
// If we encounter a newline while we're in a format spec, then
// we stop here and let the lexer emit the newline token.
//
// Relevant discussion: https://github.com/python/cpython/issues/110259
if in_format_spec {
break;
}
return Err(LexicalError {
error: LexicalErrorType::FStringError(FStringErrorType::UnterminatedString),
location: self.offset(),
@ -620,7 +630,7 @@ impl<'source> Lexer<'source> {
}
}
'{' => {
if self.cursor.second() == '{' && !fstring.is_in_format_spec(self.nesting) {
if self.cursor.second() == '{' && !in_format_spec {
self.cursor.bump();
normalized
.push_str(&self.source[TextRange::new(last_offset, self.offset())]);
@ -634,9 +644,7 @@ impl<'source> Lexer<'source> {
if in_named_unicode {
in_named_unicode = false;
self.cursor.bump();
} else if self.cursor.second() == '}'
&& !fstring.is_in_format_spec(self.nesting)
{
} else if self.cursor.second() == '}' && !in_format_spec {
self.cursor.bump();
normalized
.push_str(&self.source[TextRange::new(last_offset, self.offset())]);
@ -1194,6 +1202,9 @@ impl<'source> Lexer<'source> {
self.state = State::AfterNewline;
Tok::Newline
} else {
if let Some(fstring) = self.fstrings.current_mut() {
fstring.try_end_format_spec(self.nesting);
}
Tok::NonLogicalNewline
},
self.token_range(),
@ -1207,6 +1218,9 @@ impl<'source> Lexer<'source> {
self.state = State::AfterNewline;
Tok::Newline
} else {
if let Some(fstring) = self.fstrings.current_mut() {
fstring.try_end_format_spec(self.nesting);
}
Tok::NonLogicalNewline
},
self.token_range(),
@ -2051,6 +2065,29 @@ def f(arg=%timeit a = b):
assert_debug_snapshot!(lex_source(source));
}
#[test]
fn test_fstring_with_multiline_format_spec() {
// The last f-string is invalid syntactically but we should still lex it.
// Note that the `b` is a `Name` token and not a `FStringMiddle` token.
let source = r"f'''__{
x:d
}__'''
f'''__{
x:a
b
c
}__'''
f'__{
x:d
}__'
f'__{
x:a
b
}__'
";
assert_debug_snapshot!(lex_source(source));
}
#[test]
fn test_fstring_conversion() {
let source = r#"f"{x!s} {x=!r} {x:.3f!r} {{x!r}}""#;