Fix syntax error false positives for escapes and quotes in f-strings (#20867)

Summary
--

Fixes #20844 by refining the unsupported syntax error check for [PEP
701]
f-strings before Python 3.12 to allow backslash escapes and escaped
outer quotes
in the format spec part of f-strings. These are only disallowed within
the
f-string expression part on earlier versions. Using the examples from
the PR:

```pycon
>>> f"{1:\x64}"
'1'
>>> f"{1:\"d\"}"
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: Invalid format specifier '"d"' for object of type 'int'
```

Note that the second case is a runtime error, but this is actually
avoidable if
you override `__format__`, so despite being pretty weird, this could
actually be
a valid use case.

```pycon
>>> class C:
...     def __format__(*args, **kwargs): return "<C>"
...
>>> f"{C():\"d\"}"
'<C>'
```

At first I thought narrowing the range we check to exclude the format
spec would
only work for escapes, but it turns out that cases like `f"{1:""}"` are
already
covered by an existing `ParseError`, so we can just narrow the range of
both our
escape and quote checks.

Our comment check also seems to be working correctly because it's based
on the
actual tokens. A case like
[this](https://play.ruff.rs/9f1c2ff2-cd8e-4ad7-9f40-56c0a524209f):

```python
f"""{1:# }"""
```

doesn't include a comment token, instead the `#` is part of an
`InterpolatedStringLiteralElement`.

Test Plan
--

New inline parser tests

[PEP 701]: https://peps.python.org/pep-0701/
This commit is contained in:
Brent Westbrook 2025-10-15 09:23:16 -04:00 committed by GitHub
parent 8817ea5c84
commit 8b9ab48ac6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 317 additions and 62 deletions

View file

@ -1571,6 +1571,8 @@ impl<'src> Parser<'src> {
// f"""{f'''# before expression {f'# aro{f"#{1+1}#"}und #'}'''} # after expression"""
// f"escape outside of \t {expr}\n"
// f"test\"abcd"
// f"{1:\x64}" # escapes are valid in the format spec
// f"{1:\"d\"}" # this also means that escaped outer quotes are valid
// test_err pep701_f_string_py311
// # parse_options: {"target-version": "3.11"}
@ -1586,6 +1588,13 @@ impl<'src> Parser<'src> {
// f"""{f"""{x}"""}""" # mark the whole triple quote
// f"{'\n'.join(['\t', '\v', '\r'])}" # multiple escape sequences, multiple errors
// test_err nested_quote_in_format_spec_py312
// # parse_options: {"target-version": "3.12"}
// f"{1:""}" # this is a ParseError on all versions
// test_ok non_nested_quote_in_format_spec_py311
// # parse_options: {"target-version": "3.11"}
// f"{1:''}" # but this is okay on all versions
let range = self.node_range(start);
if !self.options.target_version.supports_pep_701()
@ -1594,22 +1603,29 @@ impl<'src> Parser<'src> {
let quote_bytes = flags.quote_str().as_bytes();
let quote_len = flags.quote_len();
for expr in elements.interpolations() {
for slash_position in memchr::memchr_iter(b'\\', self.source[expr.range].as_bytes())
{
// We need to check the whole expression range, including any leading or trailing
// debug text, but exclude the format spec, where escapes and escaped, reused quotes
// are allowed.
let range = expr
.format_spec
.as_ref()
.map(|format_spec| TextRange::new(expr.start(), format_spec.start()))
.unwrap_or(expr.range);
for slash_position in memchr::memchr_iter(b'\\', self.source[range].as_bytes()) {
let slash_position = TextSize::try_from(slash_position).unwrap();
self.add_unsupported_syntax_error(
UnsupportedSyntaxErrorKind::Pep701FString(FStringKind::Backslash),
TextRange::at(expr.range.start() + slash_position, '\\'.text_len()),
TextRange::at(range.start() + slash_position, '\\'.text_len()),
);
}
if let Some(quote_position) =
memchr::memmem::find(self.source[expr.range].as_bytes(), quote_bytes)
memchr::memmem::find(self.source[range].as_bytes(), quote_bytes)
{
let quote_position = TextSize::try_from(quote_position).unwrap();
self.add_unsupported_syntax_error(
UnsupportedSyntaxErrorKind::Pep701FString(FStringKind::NestedQuote),
TextRange::at(expr.range.start() + quote_position, quote_len),
TextRange::at(range.start() + quote_position, quote_len),
);
}
}