Parse triple quoted string annotations as if parenthesized (#15387)

## Summary

Resolves #9467 

Parse quoted annotations as if the string content is inside parenthesis.
With this logic `x` and `y` in this example are equal:

```python
y: """
   int |
   str
"""

z: """(
    int |
    str
)
"""
```

Also this rule only applies to triple
quotes([link](https://github.com/python/typing-council/issues/9#issuecomment-1890808610)).

This PR is based on the
[comments](https://github.com/astral-sh/ruff/issues/9467#issuecomment-2579180991)
on the issue.

I did one extra change, since we don't want any indentation tokens I am
setting the `State::Other` as the initial state of the Lexer.

Remaining work:

- [x] Add a test case for red-knot.
- [x] Add more tests.

## Test Plan

Added a test which previously failed because quoted annotation contained
indentation.
Added an mdtest for red-knot.
Updated previous test.

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
Shaygan Hooshyari 2025-01-16 07:08:15 +01:00 committed by GitHub
parent d2656e88a3
commit cf4ab7cba1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 295 additions and 31 deletions

View file

@ -173,3 +173,40 @@ p: "call()"
r: "[1, 2]" r: "[1, 2]"
s: "(1, 2)" s: "(1, 2)"
``` ```
## Multi line annotation
Quoted type annotations should be parsed as if surrounded by parentheses.
```py
def valid(
a1: """(
int |
str
)
""",
a2: """
int |
str
""",
):
reveal_type(a1) # revealed: int | str
reveal_type(a2) # revealed: int | str
def invalid(
# error: [invalid-syntax-in-forward-annotation]
a1: """
int |
str)
""",
# error: [invalid-syntax-in-forward-annotation]
a2: """
int) |
str
""",
# error: [invalid-syntax-in-forward-annotation]
a3: """
(int)) """,
):
pass
```

View file

@ -1,7 +1,7 @@
use ruff_db::source::source_text; use ruff_db::source::source_text;
use ruff_python_ast::str::raw_contents; use ruff_python_ast::str::raw_contents;
use ruff_python_ast::{self as ast, ModExpression, StringFlags}; use ruff_python_ast::{self as ast, ModExpression};
use ruff_python_parser::{parse_expression_range, Parsed}; use ruff_python_parser::Parsed;
use ruff_text_size::Ranged; use ruff_text_size::Ranged;
use crate::declare_lint; use crate::declare_lint;
@ -153,19 +153,9 @@ pub(crate) fn parse_string_annotation(
} else if raw_contents(node_text) } else if raw_contents(node_text)
.is_some_and(|raw_contents| raw_contents == string_literal.as_str()) .is_some_and(|raw_contents| raw_contents == string_literal.as_str())
{ {
let range_excluding_quotes = string_literal let parsed =
.range() ruff_python_parser::parse_string_annotation(source.as_str(), string_literal);
.add_start(string_literal.flags.opener_len()) match parsed {
.sub_end(string_literal.flags.closer_len());
// TODO: Support multiline strings like:
// ```py
// x: """
// int
// | float
// """ = 1
// ```
match parse_expression_range(source.as_str(), range_excluding_quotes) {
Ok(parsed) => return Some(parsed), Ok(parsed) => return Some(parsed),
Err(parse_error) => context.report_lint( Err(parse_error) => context.report_lint(
&INVALID_SYNTAX_IN_FORWARD_ANNOTATION, &INVALID_SYNTAX_IN_FORWARD_ANNOTATION,

View file

@ -11,3 +11,34 @@ def g() -> "///":
X: """List[int]"""'' = [] X: """List[int]"""'' = []
# Type annotations with triple quotes can contain newlines and indentation
# https://github.com/python/typing-council/issues/9
y: """
int |
str
"""
z: """(
int |
str
)
"""
invalid1: """
int |
str)
"""
invalid2: """
int) |
str
"""
invalid3: """
((int)
"""
invalid4: """
(int
"""

View file

@ -33,9 +33,6 @@ a: '''\\
list[int]''' = [42] list[int]''' = [42]
# TODO: These are valid too. String annotations are assumed to be enclosed in parentheses.
# https://github.com/astral-sh/ruff/issues/9467
def f(a: ''' def f(a: '''
list[int] list[int]
''' = []): ... ''' = []): ...

View file

@ -13,4 +13,68 @@ F722.py:13:4: F722 Syntax error in forward annotation: `List[int]☃`
| |
13 | X: """List[int]"""'☃' = [] 13 | X: """List[int]"""'☃' = []
| ^^^^^^^^^^^^^^^^^^ F722 | ^^^^^^^^^^^^^^^^^^ F722
14 |
15 | # Type annotations with triple quotes can contain newlines and indentation
|
F722.py:30:11: F722 Syntax error in forward annotation: `
int |
str)
`
|
28 | """
29 |
30 | invalid1: """
| ___________^
31 | | int |
32 | | str)
33 | | """
| |___^ F722
34 |
35 | invalid2: """
|
F722.py:35:11: F722 Syntax error in forward annotation: `
int) |
str
`
|
33 | """
34 |
35 | invalid2: """
| ___________^
36 | | int) |
37 | | str
38 | | """
| |___^ F722
39 | invalid3: """
40 | ((int)
|
F722.py:39:11: F722 Syntax error in forward annotation: `
((int)
`
|
37 | str
38 | """
39 | invalid3: """
| ___________^
40 | | ((int)
41 | | """
| |___^ F722
42 | invalid4: """
43 | (int
|
F722.py:42:11: F722 Syntax error in forward annotation: `
(int
`
|
40 | ((int)
41 | """
42 | invalid4: """
| ___________^
43 | | (int
44 | | """
| |___^ F722
| |

View file

@ -158,4 +158,75 @@ UP037_2.pyi:32:4: UP037 [*] Remove quotes from type annotation
33 |+list[int]) = [42] 33 |+list[int]) = [42]
34 34 | 34 34 |
35 35 | 35 35 |
36 36 | # TODO: These are valid too. String annotations are assumed to be enclosed in parentheses. 36 36 | def f(a: '''
UP037_2.pyi:36:10: UP037 [*] Remove quotes from type annotation
|
36 | def f(a: '''
| __________^
37 | | list[int]
38 | | ''' = []): ...
| |_______^ UP037
|
= help: Remove quotes
Safe fix
33 33 | list[int]''' = [42]
34 34 |
35 35 |
36 |-def f(a: '''
36 |+def f(a:
37 37 | list[int]
38 |- ''' = []): ...
38 |+ = []): ...
39 39 |
40 40 |
41 41 | def f(a: Foo['''
UP037_2.pyi:41:14: UP037 [*] Remove quotes from type annotation
|
41 | def f(a: Foo['''
| ______________^
42 | | Bar
43 | | [
44 | | Multi |
45 | | Line
46 | | ] # Comment''']): ...
| |___________________^ UP037
|
= help: Remove quotes
Safe fix
38 38 | ''' = []): ...
39 39 |
40 40 |
41 |-def f(a: Foo['''
41 |+def f(a: Foo[(
42 42 | Bar
43 43 | [
44 44 | Multi |
45 45 | Line
46 |- ] # Comment''']): ...
46 |+ ] # Comment
47 |+)]): ...
47 48 |
48 49 |
49 50 | a: '''list
UP037_2.pyi:49:4: UP037 [*] Remove quotes from type annotation
|
49 | a: '''list
| ____^
50 | | [int]''' = [42]
| |________^ UP037
|
= help: Remove quotes
Safe fix
46 46 | ] # Comment''']): ...
47 47 |
48 48 |
49 |-a: '''list
50 |-[int]''' = [42]
49 |+a: (list
50 |+[int]) = [42]

View file

@ -84,15 +84,21 @@ impl<'src> Lexer<'src> {
"Lexer only supports files with a size up to 4GB" "Lexer only supports files with a size up to 4GB"
); );
let (state, nesting) = if mode == Mode::ParenthesizedExpression {
(State::Other, 1)
} else {
(State::AfterNewline, 0)
};
let mut lexer = Lexer { let mut lexer = Lexer {
source, source,
cursor: Cursor::new(source), cursor: Cursor::new(source),
state: State::AfterNewline, state,
current_kind: TokenKind::EndOfFile, current_kind: TokenKind::EndOfFile,
current_range: TextRange::empty(start_offset), current_range: TextRange::empty(start_offset),
current_value: TokenValue::None, current_value: TokenValue::None,
current_flags: TokenFlags::empty(), current_flags: TokenFlags::empty(),
nesting: 0, nesting,
indentations: Indentations::default(), indentations: Indentations::default(),
pending_indentation: None, pending_indentation: None,
mode, mode,
@ -1309,7 +1315,11 @@ impl<'src> Lexer<'src> {
fn consume_end(&mut self) -> TokenKind { fn consume_end(&mut self) -> TokenKind {
// We reached end of file. // We reached end of file.
// First of all, we need all nestings to be finished. // First of all, we need all nestings to be finished.
if self.nesting > 0 { // For Mode::ParenthesizedExpression we start with nesting level 1.
// So we check if we end with that level.
let init_nesting = u32::from(self.mode == Mode::ParenthesizedExpression);
if self.nesting > init_nesting {
// Reset the nesting to avoid going into infinite loop. // Reset the nesting to avoid going into infinite loop.
self.nesting = 0; self.nesting = 0;
return self.push_error(LexicalError::new(LexicalErrorType::Eof, self.token_range())); return self.push_error(LexicalError::new(LexicalErrorType::Eof, self.token_range()));

View file

@ -72,7 +72,9 @@ pub use crate::token::{Token, TokenKind};
use crate::parser::Parser; use crate::parser::Parser;
use ruff_python_ast::{Expr, Mod, ModExpression, ModModule, PySourceType, Suite}; use ruff_python_ast::{
Expr, Mod, ModExpression, ModModule, PySourceType, StringFlags, StringLiteral, Suite,
};
use ruff_python_trivia::CommentRanges; use ruff_python_trivia::CommentRanges;
use ruff_text_size::{Ranged, TextRange, TextSize}; use ruff_text_size::{Ranged, TextRange, TextSize};
@ -166,6 +168,65 @@ pub fn parse_expression_range(
.into_result() .into_result()
} }
/// Parses a Python expression as if it is parenthesized.
///
/// It behaves similarly to [`parse_expression_range`] but allows what would be valid within parenthesis
///
/// # Example
///
/// Parsing an expression that would be valid within parenthesis:
///
/// ```
/// use ruff_python_parser::parse_parenthesized_expression_range;
/// # use ruff_text_size::{TextRange, TextSize};
///
/// let parsed = parse_parenthesized_expression_range("'''\n int | str'''", TextRange::new(TextSize::new(3), TextSize::new(14)));
/// assert!(parsed.is_ok());
pub fn parse_parenthesized_expression_range(
source: &str,
range: TextRange,
) -> Result<Parsed<ModExpression>, ParseError> {
let source = &source[..range.end().to_usize()];
let parsed =
Parser::new_starts_at(source, Mode::ParenthesizedExpression, range.start()).parse();
parsed.try_into_expression().unwrap().into_result()
}
/// Parses a Python expression from a string annotation.
///
/// # Example
///
/// Parsing a string annotation:
///
/// ```
/// use ruff_python_parser::parse_string_annotation;
/// use ruff_python_ast::{StringLiteral, StringLiteralFlags};
/// use ruff_text_size::{TextRange, TextSize};
///
/// let string = StringLiteral {
/// value: "'''\n int | str'''".to_string().into_boxed_str(),
/// flags: StringLiteralFlags::default(),
/// range: TextRange::new(TextSize::new(0), TextSize::new(16)),
/// };
/// let parsed = parse_string_annotation("'''\n int | str'''", &string);
/// assert!(!parsed.is_ok());
/// ```
pub fn parse_string_annotation(
source: &str,
string: &StringLiteral,
) -> Result<Parsed<ModExpression>, ParseError> {
let range = string
.range()
.add_start(string.flags.opener_len())
.sub_end(string.flags.closer_len());
let source = &source[..range.end().to_usize()];
if string.flags.is_triple_quoted() {
parse_parenthesized_expression_range(source, range)
} else {
parse_expression_range(source, range)
}
}
/// Parse the given Python source code using the specified [`Mode`]. /// Parse the given Python source code using the specified [`Mode`].
/// ///
/// This function is the most general function to parse Python code. Based on the [`Mode`] supplied, /// This function is the most general function to parse Python code. Based on the [`Mode`] supplied,
@ -582,6 +643,11 @@ pub enum Mode {
/// The code consists of a single expression. /// The code consists of a single expression.
Expression, Expression,
/// The code consists of a single expression and is parsed as if it is parenthesized. The parentheses themselves aren't required.
/// This allows for having valid multiline expression without the need of parentheses
/// and is specifically useful for parsing string annotations.
ParenthesizedExpression,
/// The code consists of a sequence of statements which can include the /// The code consists of a sequence of statements which can include the
/// escape commands that are part of IPython syntax. /// escape commands that are part of IPython syntax.
/// ///

View file

@ -74,7 +74,9 @@ impl<'src> Parser<'src> {
/// Consumes the [`Parser`] and returns the parsed [`Parsed`]. /// Consumes the [`Parser`] and returns the parsed [`Parsed`].
pub(crate) fn parse(mut self) -> Parsed<Mod> { pub(crate) fn parse(mut self) -> Parsed<Mod> {
let syntax = match self.mode { let syntax = match self.mode {
Mode::Expression => Mod::Expression(self.parse_single_expression()), Mode::Expression | Mode::ParenthesizedExpression => {
Mod::Expression(self.parse_single_expression())
}
Mode::Module | Mode::Ipython => Mod::Module(self.parse_module()), Mode::Module | Mode::Ipython => Mod::Module(self.parse_module()),
}; };

View file

@ -2,10 +2,10 @@
use ruff_python_ast::relocate::relocate_expr; use ruff_python_ast::relocate::relocate_expr;
use ruff_python_ast::str::raw_contents; use ruff_python_ast::str::raw_contents;
use ruff_python_ast::{Expr, ExprStringLiteral, ModExpression, StringFlags, StringLiteral}; use ruff_python_ast::{Expr, ExprStringLiteral, ModExpression, StringLiteral};
use ruff_text_size::Ranged; use ruff_text_size::Ranged;
use crate::{parse_expression, parse_expression_range, ParseError, Parsed}; use crate::{parse_expression, parse_string_annotation, ParseError, Parsed};
type AnnotationParseResult = Result<ParsedAnnotation, ParseError>; type AnnotationParseResult = Result<ParsedAnnotation, ParseError>;
@ -81,12 +81,8 @@ fn parse_simple_type_annotation(
string_literal: &StringLiteral, string_literal: &StringLiteral,
source: &str, source: &str,
) -> AnnotationParseResult { ) -> AnnotationParseResult {
let range_excluding_quotes = string_literal
.range()
.add_start(string_literal.flags.opener_len())
.sub_end(string_literal.flags.closer_len());
Ok(ParsedAnnotation { Ok(ParsedAnnotation {
parsed: parse_expression_range(source, range_excluding_quotes)?, parsed: parse_string_annotation(source, string_literal)?,
kind: AnnotationKind::Simple, kind: AnnotationKind::Simple,
}) })
} }