Parse triple quoted string annotations as if parenthesized (#15387)

## Summary Resolves #9467 Parse quoted annotations as if the string content is inside parenthesis. With this logic `x` and `y` in this example are equal: ```python y: """ int | str """ z: """( int | str ) """ ``` Also this rule only applies to triple quotes([link](https://github.com/python/typing-council/issues/9#issuecomment-1890808610)). This PR is based on the [comments](https://github.com/astral-sh/ruff/issues/9467#issuecomment-2579180991) on the issue. I did one extra change, since we don't want any indentation tokens I am setting the `State::Other` as the initial state of the Lexer. Remaining work: - [x] Add a test case for red-knot. - [x] Add more tests. ## Test Plan Added a test which previously failed because quoted annotation contained indentation. Added an mdtest for red-knot. Updated previous test. Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com> Co-authored-by: Micha Reiser <micha@reiser.io>
2025-09-23 10:32:49 +00:00 · 2025-01-16 07:08:15 +01:00 · 2025-01-16 07:08:15 +01:00 · cf4ab7cba1
commit cf4ab7cba1
parent d2656e88a3
10 changed files with 295 additions and 31 deletions
--- a/crates/ruff_python_parser/src/lexer.rs
+++ b/crates/ruff_python_parser/src/lexer.rs
@ -84,15 +84,21 @@ impl<'src> Lexer<'src> {
            "Lexer only supports files with a size up to 4GB"
        );

+        let (state, nesting) = if mode == Mode::ParenthesizedExpression {
+            (State::Other, 1)
+        } else {
+            (State::AfterNewline, 0)
+        };
+
        let mut lexer = Lexer {
            source,
            cursor: Cursor::new(source),
-            state: State::AfterNewline,
+            state,
            current_kind: TokenKind::EndOfFile,
            current_range: TextRange::empty(start_offset),
            current_value: TokenValue::None,
            current_flags: TokenFlags::empty(),
-            nesting: 0,
+            nesting,
            indentations: Indentations::default(),
            pending_indentation: None,
            mode,
@ -1309,7 +1315,11 @@ impl<'src> Lexer<'src> {
    fn consume_end(&mut self) -> TokenKind {
        // We reached end of file.
        // First of all, we need all nestings to be finished.
-        if self.nesting > 0 {
+        // For Mode::ParenthesizedExpression we start with nesting level 1.
+        // So we check if we end with that level.
+        let init_nesting = u32::from(self.mode == Mode::ParenthesizedExpression);
+
+        if self.nesting > init_nesting {
            // Reset the nesting to avoid going into infinite loop.
            self.nesting = 0;
            return self.push_error(LexicalError::new(LexicalErrorType::Eof, self.token_range()));
--- a/crates/ruff_python_parser/src/lib.rs
+++ b/crates/ruff_python_parser/src/lib.rs
@ -72,7 +72,9 @@ pub use crate::token::{Token, TokenKind};

 use crate::parser::Parser;

-use ruff_python_ast::{Expr, Mod, ModExpression, ModModule, PySourceType, Suite};
+use ruff_python_ast::{
+    Expr, Mod, ModExpression, ModModule, PySourceType, StringFlags, StringLiteral, Suite,
+};
 use ruff_python_trivia::CommentRanges;
 use ruff_text_size::{Ranged, TextRange, TextSize};

@ -166,6 +168,65 @@ pub fn parse_expression_range(
        .into_result()
 }

+/// Parses a Python expression as if it is parenthesized.
+///
+/// It behaves similarly to [`parse_expression_range`] but allows what would be valid within parenthesis
+///
+/// # Example
+///
+/// Parsing an expression that would be valid within parenthesis:
+///
+/// ```
+/// use ruff_python_parser::parse_parenthesized_expression_range;
+/// # use ruff_text_size::{TextRange, TextSize};
+///
+/// let parsed = parse_parenthesized_expression_range("'''\n int | str'''", TextRange::new(TextSize::new(3), TextSize::new(14)));
+/// assert!(parsed.is_ok());
+pub fn parse_parenthesized_expression_range(
+    source: &str,
+    range: TextRange,
+) -> Result<Parsed<ModExpression>, ParseError> {
+    let source = &source[..range.end().to_usize()];
+    let parsed =
+        Parser::new_starts_at(source, Mode::ParenthesizedExpression, range.start()).parse();
+    parsed.try_into_expression().unwrap().into_result()
+}
+
+/// Parses a Python expression from a string annotation.
+///
+/// # Example
+///
+/// Parsing a string annotation:
+///
+/// ```
+/// use ruff_python_parser::parse_string_annotation;
+/// use ruff_python_ast::{StringLiteral, StringLiteralFlags};
+/// use ruff_text_size::{TextRange, TextSize};
+///
+/// let string = StringLiteral {
+///     value: "'''\n int | str'''".to_string().into_boxed_str(),
+///     flags: StringLiteralFlags::default(),
+///     range: TextRange::new(TextSize::new(0), TextSize::new(16)),
+/// };
+/// let parsed = parse_string_annotation("'''\n int | str'''", &string);
+/// assert!(!parsed.is_ok());
+/// ```
+pub fn parse_string_annotation(
+    source: &str,
+    string: &StringLiteral,
+) -> Result<Parsed<ModExpression>, ParseError> {
+    let range = string
+        .range()
+        .add_start(string.flags.opener_len())
+        .sub_end(string.flags.closer_len());
+    let source = &source[..range.end().to_usize()];
+    if string.flags.is_triple_quoted() {
+        parse_parenthesized_expression_range(source, range)
+    } else {
+        parse_expression_range(source, range)
+    }
+}
+
 /// Parse the given Python source code using the specified [`Mode`].
 ///
 /// This function is the most general function to parse Python code. Based on the [`Mode`] supplied,
@ -582,6 +643,11 @@ pub enum Mode {
    /// The code consists of a single expression.
    Expression,

+    /// The code consists of a single expression and is parsed as if it is parenthesized. The parentheses themselves aren't required.
+    /// This allows for having valid multiline expression without the need of parentheses
+    /// and is specifically useful for parsing string annotations.
+    ParenthesizedExpression,
+
    /// The code consists of a sequence of statements which can include the
    /// escape commands that are part of IPython syntax.
    ///
--- a/crates/ruff_python_parser/src/parser/mod.rs
+++ b/crates/ruff_python_parser/src/parser/mod.rs
@ -74,7 +74,9 @@ impl<'src> Parser<'src> {
    /// Consumes the [`Parser`] and returns the parsed [`Parsed`].
    pub(crate) fn parse(mut self) -> Parsed<Mod> {
        let syntax = match self.mode {
-            Mode::Expression => Mod::Expression(self.parse_single_expression()),
+            Mode::Expression | Mode::ParenthesizedExpression => {
+                Mod::Expression(self.parse_single_expression())
+            }
            Mode::Module | Mode::Ipython => Mod::Module(self.parse_module()),
        };

--- a/crates/ruff_python_parser/src/typing.rs
+++ b/crates/ruff_python_parser/src/typing.rs
@ -2,10 +2,10 @@

 use ruff_python_ast::relocate::relocate_expr;
 use ruff_python_ast::str::raw_contents;
-use ruff_python_ast::{Expr, ExprStringLiteral, ModExpression, StringFlags, StringLiteral};
+use ruff_python_ast::{Expr, ExprStringLiteral, ModExpression, StringLiteral};
 use ruff_text_size::Ranged;

-use crate::{parse_expression, parse_expression_range, ParseError, Parsed};
+use crate::{parse_expression, parse_string_annotation, ParseError, Parsed};

 type AnnotationParseResult = Result<ParsedAnnotation, ParseError>;

@ -81,12 +81,8 @@ fn parse_simple_type_annotation(
    string_literal: &StringLiteral,
    source: &str,
 ) -> AnnotationParseResult {
-    let range_excluding_quotes = string_literal
-        .range()
-        .add_start(string_literal.flags.opener_len())
-        .sub_end(string_literal.flags.closer_len());
    Ok(ParsedAnnotation {
-        parsed: parse_expression_range(source, range_excluding_quotes)?,
+        parsed: parse_string_annotation(source, string_literal)?,
        kind: AnnotationKind::Simple,
    })
 }