diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 8228da57a2..5a7fbc7bdb 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -1319,7 +1319,8 @@ impl<'src> Lexer<'src> { } } - /// Re-lex the current token in the context of a logical line. + /// Re-lex the [`NonLogicalNewline`] token at the given position in the context of a logical + /// line. /// /// Returns a boolean indicating whether the lexer's position has changed. This could result /// into the new current token being different than the previous current token but is not @@ -1373,7 +1374,10 @@ impl<'src> Lexer<'src> { /// /// [`Newline`]: TokenKind::Newline /// [`NonLogicalNewline`]: TokenKind::NonLogicalNewline - pub(crate) fn re_lex_logical_token(&mut self) -> bool { + pub(crate) fn re_lex_logical_token( + &mut self, + non_logical_newline_start: Option, + ) -> bool { if self.nesting == 0 { return false; } @@ -1388,84 +1392,35 @@ impl<'src> Lexer<'src> { return false; } - let mut current_position = self.current_range().start(); - let mut reverse_chars = self.source[..current_position.to_usize()] - .chars() - .rev() - .peekable(); - let mut newline_position = None; + let Some(new_position) = non_logical_newline_start else { + return false; + }; - while let Some(ch) = reverse_chars.next() { - if is_python_whitespace(ch) { - current_position -= ch.text_len(); - continue; - } - - match ch { - '\n' => { - current_position -= ch.text_len(); - if let Some(carriage_return) = reverse_chars.next_if_eq(&'\r') { - current_position -= carriage_return.text_len(); - } - } - '\r' => { - current_position -= ch.text_len(); - } - _ => break, - } - - debug_assert!(matches!(ch, '\n' | '\r')); - - // Count the number of backslashes before the newline character. - let mut backslash_count = 0; - while reverse_chars.next_if_eq(&'\\').is_some() { - backslash_count += 1; - } - - if backslash_count == 0 { - // No escapes: `\n` - newline_position = Some(current_position); - } else { - if backslash_count % 2 == 0 { - // Even number of backslashes i.e., all backslashes cancel each other out - // which means the newline character is not being escaped. - newline_position = Some(current_position); - } - current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count); - } + // Earlier we reduced the nesting level unconditionally. Now that we know the lexer's + // position is going to be moved back, the lexer needs to be put back into a + // parenthesized context if the current token is a closing parenthesis. + // + // ```py + // (a, [b, + // c + // ) + // ``` + // + // Here, the parser would request to re-lex the token when it's at `)` and can recover + // from an unclosed `[`. This method will move the lexer back to the newline character + // after `c` which means it goes back into parenthesized context. + if matches!( + self.current_kind, + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace + ) { + self.nesting += 1; } - // The lexer should only be moved if there's a newline character which needs to be - // re-lexed. - if let Some(newline_position) = newline_position { - // Earlier we reduced the nesting level unconditionally. Now that we know the lexer's - // position is going to be moved back, the lexer needs to be put back into a - // parenthesized context if the current token is a closing parenthesis. - // - // ```py - // (a, [b, - // c - // ) - // ``` - // - // Here, the parser would request to re-lex the token when it's at `)` and can recover - // from an unclosed `[`. This method will move the lexer back to the newline character - // after `c` which means it goes back into parenthesized context. - if matches!( - self.current_kind, - TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace - ) { - self.nesting += 1; - } - - self.cursor = Cursor::new(self.source); - self.cursor.skip_bytes(newline_position.to_usize()); - self.state = State::Other; - self.next_token(); - true - } else { - false - } + self.cursor = Cursor::new(self.source); + self.cursor.skip_bytes(new_position.to_usize()); + self.state = State::Other; + self.next_token(); + true } #[inline] diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index c9c9fa3ce6..4851879c89 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -60,12 +60,23 @@ impl<'src> TokenSource<'src> { self.lexer.take_value() } - /// Calls the underlying [`re_lex_logical_token`] method on the lexer and updates the token - /// vector accordingly. + /// Calls the underlying [`re_lex_logical_token`] method on the lexer with the new lexer + /// position and updates the token vector accordingly. /// /// [`re_lex_logical_token`]: Lexer::re_lex_logical_token pub(crate) fn re_lex_logical_token(&mut self) { - if self.lexer.re_lex_logical_token() { + let mut non_logical_newline_start = None; + for token in self.tokens.iter().rev() { + match token.kind() { + TokenKind::NonLogicalNewline => { + non_logical_newline_start = Some(token.start()); + } + TokenKind::Comment => continue, + _ => break, + } + } + + if self.lexer.re_lex_logical_token(non_logical_newline_start) { let current_start = self.current_range().start(); while self .tokens diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap index 9e22a93f78..3b106ee408 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap @@ -82,8 +82,9 @@ Module( | 1 | call(a, b, # comment \ -2 | / -3 | | def bar(): + | _______________________^ +2 | | | |_^ Syntax Error: Expected ')', found newline +3 | def bar(): 4 | pass |