mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-22 10:02:37 +00:00

## Summary This PR detects the use of PEP 701 f-strings before 3.12. This one sounded difficult and ended up being pretty easy, so I think there's a good chance I've over-simplified things. However, from experimenting in the Python REPL and checking with [pyright], I think this is correct. pyright actually doesn't even flag the comment case, but Python does. I also checked pyright's implementation for [quotes](98dc4469cc/packages/pyright-internal/src/analyzer/checker.ts (L1379-L1398)
) and [escapes](98dc4469cc/packages/pyright-internal/src/analyzer/checker.ts (L1365-L1377)
) and think I've approximated how they do it. Python's error messages also point to the simple approach of these characters simply not being allowed: ```pycon Python 3.11.11 (main, Feb 12 2025, 14:51:05) [Clang 19.1.6 ] on linux Type "help", "copyright", "credits" or "license" for more information. >>> f'''multiline { ... expression # comment ... }''' File "<stdin>", line 3 }''' ^ SyntaxError: f-string expression part cannot include '#' >>> f'''{not a line \ ... continuation}''' File "<stdin>", line 2 continuation}''' ^ SyntaxError: f-string expression part cannot include a backslash >>> f'hello {'world'}' File "<stdin>", line 1 f'hello {'world'}' ^^^^^ SyntaxError: f-string: expecting '}' ``` And since escapes aren't allowed, I don't think there are any tricky cases where nested quotes or comments can sneak in. It's also slightly annoying that the error is repeated for every nested quote character, but that also mirrors pyright, although they highlight the whole nested string, which is a little nicer. However, their check is in the analysis phase, so I don't think we have such easy access to the quoted range, at least without adding another mini visitor. ## Test Plan New inline tests [pyright]: https://pyright-play.net/?pythonVersion=3.11&strict=true&code=EYQw5gBAvBAmCWBjALgCgO4gHaygRgEoAoEaCAIgBpyiiBiCLAUwGdknYIBHAVwHt2LIgDMA5AFlwSCJhwAuCAG8IoMAG1Rs2KIC6EAL6iIxosbPmLlq5foRWiEAAcmERAAsQAJxAomnltY2wuSKogA6WKIAdABWfPBYqCAE%2BuSBVqbpWVm2iHwAtvlMWMgB2ekiolUAgq4FjgA2TAAeEMieSADWCsoV5qoaqrrGDJ5MiDz%2B8ABuLqosAIREhlXlaybrmyYMXsDw7V4AnoysyAmQ5SIhwYo3d9cheADUeKlv5O%2BpQA
217 lines
7.3 KiB
Rust
217 lines
7.3 KiB
Rust
use ruff_text_size::{Ranged, TextRange, TextSize};
|
|
|
|
use crate::error::LexicalError;
|
|
use crate::lexer::{Lexer, LexerCheckpoint};
|
|
use crate::token::{Token, TokenFlags, TokenKind, TokenValue};
|
|
use crate::Mode;
|
|
|
|
/// Token source for the parser that skips over any trivia tokens.
|
|
#[derive(Debug)]
|
|
pub(crate) struct TokenSource<'src> {
|
|
/// The underlying source for the tokens.
|
|
lexer: Lexer<'src>,
|
|
|
|
/// A vector containing all the tokens emitted by the lexer. This is returned when the parser
|
|
/// is finished consuming all the tokens. Note that unlike the emitted tokens, this vector
|
|
/// holds both the trivia and non-trivia tokens.
|
|
tokens: Vec<Token>,
|
|
}
|
|
|
|
impl<'src> TokenSource<'src> {
|
|
/// Create a new token source for the given lexer.
|
|
pub(crate) fn new(lexer: Lexer<'src>) -> Self {
|
|
// TODO(dhruvmanila): Use `allocate_tokens_vec`
|
|
TokenSource {
|
|
lexer,
|
|
tokens: vec![],
|
|
}
|
|
}
|
|
|
|
/// Create a new token source from the given source code which starts at the given offset.
|
|
pub(crate) fn from_source(source: &'src str, mode: Mode, start_offset: TextSize) -> Self {
|
|
let lexer = Lexer::new(source, mode, start_offset);
|
|
let mut source = TokenSource::new(lexer);
|
|
|
|
// Initialize the token source so that the current token is set correctly.
|
|
source.do_bump();
|
|
source
|
|
}
|
|
|
|
/// Returns the kind of the current token.
|
|
pub(crate) fn current_kind(&self) -> TokenKind {
|
|
self.lexer.current_kind()
|
|
}
|
|
|
|
/// Returns the range of the current token.
|
|
pub(crate) fn current_range(&self) -> TextRange {
|
|
self.lexer.current_range()
|
|
}
|
|
|
|
/// Returns the flags for the current token.
|
|
pub(crate) fn current_flags(&self) -> TokenFlags {
|
|
self.lexer.current_flags()
|
|
}
|
|
|
|
/// Calls the underlying [`take_value`] method on the lexer. Refer to its documentation
|
|
/// for more info.
|
|
///
|
|
/// [`take_value`]: Lexer::take_value
|
|
pub(crate) fn take_value(&mut self) -> TokenValue {
|
|
self.lexer.take_value()
|
|
}
|
|
|
|
/// Calls the underlying [`re_lex_logical_token`] method on the lexer with the new lexer
|
|
/// position and updates the token vector accordingly.
|
|
///
|
|
/// [`re_lex_logical_token`]: Lexer::re_lex_logical_token
|
|
pub(crate) fn re_lex_logical_token(&mut self) {
|
|
let mut non_logical_newline_start = None;
|
|
for token in self.tokens.iter().rev() {
|
|
match token.kind() {
|
|
TokenKind::NonLogicalNewline => {
|
|
non_logical_newline_start = Some(token.start());
|
|
}
|
|
TokenKind::Comment => continue,
|
|
_ => break,
|
|
}
|
|
}
|
|
|
|
if self.lexer.re_lex_logical_token(non_logical_newline_start) {
|
|
let current_start = self.current_range().start();
|
|
while self
|
|
.tokens
|
|
.last()
|
|
.is_some_and(|last| last.start() >= current_start)
|
|
{
|
|
self.tokens.pop();
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Returns the next non-trivia token without consuming it.
|
|
///
|
|
/// Use [`peek2`] to get the next two tokens.
|
|
///
|
|
/// [`peek2`]: TokenSource::peek2
|
|
pub(crate) fn peek(&mut self) -> TokenKind {
|
|
let checkpoint = self.lexer.checkpoint();
|
|
let next = self.next_non_trivia_token();
|
|
self.lexer.rewind(checkpoint);
|
|
next
|
|
}
|
|
|
|
/// Returns the next two non-trivia tokens without consuming it.
|
|
///
|
|
/// Use [`peek`] to only get the next token.
|
|
///
|
|
/// [`peek`]: TokenSource::peek
|
|
pub(crate) fn peek2(&mut self) -> (TokenKind, TokenKind) {
|
|
let checkpoint = self.lexer.checkpoint();
|
|
let first = self.next_non_trivia_token();
|
|
let second = self.next_non_trivia_token();
|
|
self.lexer.rewind(checkpoint);
|
|
(first, second)
|
|
}
|
|
|
|
/// Bumps the token source to the next non-trivia token.
|
|
///
|
|
/// It pushes the given kind to the token vector with the current token range.
|
|
pub(crate) fn bump(&mut self, kind: TokenKind) {
|
|
self.tokens
|
|
.push(Token::new(kind, self.current_range(), self.current_flags()));
|
|
self.do_bump();
|
|
}
|
|
|
|
/// Bumps the token source to the next non-trivia token without adding the current token to the
|
|
/// token vector. It does add the trivia tokens to the token vector.
|
|
fn do_bump(&mut self) {
|
|
loop {
|
|
let kind = self.lexer.next_token();
|
|
if kind.is_trivia() {
|
|
self.tokens
|
|
.push(Token::new(kind, self.current_range(), self.current_flags()));
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
/// Returns the next non-trivia token without adding it to the token vector.
|
|
fn next_non_trivia_token(&mut self) -> TokenKind {
|
|
loop {
|
|
let kind = self.lexer.next_token();
|
|
if kind.is_trivia() {
|
|
continue;
|
|
}
|
|
break kind;
|
|
}
|
|
}
|
|
|
|
/// Creates a checkpoint to which the token source can later return to using [`Self::rewind`].
|
|
pub(crate) fn checkpoint(&self) -> TokenSourceCheckpoint {
|
|
TokenSourceCheckpoint {
|
|
lexer_checkpoint: self.lexer.checkpoint(),
|
|
tokens_position: self.tokens.len(),
|
|
}
|
|
}
|
|
|
|
/// Restore the token source to the given checkpoint.
|
|
pub(crate) fn rewind(&mut self, checkpoint: TokenSourceCheckpoint) {
|
|
let TokenSourceCheckpoint {
|
|
lexer_checkpoint,
|
|
tokens_position,
|
|
} = checkpoint;
|
|
|
|
self.lexer.rewind(lexer_checkpoint);
|
|
self.tokens.truncate(tokens_position);
|
|
}
|
|
|
|
/// Returns a slice of [`Token`] that are within the given `range`.
|
|
pub(crate) fn in_range(&self, range: TextRange) -> &[Token] {
|
|
let start = self
|
|
.tokens
|
|
.iter()
|
|
.rposition(|tok| tok.start() == range.start());
|
|
let end = self.tokens.iter().rposition(|tok| tok.end() == range.end());
|
|
|
|
let (Some(start), Some(end)) = (start, end) else {
|
|
return &self.tokens;
|
|
};
|
|
|
|
&self.tokens[start..=end]
|
|
}
|
|
|
|
/// Consumes the token source, returning the collected tokens, comment ranges, and any errors
|
|
/// encountered during lexing. The token collection includes both the trivia and non-trivia
|
|
/// tokens.
|
|
pub(crate) fn finish(mut self) -> (Vec<Token>, Vec<LexicalError>) {
|
|
assert_eq!(
|
|
self.current_kind(),
|
|
TokenKind::EndOfFile,
|
|
"TokenSource was not fully consumed"
|
|
);
|
|
|
|
// The `EndOfFile` token shouldn't be included in the token stream, it's mainly to signal
|
|
// the parser to stop. This isn't in `do_bump` because it only needs to be done once.
|
|
if let Some(last) = self.tokens.pop() {
|
|
assert_eq!(last.kind(), TokenKind::EndOfFile);
|
|
}
|
|
|
|
(self.tokens, self.lexer.finish())
|
|
}
|
|
}
|
|
|
|
pub(crate) struct TokenSourceCheckpoint {
|
|
lexer_checkpoint: LexerCheckpoint,
|
|
tokens_position: usize,
|
|
}
|
|
|
|
/// Allocates a [`Vec`] with an approximated capacity to fit all tokens
|
|
/// of `contents`.
|
|
///
|
|
/// See [#9546](https://github.com/astral-sh/ruff/pull/9546) for a more detailed explanation.
|
|
#[allow(dead_code)]
|
|
fn allocate_tokens_vec(contents: &str) -> Vec<Token> {
|
|
let lower_bound = contents.len().saturating_mul(15) / 100;
|
|
Vec::with_capacity(lower_bound)
|
|
}
|