mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-24 04:14:22 +00:00
Add support for PEP 701 (#7376)
## Summary This PR adds support for PEP 701 in Ruff. This is a rollup PR of all the other individual PRs. The separate PRs were created for logic separation and code reviews. Refer to each pull request for a detail description on the change. Refer to the PR description for the list of pull requests within this PR. ## Test Plan ### Formatter ecosystem checks Explanation for the change in ecosystem check: https://github.com/astral-sh/ruff/pull/7597#issue-1908878183 #### `main` ``` | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | ``` #### `dhruv/pep-701` ``` | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76051 | 1789 | 1632 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | ```
This commit is contained in:
parent
78b8741352
commit
e62e245c61
115 changed files with 44780 additions and 31370 deletions
|
@ -37,6 +37,7 @@ use ruff_python_ast::{Int, IpyEscapeKind};
|
|||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
|
||||
use crate::lexer::cursor::{Cursor, EOF_CHAR};
|
||||
use crate::lexer::fstring::{FStringContext, FStringContextFlags, FStrings};
|
||||
use crate::lexer::indentation::{Indentation, Indentations};
|
||||
use crate::{
|
||||
soft_keywords::SoftKeywordTransformer,
|
||||
|
@ -46,6 +47,7 @@ use crate::{
|
|||
};
|
||||
|
||||
mod cursor;
|
||||
mod fstring;
|
||||
mod indentation;
|
||||
|
||||
/// A lexer for Python source code.
|
||||
|
@ -62,6 +64,8 @@ pub struct Lexer<'source> {
|
|||
pending_indentation: Option<Indentation>,
|
||||
// Lexer mode.
|
||||
mode: Mode,
|
||||
// F-string contexts.
|
||||
fstrings: FStrings,
|
||||
}
|
||||
|
||||
/// Contains a Token along with its `range`.
|
||||
|
@ -154,6 +158,7 @@ impl<'source> Lexer<'source> {
|
|||
source: input,
|
||||
cursor: Cursor::new(input),
|
||||
mode,
|
||||
fstrings: FStrings::default(),
|
||||
};
|
||||
// TODO: Handle possible mismatch between BOM and explicit encoding declaration.
|
||||
// spell-checker:ignore feff
|
||||
|
@ -165,16 +170,24 @@ impl<'source> Lexer<'source> {
|
|||
/// Lex an identifier. Also used for keywords and string/bytes literals with a prefix.
|
||||
fn lex_identifier(&mut self, first: char) -> Result<Tok, LexicalError> {
|
||||
// Detect potential string like rb'' b'' f'' u'' r''
|
||||
match self.cursor.first() {
|
||||
quote @ ('\'' | '"') => {
|
||||
match (first, self.cursor.first()) {
|
||||
('f' | 'F', quote @ ('\'' | '"')) => {
|
||||
self.cursor.bump();
|
||||
return Ok(self.lex_fstring_start(quote, false));
|
||||
}
|
||||
('r' | 'R', 'f' | 'F') | ('f' | 'F', 'r' | 'R') if is_quote(self.cursor.second()) => {
|
||||
self.cursor.bump();
|
||||
let quote = self.cursor.bump().unwrap();
|
||||
return Ok(self.lex_fstring_start(quote, true));
|
||||
}
|
||||
(_, quote @ ('\'' | '"')) => {
|
||||
if let Ok(string_kind) = StringKind::try_from(first) {
|
||||
self.cursor.bump();
|
||||
return self.lex_string(string_kind, quote);
|
||||
}
|
||||
}
|
||||
second @ ('f' | 'F' | 'r' | 'R' | 'b' | 'B') if is_quote(self.cursor.second()) => {
|
||||
(_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => {
|
||||
self.cursor.bump();
|
||||
|
||||
if let Ok(string_kind) = StringKind::try_from([first, second]) {
|
||||
let quote = self.cursor.bump().unwrap();
|
||||
return self.lex_string(string_kind, quote);
|
||||
|
@ -509,6 +522,148 @@ impl<'source> Lexer<'source> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Lex a f-string start token.
|
||||
fn lex_fstring_start(&mut self, quote: char, is_raw_string: bool) -> Tok {
|
||||
#[cfg(debug_assertions)]
|
||||
debug_assert_eq!(self.cursor.previous(), quote);
|
||||
|
||||
let mut flags = FStringContextFlags::empty();
|
||||
if quote == '"' {
|
||||
flags |= FStringContextFlags::DOUBLE;
|
||||
}
|
||||
if is_raw_string {
|
||||
flags |= FStringContextFlags::RAW;
|
||||
}
|
||||
if self.cursor.eat_char2(quote, quote) {
|
||||
flags |= FStringContextFlags::TRIPLE;
|
||||
}
|
||||
|
||||
self.fstrings.push(FStringContext::new(flags, self.nesting));
|
||||
Tok::FStringStart
|
||||
}
|
||||
|
||||
/// Lex a f-string middle or end token.
|
||||
fn lex_fstring_middle_or_end(&mut self) -> Result<Option<Tok>, LexicalError> {
|
||||
// SAFETY: Safe because the function is only called when `self.fstrings` is not empty.
|
||||
let fstring = self.fstrings.current().unwrap();
|
||||
self.cursor.start_token();
|
||||
|
||||
// Check if we're at the end of the f-string.
|
||||
if fstring.is_triple_quoted() {
|
||||
let quote_char = fstring.quote_char();
|
||||
if self.cursor.eat_char3(quote_char, quote_char, quote_char) {
|
||||
return Ok(Some(Tok::FStringEnd));
|
||||
}
|
||||
} else if self.cursor.eat_char(fstring.quote_char()) {
|
||||
return Ok(Some(Tok::FStringEnd));
|
||||
}
|
||||
|
||||
// We have to decode `{{` and `}}` into `{` and `}` respectively. As an
|
||||
// optimization, we only allocate a new string we find any escaped curly braces,
|
||||
// otherwise this string will remain empty and we'll use a source slice instead.
|
||||
let mut normalized = String::new();
|
||||
|
||||
// Tracks the last offset of token value that has been written to `normalized`.
|
||||
let mut last_offset = self.offset();
|
||||
|
||||
let mut in_named_unicode = false;
|
||||
|
||||
loop {
|
||||
match self.cursor.first() {
|
||||
// The condition is to differentiate between the `NUL` (`\0`) character
|
||||
// in the source code and the one returned by `self.cursor.first()` when
|
||||
// we reach the end of the source code.
|
||||
EOF_CHAR if self.cursor.is_eof() => {
|
||||
let error = if fstring.is_triple_quoted() {
|
||||
FStringErrorType::UnterminatedTripleQuotedString
|
||||
} else {
|
||||
FStringErrorType::UnterminatedString
|
||||
};
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::FStringError(error),
|
||||
location: self.offset(),
|
||||
});
|
||||
}
|
||||
'\n' if !fstring.is_triple_quoted() => {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::FStringError(FStringErrorType::UnterminatedString),
|
||||
location: self.offset(),
|
||||
});
|
||||
}
|
||||
'\\' => {
|
||||
self.cursor.bump(); // '\'
|
||||
if matches!(self.cursor.first(), '{' | '}') {
|
||||
// Don't consume `{` or `}` as we want them to be emitted as tokens.
|
||||
// They will be handled in the next iteration.
|
||||
continue;
|
||||
} else if !fstring.is_raw_string() {
|
||||
if self.cursor.eat_char2('N', '{') {
|
||||
in_named_unicode = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Consume the escaped character.
|
||||
self.cursor.bump();
|
||||
}
|
||||
quote @ ('\'' | '"') if quote == fstring.quote_char() => {
|
||||
if let Some(triple_quotes) = fstring.triple_quotes() {
|
||||
if self.cursor.rest().starts_with(triple_quotes) {
|
||||
break;
|
||||
}
|
||||
self.cursor.bump();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
'{' => {
|
||||
if self.cursor.second() == '{' {
|
||||
self.cursor.bump();
|
||||
normalized
|
||||
.push_str(&self.source[TextRange::new(last_offset, self.offset())]);
|
||||
self.cursor.bump(); // Skip the second `{`
|
||||
last_offset = self.offset();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
'}' => {
|
||||
if in_named_unicode {
|
||||
in_named_unicode = false;
|
||||
self.cursor.bump();
|
||||
} else if self.cursor.second() == '}'
|
||||
&& !fstring.is_in_format_spec(self.nesting)
|
||||
{
|
||||
self.cursor.bump();
|
||||
normalized
|
||||
.push_str(&self.source[TextRange::new(last_offset, self.offset())]);
|
||||
self.cursor.bump(); // Skip the second `}`
|
||||
last_offset = self.offset();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
self.cursor.bump();
|
||||
}
|
||||
}
|
||||
}
|
||||
let range = self.token_range();
|
||||
if range.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let value = if normalized.is_empty() {
|
||||
self.source[range].to_string()
|
||||
} else {
|
||||
normalized.push_str(&self.source[TextRange::new(last_offset, self.offset())]);
|
||||
normalized
|
||||
};
|
||||
Ok(Some(Tok::FStringMiddle {
|
||||
value,
|
||||
is_raw: fstring.is_raw_string(),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Lex a string literal.
|
||||
fn lex_string(&mut self, kind: StringKind, quote: char) -> Result<Tok, LexicalError> {
|
||||
#[cfg(debug_assertions)]
|
||||
|
@ -530,6 +685,19 @@ impl<'source> Lexer<'source> {
|
|||
}
|
||||
}
|
||||
Some('\r' | '\n') if !triple_quoted => {
|
||||
if let Some(fstring) = self.fstrings.current() {
|
||||
// When we are in an f-string, check whether does the initial quote
|
||||
// matches with f-strings quotes and if it is, then this must be a
|
||||
// missing '}' token so raise the proper error.
|
||||
if fstring.quote_char() == quote && !fstring.is_triple_quoted() {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::FStringError(
|
||||
FStringErrorType::UnclosedLbrace,
|
||||
),
|
||||
location: self.offset() - fstring.quote_size(),
|
||||
});
|
||||
}
|
||||
}
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError(
|
||||
"EOL while scanning string literal".to_owned(),
|
||||
|
@ -549,6 +717,21 @@ impl<'source> Lexer<'source> {
|
|||
|
||||
Some(_) => {}
|
||||
None => {
|
||||
if let Some(fstring) = self.fstrings.current() {
|
||||
// When we are in an f-string, check whether does the initial quote
|
||||
// matches with f-strings quotes and if it is, then this must be a
|
||||
// missing '}' token so raise the proper error.
|
||||
if fstring.quote_char() == quote
|
||||
&& fstring.is_triple_quoted() == triple_quoted
|
||||
{
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::FStringError(
|
||||
FStringErrorType::UnclosedLbrace,
|
||||
),
|
||||
location: self.offset() - fstring.quote_size(),
|
||||
});
|
||||
}
|
||||
}
|
||||
return Err(LexicalError {
|
||||
error: if triple_quoted {
|
||||
LexicalErrorType::Eof
|
||||
|
@ -572,8 +755,28 @@ impl<'source> Lexer<'source> {
|
|||
// This is the main entry point. Call this function to retrieve the next token.
|
||||
// This function is used by the iterator implementation.
|
||||
pub fn next_token(&mut self) -> LexResult {
|
||||
if let Some(fstring) = self.fstrings.current() {
|
||||
if !fstring.is_in_expression(self.nesting) {
|
||||
match self.lex_fstring_middle_or_end() {
|
||||
Ok(Some(tok)) => {
|
||||
if tok == Tok::FStringEnd {
|
||||
self.fstrings.pop();
|
||||
}
|
||||
return Ok((tok, self.token_range()));
|
||||
}
|
||||
Err(e) => {
|
||||
// This is to prevent an infinite loop in which the lexer
|
||||
// continuously returns an error token because the f-string
|
||||
// remains on the stack.
|
||||
self.fstrings.pop();
|
||||
return Err(e);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Return dedent tokens until the current indentation level matches the indentation of the next token.
|
||||
if let Some(indentation) = self.pending_indentation.take() {
|
||||
else if let Some(indentation) = self.pending_indentation.take() {
|
||||
match self.indentations.current().try_compare(indentation) {
|
||||
Ok(Ordering::Greater) => {
|
||||
self.pending_indentation = Some(indentation);
|
||||
|
@ -894,10 +1097,7 @@ impl<'source> Lexer<'source> {
|
|||
if self.cursor.eat_char('=') {
|
||||
Tok::NotEqual
|
||||
} else {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::UnrecognizedToken { tok: '!' },
|
||||
location: self.token_start(),
|
||||
});
|
||||
Tok::Exclamation
|
||||
}
|
||||
}
|
||||
'~' => Tok::Tilde,
|
||||
|
@ -922,11 +1122,26 @@ impl<'source> Lexer<'source> {
|
|||
Tok::Lbrace
|
||||
}
|
||||
'}' => {
|
||||
if let Some(fstring) = self.fstrings.current_mut() {
|
||||
if fstring.nesting() == self.nesting {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::FStringError(FStringErrorType::SingleRbrace),
|
||||
location: self.token_start(),
|
||||
});
|
||||
}
|
||||
fstring.try_end_format_spec(self.nesting);
|
||||
}
|
||||
self.nesting = self.nesting.saturating_sub(1);
|
||||
Tok::Rbrace
|
||||
}
|
||||
':' => {
|
||||
if self.cursor.eat_char('=') {
|
||||
if self
|
||||
.fstrings
|
||||
.current_mut()
|
||||
.is_some_and(|fstring| fstring.try_start_format_spec(self.nesting))
|
||||
{
|
||||
Tok::Colon
|
||||
} else if self.cursor.eat_char('=') {
|
||||
Tok::ColonEqual
|
||||
} else {
|
||||
Tok::Colon
|
||||
|
@ -1743,4 +1958,191 @@ def f(arg=%timeit a = b):
|
|||
.collect();
|
||||
assert_debug_snapshot!(tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_fstrings() {
|
||||
let source = r#"f"" "" F"" f'' '' f"""""" f''''''"#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_prefix() {
|
||||
let source = r#"f"" F"" rf"" rF"" Rf"" RF"" fr"" Fr"" fR"" FR"""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring() {
|
||||
let source = r#"f"normal {foo} {{another}} {bar} {{{three}}}""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_parentheses() {
|
||||
let source = r#"f"{}" f"{{}}" f" {}" f"{{{}}}" f"{{{{}}}}" f" {} {{}} {{{}}} {{{{}}}} ""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_escape() {
|
||||
let source = r#"f"\{x:\"\{x}} \"\"\
|
||||
end""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_escape_braces() {
|
||||
let source = r"f'\{foo}' f'\\{foo}' f'\{{foo}}' f'\\{{foo}}'";
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_escape_raw() {
|
||||
let source = r#"rf"\{x:\"\{x}} \"\"\
|
||||
end""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_named_unicode() {
|
||||
let source = r#"f"\N{BULLET} normal \Nope \N""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_named_unicode_raw() {
|
||||
let source = r#"rf"\N{BULLET} normal""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_with_named_expression() {
|
||||
let source = r#"f"{x:=10} {(x:=10)} {x,{y:=10}} {[x:=10]}""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_with_format_spec() {
|
||||
let source = r#"f"{foo:} {x=!s:.3f} {x:.{y}f} {'':*^{1:{1}}}""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_conversion() {
|
||||
let source = r#"f"{x!s} {x=!r} {x:.3f!r} {{x!r}}""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_nested() {
|
||||
let source = r#"f"foo {f"bar {x + f"{wow}"}"} baz" f'foo {f'bar'} some {f"another"}'"#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_expression_multiline() {
|
||||
let source = r#"f"first {
|
||||
x
|
||||
*
|
||||
y
|
||||
} second""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_multiline() {
|
||||
let source = r#"f"""
|
||||
hello
|
||||
world
|
||||
""" f'''
|
||||
world
|
||||
hello
|
||||
''' f"some {f"""multiline
|
||||
allowed {x}"""} string""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_comments() {
|
||||
let source = r#"f"""
|
||||
# not a comment { # comment {
|
||||
x
|
||||
} # not a comment
|
||||
""""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_with_ipy_escape_command() {
|
||||
let source = r#"f"foo {!pwd} bar""#;
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_with_lambda_expression() {
|
||||
let source = r#"
|
||||
f"{lambda x:{x}}"
|
||||
f"{(lambda x:{x})}"
|
||||
"#
|
||||
.trim();
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_with_nul_char() {
|
||||
let source = r"f'\0'";
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
fn lex_fstring_error(source: &str) -> FStringErrorType {
|
||||
match lex(source, Mode::Module).find_map(std::result::Result::err) {
|
||||
Some(err) => match err.error {
|
||||
LexicalErrorType::FStringError(error) => error,
|
||||
_ => panic!("Expected FStringError: {err:?}"),
|
||||
},
|
||||
_ => panic!("Expected atleast one FStringError"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_error() {
|
||||
use FStringErrorType::{
|
||||
SingleRbrace, UnclosedLbrace, UnterminatedString, UnterminatedTripleQuotedString,
|
||||
};
|
||||
|
||||
assert_eq!(lex_fstring_error("f'}'"), SingleRbrace);
|
||||
assert_eq!(lex_fstring_error("f'{{}'"), SingleRbrace);
|
||||
assert_eq!(lex_fstring_error("f'{{}}}'"), SingleRbrace);
|
||||
assert_eq!(lex_fstring_error("f'foo}'"), SingleRbrace);
|
||||
assert_eq!(lex_fstring_error(r"f'\u007b}'"), SingleRbrace);
|
||||
assert_eq!(lex_fstring_error("f'{a:b}}'"), SingleRbrace);
|
||||
assert_eq!(lex_fstring_error("f'{3:}}>10}'"), SingleRbrace);
|
||||
assert_eq!(lex_fstring_error(r"f'\{foo}\}'"), SingleRbrace);
|
||||
|
||||
assert_eq!(lex_fstring_error("f'{'"), UnclosedLbrace);
|
||||
assert_eq!(lex_fstring_error("f'{foo!r'"), UnclosedLbrace);
|
||||
assert_eq!(lex_fstring_error("f'{foo='"), UnclosedLbrace);
|
||||
assert_eq!(
|
||||
lex_fstring_error(
|
||||
r#"f"{"
|
||||
"#
|
||||
),
|
||||
UnclosedLbrace
|
||||
);
|
||||
assert_eq!(lex_fstring_error(r#"f"""{""""#), UnclosedLbrace);
|
||||
|
||||
assert_eq!(lex_fstring_error(r#"f""#), UnterminatedString);
|
||||
assert_eq!(lex_fstring_error(r#"f'"#), UnterminatedString);
|
||||
|
||||
assert_eq!(lex_fstring_error(r#"f""""#), UnterminatedTripleQuotedString);
|
||||
assert_eq!(lex_fstring_error(r#"f'''"#), UnterminatedTripleQuotedString);
|
||||
assert_eq!(
|
||||
lex_fstring_error(r#"f"""""#),
|
||||
UnterminatedTripleQuotedString
|
||||
);
|
||||
assert_eq!(
|
||||
lex_fstring_error(r#"f""""""#),
|
||||
UnterminatedTripleQuotedString
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue