mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-28 04:45:01 +00:00

## Summary This PR adds support for PEP 701 in Ruff. This is a rollup PR of all the other individual PRs. The separate PRs were created for logic separation and code reviews. Refer to each pull request for a detail description on the change. Refer to the PR description for the list of pull requests within this PR. ## Test Plan ### Formatter ecosystem checks Explanation for the change in ecosystem check: https://github.com/astral-sh/ruff/pull/7597#issue-1908878183 #### `main` ``` | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | ``` #### `dhruv/pep-701` ``` | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76051 | 1789 | 1632 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | ```
410 lines
11 KiB
Rust
410 lines
11 KiB
Rust
//! Detect code style from Python source code.
|
||
|
||
use std::fmt;
|
||
use std::ops::Deref;
|
||
|
||
use once_cell::unsync::OnceCell;
|
||
use ruff_python_literal::escape::Quote as StrQuote;
|
||
use ruff_python_parser::lexer::LexResult;
|
||
use ruff_python_parser::Tok;
|
||
use ruff_source_file::{find_newline, LineEnding};
|
||
|
||
use ruff_python_ast::str::leading_quote;
|
||
use ruff_source_file::Locator;
|
||
|
||
pub struct Stylist<'a> {
|
||
locator: &'a Locator<'a>,
|
||
indentation: Indentation,
|
||
quote: Quote,
|
||
line_ending: OnceCell<LineEnding>,
|
||
}
|
||
|
||
impl<'a> Stylist<'a> {
|
||
pub fn indentation(&'a self) -> &'a Indentation {
|
||
&self.indentation
|
||
}
|
||
|
||
pub fn quote(&'a self) -> Quote {
|
||
self.quote
|
||
}
|
||
|
||
pub fn line_ending(&'a self) -> LineEnding {
|
||
*self.line_ending.get_or_init(|| {
|
||
let contents = self.locator.contents();
|
||
find_newline(contents)
|
||
.map(|(_, ending)| ending)
|
||
.unwrap_or_default()
|
||
})
|
||
}
|
||
|
||
pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self {
|
||
let indentation = detect_indention(tokens, locator);
|
||
|
||
Self {
|
||
locator,
|
||
indentation,
|
||
quote: detect_quote(tokens, locator),
|
||
line_ending: OnceCell::default(),
|
||
}
|
||
}
|
||
}
|
||
|
||
fn detect_quote(tokens: &[LexResult], locator: &Locator) -> Quote {
|
||
let quote_range = tokens.iter().flatten().find_map(|(t, range)| match t {
|
||
Tok::String {
|
||
triple_quoted: false,
|
||
..
|
||
} => Some(*range),
|
||
// No need to check if it's triple-quoted as f-strings cannot be used
|
||
// as docstrings.
|
||
Tok::FStringStart => Some(*range),
|
||
_ => None,
|
||
});
|
||
|
||
if let Some(quote_range) = quote_range {
|
||
let content = &locator.slice(quote_range);
|
||
if let Some(quotes) = leading_quote(content) {
|
||
return if quotes.contains('\'') {
|
||
Quote::Single
|
||
} else if quotes.contains('"') {
|
||
Quote::Double
|
||
} else {
|
||
unreachable!("Expected string to start with a valid quote prefix")
|
||
};
|
||
}
|
||
}
|
||
|
||
Quote::default()
|
||
}
|
||
|
||
fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation {
|
||
let indent_range = tokens.iter().flatten().find_map(|(t, range)| {
|
||
if matches!(t, Tok::Indent) {
|
||
Some(range)
|
||
} else {
|
||
None
|
||
}
|
||
});
|
||
|
||
if let Some(indent_range) = indent_range {
|
||
let mut whitespace = locator.slice(*indent_range);
|
||
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
|
||
// > A formfeed character may be present at the start of the line; it will be ignored for
|
||
// > the indentation calculations above. Formfeed characters occurring elsewhere in the
|
||
// > leading whitespace have an undefined effect (for instance, they may reset the space
|
||
// > count to zero).
|
||
// So there's UB in python lexer -.-
|
||
// In practice, they just reset the indentation:
|
||
// https://github.com/python/cpython/blob/df8b3a46a7aa369f246a09ffd11ceedf1d34e921/Parser/tokenizer.c#L1819-L1821
|
||
// https://github.com/astral-sh/ruff/blob/a41bb2733fe75a71f4cf6d4bb21e659fc4630b30/crates/ruff_python_parser/src/lexer.rs#L664-L667
|
||
// We also reset the indentation when we see a formfeed character.
|
||
// See also https://github.com/astral-sh/ruff/issues/7455#issuecomment-1722458825
|
||
if let Some((_before, after)) = whitespace.rsplit_once('\x0C') {
|
||
whitespace = after;
|
||
}
|
||
|
||
Indentation(whitespace.to_string())
|
||
} else {
|
||
Indentation::default()
|
||
}
|
||
}
|
||
|
||
/// The quotation style used in Python source code.
|
||
#[derive(Debug, Default, PartialEq, Eq, Copy, Clone)]
|
||
pub enum Quote {
|
||
Single,
|
||
#[default]
|
||
Double,
|
||
}
|
||
|
||
impl From<Quote> for char {
|
||
fn from(val: Quote) -> Self {
|
||
match val {
|
||
Quote::Single => '\'',
|
||
Quote::Double => '"',
|
||
}
|
||
}
|
||
}
|
||
|
||
impl From<Quote> for StrQuote {
|
||
fn from(val: Quote) -> Self {
|
||
match val {
|
||
Quote::Single => StrQuote::Single,
|
||
Quote::Double => StrQuote::Double,
|
||
}
|
||
}
|
||
}
|
||
|
||
impl fmt::Display for Quote {
|
||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||
match self {
|
||
Quote::Single => write!(f, "\'"),
|
||
Quote::Double => write!(f, "\""),
|
||
}
|
||
}
|
||
}
|
||
|
||
/// The indentation style used in Python source code.
|
||
#[derive(Debug, PartialEq, Eq)]
|
||
pub struct Indentation(String);
|
||
|
||
impl Indentation {
|
||
pub const fn new(indentation: String) -> Self {
|
||
Self(indentation)
|
||
}
|
||
}
|
||
|
||
impl Default for Indentation {
|
||
fn default() -> Self {
|
||
Indentation(" ".to_string())
|
||
}
|
||
}
|
||
|
||
impl Indentation {
|
||
pub fn as_str(&self) -> &str {
|
||
self.0.as_str()
|
||
}
|
||
|
||
pub fn as_char(&self) -> char {
|
||
self.0.chars().next().unwrap()
|
||
}
|
||
}
|
||
|
||
impl Deref for Indentation {
|
||
type Target = str;
|
||
|
||
fn deref(&self) -> &Self::Target {
|
||
self.as_str()
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use ruff_python_parser::lexer::lex;
|
||
use ruff_python_parser::Mode;
|
||
|
||
use ruff_source_file::{find_newline, LineEnding};
|
||
|
||
use super::{Indentation, Quote, Stylist};
|
||
use ruff_source_file::Locator;
|
||
|
||
#[test]
|
||
fn indentation() {
|
||
let contents = r#"x = 1"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||
&Indentation::default()
|
||
);
|
||
|
||
let contents = r#"
|
||
if True:
|
||
pass
|
||
"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||
&Indentation(" ".to_string())
|
||
);
|
||
|
||
let contents = r#"
|
||
if True:
|
||
pass
|
||
"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||
&Indentation(" ".to_string())
|
||
);
|
||
|
||
let contents = r#"
|
||
if True:
|
||
pass
|
||
"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||
&Indentation("\t".to_string())
|
||
);
|
||
|
||
// TODO(charlie): Should non-significant whitespace be detected?
|
||
let contents = r#"
|
||
x = (
|
||
1,
|
||
2,
|
||
3,
|
||
)
|
||
"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||
&Indentation::default()
|
||
);
|
||
|
||
// formfeed indent, see `detect_indention` comment.
|
||
let contents = r#"
|
||
class FormFeedIndent:
|
||
def __init__(self, a=[]):
|
||
print(a)
|
||
"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||
&Indentation(" ".to_string())
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn quote() {
|
||
let contents = r#"x = 1"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::default()
|
||
);
|
||
|
||
let contents = r#"x = '1'"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::Single
|
||
);
|
||
|
||
let contents = r#"x = f'1'"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::Single
|
||
);
|
||
|
||
let contents = r#"x = "1""#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::Double
|
||
);
|
||
|
||
let contents = r#"x = f"1""#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::Double
|
||
);
|
||
|
||
let contents = r#"s = "It's done.""#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::Double
|
||
);
|
||
|
||
// No style if only double quoted docstring (will take default Double)
|
||
let contents = r#"
|
||
def f():
|
||
"""Docstring."""
|
||
pass
|
||
"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::default()
|
||
);
|
||
|
||
// Detect from string literal appearing after docstring
|
||
let contents = r#"
|
||
"""Module docstring."""
|
||
|
||
a = 'v'
|
||
"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::Single
|
||
);
|
||
|
||
let contents = r#"
|
||
'''Module docstring.'''
|
||
|
||
a = "v"
|
||
"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::Double
|
||
);
|
||
|
||
// Detect from f-string appearing after docstring
|
||
let contents = r#"
|
||
"""Module docstring."""
|
||
|
||
a = f'v'
|
||
"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::Single
|
||
);
|
||
|
||
let contents = r#"
|
||
'''Module docstring.'''
|
||
|
||
a = f"v"
|
||
"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::Double
|
||
);
|
||
|
||
let contents = r#"
|
||
f'''Module docstring.'''
|
||
"#;
|
||
let locator = Locator::new(contents);
|
||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||
assert_eq!(
|
||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||
Quote::Single
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn line_ending() {
|
||
let contents = "x = 1";
|
||
assert_eq!(find_newline(contents).map(|(_, ending)| ending), None);
|
||
|
||
let contents = "x = 1\n";
|
||
assert_eq!(
|
||
find_newline(contents).map(|(_, ending)| ending),
|
||
Some(LineEnding::Lf)
|
||
);
|
||
|
||
let contents = "x = 1\r";
|
||
assert_eq!(
|
||
find_newline(contents).map(|(_, ending)| ending),
|
||
Some(LineEnding::Cr)
|
||
);
|
||
|
||
let contents = "x = 1\r\n";
|
||
assert_eq!(
|
||
find_newline(contents).map(|(_, ending)| ending),
|
||
Some(LineEnding::CrLf)
|
||
);
|
||
}
|
||
}
|