mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-10 21:58:05 +00:00
Add support for PEP 701 (#7376)
## Summary This PR adds support for PEP 701 in Ruff. This is a rollup PR of all the other individual PRs. The separate PRs were created for logic separation and code reviews. Refer to each pull request for a detail description on the change. Refer to the PR description for the list of pull requests within this PR. ## Test Plan ### Formatter ecosystem checks Explanation for the change in ecosystem check: https://github.com/astral-sh/ruff/pull/7597#issue-1908878183 #### `main` ``` | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | ``` #### `dhruv/pep-701` ``` | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76051 | 1789 | 1632 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | ```
This commit is contained in:
parent
78b8741352
commit
e62e245c61
115 changed files with 44780 additions and 31370 deletions
|
@ -43,8 +43,8 @@ pub fn format_and_debug_print(source: &str, cli: &Cli, source_type: &Path) -> Re
|
|||
.map_err(|err| format_err!("Source contains syntax errors {err:?}"))?;
|
||||
|
||||
// Parse the AST.
|
||||
let module =
|
||||
parse_ok_tokens(tokens, Mode::Module, "<filename>").context("Syntax error in input")?;
|
||||
let module = parse_ok_tokens(tokens, source, Mode::Module, "<filename>")
|
||||
.context("Syntax error in input")?;
|
||||
|
||||
let options = PyFormatOptions::from_extension(source_type);
|
||||
|
||||
|
|
|
@ -567,7 +567,7 @@ mod tests {
|
|||
let source_code = SourceCode::new(source);
|
||||
let (tokens, comment_ranges) =
|
||||
tokens_and_ranges(source).expect("Expect source to be valid Python");
|
||||
let parsed = parse_ok_tokens(tokens, Mode::Module, "test.py")
|
||||
let parsed = parse_ok_tokens(tokens, source, Mode::Module, "test.py")
|
||||
.expect("Expect source to be valid Python");
|
||||
|
||||
CommentsTestCase {
|
||||
|
|
|
@ -139,7 +139,7 @@ impl<'a> FormatString<'a> {
|
|||
impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
|
||||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
let locator = f.context().locator();
|
||||
match self.layout {
|
||||
let result = match self.layout {
|
||||
StringLayout::Default => {
|
||||
if self.string.is_implicit_concatenated() {
|
||||
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
|
||||
|
@ -162,7 +162,73 @@ impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
|
|||
StringLayout::ImplicitConcatenatedStringInBinaryLike => {
|
||||
FormatStringContinuation::new(self.string).fmt(f)
|
||||
}
|
||||
};
|
||||
// TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
|
||||
// This is to mark all of those comments as formatted but we need to
|
||||
// figure out how to handle them. Note that this needs to be done only
|
||||
// after the f-string is formatted, so only for all the non-formatted
|
||||
// comments.
|
||||
if let AnyString::FString(fstring) = self.string {
|
||||
let comments = f.context().comments();
|
||||
fstring.values.iter().for_each(|value| {
|
||||
comments.mark_verbatim_node_comments_formatted(value.into());
|
||||
});
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for the f-string range.
|
||||
///
|
||||
/// For now, this is limited to the outermost f-string and doesn't support
|
||||
/// nested f-strings.
|
||||
#[derive(Debug, Default)]
|
||||
struct FStringRangeBuilder {
|
||||
start_location: TextSize,
|
||||
end_location: TextSize,
|
||||
nesting: u32,
|
||||
}
|
||||
|
||||
impl FStringRangeBuilder {
|
||||
fn visit_token(&mut self, token: &Tok, range: TextRange) {
|
||||
match token {
|
||||
Tok::FStringStart => {
|
||||
if self.nesting == 0 {
|
||||
self.start_location = range.start();
|
||||
}
|
||||
self.nesting += 1;
|
||||
}
|
||||
Tok::FStringEnd => {
|
||||
// We can assume that this will never overflow because we know
|
||||
// that the program once parsed to a valid AST which means that
|
||||
// the start and end tokens for f-strings are balanced.
|
||||
self.nesting -= 1;
|
||||
if self.nesting == 0 {
|
||||
self.end_location = range.end();
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if the lexer is currently inside of a f-string.
|
||||
///
|
||||
/// It'll return `false` once the `FStringEnd` token for the outermost
|
||||
/// f-string is visited.
|
||||
const fn in_fstring(&self) -> bool {
|
||||
self.nesting > 0
|
||||
}
|
||||
|
||||
/// Returns the complete range of the previously visited f-string.
|
||||
///
|
||||
/// This method should only be called once the lexer is outside of any
|
||||
/// f-string otherwise it might return an invalid range.
|
||||
///
|
||||
/// It doesn't consume the builder because there can be multiple f-strings
|
||||
/// throughout the source code.
|
||||
fn finish(&self) -> TextRange {
|
||||
debug_assert!(!self.in_fstring());
|
||||
TextRange::new(self.start_location, self.end_location)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -195,6 +261,10 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
|
|||
// because this is a black preview style.
|
||||
let lexer = lex_starts_at(string_content, Mode::Expression, string_range.start());
|
||||
|
||||
// The lexer emits multiple tokens for a single f-string literal. Each token
|
||||
// will have it's own range but we require the complete range of the f-string.
|
||||
let mut fstring_range_builder = FStringRangeBuilder::default();
|
||||
|
||||
let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
|
||||
|
||||
for token in lexer {
|
||||
|
@ -226,8 +296,31 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
|
|||
}
|
||||
};
|
||||
|
||||
fstring_range_builder.visit_token(&token, token_range);
|
||||
|
||||
// We need to ignore all the tokens within the f-string as there can
|
||||
// be `String` tokens inside it as well. For example,
|
||||
//
|
||||
// ```python
|
||||
// f"foo {'bar'} foo"
|
||||
// # ^^^^^
|
||||
// # Ignore any logic for this `String` token
|
||||
// ```
|
||||
//
|
||||
// Here, we're interested in the complete f-string, not the individual
|
||||
// tokens inside it.
|
||||
if fstring_range_builder.in_fstring() {
|
||||
continue;
|
||||
}
|
||||
|
||||
match token {
|
||||
Tok::String { .. } => {
|
||||
Tok::String { .. } | Tok::FStringEnd => {
|
||||
let token_range = if token.is_f_string_end() {
|
||||
fstring_range_builder.finish()
|
||||
} else {
|
||||
token_range
|
||||
};
|
||||
|
||||
// ```python
|
||||
// (
|
||||
// "a"
|
||||
|
@ -346,11 +439,7 @@ impl StringPart {
|
|||
}
|
||||
};
|
||||
|
||||
let normalized = normalize_string(
|
||||
locator.slice(self.content_range),
|
||||
quotes,
|
||||
self.prefix.is_raw_string(),
|
||||
);
|
||||
let normalized = normalize_string(locator.slice(self.content_range), quotes, self.prefix);
|
||||
|
||||
NormalizedString {
|
||||
prefix: self.prefix,
|
||||
|
@ -442,6 +531,10 @@ impl StringPrefix {
|
|||
pub(super) const fn is_raw_string(self) -> bool {
|
||||
self.contains(StringPrefix::RAW) || self.contains(StringPrefix::RAW_UPPER)
|
||||
}
|
||||
|
||||
pub(super) const fn is_fstring(self) -> bool {
|
||||
self.contains(StringPrefix::F_STRING)
|
||||
}
|
||||
}
|
||||
|
||||
impl Format<PyFormatContext<'_>> for StringPrefix {
|
||||
|
@ -681,7 +774,7 @@ impl Format<PyFormatContext<'_>> for StringQuotes {
|
|||
/// with the provided [`StringQuotes`] style.
|
||||
///
|
||||
/// Returns the normalized string and whether it contains new lines.
|
||||
fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str> {
|
||||
fn normalize_string(input: &str, quotes: StringQuotes, prefix: StringPrefix) -> Cow<str> {
|
||||
// The normalized string if `input` is not yet normalized.
|
||||
// `output` must remain empty if `input` is already normalized.
|
||||
let mut output = String::new();
|
||||
|
@ -693,14 +786,30 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str>
|
|||
let preferred_quote = style.as_char();
|
||||
let opposite_quote = style.invert().as_char();
|
||||
|
||||
let mut chars = input.char_indices();
|
||||
let mut chars = input.char_indices().peekable();
|
||||
|
||||
let is_raw = prefix.is_raw_string();
|
||||
let is_fstring = prefix.is_fstring();
|
||||
let mut formatted_value_nesting = 0u32;
|
||||
|
||||
while let Some((index, c)) = chars.next() {
|
||||
if is_fstring && matches!(c, '{' | '}') {
|
||||
if chars.peek().copied().is_some_and(|(_, next)| next == c) {
|
||||
// Skip over the second character of the double braces
|
||||
chars.next();
|
||||
} else if c == '{' {
|
||||
formatted_value_nesting += 1;
|
||||
} else {
|
||||
// Safe to assume that `c == '}'` here because of the matched pattern above
|
||||
formatted_value_nesting = formatted_value_nesting.saturating_sub(1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if c == '\r' {
|
||||
output.push_str(&input[last_index..index]);
|
||||
|
||||
// Skip over the '\r' character, keep the `\n`
|
||||
if input.as_bytes().get(index + 1).copied() == Some(b'\n') {
|
||||
if chars.peek().copied().is_some_and(|(_, next)| next == '\n') {
|
||||
chars.next();
|
||||
}
|
||||
// Replace the `\r` with a `\n`
|
||||
|
@ -711,9 +820,9 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str>
|
|||
last_index = index + '\r'.len_utf8();
|
||||
} else if !quotes.triple && !is_raw {
|
||||
if c == '\\' {
|
||||
if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) {
|
||||
if let Some((_, next)) = chars.peek().copied() {
|
||||
#[allow(clippy::if_same_then_else)]
|
||||
if next == opposite_quote {
|
||||
if next == opposite_quote && formatted_value_nesting == 0 {
|
||||
// Remove the escape by ending before the backslash and starting again with the quote
|
||||
chars.next();
|
||||
output.push_str(&input[last_index..index]);
|
||||
|
@ -726,7 +835,7 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str>
|
|||
chars.next();
|
||||
}
|
||||
}
|
||||
} else if c == preferred_quote {
|
||||
} else if c == preferred_quote && formatted_value_nesting == 0 {
|
||||
// Escape the quote
|
||||
output.push_str(&input[last_index..index]);
|
||||
output.push('\\');
|
||||
|
|
|
@ -127,7 +127,7 @@ pub fn format_module_source(
|
|||
options: PyFormatOptions,
|
||||
) -> Result<Printed, FormatModuleError> {
|
||||
let (tokens, comment_ranges) = tokens_and_ranges(source)?;
|
||||
let module = parse_ok_tokens(tokens, Mode::Module, "<filename>")?;
|
||||
let module = parse_ok_tokens(tokens, source, Mode::Module, "<filename>")?;
|
||||
let formatted = format_module_ast(&module, &comment_ranges, source, options)?;
|
||||
Ok(formatted.print()?)
|
||||
}
|
||||
|
@ -213,7 +213,7 @@ def main() -> None:
|
|||
|
||||
// Parse the AST.
|
||||
let source_path = "code_inline.py";
|
||||
let module = parse_ok_tokens(tokens, Mode::Module, source_path).unwrap();
|
||||
let module = parse_ok_tokens(tokens, source, Mode::Module, source_path).unwrap();
|
||||
let options = PyFormatOptions::from_extension(Path::new(source_path));
|
||||
let formatted = format_module_ast(&module, &comment_ranges, source, options).unwrap();
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue