Add support for PEP 701 (#7376)

## Summary

This PR adds support for PEP 701 in Ruff. This is a rollup PR of all the
other individual PRs. The separate PRs were created for logic separation
and code reviews. Refer to each pull request for a detail description on
the change.

Refer to the PR description for the list of pull requests within this PR.

## Test Plan

### Formatter ecosystem checks

Explanation for the change in ecosystem check:
https://github.com/astral-sh/ruff/pull/7597#issue-1908878183

#### `main`

```
| project      | similarity index  | total files       | changed files     |
|--------------|------------------:|------------------:|------------------:|
| cpython      |           0.76083 |              1789 |              1631 |
| django       |           0.99983 |              2760 |                36 |
| transformers |           0.99963 |              2587 |               319 |
| twine        |           1.00000 |                33 |                 0 |
| typeshed     |           0.99983 |              3496 |                18 |
| warehouse    |           0.99967 |               648 |                15 |
| zulip        |           0.99972 |              1437 |                21 |
```

#### `dhruv/pep-701`

```
| project      | similarity index  | total files       | changed files     |
|--------------|------------------:|------------------:|------------------:|
| cpython      |           0.76051 |              1789 |              1632 |
| django       |           0.99983 |              2760 |                36 |
| transformers |           0.99963 |              2587 |               319 |
| twine        |           1.00000 |                33 |                 0 |
| typeshed     |           0.99983 |              3496 |                18 |
| warehouse    |           0.99967 |               648 |                15 |
| zulip        |           0.99972 |              1437 |                21 |
```
This commit is contained in:
Dhruv Manilawala 2023-09-29 08:25:39 +05:30 committed by GitHub
parent 78b8741352
commit e62e245c61
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
115 changed files with 44780 additions and 31370 deletions

View file

@ -43,8 +43,8 @@ pub fn format_and_debug_print(source: &str, cli: &Cli, source_type: &Path) -> Re
.map_err(|err| format_err!("Source contains syntax errors {err:?}"))?;
// Parse the AST.
let module =
parse_ok_tokens(tokens, Mode::Module, "<filename>").context("Syntax error in input")?;
let module = parse_ok_tokens(tokens, source, Mode::Module, "<filename>")
.context("Syntax error in input")?;
let options = PyFormatOptions::from_extension(source_type);

View file

@ -567,7 +567,7 @@ mod tests {
let source_code = SourceCode::new(source);
let (tokens, comment_ranges) =
tokens_and_ranges(source).expect("Expect source to be valid Python");
let parsed = parse_ok_tokens(tokens, Mode::Module, "test.py")
let parsed = parse_ok_tokens(tokens, source, Mode::Module, "test.py")
.expect("Expect source to be valid Python");
CommentsTestCase {

View file

@ -139,7 +139,7 @@ impl<'a> FormatString<'a> {
impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
match self.layout {
let result = match self.layout {
StringLayout::Default => {
if self.string.is_implicit_concatenated() {
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
@ -162,7 +162,73 @@ impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
StringLayout::ImplicitConcatenatedStringInBinaryLike => {
FormatStringContinuation::new(self.string).fmt(f)
}
};
// TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
// This is to mark all of those comments as formatted but we need to
// figure out how to handle them. Note that this needs to be done only
// after the f-string is formatted, so only for all the non-formatted
// comments.
if let AnyString::FString(fstring) = self.string {
let comments = f.context().comments();
fstring.values.iter().for_each(|value| {
comments.mark_verbatim_node_comments_formatted(value.into());
});
}
result
}
}
/// A builder for the f-string range.
///
/// For now, this is limited to the outermost f-string and doesn't support
/// nested f-strings.
#[derive(Debug, Default)]
struct FStringRangeBuilder {
start_location: TextSize,
end_location: TextSize,
nesting: u32,
}
impl FStringRangeBuilder {
fn visit_token(&mut self, token: &Tok, range: TextRange) {
match token {
Tok::FStringStart => {
if self.nesting == 0 {
self.start_location = range.start();
}
self.nesting += 1;
}
Tok::FStringEnd => {
// We can assume that this will never overflow because we know
// that the program once parsed to a valid AST which means that
// the start and end tokens for f-strings are balanced.
self.nesting -= 1;
if self.nesting == 0 {
self.end_location = range.end();
}
}
_ => {}
}
}
/// Returns `true` if the lexer is currently inside of a f-string.
///
/// It'll return `false` once the `FStringEnd` token for the outermost
/// f-string is visited.
const fn in_fstring(&self) -> bool {
self.nesting > 0
}
/// Returns the complete range of the previously visited f-string.
///
/// This method should only be called once the lexer is outside of any
/// f-string otherwise it might return an invalid range.
///
/// It doesn't consume the builder because there can be multiple f-strings
/// throughout the source code.
fn finish(&self) -> TextRange {
debug_assert!(!self.in_fstring());
TextRange::new(self.start_location, self.end_location)
}
}
@ -195,6 +261,10 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
// because this is a black preview style.
let lexer = lex_starts_at(string_content, Mode::Expression, string_range.start());
// The lexer emits multiple tokens for a single f-string literal. Each token
// will have it's own range but we require the complete range of the f-string.
let mut fstring_range_builder = FStringRangeBuilder::default();
let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
for token in lexer {
@ -226,8 +296,31 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
}
};
fstring_range_builder.visit_token(&token, token_range);
// We need to ignore all the tokens within the f-string as there can
// be `String` tokens inside it as well. For example,
//
// ```python
// f"foo {'bar'} foo"
// # ^^^^^
// # Ignore any logic for this `String` token
// ```
//
// Here, we're interested in the complete f-string, not the individual
// tokens inside it.
if fstring_range_builder.in_fstring() {
continue;
}
match token {
Tok::String { .. } => {
Tok::String { .. } | Tok::FStringEnd => {
let token_range = if token.is_f_string_end() {
fstring_range_builder.finish()
} else {
token_range
};
// ```python
// (
// "a"
@ -346,11 +439,7 @@ impl StringPart {
}
};
let normalized = normalize_string(
locator.slice(self.content_range),
quotes,
self.prefix.is_raw_string(),
);
let normalized = normalize_string(locator.slice(self.content_range), quotes, self.prefix);
NormalizedString {
prefix: self.prefix,
@ -442,6 +531,10 @@ impl StringPrefix {
pub(super) const fn is_raw_string(self) -> bool {
self.contains(StringPrefix::RAW) || self.contains(StringPrefix::RAW_UPPER)
}
pub(super) const fn is_fstring(self) -> bool {
self.contains(StringPrefix::F_STRING)
}
}
impl Format<PyFormatContext<'_>> for StringPrefix {
@ -681,7 +774,7 @@ impl Format<PyFormatContext<'_>> for StringQuotes {
/// with the provided [`StringQuotes`] style.
///
/// Returns the normalized string and whether it contains new lines.
fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str> {
fn normalize_string(input: &str, quotes: StringQuotes, prefix: StringPrefix) -> Cow<str> {
// The normalized string if `input` is not yet normalized.
// `output` must remain empty if `input` is already normalized.
let mut output = String::new();
@ -693,14 +786,30 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str>
let preferred_quote = style.as_char();
let opposite_quote = style.invert().as_char();
let mut chars = input.char_indices();
let mut chars = input.char_indices().peekable();
let is_raw = prefix.is_raw_string();
let is_fstring = prefix.is_fstring();
let mut formatted_value_nesting = 0u32;
while let Some((index, c)) = chars.next() {
if is_fstring && matches!(c, '{' | '}') {
if chars.peek().copied().is_some_and(|(_, next)| next == c) {
// Skip over the second character of the double braces
chars.next();
} else if c == '{' {
formatted_value_nesting += 1;
} else {
// Safe to assume that `c == '}'` here because of the matched pattern above
formatted_value_nesting = formatted_value_nesting.saturating_sub(1);
}
continue;
}
if c == '\r' {
output.push_str(&input[last_index..index]);
// Skip over the '\r' character, keep the `\n`
if input.as_bytes().get(index + 1).copied() == Some(b'\n') {
if chars.peek().copied().is_some_and(|(_, next)| next == '\n') {
chars.next();
}
// Replace the `\r` with a `\n`
@ -711,9 +820,9 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str>
last_index = index + '\r'.len_utf8();
} else if !quotes.triple && !is_raw {
if c == '\\' {
if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) {
if let Some((_, next)) = chars.peek().copied() {
#[allow(clippy::if_same_then_else)]
if next == opposite_quote {
if next == opposite_quote && formatted_value_nesting == 0 {
// Remove the escape by ending before the backslash and starting again with the quote
chars.next();
output.push_str(&input[last_index..index]);
@ -726,7 +835,7 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str>
chars.next();
}
}
} else if c == preferred_quote {
} else if c == preferred_quote && formatted_value_nesting == 0 {
// Escape the quote
output.push_str(&input[last_index..index]);
output.push('\\');

View file

@ -127,7 +127,7 @@ pub fn format_module_source(
options: PyFormatOptions,
) -> Result<Printed, FormatModuleError> {
let (tokens, comment_ranges) = tokens_and_ranges(source)?;
let module = parse_ok_tokens(tokens, Mode::Module, "<filename>")?;
let module = parse_ok_tokens(tokens, source, Mode::Module, "<filename>")?;
let formatted = format_module_ast(&module, &comment_ranges, source, options)?;
Ok(formatted.print()?)
}
@ -213,7 +213,7 @@ def main() -> None:
// Parse the AST.
let source_path = "code_inline.py";
let module = parse_ok_tokens(tokens, Mode::Module, source_path).unwrap();
let module = parse_ok_tokens(tokens, source, Mode::Module, source_path).unwrap();
let options = PyFormatOptions::from_extension(Path::new(source_path));
let formatted = format_module_ast(&module, &comment_ranges, source, options).unwrap();