mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-12 13:26:33 +00:00
Add support for PEP 701 (#7376)
## Summary This PR adds support for PEP 701 in Ruff. This is a rollup PR of all the other individual PRs. The separate PRs were created for logic separation and code reviews. Refer to each pull request for a detail description on the change. Refer to the PR description for the list of pull requests within this PR. ## Test Plan ### Formatter ecosystem checks Explanation for the change in ecosystem check: https://github.com/astral-sh/ruff/pull/7597#issue-1908878183 #### `main` ``` | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | ``` #### `dhruv/pep-701` ``` | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76051 | 1789 | 1632 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | ```
This commit is contained in:
parent
78b8741352
commit
e62e245c61
115 changed files with 44780 additions and 31370 deletions
|
@ -1,25 +1,26 @@
|
|||
//! Struct used to index source code, to enable efficient lookup of tokens that
|
||||
//! are omitted from the AST (e.g., commented lines).
|
||||
|
||||
use crate::CommentRangesBuilder;
|
||||
use ruff_python_ast::Stmt;
|
||||
use ruff_python_parser::lexer::LexResult;
|
||||
use ruff_python_parser::{StringKind, Tok};
|
||||
use ruff_python_parser::Tok;
|
||||
use ruff_python_trivia::{
|
||||
has_leading_content, has_trailing_content, is_python_whitespace, CommentRanges,
|
||||
};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
use crate::fstring_ranges::{FStringRanges, FStringRangesBuilder};
|
||||
use crate::CommentRangesBuilder;
|
||||
|
||||
pub struct Indexer {
|
||||
comment_ranges: CommentRanges,
|
||||
|
||||
/// Stores the start offset of continuation lines.
|
||||
continuation_lines: Vec<TextSize>,
|
||||
|
||||
/// The range of all f-string in the source document. The ranges are sorted by their
|
||||
/// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
|
||||
f_string_ranges: Vec<TextRange>,
|
||||
/// The range of all f-string in the source document.
|
||||
fstring_ranges: FStringRanges,
|
||||
}
|
||||
|
||||
impl Indexer {
|
||||
|
@ -27,8 +28,8 @@ impl Indexer {
|
|||
assert!(TextSize::try_from(locator.contents().len()).is_ok());
|
||||
|
||||
let mut comment_ranges_builder = CommentRangesBuilder::default();
|
||||
let mut fstring_ranges_builder = FStringRangesBuilder::default();
|
||||
let mut continuation_lines = Vec::new();
|
||||
let mut f_string_ranges = Vec::new();
|
||||
// Token, end
|
||||
let mut prev_end = TextSize::default();
|
||||
let mut prev_token: Option<&Tok> = None;
|
||||
|
@ -59,18 +60,10 @@ impl Indexer {
|
|||
}
|
||||
|
||||
comment_ranges_builder.visit_token(tok, *range);
|
||||
fstring_ranges_builder.visit_token(tok, *range);
|
||||
|
||||
match tok {
|
||||
Tok::Newline | Tok::NonLogicalNewline => {
|
||||
line_start = range.end();
|
||||
}
|
||||
Tok::String {
|
||||
kind: StringKind::FString | StringKind::RawFString,
|
||||
..
|
||||
} => {
|
||||
f_string_ranges.push(*range);
|
||||
}
|
||||
_ => {}
|
||||
if matches!(tok, Tok::Newline | Tok::NonLogicalNewline) {
|
||||
line_start = range.end();
|
||||
}
|
||||
|
||||
prev_token = Some(tok);
|
||||
|
@ -79,7 +72,7 @@ impl Indexer {
|
|||
Self {
|
||||
comment_ranges: comment_ranges_builder.finish(),
|
||||
continuation_lines,
|
||||
f_string_ranges,
|
||||
fstring_ranges: fstring_ranges_builder.finish(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -88,6 +81,11 @@ impl Indexer {
|
|||
&self.comment_ranges
|
||||
}
|
||||
|
||||
/// Returns the byte offset ranges of f-strings.
|
||||
pub const fn fstring_ranges(&self) -> &FStringRanges {
|
||||
&self.fstring_ranges
|
||||
}
|
||||
|
||||
/// Returns the line start positions of continuations (backslash).
|
||||
pub fn continuation_line_starts(&self) -> &[TextSize] {
|
||||
&self.continuation_lines
|
||||
|
@ -99,22 +97,6 @@ impl Indexer {
|
|||
self.continuation_lines.binary_search(&line_start).is_ok()
|
||||
}
|
||||
|
||||
/// Return the [`TextRange`] of the f-string containing a given offset.
|
||||
pub fn f_string_range(&self, offset: TextSize) -> Option<TextRange> {
|
||||
let Ok(string_range_index) = self.f_string_ranges.binary_search_by(|range| {
|
||||
if offset < range.start() {
|
||||
std::cmp::Ordering::Greater
|
||||
} else if range.contains(offset) {
|
||||
std::cmp::Ordering::Equal
|
||||
} else {
|
||||
std::cmp::Ordering::Less
|
||||
}
|
||||
}) else {
|
||||
return None;
|
||||
};
|
||||
Some(self.f_string_ranges[string_range_index])
|
||||
}
|
||||
|
||||
/// Returns `true` if a statement or expression includes at least one comment.
|
||||
pub fn has_comments<T>(&self, node: &T, locator: &Locator) -> bool
|
||||
where
|
||||
|
@ -250,7 +232,7 @@ mod tests {
|
|||
use ruff_python_parser::lexer::LexResult;
|
||||
use ruff_python_parser::{lexer, Mode};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::TextSize;
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
|
||||
use crate::Indexer;
|
||||
|
||||
|
@ -333,5 +315,203 @@ import os
|
|||
TextSize::from(116)
|
||||
]
|
||||
);
|
||||
|
||||
let contents = r"
|
||||
f'foo { 'str1' \
|
||||
'str2' \
|
||||
'str3'
|
||||
f'nested { 'str4'
|
||||
'str5' \
|
||||
'str6'
|
||||
}'
|
||||
}'
|
||||
"
|
||||
.trim();
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.continuation_line_starts(),
|
||||
[
|
||||
// row 1
|
||||
TextSize::new(0),
|
||||
// row 2
|
||||
TextSize::new(17),
|
||||
// row 5
|
||||
TextSize::new(63),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_f_string_ranges() {
|
||||
let contents = r#"
|
||||
f"normal f-string"
|
||||
f"start {f"inner {f"another"}"} end"
|
||||
f"implicit " f"concatenation"
|
||||
"#
|
||||
.trim();
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer
|
||||
.fstring_ranges()
|
||||
.values()
|
||||
.copied()
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
TextRange::new(TextSize::from(0), TextSize::from(18)),
|
||||
TextRange::new(TextSize::from(19), TextSize::from(55)),
|
||||
TextRange::new(TextSize::from(28), TextSize::from(49)),
|
||||
TextRange::new(TextSize::from(37), TextSize::from(47)),
|
||||
TextRange::new(TextSize::from(56), TextSize::from(68)),
|
||||
TextRange::new(TextSize::from(69), TextSize::from(85)),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_triple_quoted_f_string_ranges() {
|
||||
let contents = r#"
|
||||
f"""
|
||||
this is one
|
||||
multiline f-string
|
||||
"""
|
||||
f'''
|
||||
and this is
|
||||
another
|
||||
'''
|
||||
f"""
|
||||
this is a {f"""nested multiline
|
||||
f-string"""}
|
||||
"""
|
||||
"#
|
||||
.trim();
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer
|
||||
.fstring_ranges()
|
||||
.values()
|
||||
.copied()
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
TextRange::new(TextSize::from(0), TextSize::from(39)),
|
||||
TextRange::new(TextSize::from(40), TextSize::from(68)),
|
||||
TextRange::new(TextSize::from(69), TextSize::from(122)),
|
||||
TextRange::new(TextSize::from(85), TextSize::from(117)),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_innermost_outermost() {
|
||||
let contents = r#"
|
||||
f"no nested f-string"
|
||||
|
||||
if True:
|
||||
f"first {f"second {f"third"} second"} first"
|
||||
foo = "normal string"
|
||||
|
||||
f"implicit " f"concatenation"
|
||||
|
||||
f"first line {
|
||||
foo + f"second line {bar}"
|
||||
} third line"
|
||||
|
||||
f"""this is a
|
||||
multi-line {f"""nested
|
||||
f-string"""}
|
||||
the end"""
|
||||
"#
|
||||
.trim();
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
|
||||
// For reference, the ranges of the f-strings in the above code are as
|
||||
// follows where the ones inside parentheses are nested f-strings:
|
||||
//
|
||||
// [0..21, (36..80, 45..72, 55..63), 108..120, 121..137, (139..198, 164..184), (200..260, 226..248)]
|
||||
|
||||
for (offset, innermost_range, outermost_range) in [
|
||||
// Inside a normal f-string
|
||||
(
|
||||
TextSize::new(130),
|
||||
TextRange::new(TextSize::new(121), TextSize::new(137)),
|
||||
TextRange::new(TextSize::new(121), TextSize::new(137)),
|
||||
),
|
||||
// Left boundary
|
||||
(
|
||||
TextSize::new(121),
|
||||
TextRange::new(TextSize::new(121), TextSize::new(137)),
|
||||
TextRange::new(TextSize::new(121), TextSize::new(137)),
|
||||
),
|
||||
// Right boundary
|
||||
(
|
||||
TextSize::new(136), // End offsets are exclusive
|
||||
TextRange::new(TextSize::new(121), TextSize::new(137)),
|
||||
TextRange::new(TextSize::new(121), TextSize::new(137)),
|
||||
),
|
||||
// "first" left
|
||||
(
|
||||
TextSize::new(40),
|
||||
TextRange::new(TextSize::new(36), TextSize::new(80)),
|
||||
TextRange::new(TextSize::new(36), TextSize::new(80)),
|
||||
),
|
||||
// "second" left
|
||||
(
|
||||
TextSize::new(50),
|
||||
TextRange::new(TextSize::new(45), TextSize::new(72)),
|
||||
TextRange::new(TextSize::new(36), TextSize::new(80)),
|
||||
),
|
||||
// "third"
|
||||
(
|
||||
TextSize::new(60),
|
||||
TextRange::new(TextSize::new(55), TextSize::new(63)),
|
||||
TextRange::new(TextSize::new(36), TextSize::new(80)),
|
||||
),
|
||||
// "second" right
|
||||
(
|
||||
TextSize::new(70),
|
||||
TextRange::new(TextSize::new(45), TextSize::new(72)),
|
||||
TextRange::new(TextSize::new(36), TextSize::new(80)),
|
||||
),
|
||||
// "first" right
|
||||
(
|
||||
TextSize::new(75),
|
||||
TextRange::new(TextSize::new(36), TextSize::new(80)),
|
||||
TextRange::new(TextSize::new(36), TextSize::new(80)),
|
||||
),
|
||||
// Single-quoted f-strings spanning across multiple lines
|
||||
(
|
||||
TextSize::new(160),
|
||||
TextRange::new(TextSize::new(139), TextSize::new(198)),
|
||||
TextRange::new(TextSize::new(139), TextSize::new(198)),
|
||||
),
|
||||
(
|
||||
TextSize::new(170),
|
||||
TextRange::new(TextSize::new(164), TextSize::new(184)),
|
||||
TextRange::new(TextSize::new(139), TextSize::new(198)),
|
||||
),
|
||||
// Multi-line f-strings
|
||||
(
|
||||
TextSize::new(220),
|
||||
TextRange::new(TextSize::new(200), TextSize::new(260)),
|
||||
TextRange::new(TextSize::new(200), TextSize::new(260)),
|
||||
),
|
||||
(
|
||||
TextSize::new(240),
|
||||
TextRange::new(TextSize::new(226), TextSize::new(248)),
|
||||
TextRange::new(TextSize::new(200), TextSize::new(260)),
|
||||
),
|
||||
] {
|
||||
assert_eq!(
|
||||
indexer.fstring_ranges().innermost(offset).unwrap(),
|
||||
innermost_range
|
||||
);
|
||||
assert_eq!(
|
||||
indexer.fstring_ranges().outermost(offset).unwrap(),
|
||||
outermost_range
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue