mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 18:58:04 +00:00
Remove cyclic dev dependency with the parser crate (#11261)
## Summary This PR removes the cyclic dev dependency some of the crates had with the parser crate. The cyclic dependencies are: * `ruff_python_ast` has a **dev dependency** on `ruff_python_parser` and `ruff_python_parser` directly depends on `ruff_python_ast` * `ruff_python_trivia` has a **dev dependency** on `ruff_python_parser` and `ruff_python_parser` has an indirect dependency on `ruff_python_trivia` (`ruff_python_parser` - `ruff_python_ast` - `ruff_python_trivia`) Specifically, this PR does the following: * Introduce two new crates * `ruff_python_ast_integration_tests` and move the tests from the `ruff_python_ast` crate which uses the parser in this crate * `ruff_python_trivia_integration_tests` and move the tests from the `ruff_python_trivia` crate which uses the parser in this crate ### Motivation The main motivation for this PR is to help development. Before this PR, `rust-analyzer` wouldn't provide any intellisense in the `ruff_python_parser` crate regarding the symbols in `ruff_python_ast` crate. ``` [ERROR][2024-05-03 13:47:06] .../vim/lsp/rpc.lua:770 "rpc" "/Users/dhruv/.cargo/bin/rust-analyzer" "stderr" "[ERROR project_model::workspace] cyclic deps: ruff_python_parser(Idx::<CrateData>(50)) -> ruff_python_ast(Idx::<CrateData>(37)), alternative path: ruff_python_ast(Idx::<CrateData>(37)) -> ruff_python_parser(Idx::<CrateData>(50))\n" ``` ## Test Plan Check the logs of `rust-analyzer` to not see any signs of cyclic dependency.
This commit is contained in:
parent
12b5c3a54c
commit
28cc71fb6b
78 changed files with 774 additions and 728 deletions
|
@ -21,8 +21,6 @@ unicode-ident = { workspace = true }
|
|||
|
||||
[dev-dependencies]
|
||||
insta = { workspace = true }
|
||||
ruff_python_parser = { path = "../ruff_python_parser" }
|
||||
ruff_python_index = { path = "../ruff_python_index" }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
|
|
@ -203,158 +203,3 @@ impl<'a> IntoIterator for &'a CommentRanges {
|
|||
self.raw.iter()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ruff_python_index::Indexer;
|
||||
use ruff_python_parser::lexer::LexResult;
|
||||
use ruff_python_parser::{tokenize, Mode};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::TextSize;
|
||||
|
||||
#[test]
|
||||
fn block_comments_two_line_block_at_start() {
|
||||
// arrange
|
||||
let source = "# line 1\n# line 2\n";
|
||||
let tokens = tokenize(source, Mode::Module);
|
||||
let locator = Locator::new(source);
|
||||
let indexer = Indexer::from_tokens(&tokens, &locator);
|
||||
|
||||
// act
|
||||
let block_comments = indexer.comment_ranges().block_comments(&locator);
|
||||
|
||||
// assert
|
||||
assert_eq!(block_comments, vec![TextSize::new(0), TextSize::new(9)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_comments_indented_block() {
|
||||
// arrange
|
||||
let source = " # line 1\n # line 2\n";
|
||||
let tokens = tokenize(source, Mode::Module);
|
||||
let locator = Locator::new(source);
|
||||
let indexer = Indexer::from_tokens(&tokens, &locator);
|
||||
|
||||
// act
|
||||
let block_comments = indexer.comment_ranges().block_comments(&locator);
|
||||
|
||||
// assert
|
||||
assert_eq!(block_comments, vec![TextSize::new(4), TextSize::new(17)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_comments_single_line_is_not_a_block() {
|
||||
// arrange
|
||||
let source = "\n";
|
||||
let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
|
||||
let locator = Locator::new(source);
|
||||
let indexer = Indexer::from_tokens(&tokens, &locator);
|
||||
|
||||
// act
|
||||
let block_comments = indexer.comment_ranges().block_comments(&locator);
|
||||
|
||||
// assert
|
||||
assert_eq!(block_comments, Vec::<TextSize>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_comments_lines_with_code_not_a_block() {
|
||||
// arrange
|
||||
let source = "x = 1 # line 1\ny = 2 # line 2\n";
|
||||
let tokens = tokenize(source, Mode::Module);
|
||||
let locator = Locator::new(source);
|
||||
let indexer = Indexer::from_tokens(&tokens, &locator);
|
||||
|
||||
// act
|
||||
let block_comments = indexer.comment_ranges().block_comments(&locator);
|
||||
|
||||
// assert
|
||||
assert_eq!(block_comments, Vec::<TextSize>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_comments_sequential_lines_not_in_block() {
|
||||
// arrange
|
||||
let source = " # line 1\n # line 2\n";
|
||||
let tokens = tokenize(source, Mode::Module);
|
||||
let locator = Locator::new(source);
|
||||
let indexer = Indexer::from_tokens(&tokens, &locator);
|
||||
|
||||
// act
|
||||
let block_comments = indexer.comment_ranges().block_comments(&locator);
|
||||
|
||||
// assert
|
||||
assert_eq!(block_comments, Vec::<TextSize>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_comments_lines_in_triple_quotes_not_a_block() {
|
||||
// arrange
|
||||
let source = r#"
|
||||
"""
|
||||
# line 1
|
||||
# line 2
|
||||
"""
|
||||
"#;
|
||||
let tokens = tokenize(source, Mode::Module);
|
||||
let locator = Locator::new(source);
|
||||
let indexer = Indexer::from_tokens(&tokens, &locator);
|
||||
|
||||
// act
|
||||
let block_comments = indexer.comment_ranges().block_comments(&locator);
|
||||
|
||||
// assert
|
||||
assert_eq!(block_comments, Vec::<TextSize>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_comments_stress_test() {
|
||||
// arrange
|
||||
let source = r#"
|
||||
# block comment 1 line 1
|
||||
# block comment 2 line 2
|
||||
|
||||
# these lines
|
||||
# do not form
|
||||
# a block comment
|
||||
|
||||
x = 1 # these lines also do not
|
||||
y = 2 # do not form a block comment
|
||||
|
||||
# these lines do form a block comment
|
||||
#
|
||||
|
||||
#
|
||||
# and so do these
|
||||
#
|
||||
|
||||
"""
|
||||
# these lines are in triple quotes and
|
||||
# therefore do not form a block comment
|
||||
"""
|
||||
"#;
|
||||
let tokens = tokenize(source, Mode::Module);
|
||||
let locator = Locator::new(source);
|
||||
let indexer = Indexer::from_tokens(&tokens, &locator);
|
||||
|
||||
// act
|
||||
let block_comments = indexer.comment_ranges().block_comments(&locator);
|
||||
|
||||
// assert
|
||||
assert_eq!(
|
||||
block_comments,
|
||||
vec![
|
||||
// Block #1
|
||||
TextSize::new(1),
|
||||
TextSize::new(26),
|
||||
// Block #2
|
||||
TextSize::new(174),
|
||||
TextSize::new(212),
|
||||
// Block #3
|
||||
TextSize::new(219),
|
||||
TextSize::new(225),
|
||||
TextSize::new(247)
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokenize_reverse()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: RParen,
|
||||
range: 52..53,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 51..52,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 0..51,
|
||||
},
|
||||
]
|
|
@ -1,22 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokenize_reverse()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 17..43,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 16..17,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 15..16,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 0..15,
|
||||
},
|
||||
]
|
|
@ -1,22 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokenize_reverse()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 21..51,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 20..21,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 19..20,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 0..19,
|
||||
},
|
||||
]
|
|
@ -1,22 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokenize_reverse()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 3..16,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 2..3,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 1..2,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 0..1,
|
||||
},
|
||||
]
|
|
@ -1,10 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Name,
|
||||
range: 0..2,
|
||||
},
|
||||
]
|
|
@ -1,18 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Name,
|
||||
range: 0..3,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 3..4,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Name,
|
||||
range: 4..7,
|
||||
},
|
||||
]
|
|
@ -1,14 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 0..1,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 1..3,
|
||||
},
|
||||
]
|
|
@ -1,14 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokenize_reverse()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 76..77,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 0..76,
|
||||
},
|
||||
]
|
|
@ -1,14 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokenize_reverse()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 78..79,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 0..78,
|
||||
},
|
||||
]
|
|
@ -1,22 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokenize_reverse()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 53..72,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 52..53,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 51..52,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 0..51,
|
||||
},
|
||||
]
|
|
@ -1,14 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 0..2,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 2..7,
|
||||
},
|
||||
]
|
|
@ -1,22 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokenize_reverse()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 14..27,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 13..14,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 12..13,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 0..12,
|
||||
},
|
||||
]
|
|
@ -1,22 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokenize_reverse()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 35..54,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 34..35,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 33..34,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 0..33,
|
||||
},
|
||||
]
|
|
@ -1,18 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Name,
|
||||
range: 0..3,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 3..4,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 4..8,
|
||||
},
|
||||
]
|
|
@ -1,14 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 0..1,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 1..6,
|
||||
},
|
||||
]
|
|
@ -1,26 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 0..17,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Newline,
|
||||
range: 17..18,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 18..26,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 26..27,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 27..53,
|
||||
},
|
||||
]
|
|
@ -1,22 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Comma,
|
||||
range: 0..1,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Comma,
|
||||
range: 1..2,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Comma,
|
||||
range: 2..3,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Comma,
|
||||
range: 3..4,
|
||||
},
|
||||
]
|
|
@ -1,30 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: LParen,
|
||||
range: 0..1,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 1..2,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Continuation,
|
||||
range: 2..3,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Newline,
|
||||
range: 3..4,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 4..5,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: RParen,
|
||||
range: 5..6,
|
||||
},
|
||||
]
|
|
@ -1,14 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: EqEqual,
|
||||
range: 0..2,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Equals,
|
||||
range: 2..3,
|
||||
},
|
||||
]
|
|
@ -1,22 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: RArrow,
|
||||
range: 0..2,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 2..3,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 3..4,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 4..5,
|
||||
},
|
||||
]
|
|
@ -1,34 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: If,
|
||||
range: 0..2,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 2..3,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: In,
|
||||
range: 3..5,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 5..6,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Else,
|
||||
range: 6..10,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 10..11,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Match,
|
||||
range: 11..16,
|
||||
},
|
||||
]
|
|
@ -1,14 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: NotEqual,
|
||||
range: 0..2,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Equals,
|
||||
range: 2..3,
|
||||
},
|
||||
]
|
|
@ -1,106 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: RArrow,
|
||||
range: 0..2,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 2..3,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: StarEqual,
|
||||
range: 3..5,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 5..6,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: LParen,
|
||||
range: 6..7,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 7..8,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: MinusEqual,
|
||||
range: 8..10,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 10..11,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: RParen,
|
||||
range: 11..12,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 12..13,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Tilde,
|
||||
range: 13..14,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 14..15,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: DoubleSlash,
|
||||
range: 15..17,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 17..18,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: DoubleStar,
|
||||
range: 18..20,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 20..21,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: DoubleStarEqual,
|
||||
range: 21..24,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 24..25,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Circumflex,
|
||||
range: 25..26,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 26..27,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: CircumflexEqual,
|
||||
range: 27..29,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 29..30,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Vbar,
|
||||
range: 30..31,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 31..32,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: VbarEqual,
|
||||
range: 32..34,
|
||||
},
|
||||
]
|
|
@ -1,30 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: LParen,
|
||||
range: 0..1,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: LBracket,
|
||||
range: 1..2,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: LBrace,
|
||||
range: 2..3,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: RBrace,
|
||||
range: 3..4,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: RBracket,
|
||||
range: 4..5,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: RParen,
|
||||
range: 5..6,
|
||||
},
|
||||
]
|
|
@ -1,42 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 0..1,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 1..30,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Newline,
|
||||
range: 30..31,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 31..39,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 39..77,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Newline,
|
||||
range: 77..78,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 78..86,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Comma,
|
||||
range: 86..87,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Slash,
|
||||
range: 87..88,
|
||||
},
|
||||
]
|
|
@ -1,18 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: RParen,
|
||||
range: 14..15,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 15..16,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 16..25,
|
||||
},
|
||||
]
|
|
@ -1,22 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 0..9,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Newline,
|
||||
range: 9..10,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Whitespace,
|
||||
range: 10..14,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Comment,
|
||||
range: 14..23,
|
||||
},
|
||||
]
|
|
@ -1,10 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokens()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Name,
|
||||
range: 0..6,
|
||||
},
|
||||
]
|
|
@ -1,14 +0,0 @@
|
|||
---
|
||||
source: crates/ruff_python_trivia/src/tokenizer.rs
|
||||
expression: test_case.tokenize_reverse()
|
||||
---
|
||||
[
|
||||
SimpleToken {
|
||||
kind: Other,
|
||||
range: 79..80,
|
||||
},
|
||||
SimpleToken {
|
||||
kind: Bogus,
|
||||
range: 0..79,
|
||||
},
|
||||
]
|
|
@ -1024,426 +1024,3 @@ impl Iterator for BackwardsTokenizer<'_> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use insta::assert_debug_snapshot;
|
||||
|
||||
use ruff_python_parser::lexer::lex;
|
||||
use ruff_python_parser::{Mode, Tok};
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
|
||||
use crate::tokenizer::{lines_after, lines_before, SimpleToken, SimpleTokenizer};
|
||||
use crate::{BackwardsTokenizer, SimpleTokenKind};
|
||||
|
||||
struct TokenizationTestCase {
|
||||
source: &'static str,
|
||||
range: TextRange,
|
||||
tokens: Vec<SimpleToken>,
|
||||
}
|
||||
|
||||
impl TokenizationTestCase {
|
||||
fn assert_reverse_tokenization(&self) {
|
||||
let mut backwards = self.tokenize_reverse();
|
||||
|
||||
// Re-reverse to get the tokens in forward order.
|
||||
backwards.reverse();
|
||||
|
||||
assert_eq!(&backwards, &self.tokens);
|
||||
}
|
||||
|
||||
fn tokenize_reverse(&self) -> Vec<SimpleToken> {
|
||||
let comment_ranges: Vec<_> = lex(self.source, Mode::Module)
|
||||
.filter_map(|result| {
|
||||
let (token, range) = result.expect("Input to be a valid python program.");
|
||||
if matches!(token, Tok::Comment(_)) {
|
||||
Some(range)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
BackwardsTokenizer::new(self.source, self.range, &comment_ranges).collect()
|
||||
}
|
||||
|
||||
fn tokens(&self) -> &[SimpleToken] {
|
||||
&self.tokens
|
||||
}
|
||||
}
|
||||
|
||||
fn tokenize_range(source: &'static str, range: TextRange) -> TokenizationTestCase {
|
||||
let tokens: Vec<_> = SimpleTokenizer::new(source, range).collect();
|
||||
|
||||
TokenizationTestCase {
|
||||
source,
|
||||
range,
|
||||
tokens,
|
||||
}
|
||||
}
|
||||
|
||||
fn tokenize(source: &'static str) -> TokenizationTestCase {
|
||||
tokenize_range(source, TextRange::new(TextSize::new(0), source.text_len()))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_trivia() {
|
||||
let source = "# comment\n # comment";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_parentheses() {
|
||||
let source = "([{}])";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_comma() {
|
||||
let source = ",,,,";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_eq() {
|
||||
// Should tokenize as `==`, then `=`, regardless of whether we're lexing forwards or
|
||||
// backwards.
|
||||
let source = "===";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_not_eq() {
|
||||
// Should tokenize as `!=`, then `=`, regardless of whether we're lexing forwards or
|
||||
// backwards.
|
||||
let source = "!==";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_continuation() {
|
||||
let source = "( \\\n )";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_operators() {
|
||||
let source = "-> *= ( -= ) ~ // ** **= ^ ^= | |=";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_invalid_operators() {
|
||||
let source = "-> $=";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
|
||||
// note: not reversible: [other, bogus, bogus] vs [bogus, bogus, other]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tricky_unicode() {
|
||||
let source = "មុ";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn identifier_ending_in_non_start_char() {
|
||||
let source = "i5";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_with_kind() {
|
||||
let source = "f'foo'";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
|
||||
// note: not reversible: [other, bogus] vs [bogus, other]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_with_byte_kind() {
|
||||
let source = "BR'foo'";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
|
||||
// note: not reversible: [other, bogus] vs [bogus, other]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_with_invalid_kind() {
|
||||
let source = "abc'foo'";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
|
||||
// note: not reversible: [other, bogus] vs [bogus, other]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn identifier_starting_with_string_kind() {
|
||||
let source = "foo bar";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignore_word_with_only_id_continuing_chars() {
|
||||
let source = "555";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
|
||||
// note: not reversible: [other, bogus, bogus] vs [bogus, bogus, other]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_multichar() {
|
||||
let source = "if in else match";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_substring() {
|
||||
let source = "('some string') # comment";
|
||||
|
||||
let test_case =
|
||||
tokenize_range(source, TextRange::new(TextSize::new(14), source.text_len()));
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_slash() {
|
||||
let source = r" # trailing positional comment
|
||||
# Positional arguments only after here
|
||||
,/";
|
||||
|
||||
let test_case = tokenize(source);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
test_case.assert_reverse_tokenization();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_bogus() {
|
||||
let source = r#"# leading comment
|
||||
"a string"
|
||||
a = (10)"#;
|
||||
|
||||
let test_case = tokenize(source);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokens());
|
||||
assert_debug_snapshot!("Reverse", test_case.tokenize_reverse());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_quoted_multiline_string_containing_comment() {
|
||||
let test_case = tokenize(
|
||||
r"'This string contains a hash looking like a comment\
|
||||
# This is not a comment'",
|
||||
);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokenize_reverse());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_quoted_multiline_string_implicit_concatenation() {
|
||||
let test_case = tokenize(
|
||||
r#"'This string contains a hash looking like a comment\
|
||||
# This is' "not_a_comment""#,
|
||||
);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokenize_reverse());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn triple_quoted_multiline_string_containing_comment() {
|
||||
let test_case = tokenize(
|
||||
r"'''This string contains a hash looking like a comment
|
||||
# This is not a comment'''",
|
||||
);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokenize_reverse());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn comment_containing_triple_quoted_string() {
|
||||
let test_case = tokenize("'''leading string''' # a comment '''not a string'''");
|
||||
|
||||
assert_debug_snapshot!(test_case.tokenize_reverse());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn comment_containing_single_quoted_string() {
|
||||
let test_case = tokenize("'leading string' # a comment 'not a string'");
|
||||
|
||||
assert_debug_snapshot!(test_case.tokenize_reverse());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_followed_by_multiple_comments() {
|
||||
let test_case =
|
||||
tokenize(r#"'a string # containing a hash " # and another hash ' # finally a comment"#);
|
||||
|
||||
assert_debug_snapshot!(test_case.tokenize_reverse());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_with_escaped_quote() {
|
||||
let test_case = tokenize(r"'a string \' # containing a hash ' # finally a comment");
|
||||
|
||||
assert_debug_snapshot!(test_case.tokenize_reverse());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_with_double_escaped_backslash() {
|
||||
let test_case = tokenize(r"'a string \\' # a comment '");
|
||||
|
||||
assert_debug_snapshot!(test_case.tokenize_reverse());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string_literal() {
|
||||
let test_case = tokenize(r"'' # a comment '");
|
||||
|
||||
assert_debug_snapshot!(test_case.tokenize_reverse());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_before_empty_string() {
|
||||
assert_eq!(lines_before(TextSize::new(0), ""), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_before_in_the_middle_of_a_line() {
|
||||
assert_eq!(lines_before(TextSize::new(4), "a = 20"), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_before_on_a_new_line() {
|
||||
assert_eq!(lines_before(TextSize::new(7), "a = 20\nb = 10"), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_before_multiple_leading_newlines() {
|
||||
assert_eq!(lines_before(TextSize::new(9), "a = 20\n\r\nb = 10"), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_before_with_comment_offset() {
|
||||
assert_eq!(lines_before(TextSize::new(8), "a = 20\n# a comment"), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_before_with_trailing_comment() {
|
||||
assert_eq!(
|
||||
lines_before(TextSize::new(22), "a = 20 # some comment\nb = 10"),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_before_with_comment_only_line() {
|
||||
assert_eq!(
|
||||
lines_before(TextSize::new(22), "a = 20\n# some comment\nb = 10"),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_after_empty_string() {
|
||||
assert_eq!(lines_after(TextSize::new(0), ""), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_after_in_the_middle_of_a_line() {
|
||||
assert_eq!(lines_after(TextSize::new(4), "a = 20"), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_after_before_a_new_line() {
|
||||
assert_eq!(lines_after(TextSize::new(6), "a = 20\nb = 10"), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_after_multiple_newlines() {
|
||||
assert_eq!(lines_after(TextSize::new(6), "a = 20\n\r\nb = 10"), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_after_before_comment_offset() {
|
||||
assert_eq!(lines_after(TextSize::new(7), "a = 20 # a comment\n"), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_after_with_comment_only_line() {
|
||||
assert_eq!(
|
||||
lines_after(TextSize::new(6), "a = 20\n# some comment\nb = 10"),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_previous_token_simple() {
|
||||
let cases = &["x = (", "x = ( ", "x = (\n"];
|
||||
for source in cases {
|
||||
let token = BackwardsTokenizer::up_to(source.text_len(), source, &[])
|
||||
.skip_trivia()
|
||||
.next()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
token,
|
||||
SimpleToken {
|
||||
kind: SimpleTokenKind::LParen,
|
||||
range: TextRange::new(TextSize::new(4), TextSize::new(5)),
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -79,51 +79,3 @@ impl PythonWhitespace for str {
|
|||
self.trim_end_matches(is_python_whitespace)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ruff_python_parser::{parse_suite, ParseError};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::Ranged;
|
||||
|
||||
use crate::has_trailing_content;
|
||||
|
||||
#[test]
|
||||
fn trailing_content() -> Result<(), ParseError> {
|
||||
let contents = "x = 1";
|
||||
let program = parse_suite(contents)?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!has_trailing_content(stmt.end(), &locator));
|
||||
|
||||
let contents = "x = 1; y = 2";
|
||||
let program = parse_suite(contents)?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(has_trailing_content(stmt.end(), &locator));
|
||||
|
||||
let contents = "x = 1 ";
|
||||
let program = parse_suite(contents)?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!has_trailing_content(stmt.end(), &locator));
|
||||
|
||||
let contents = "x = 1 # Comment";
|
||||
let program = parse_suite(contents)?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!has_trailing_content(stmt.end(), &locator));
|
||||
|
||||
let contents = r"
|
||||
x = 1
|
||||
y = 2
|
||||
"
|
||||
.trim();
|
||||
let program = parse_suite(contents)?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!has_trailing_content(stmt.end(), &locator));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue