mirror of
				https://github.com/astral-sh/ruff.git
				synced 2025-10-30 19:47:52 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			408 lines
		
	
	
	
		
			9.6 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			408 lines
		
	
	
	
		
			9.6 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
| use insta::assert_debug_snapshot;
 | |
| 
 | |
| use ruff_python_parser::{Mode, ParseOptions, parse_unchecked};
 | |
| use ruff_python_trivia::{BackwardsTokenizer, SimpleTokenKind};
 | |
| use ruff_python_trivia::{CommentRanges, SimpleToken, SimpleTokenizer, lines_after, lines_before};
 | |
| use ruff_text_size::{TextLen, TextRange, TextSize};
 | |
| 
 | |
| struct TokenizationTestCase {
 | |
|     source: &'static str,
 | |
|     range: TextRange,
 | |
|     tokens: Vec<SimpleToken>,
 | |
| }
 | |
| 
 | |
| impl TokenizationTestCase {
 | |
|     fn assert_reverse_tokenization(&self) {
 | |
|         let mut backwards = self.tokenize_reverse();
 | |
| 
 | |
|         // Re-reverse to get the tokens in forward order.
 | |
|         backwards.reverse();
 | |
| 
 | |
|         assert_eq!(&backwards, &self.tokens);
 | |
|     }
 | |
| 
 | |
|     fn tokenize_reverse(&self) -> Vec<SimpleToken> {
 | |
|         let parsed = parse_unchecked(self.source, ParseOptions::from(Mode::Module));
 | |
|         let comment_ranges = CommentRanges::from(parsed.tokens());
 | |
|         BackwardsTokenizer::new(self.source, self.range, &comment_ranges).collect()
 | |
|     }
 | |
| 
 | |
|     fn tokens(&self) -> &[SimpleToken] {
 | |
|         &self.tokens
 | |
|     }
 | |
| }
 | |
| 
 | |
| fn tokenize_range(source: &'static str, range: TextRange) -> TokenizationTestCase {
 | |
|     let tokens: Vec<_> = SimpleTokenizer::new(source, range).collect();
 | |
| 
 | |
|     TokenizationTestCase {
 | |
|         source,
 | |
|         range,
 | |
|         tokens,
 | |
|     }
 | |
| }
 | |
| 
 | |
| fn tokenize(source: &'static str) -> TokenizationTestCase {
 | |
|     tokenize_range(source, TextRange::new(TextSize::new(0), source.text_len()))
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_trivia() {
 | |
|     let source = "# comment\n    # comment";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_parentheses() {
 | |
|     let source = "([{}])";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_comma() {
 | |
|     let source = ",,,,";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_eq() {
 | |
|     // Should tokenize as `==`, then `=`, regardless of whether we're lexing forwards or
 | |
|     // backwards.
 | |
|     let source = "===";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_not_eq() {
 | |
|     // Should tokenize as `!=`, then `=`, regardless of whether we're lexing forwards or
 | |
|     // backwards.
 | |
|     let source = "!==";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_continuation() {
 | |
|     let source = "( \\\n )";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_operators() {
 | |
|     let source = "-> *= ( -= ) ~ // ** **= ^ ^= | |=";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_invalid_operators() {
 | |
|     let source = "-> $=";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
| 
 | |
|     // note: not reversible: [other, bogus, bogus] vs [bogus, bogus, other]
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tricky_unicode() {
 | |
|     let source = "មុ";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn identifier_ending_in_non_start_char() {
 | |
|     let source = "i5";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn string_with_kind() {
 | |
|     let source = "f'foo'";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
| 
 | |
|     // note: not reversible: [other, bogus] vs [bogus, other]
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn string_with_byte_kind() {
 | |
|     let source = "BR'foo'";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
| 
 | |
|     // note: not reversible: [other, bogus] vs [bogus, other]
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn string_with_invalid_kind() {
 | |
|     let source = "abc'foo'";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
| 
 | |
|     // note: not reversible: [other, bogus] vs [bogus, other]
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn identifier_starting_with_string_kind() {
 | |
|     let source = "foo bar";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn ignore_word_with_only_id_continuing_chars() {
 | |
|     let source = "555";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
| 
 | |
|     // note: not reversible: [other, bogus, bogus] vs [bogus, bogus, other]
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_multichar() {
 | |
|     let source = "if in else match";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_substring() {
 | |
|     let source = "('some string') # comment";
 | |
| 
 | |
|     let test_case = tokenize_range(source, TextRange::new(TextSize::new(14), source.text_len()));
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_slash() {
 | |
|     let source = r" # trailing positional comment
 | |
|         # Positional arguments only after here
 | |
|         ,/";
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     test_case.assert_reverse_tokenization();
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn tokenize_bogus() {
 | |
|     let source = r#"# leading comment
 | |
|         "a string"
 | |
|         a = (10)"#;
 | |
| 
 | |
|     let test_case = tokenize(source);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokens());
 | |
|     assert_debug_snapshot!("Reverse", test_case.tokenize_reverse());
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn single_quoted_multiline_string_containing_comment() {
 | |
|     let test_case = tokenize(
 | |
|         r"'This string contains a hash looking like a comment\
 | |
| # This is not a comment'",
 | |
|     );
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokenize_reverse());
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn single_quoted_multiline_string_implicit_concatenation() {
 | |
|     let test_case = tokenize(
 | |
|         r#"'This string contains a hash looking like a comment\
 | |
| # This is' "not_a_comment""#,
 | |
|     );
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokenize_reverse());
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn triple_quoted_multiline_string_containing_comment() {
 | |
|     let test_case = tokenize(
 | |
|         r"'''This string contains a hash looking like a comment
 | |
| # This is not a comment'''",
 | |
|     );
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokenize_reverse());
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn comment_containing_triple_quoted_string() {
 | |
|     let test_case = tokenize("'''leading string''' # a comment '''not a string'''");
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokenize_reverse());
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn comment_containing_single_quoted_string() {
 | |
|     let test_case = tokenize("'leading string' # a comment 'not a string'");
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokenize_reverse());
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn string_followed_by_multiple_comments() {
 | |
|     let test_case =
 | |
|         tokenize(r#"'a string # containing a hash " # and another hash ' # finally a comment"#);
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokenize_reverse());
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn string_with_escaped_quote() {
 | |
|     let test_case = tokenize(r"'a string \' # containing a hash ' # finally a comment");
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokenize_reverse());
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn string_with_double_escaped_backslash() {
 | |
|     let test_case = tokenize(r"'a string \\' # a comment '");
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokenize_reverse());
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn empty_string_literal() {
 | |
|     let test_case = tokenize(r"'' # a comment '");
 | |
| 
 | |
|     assert_debug_snapshot!(test_case.tokenize_reverse());
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_before_empty_string() {
 | |
|     assert_eq!(lines_before(TextSize::new(0), ""), 0);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_before_in_the_middle_of_a_line() {
 | |
|     assert_eq!(lines_before(TextSize::new(4), "a = 20"), 0);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_before_on_a_new_line() {
 | |
|     assert_eq!(lines_before(TextSize::new(7), "a = 20\nb = 10"), 1);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_before_multiple_leading_newlines() {
 | |
|     assert_eq!(lines_before(TextSize::new(9), "a = 20\n\r\nb = 10"), 2);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_before_with_comment_offset() {
 | |
|     assert_eq!(lines_before(TextSize::new(8), "a = 20\n# a comment"), 0);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_before_with_trailing_comment() {
 | |
|     assert_eq!(
 | |
|         lines_before(TextSize::new(22), "a = 20 # some comment\nb = 10"),
 | |
|         1
 | |
|     );
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_before_with_comment_only_line() {
 | |
|     assert_eq!(
 | |
|         lines_before(TextSize::new(22), "a = 20\n# some comment\nb = 10"),
 | |
|         1
 | |
|     );
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_after_empty_string() {
 | |
|     assert_eq!(lines_after(TextSize::new(0), ""), 0);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_after_in_the_middle_of_a_line() {
 | |
|     assert_eq!(lines_after(TextSize::new(4), "a = 20"), 0);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_after_before_a_new_line() {
 | |
|     assert_eq!(lines_after(TextSize::new(6), "a = 20\nb = 10"), 1);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_after_multiple_newlines() {
 | |
|     assert_eq!(lines_after(TextSize::new(6), "a = 20\n\r\nb = 10"), 2);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_after_before_comment_offset() {
 | |
|     assert_eq!(lines_after(TextSize::new(7), "a = 20 # a comment\n"), 0);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lines_after_with_comment_only_line() {
 | |
|     assert_eq!(
 | |
|         lines_after(TextSize::new(6), "a = 20\n# some comment\nb = 10"),
 | |
|         1
 | |
|     );
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_previous_token_simple() {
 | |
|     let cases = &["x = (", "x = ( ", "x = (\n"];
 | |
|     for source in cases {
 | |
|         let token = BackwardsTokenizer::up_to(source.text_len(), source, &[])
 | |
|             .skip_trivia()
 | |
|             .next()
 | |
|             .unwrap();
 | |
|         assert_eq!(
 | |
|             token,
 | |
|             SimpleToken {
 | |
|                 kind: SimpleTokenKind::LParen,
 | |
|                 range: TextRange::new(TextSize::new(4), TextSize::new(5)),
 | |
|             }
 | |
|         );
 | |
|     }
 | |
| }
 | 
