SimpleTokenizer: Fix infinite loop when lexing empty quotes (#5917)

This commit is contained in:
Micha Reiser 2023-07-20 15:18:35 +02:00 committed by GitHub
parent ccc6bd5df0
commit d351761f5d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 3 deletions

View file

@ -0,0 +1,22 @@
---
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokenize_reverse()
---
[
SimpleToken {
kind: Comment,
range: 3..16,
},
SimpleToken {
kind: Whitespace,
range: 2..3,
},
SimpleToken {
kind: Other,
range: 1..2,
},
SimpleToken {
kind: Bogus,
range: 0..1,
},
]

View file

@ -577,15 +577,15 @@ fn find_unterminated_string_kind(input: &str) -> Option<StringKind> {
let mut rest = input;
while let Some(comment_or_string_start) = memchr3(b'#', b'\'', b'\"', rest.as_bytes()) {
let c = rest.as_bytes()[comment_or_string_start];
let c = rest.as_bytes()[comment_or_string_start] as char;
let after = &rest[comment_or_string_start + 1..];
if c == b'#' {
if c == '#' {
let comment_end = memchr2(b'\n', b'\r', after.as_bytes()).unwrap_or(after.len());
rest = &after[comment_end..];
} else {
let mut cursor = Cursor::new(after);
let quote_kind = if c == b'\'' {
let quote_kind = if c == '\'' {
QuoteKind::Single
} else {
QuoteKind::Double
@ -598,6 +598,7 @@ fn find_unterminated_string_kind(input: &str) -> Option<StringKind> {
StringKind::Triple(quote_kind)
} else {
// empty string literal, nothing more to lex
rest = cursor.chars().as_str();
continue;
}
} else {
@ -941,6 +942,13 @@ mod tests {
assert_debug_snapshot!(test_case.tokenize_reverse());
}
#[test]
fn empty_string_literal() {
let test_case = tokenize(r#"'' # a comment '"#);
assert_debug_snapshot!(test_case.tokenize_reverse());
}
#[test]
fn lines_before_empty_string() {
assert_eq!(lines_before(TextSize::new(0), ""), 0);