fix: handle nested comments (#726)

This commit is contained in:
Han YANG 2022-12-01 02:22:29 +08:00 committed by GitHub
parent 316359760d
commit e22aa78315
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -832,22 +832,23 @@ impl<'a> Tokenizer<'a> {
chars: &mut Peekable<Chars<'_>>,
) -> Result<Option<Token>, TokenizerError> {
let mut s = String::new();
let mut maybe_closing_comment = false;
// TODO: deal with nested comments
let mut nested = 1;
let mut last_ch = ' ';
loop {
match chars.next() {
Some(ch) => {
if maybe_closing_comment {
if ch == '/' {
if last_ch == '/' && ch == '*' {
nested += 1;
} else if last_ch == '*' && ch == '/' {
nested -= 1;
if nested == 0 {
s.pop();
break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
} else {
s.push('*');
}
}
maybe_closing_comment = ch == '*';
if !maybe_closing_comment {
s.push(ch);
}
s.push(ch);
last_ch = ch;
}
None => break self.tokenizer_error("Unexpected EOF while in a multi-line comment"),
}
@ -1355,6 +1356,23 @@ mod tests {
compare(expected, tokens);
}
#[test]
fn tokenize_nested_multiline_comment() {
let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1");
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Number("0".to_string(), false),
Token::Whitespace(Whitespace::MultiLineComment(
"multi-line\n* \n/* comment \n /*comment*/*/ */ /comment".to_string(),
)),
Token::Number("1".to_string(), false),
];
compare(expected, tokens);
}
#[test]
fn tokenize_multiline_comment_with_even_asterisks() {
let sql = String::from("\n/** Comment **/\n");