mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-25 16:34:04 +00:00
Correctly tokenize nested comments (#1629)
This commit is contained in:
parent
94ea20628f
commit
8bc63f0e4a
4 changed files with 109 additions and 18 deletions
|
@ -131,4 +131,8 @@ impl Dialect for GenericDialect {
|
||||||
fn supports_empty_projections(&self) -> bool {
|
fn supports_empty_projections(&self) -> bool {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn supports_nested_comments(&self) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -682,6 +682,12 @@ pub trait Dialect: Debug + Any {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the dialect supports nested comments
|
||||||
|
/// e.g. `/* /* nested */ */`
|
||||||
|
fn supports_nested_comments(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
|
/// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
|
||||||
/// as an alias assignment operator, rather than a boolean expression.
|
/// as an alias assignment operator, rather than a boolean expression.
|
||||||
/// For example: the following statements are equivalent for such a dialect:
|
/// For example: the following statements are equivalent for such a dialect:
|
||||||
|
|
|
@ -241,6 +241,10 @@ impl Dialect for PostgreSqlDialect {
|
||||||
fn supports_empty_projections(&self) -> bool {
|
fn supports_empty_projections(&self) -> bool {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn supports_nested_comments(&self) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_create(parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
|
pub fn parse_create(parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
|
||||||
|
|
101
src/tokenizer.rs
101
src/tokenizer.rs
|
@ -1855,28 +1855,33 @@ impl<'a> Tokenizer<'a> {
|
||||||
) -> Result<Option<Token>, TokenizerError> {
|
) -> Result<Option<Token>, TokenizerError> {
|
||||||
let mut s = String::new();
|
let mut s = String::new();
|
||||||
let mut nested = 1;
|
let mut nested = 1;
|
||||||
let mut last_ch = ' ';
|
let supports_nested_comments = self.dialect.supports_nested_comments();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
match chars.next() {
|
match chars.next() {
|
||||||
Some(ch) => {
|
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
|
||||||
if last_ch == '/' && ch == '*' {
|
chars.next(); // consume the '*'
|
||||||
|
s.push('/');
|
||||||
|
s.push('*');
|
||||||
nested += 1;
|
nested += 1;
|
||||||
} else if last_ch == '*' && ch == '/' {
|
}
|
||||||
|
Some('*') if matches!(chars.peek(), Some('/')) => {
|
||||||
|
chars.next(); // consume the '/'
|
||||||
nested -= 1;
|
nested -= 1;
|
||||||
if nested == 0 {
|
if nested == 0 {
|
||||||
s.pop();
|
|
||||||
break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
|
break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
|
||||||
}
|
}
|
||||||
|
s.push('*');
|
||||||
|
s.push('/');
|
||||||
}
|
}
|
||||||
|
Some(ch) => {
|
||||||
s.push(ch);
|
s.push(ch);
|
||||||
last_ch = ch;
|
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
break self.tokenizer_error(
|
break self.tokenizer_error(
|
||||||
chars.location(),
|
chars.location(),
|
||||||
"Unexpected EOF while in a multi-line comment",
|
"Unexpected EOF while in a multi-line comment",
|
||||||
)
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2718,19 +2723,91 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_nested_multiline_comment() {
|
fn tokenize_nested_multiline_comment() {
|
||||||
let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1");
|
|
||||||
|
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
let test_cases = vec![
|
||||||
let expected = vec![
|
(
|
||||||
|
"0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1",
|
||||||
|
vec![
|
||||||
Token::Number("0".to_string(), false),
|
Token::Number("0".to_string(), false),
|
||||||
Token::Whitespace(Whitespace::MultiLineComment(
|
Token::Whitespace(Whitespace::MultiLineComment(
|
||||||
"multi-line\n* \n/* comment \n /*comment*/*/ */ /comment".to_string(),
|
"multi-line\n* \n/* comment \n /*comment*/*/ ".into(),
|
||||||
|
)),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::Div,
|
||||||
|
Token::Word(Word {
|
||||||
|
value: "comment".to_string(),
|
||||||
|
quote_style: None,
|
||||||
|
keyword: Keyword::COMMENT,
|
||||||
|
}),
|
||||||
|
Token::Mul,
|
||||||
|
Token::Div,
|
||||||
|
Token::Number("1".to_string(), false),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1",
|
||||||
|
vec![
|
||||||
|
Token::Number("0".to_string(), false),
|
||||||
|
Token::Whitespace(Whitespace::MultiLineComment(
|
||||||
|
"multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(),
|
||||||
)),
|
)),
|
||||||
Token::Number("1".to_string(), false),
|
Token::Number("1".to_string(), false),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"SELECT 1/* a /* b */ c */0",
|
||||||
|
vec![
|
||||||
|
Token::make_keyword("SELECT"),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::Number("1".to_string(), false),
|
||||||
|
Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())),
|
||||||
|
Token::Number("0".to_string(), false),
|
||||||
|
],
|
||||||
|
),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
for (sql, expected) in test_cases {
|
||||||
|
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||||
compare(expected, tokens);
|
compare(expected, tokens);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_nested_multiline_comment_empty() {
|
||||||
|
let sql = "select 1/*/**/*/0";
|
||||||
|
|
||||||
|
let dialect = GenericDialect {};
|
||||||
|
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||||
|
let expected = vec![
|
||||||
|
Token::make_keyword("select"),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::Number("1".to_string(), false),
|
||||||
|
Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())),
|
||||||
|
Token::Number("0".to_string(), false),
|
||||||
|
];
|
||||||
|
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_nested_comments_if_not_supported() {
|
||||||
|
let dialect = SQLiteDialect {};
|
||||||
|
let sql = "SELECT 1/*/* nested comment */*/0";
|
||||||
|
let tokens = Tokenizer::new(&dialect, sql).tokenize();
|
||||||
|
let expected = vec![
|
||||||
|
Token::make_keyword("SELECT"),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::Number("1".to_string(), false),
|
||||||
|
Token::Whitespace(Whitespace::MultiLineComment(
|
||||||
|
"/* nested comment ".to_string(),
|
||||||
|
)),
|
||||||
|
Token::Mul,
|
||||||
|
Token::Div,
|
||||||
|
Token::Number("0".to_string(), false),
|
||||||
|
];
|
||||||
|
|
||||||
|
compare(expected, tokens.unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_multiline_comment_with_even_asterisks() {
|
fn tokenize_multiline_comment_with_even_asterisks() {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue