mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-12-23 11:12:51 +00:00
Correctly tokenize nested comments in Databricks, Clickhouse, and ANSI (#2044)
This commit is contained in:
parent
a430838974
commit
c0998832a2
4 changed files with 81 additions and 76 deletions
|
|
@ -33,4 +33,9 @@ impl Dialect for AnsiDialect {
|
|||
fn require_interval_qualifier(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// The SQL standard explicitly states that block comments nest.
|
||||
fn supports_nested_comments(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,4 +94,10 @@ impl Dialect for ClickHouseDialect {
|
|||
fn supports_group_by_with_modifier(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// Supported since 2020.
|
||||
/// See <https://clickhouse.com/docs/whats-new/changelog/2020#backward-incompatible-change-2>
|
||||
fn supports_nested_comments(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,4 +64,9 @@ impl Dialect for DatabricksDialect {
|
|||
fn supports_struct_literal(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// See <https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-comment>
|
||||
fn supports_nested_comments(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
|
|
|||
141
src/tokenizer.rs
141
src/tokenizer.rs
|
|
@ -2419,7 +2419,7 @@ mod tests {
|
|||
use crate::dialect::{
|
||||
BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect,
|
||||
};
|
||||
use crate::test_utils::all_dialects_where;
|
||||
use crate::test_utils::{all_dialects_except, all_dialects_where};
|
||||
use core::fmt::Debug;
|
||||
|
||||
#[test]
|
||||
|
|
@ -3169,90 +3169,79 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn tokenize_nested_multiline_comment() {
|
||||
let dialect = GenericDialect {};
|
||||
let test_cases = vec![
|
||||
(
|
||||
"0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1",
|
||||
vec![
|
||||
Token::Number("0".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(
|
||||
"multi-line\n* \n/* comment \n /*comment*/*/ ".into(),
|
||||
)),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Div,
|
||||
Token::Word(Word {
|
||||
value: "comment".to_string(),
|
||||
quote_style: None,
|
||||
keyword: Keyword::COMMENT,
|
||||
}),
|
||||
Token::Mul,
|
||||
Token::Div,
|
||||
Token::Number("1".to_string(), false),
|
||||
],
|
||||
),
|
||||
(
|
||||
"0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1",
|
||||
vec![
|
||||
Token::Number("0".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(
|
||||
"multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(),
|
||||
)),
|
||||
Token::Number("1".to_string(), false),
|
||||
],
|
||||
),
|
||||
(
|
||||
"SELECT 1/* a /* b */ c */0",
|
||||
vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number("1".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())),
|
||||
Token::Number("0".to_string(), false),
|
||||
],
|
||||
),
|
||||
];
|
||||
all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
|
||||
"0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1",
|
||||
vec![
|
||||
Token::Number("0".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(
|
||||
"multi-line\n* \n/* comment \n /*comment*/*/ ".into(),
|
||||
)),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Div,
|
||||
Token::Word(Word {
|
||||
value: "comment".to_string(),
|
||||
quote_style: None,
|
||||
keyword: Keyword::COMMENT,
|
||||
}),
|
||||
Token::Mul,
|
||||
Token::Div,
|
||||
Token::Number("1".to_string(), false),
|
||||
],
|
||||
);
|
||||
|
||||
for (sql, expected) in test_cases {
|
||||
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||
compare(expected, tokens);
|
||||
}
|
||||
all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
|
||||
"0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1",
|
||||
vec![
|
||||
Token::Number("0".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(
|
||||
"multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(),
|
||||
)),
|
||||
Token::Number("1".to_string(), false),
|
||||
],
|
||||
);
|
||||
|
||||
all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
|
||||
"SELECT 1/* a /* b */ c */0",
|
||||
vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number("1".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())),
|
||||
Token::Number("0".to_string(), false),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_nested_multiline_comment_empty() {
|
||||
let sql = "select 1/*/**/*/0";
|
||||
|
||||
let dialect = GenericDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::make_keyword("select"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number("1".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())),
|
||||
Token::Number("0".to_string(), false),
|
||||
];
|
||||
|
||||
compare(expected, tokens);
|
||||
all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
|
||||
"select 1/*/**/*/0",
|
||||
vec![
|
||||
Token::make_keyword("select"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number("1".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())),
|
||||
Token::Number("0".to_string(), false),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_nested_comments_if_not_supported() {
|
||||
let dialect = SQLiteDialect {};
|
||||
let sql = "SELECT 1/*/* nested comment */*/0";
|
||||
let tokens = Tokenizer::new(&dialect, sql).tokenize();
|
||||
let expected = vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number("1".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(
|
||||
"/* nested comment ".to_string(),
|
||||
)),
|
||||
Token::Mul,
|
||||
Token::Div,
|
||||
Token::Number("0".to_string(), false),
|
||||
];
|
||||
|
||||
compare(expected, tokens.unwrap());
|
||||
all_dialects_except(|d| d.supports_nested_comments()).tokenizes_to(
|
||||
"SELECT 1/*/* nested comment */*/0",
|
||||
vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number("1".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(
|
||||
"/* nested comment ".to_string(),
|
||||
)),
|
||||
Token::Mul,
|
||||
Token::Div,
|
||||
Token::Number("0".to_string(), false),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue