mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-18 05:00:16 +00:00
[snowflake] Support single line comments starting with '#' or '//' (#264)
Co-authored-by: Eyal Leshem <eyal@satoricyber.com>
This commit is contained in:
parent
e9aa87fa2c
commit
1c6077c0db
2 changed files with 81 additions and 14 deletions
|
@ -21,6 +21,7 @@ use std::str::Chars;
|
|||
|
||||
use super::dialect::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
|
||||
use super::dialect::Dialect;
|
||||
use super::dialect::SnowflakeDialect;
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
|
@ -209,7 +210,7 @@ pub enum Whitespace {
|
|||
Space,
|
||||
Newline,
|
||||
Tab,
|
||||
SingleLineComment(String),
|
||||
SingleLineComment { comment: String, prefix: String },
|
||||
MultiLineComment(String),
|
||||
}
|
||||
|
||||
|
@ -219,7 +220,7 @@ impl fmt::Display for Whitespace {
|
|||
Whitespace::Space => f.write_str(" "),
|
||||
Whitespace::Newline => f.write_str("\n"),
|
||||
Whitespace::Tab => f.write_str("\t"),
|
||||
Whitespace::SingleLineComment(s) => write!(f, "--{}", s),
|
||||
Whitespace::SingleLineComment { prefix, comment } => write!(f, "{}{}", prefix, comment),
|
||||
Whitespace::MultiLineComment(s) => write!(f, "/*{}*/", s),
|
||||
}
|
||||
}
|
||||
|
@ -370,12 +371,11 @@ impl<'a> Tokenizer<'a> {
|
|||
match chars.peek() {
|
||||
Some('-') => {
|
||||
chars.next(); // consume the second '-', starting a single-line comment
|
||||
let mut s = peeking_take_while(chars, |ch| ch != '\n');
|
||||
if let Some(ch) = chars.next() {
|
||||
assert_eq!(ch, '\n');
|
||||
s.push(ch);
|
||||
}
|
||||
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment(s))))
|
||||
let comment = self.tokenize_single_line_comment(chars);
|
||||
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_owned(),
|
||||
comment,
|
||||
})))
|
||||
}
|
||||
// a regular '-' operator
|
||||
_ => Ok(Some(Token::Minus)),
|
||||
|
@ -388,6 +388,14 @@ impl<'a> Tokenizer<'a> {
|
|||
chars.next(); // consume the '*', starting a multi-line comment
|
||||
self.tokenize_multiline_comment(chars)
|
||||
}
|
||||
Some('/') if dialect_of!(self is SnowflakeDialect) => {
|
||||
chars.next(); // consume the second '/', starting a snowflake single-line comment
|
||||
let comment = self.tokenize_single_line_comment(chars);
|
||||
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "//".to_owned(),
|
||||
comment,
|
||||
})))
|
||||
}
|
||||
// a regular '/' operator
|
||||
_ => Ok(Some(Token::Div)),
|
||||
}
|
||||
|
@ -448,6 +456,14 @@ impl<'a> Tokenizer<'a> {
|
|||
'^' => self.consume_and_return(chars, Token::Caret),
|
||||
'{' => self.consume_and_return(chars, Token::LBrace),
|
||||
'}' => self.consume_and_return(chars, Token::RBrace),
|
||||
'#' if dialect_of!(self is SnowflakeDialect) => {
|
||||
chars.next(); // consume the '#', starting a snowflake single-line comment
|
||||
let comment = self.tokenize_single_line_comment(chars);
|
||||
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "#".to_owned(),
|
||||
comment,
|
||||
})))
|
||||
}
|
||||
other => self.consume_and_return(chars, Token::Char(other)),
|
||||
},
|
||||
None => Ok(None),
|
||||
|
@ -462,6 +478,16 @@ impl<'a> Tokenizer<'a> {
|
|||
})
|
||||
}
|
||||
|
||||
// Consume characters until newline
|
||||
fn tokenize_single_line_comment(&self, chars: &mut Peekable<Chars<'_>>) -> String {
|
||||
let mut comment = peeking_take_while(chars, |ch| ch != '\n');
|
||||
if let Some(ch) = chars.next() {
|
||||
assert_eq!(ch, '\n');
|
||||
comment.push(ch);
|
||||
}
|
||||
comment
|
||||
}
|
||||
|
||||
/// Tokenize an identifier or keyword, after the first char is already consumed.
|
||||
fn tokenize_word(&self, first_char: char, chars: &mut Peekable<Chars<'_>>) -> String {
|
||||
let mut s = first_char.to_string();
|
||||
|
@ -819,9 +845,10 @@ mod tests {
|
|||
let tokens = tokenizer.tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::Number("0".to_string()),
|
||||
Token::Whitespace(Whitespace::SingleLineComment(
|
||||
"this is a comment\n".to_string(),
|
||||
)),
|
||||
Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_string(),
|
||||
comment: "this is a comment\n".to_string(),
|
||||
}),
|
||||
Token::Number("1".to_string()),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
|
@ -834,9 +861,10 @@ mod tests {
|
|||
let dialect = GenericDialect {};
|
||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
||||
let tokens = tokenizer.tokenize().unwrap();
|
||||
let expected = vec![Token::Whitespace(Whitespace::SingleLineComment(
|
||||
"this is a comment".to_string(),
|
||||
))];
|
||||
let expected = vec![Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_string(),
|
||||
comment: "this is a comment".to_string(),
|
||||
})];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue