diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 64406698..f4587f32 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -21,6 +21,7 @@ use std::str::Chars; use super::dialect::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; use super::dialect::Dialect; +use super::dialect::SnowflakeDialect; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use std::fmt; @@ -209,7 +210,7 @@ pub enum Whitespace { Space, Newline, Tab, - SingleLineComment(String), + SingleLineComment { comment: String, prefix: String }, MultiLineComment(String), } @@ -219,7 +220,7 @@ impl fmt::Display for Whitespace { Whitespace::Space => f.write_str(" "), Whitespace::Newline => f.write_str("\n"), Whitespace::Tab => f.write_str("\t"), - Whitespace::SingleLineComment(s) => write!(f, "--{}", s), + Whitespace::SingleLineComment { prefix, comment } => write!(f, "{}{}", prefix, comment), Whitespace::MultiLineComment(s) => write!(f, "/*{}*/", s), } } @@ -370,12 +371,11 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('-') => { chars.next(); // consume the second '-', starting a single-line comment - let mut s = peeking_take_while(chars, |ch| ch != '\n'); - if let Some(ch) = chars.next() { - assert_eq!(ch, '\n'); - s.push(ch); - } - Ok(Some(Token::Whitespace(Whitespace::SingleLineComment(s)))) + let comment = self.tokenize_single_line_comment(chars); + Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_owned(), + comment, + }))) } // a regular '-' operator _ => Ok(Some(Token::Minus)), @@ -388,6 +388,14 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume the '*', starting a multi-line comment self.tokenize_multiline_comment(chars) } + Some('/') if dialect_of!(self is SnowflakeDialect) => { + chars.next(); // consume the second '/', starting a snowflake single-line comment + let comment = self.tokenize_single_line_comment(chars); + Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { + prefix: "//".to_owned(), + comment, + }))) + } // a regular '/' operator _ => Ok(Some(Token::Div)), } @@ -448,6 +456,14 @@ impl<'a> Tokenizer<'a> { '^' => self.consume_and_return(chars, Token::Caret), '{' => self.consume_and_return(chars, Token::LBrace), '}' => self.consume_and_return(chars, Token::RBrace), + '#' if dialect_of!(self is SnowflakeDialect) => { + chars.next(); // consume the '#', starting a snowflake single-line comment + let comment = self.tokenize_single_line_comment(chars); + Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { + prefix: "#".to_owned(), + comment, + }))) + } other => self.consume_and_return(chars, Token::Char(other)), }, None => Ok(None), @@ -462,6 +478,16 @@ impl<'a> Tokenizer<'a> { }) } + // Consume characters until newline + fn tokenize_single_line_comment(&self, chars: &mut Peekable>) -> String { + let mut comment = peeking_take_while(chars, |ch| ch != '\n'); + if let Some(ch) = chars.next() { + assert_eq!(ch, '\n'); + comment.push(ch); + } + comment + } + /// Tokenize an identifier or keyword, after the first char is already consumed. fn tokenize_word(&self, first_char: char, chars: &mut Peekable>) -> String { let mut s = first_char.to_string(); @@ -819,9 +845,10 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string()), - Token::Whitespace(Whitespace::SingleLineComment( - "this is a comment\n".to_string(), - )), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "this is a comment\n".to_string(), + }), Token::Number("1".to_string()), ]; compare(expected, tokens); @@ -834,9 +861,10 @@ mod tests { let dialect = GenericDialect {}; let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); - let expected = vec![Token::Whitespace(Whitespace::SingleLineComment( - "this is a comment".to_string(), - ))]; + let expected = vec![Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "this is a comment".to_string(), + })]; compare(expected, tokens); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 086d5726..c1aa41db 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -12,6 +12,7 @@ use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, SnowflakeDialect}; use sqlparser::test_utils::*; +use sqlparser::tokenizer::*; #[test] fn test_snowflake_create_table() { @@ -24,6 +25,44 @@ fn test_snowflake_create_table() { } } +#[test] +fn test_snowflake_single_line_tokenize() { + let sql = "CREATE TABLE# this is a comment \ntable_1"; + let dialect = SnowflakeDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + + let expected = vec![ + Token::make_keyword("CREATE"), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("TABLE"), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "#".to_string(), + comment: " this is a comment \n".to_string(), + }), + Token::make_word("table_1", None), + ]; + + assert_eq!(expected, tokens); + + let sql = "CREATE TABLE// this is a comment \ntable_1"; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + + let expected = vec![ + Token::make_keyword("CREATE"), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("TABLE"), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "//".to_string(), + comment: " this is a comment \n".to_string(), + }), + Token::make_word("table_1", None), + ]; + + assert_eq!(expected, tokens); +} + fn snowflake_and_generic() -> TestedDialects { TestedDialects { dialects: vec![Box::new(SnowflakeDialect {}), Box::new(GenericDialect {})],