diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index 6eb7d836..9daa2620 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -260,16 +260,15 @@ impl<'a> Tokenizer<'a> { //println!("next_token: {:?}", chars.peek()); match chars.peek() { Some(&ch) => match ch { - ' ' => { - chars.next(); - Ok(Some(Token::Whitespace(Whitespace::Space))) - } - '\t' => { - chars.next(); - Ok(Some(Token::Whitespace(Whitespace::Tab))) - } - '\n' => { + ' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)), + '\t' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Tab)), + '\n' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Newline)), + '\r' => { + // Emit a single Whitespace::Newline token for \r and \r\n chars.next(); + if let Some('\n') = chars.peek() { + chars.next(); + } Ok(Some(Token::Whitespace(Whitespace::Newline))) } 'N' => { @@ -749,6 +748,26 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_newlines() { + let sql = String::from("line1\nline2\rline3\r\nline4\r"); + + let dialect = GenericSqlDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + let expected = vec![ + Token::make_word("line1", None), + Token::Whitespace(Whitespace::Newline), + Token::make_word("line2", None), + Token::Whitespace(Whitespace::Newline), + Token::make_word("line3", None), + Token::Whitespace(Whitespace::Newline), + Token::make_word("line4", None), + Token::Whitespace(Whitespace::Newline), + ]; + compare(expected, tokens); + } + fn compare(expected: Vec, actual: Vec) { //println!("------------------------------"); //println!("tokens = {:?}", actual);