Merge pull request #54 from nickolay/windows-newlines

Support \r and \r\n line breaks in tokenizer
This commit is contained in:
Andy Grove 2019-04-27 08:54:20 -06:00 committed by GitHub
commit 64b1ea7a25
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -260,16 +260,15 @@ impl<'a> Tokenizer<'a> {
//println!("next_token: {:?}", chars.peek());
match chars.peek() {
Some(&ch) => match ch {
' ' => {
chars.next();
Ok(Some(Token::Whitespace(Whitespace::Space)))
}
'\t' => {
chars.next();
Ok(Some(Token::Whitespace(Whitespace::Tab)))
}
'\n' => {
' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
'\t' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Tab)),
'\n' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Newline)),
'\r' => {
// Emit a single Whitespace::Newline token for \r and \r\n
chars.next();
if let Some('\n') = chars.peek() {
chars.next();
}
Ok(Some(Token::Whitespace(Whitespace::Newline)))
}
'N' => {
@ -749,6 +748,26 @@ mod tests {
compare(expected, tokens);
}
#[test]
fn tokenize_newlines() {
let sql = String::from("line1\nline2\rline3\r\nline4\r");
let dialect = GenericSqlDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::make_word("line1", None),
Token::Whitespace(Whitespace::Newline),
Token::make_word("line2", None),
Token::Whitespace(Whitespace::Newline),
Token::make_word("line3", None),
Token::Whitespace(Whitespace::Newline),
Token::make_word("line4", None),
Token::Whitespace(Whitespace::Newline),
];
compare(expected, tokens);
}
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
//println!("------------------------------");
//println!("tokens = {:?}", actual);