mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-11-29 01:40:53 +00:00
Merge pull request #54 from nickolay/windows-newlines
Support \r and \r\n line breaks in tokenizer
This commit is contained in:
commit
64b1ea7a25
1 changed files with 28 additions and 9 deletions
|
|
@ -260,16 +260,15 @@ impl<'a> Tokenizer<'a> {
|
||||||
//println!("next_token: {:?}", chars.peek());
|
//println!("next_token: {:?}", chars.peek());
|
||||||
match chars.peek() {
|
match chars.peek() {
|
||||||
Some(&ch) => match ch {
|
Some(&ch) => match ch {
|
||||||
' ' => {
|
' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
|
||||||
chars.next();
|
'\t' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Tab)),
|
||||||
Ok(Some(Token::Whitespace(Whitespace::Space)))
|
'\n' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Newline)),
|
||||||
}
|
'\r' => {
|
||||||
'\t' => {
|
// Emit a single Whitespace::Newline token for \r and \r\n
|
||||||
chars.next();
|
|
||||||
Ok(Some(Token::Whitespace(Whitespace::Tab)))
|
|
||||||
}
|
|
||||||
'\n' => {
|
|
||||||
chars.next();
|
chars.next();
|
||||||
|
if let Some('\n') = chars.peek() {
|
||||||
|
chars.next();
|
||||||
|
}
|
||||||
Ok(Some(Token::Whitespace(Whitespace::Newline)))
|
Ok(Some(Token::Whitespace(Whitespace::Newline)))
|
||||||
}
|
}
|
||||||
'N' => {
|
'N' => {
|
||||||
|
|
@ -749,6 +748,26 @@ mod tests {
|
||||||
compare(expected, tokens);
|
compare(expected, tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_newlines() {
|
||||||
|
let sql = String::from("line1\nline2\rline3\r\nline4\r");
|
||||||
|
|
||||||
|
let dialect = GenericSqlDialect {};
|
||||||
|
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
||||||
|
let tokens = tokenizer.tokenize().unwrap();
|
||||||
|
let expected = vec![
|
||||||
|
Token::make_word("line1", None),
|
||||||
|
Token::Whitespace(Whitespace::Newline),
|
||||||
|
Token::make_word("line2", None),
|
||||||
|
Token::Whitespace(Whitespace::Newline),
|
||||||
|
Token::make_word("line3", None),
|
||||||
|
Token::Whitespace(Whitespace::Newline),
|
||||||
|
Token::make_word("line4", None),
|
||||||
|
Token::Whitespace(Whitespace::Newline),
|
||||||
|
];
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
|
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
|
||||||
//println!("------------------------------");
|
//println!("------------------------------");
|
||||||
//println!("tokens = {:?}", actual);
|
//println!("tokens = {:?}", actual);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue