Support unicode whitespace (#482)

* Support unicode whitespace

* Add test
This commit is contained in:
Alex Yaroslavsky 2022-05-15 21:58:56 +03:00 committed by GitHub
parent 97a148aee4
commit dd805e9a6b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -653,6 +653,10 @@ impl<'a> Tokenizer<'a> {
);
Ok(Some(Token::Placeholder(String::from("$") + &s)))
}
//whitespace check (including unicode chars) should be last as it covers some of the chars above
ch if ch.is_whitespace() => {
self.consume_and_return(chars, Token::Whitespace(Whitespace::Space))
}
other => self.consume_and_return(chars, Token::Char(other)),
},
None => Ok(None),
@ -1254,6 +1258,21 @@ mod tests {
compare(expected, tokens);
}
#[test]
fn tokenize_unicode_whitespace() {
let sql = String::from(" \u{2003}\n");
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Whitespace(Whitespace::Space),
Token::Whitespace(Whitespace::Space),
Token::Whitespace(Whitespace::Newline),
];
compare(expected, tokens);
}
#[test]
fn tokenize_mismatched_quotes() {
let sql = String::from("\"foo");