Non-Latin characters support (#840)

* Non latin characters

---------

Co-authored-by: Maciej Skrzypkowski <maciej.skrzypkowski@satoricyber.com>

* Test for mysql

---------

Co-authored-by: Maciej Skrzypkowski <maciej.skrzypkowski@satoricyber.com>
This commit is contained in:
Maciej Skrzypkowski 2023-03-23 12:07:17 +01:00 committed by GitHub
parent eb67d489bb
commit a1b7341b87
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 34 additions and 30 deletions

View file

@ -1519,7 +1519,7 @@ mod tests {
#[test]
fn tokenize_invalid_string() {
let sql = String::from("\nمصطفىh");
let sql = String::from("\n💝مصطفىh");
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
@ -1527,12 +1527,8 @@ mod tests {
// println!("tokens: {:#?}", tokens);
let expected = vec![
Token::Whitespace(Whitespace::Newline),
Token::Char('م'),
Token::Char('ص'),
Token::Char('ط'),
Token::Char('ف'),
Token::Char('ى'),
Token::make_word("h", None),
Token::Char('💝'),
Token::make_word("مصطفىh", None),
];
compare(expected, tokens);
}
@ -1582,7 +1578,7 @@ mod tests {
#[test]
fn tokenize_invalid_string_cols() {
let sql = String::from("\n\nSELECT * FROM table\tمصطفىh");
let sql = String::from("\n\nSELECT * FROM table\t💝مصطفىh");
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
@ -1599,12 +1595,8 @@ mod tests {
Token::Whitespace(Whitespace::Space),
Token::make_keyword("table"),
Token::Whitespace(Whitespace::Tab),
Token::Char('م'),
Token::Char('ص'),
Token::Char('ط'),
Token::Char('ف'),
Token::Char('ى'),
Token::make_word("h", None),
Token::Char('💝'),
Token::make_word("مصطفىh", None),
];
compare(expected, tokens);
}