mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-10 17:28:01 +00:00
Also tokenize non alphanumeric characters into some Char, since they can be tab separated values in COPY payload
This commit is contained in:
parent
7c7b67b0bc
commit
74b34faaf1
2 changed files with 38 additions and 24 deletions
|
@ -1582,6 +1582,8 @@ A Fateful Reflection of a Waitress And a Boat who must Discover a Sumo Wrestler
|
|||
Kwara & Kogi
|
||||
{"Deleted Scenes","Behind the Scenes"}
|
||||
'awe':5 'awe-inspir':4 'barbarella':1 'cat':13 'conquer':16 'dog':18 'feminist':10 'inspir':6 'monasteri':21 'must':15 'stori':7 'streetcar':2
|
||||
PHP ₱ USD $
|
||||
|
||||
\\.
|
||||
"#);
|
||||
let mut parser = parser(&sql);
|
||||
|
|
|
@ -34,6 +34,7 @@ pub enum Token {
|
|||
Number(String),
|
||||
/// String literal
|
||||
String(String),
|
||||
Char(char),
|
||||
/// Single quoted string: i.e: 'string'
|
||||
SingleQuotedString(String),
|
||||
/// Double quoted string: i.e: "string"
|
||||
|
@ -97,6 +98,7 @@ impl ToString for Token{
|
|||
Token::Keyword(ref k) =>k.to_string(),
|
||||
Token::Number(ref n) => n.to_string(),
|
||||
Token::String(ref s) => s.to_string(),
|
||||
Token::Char(ref c) => c.to_string(),
|
||||
Token::SingleQuotedString(ref s) => format!("'{}'",s),
|
||||
Token::DoubleQuotedString(ref s) => format!("\"{}\"",s),
|
||||
Token::Comma => ",".to_string(),
|
||||
|
@ -371,10 +373,7 @@ impl<'a> Tokenizer<'a> {
|
|||
'&' => self.consume_and_return(chars, Token::Ampersand),
|
||||
'{' => self.consume_and_return(chars, Token::LBrace),
|
||||
'}' => self.consume_and_return(chars, Token::RBrace),
|
||||
_ => Err(TokenizerError(format!(
|
||||
"Tokenizer Error at Line: {}, Column: {}, unhandled char '{}'",
|
||||
self.line, self.col, ch
|
||||
))),
|
||||
other => self.consume_and_return(chars, Token::Char(other))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
|
@ -492,17 +491,19 @@ mod tests {
|
|||
|
||||
let dialect = GenericSqlDialect {};
|
||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
||||
let tokens = tokenizer.tokenize();
|
||||
let tokens = tokenizer.tokenize().unwrap();
|
||||
println!("tokens: {:#?}", tokens);
|
||||
let expected = vec![
|
||||
Token::Whitespace(Whitespace::Newline),
|
||||
Token::Char('م'),
|
||||
Token::Char('ص'),
|
||||
Token::Char('ط'),
|
||||
Token::Char('ف'),
|
||||
Token::Char('ى'),
|
||||
Token::Identifier("h".to_string())
|
||||
];
|
||||
compare(expected, tokens);
|
||||
|
||||
match tokens {
|
||||
Err(e) => assert_eq!(
|
||||
TokenizerError(
|
||||
"Tokenizer Error at Line: 2, Column: 1, unhandled char \'م\'".to_string()
|
||||
),
|
||||
e
|
||||
),
|
||||
_ => panic!("Test Failure in tokenize_invalid_string"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -511,16 +512,27 @@ mod tests {
|
|||
|
||||
let dialect = GenericSqlDialect {};
|
||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
||||
let tokens = tokenizer.tokenize();
|
||||
match tokens {
|
||||
Err(e) => assert_eq!(
|
||||
TokenizerError(
|
||||
"Tokenizer Error at Line: 3, Column: 24, unhandled char \'م\'".to_string()
|
||||
),
|
||||
e
|
||||
),
|
||||
_ => panic!("Test Failure in tokenize_invalid_string_cols"),
|
||||
}
|
||||
let tokens = tokenizer.tokenize().unwrap();
|
||||
println!("tokens: {:#?}", tokens);
|
||||
let expected = vec![
|
||||
Token::Whitespace(Whitespace::Newline),
|
||||
Token::Whitespace(Whitespace::Newline),
|
||||
Token::Keyword("SELECT".into()),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Mult,
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Keyword("FROM".into()),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Keyword("TABLE".into()),
|
||||
Token::Whitespace(Whitespace::Tab),
|
||||
Token::Char('م'),
|
||||
Token::Char('ص'),
|
||||
Token::Char('ط'),
|
||||
Token::Char('ف'),
|
||||
Token::Char('ى'),
|
||||
Token::Identifier("h".to_string()),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue