Also tokenize non alphanumeric characters into some Char, since they can be tab separated values in COPY payload

This commit is contained in:
Jovansonlee Cesar 2018-09-26 23:59:52 +08:00
parent 7c7b67b0bc
commit 74b34faaf1
2 changed files with 38 additions and 24 deletions

View file

@ -1582,6 +1582,8 @@ A Fateful Reflection of a Waitress And a Boat who must Discover a Sumo Wrestler
Kwara & Kogi Kwara & Kogi
{"Deleted Scenes","Behind the Scenes"} {"Deleted Scenes","Behind the Scenes"}
'awe':5 'awe-inspir':4 'barbarella':1 'cat':13 'conquer':16 'dog':18 'feminist':10 'inspir':6 'monasteri':21 'must':15 'stori':7 'streetcar':2 'awe':5 'awe-inspir':4 'barbarella':1 'cat':13 'conquer':16 'dog':18 'feminist':10 'inspir':6 'monasteri':21 'must':15 'stori':7 'streetcar':2
PHP USD $
\\. \\.
"#); "#);
let mut parser = parser(&sql); let mut parser = parser(&sql);

View file

@ -34,6 +34,7 @@ pub enum Token {
Number(String), Number(String),
/// String literal /// String literal
String(String), String(String),
Char(char),
/// Single quoted string: i.e: 'string' /// Single quoted string: i.e: 'string'
SingleQuotedString(String), SingleQuotedString(String),
/// Double quoted string: i.e: "string" /// Double quoted string: i.e: "string"
@ -97,6 +98,7 @@ impl ToString for Token{
Token::Keyword(ref k) =>k.to_string(), Token::Keyword(ref k) =>k.to_string(),
Token::Number(ref n) => n.to_string(), Token::Number(ref n) => n.to_string(),
Token::String(ref s) => s.to_string(), Token::String(ref s) => s.to_string(),
Token::Char(ref c) => c.to_string(),
Token::SingleQuotedString(ref s) => format!("'{}'",s), Token::SingleQuotedString(ref s) => format!("'{}'",s),
Token::DoubleQuotedString(ref s) => format!("\"{}\"",s), Token::DoubleQuotedString(ref s) => format!("\"{}\"",s),
Token::Comma => ",".to_string(), Token::Comma => ",".to_string(),
@ -371,10 +373,7 @@ impl<'a> Tokenizer<'a> {
'&' => self.consume_and_return(chars, Token::Ampersand), '&' => self.consume_and_return(chars, Token::Ampersand),
'{' => self.consume_and_return(chars, Token::LBrace), '{' => self.consume_and_return(chars, Token::LBrace),
'}' => self.consume_and_return(chars, Token::RBrace), '}' => self.consume_and_return(chars, Token::RBrace),
_ => Err(TokenizerError(format!( other => self.consume_and_return(chars, Token::Char(other))
"Tokenizer Error at Line: {}, Column: {}, unhandled char '{}'",
self.line, self.col, ch
))),
}, },
None => Ok(None), None => Ok(None),
} }
@ -492,17 +491,19 @@ mod tests {
let dialect = GenericSqlDialect {}; let dialect = GenericSqlDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql); let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize(); let tokens = tokenizer.tokenize().unwrap();
println!("tokens: {:#?}", tokens);
let expected = vec![
Token::Whitespace(Whitespace::Newline),
Token::Char('م'),
Token::Char('ص'),
Token::Char('ط'),
Token::Char('ف'),
Token::Char('ى'),
Token::Identifier("h".to_string())
];
compare(expected, tokens);
match tokens {
Err(e) => assert_eq!(
TokenizerError(
"Tokenizer Error at Line: 2, Column: 1, unhandled char \'م\'".to_string()
),
e
),
_ => panic!("Test Failure in tokenize_invalid_string"),
}
} }
#[test] #[test]
@ -511,16 +512,27 @@ mod tests {
let dialect = GenericSqlDialect {}; let dialect = GenericSqlDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql); let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize(); let tokens = tokenizer.tokenize().unwrap();
match tokens { println!("tokens: {:#?}", tokens);
Err(e) => assert_eq!( let expected = vec![
TokenizerError( Token::Whitespace(Whitespace::Newline),
"Tokenizer Error at Line: 3, Column: 24, unhandled char \'م\'".to_string() Token::Whitespace(Whitespace::Newline),
), Token::Keyword("SELECT".into()),
e Token::Whitespace(Whitespace::Space),
), Token::Mult,
_ => panic!("Test Failure in tokenize_invalid_string_cols"), Token::Whitespace(Whitespace::Space),
} Token::Keyword("FROM".into()),
Token::Whitespace(Whitespace::Space),
Token::Keyword("TABLE".into()),
Token::Whitespace(Whitespace::Tab),
Token::Char('م'),
Token::Char('ص'),
Token::Char('ط'),
Token::Char('ف'),
Token::Char('ى'),
Token::Identifier("h".to_string()),
];
compare(expected, tokens);
} }
#[test] #[test]