Merge pull request #14 from crw5996/add-line-columns

Include row and column in tokenizer error messages
This commit is contained in:
Andy Grove 2018-09-08 06:19:24 -06:00 committed by GitHub
commit 09f6a43fc9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -33,7 +33,7 @@ pub enum Token {
/// Comma /// Comma
Comma, Comma,
/// Whitespace (space, tab, etc) /// Whitespace (space, tab, etc)
Whitespace, Whitespace(char),
/// Equality operator `=` /// Equality operator `=`
Eq, Eq,
/// Not Equals operator `!=` or `<>` /// Not Equals operator `!=` or `<>`
@ -65,7 +65,7 @@ pub enum Token {
} }
/// Tokenizer error /// Tokenizer error
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub struct TokenizerError(String); pub struct TokenizerError(String);
lazy_static! { lazy_static! {
@ -141,6 +141,8 @@ lazy_static! {
/// SQL Tokenizer /// SQL Tokenizer
pub struct Tokenizer { pub struct Tokenizer {
pub query: String, pub query: String,
pub line: u64,
pub col: u64,
} }
impl Tokenizer { impl Tokenizer {
@ -148,6 +150,8 @@ impl Tokenizer {
pub fn new(query: &str) -> Self { pub fn new(query: &str) -> Self {
Self { Self {
query: query.to_string(), query: query.to_string(),
line: 1,
col: 1,
} }
} }
@ -158,13 +162,30 @@ impl Tokenizer {
let mut tokens: Vec<Token> = vec![]; let mut tokens: Vec<Token> = vec![];
while let Some(token) = self.next_token(&mut peekable)? { while let Some(token) = self.next_token(&mut peekable)? {
match &token {
Token::Whitespace('\n') => {
self.line += 1;
self.col = 1;
},
Token::Whitespace('\t') => self.col += 4,
Token::Identifier(s) => self.col += s.len() as u64,
Token::Keyword(s) => self.col += s.len() as u64,
Token::Number(s) => self.col += s.len() as u64,
Token::String(s) => self.col += s.len() as u64,
_ => self.col += 1,
}
tokens.push(token); tokens.push(token);
} }
Ok(tokens Ok(tokens
.into_iter() .into_iter()
.filter(|t| match t { .filter(|t| match t {
Token::Whitespace => false, Token::Whitespace(..) => false,
_ => true, _ => true,
}).collect()) }).collect())
} }
@ -177,7 +198,7 @@ impl Tokenizer {
// whitespace // whitespace
' ' | '\t' | '\n' => { ' ' | '\t' | '\n' => {
chars.next(); // consume chars.next(); // consume
Ok(Some(Token::Whitespace)) Ok(Some(Token::Whitespace(ch)))
} }
// identifier or keyword // identifier or keyword
'a'...'z' | 'A'...'Z' | '_' | '@' => { 'a'...'z' | 'A'...'Z' | '_' | '@' => {
@ -282,9 +303,9 @@ impl Tokenizer {
chars.next(); chars.next();
Ok(Some(Token::Neq)) Ok(Some(Token::Neq))
} }
_ => Err(TokenizerError(format!("TBD"))), _ => Err(TokenizerError(format!("Tokenizer Error at Line: {}, Col: {}", self.line, self.col))),
}, },
None => Err(TokenizerError(format!("TBD"))), None => Err(TokenizerError(format!("Tokenizer Error at Line: {}, Col: {}", self.line, self.col))),
} }
} }
'<' => { '<' => {
@ -318,7 +339,9 @@ impl Tokenizer {
} }
} }
_ => Err(TokenizerError(format!( _ => Err(TokenizerError(format!(
"unhandled char '{}' in tokenizer", "Tokenizer Error at Line: {}, Column: {}, unhandled char '{}'",
self.line,
self.col,
ch ch
))), ))),
}, },
@ -404,6 +427,34 @@ mod tests {
compare(expected, tokens); compare(expected, tokens);
} }
#[test]
fn tokenize_invalid_string() {
let sql = String::from("\nمصطفىh");
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize();
match tokens {
Err(e) => assert_eq!(TokenizerError("Tokenizer Error at Line: 2, Column: 1, unhandled char \'م\'".to_string()), e),
_ => panic!("Test Failure in tokenize_invalid_string"),
}
}
#[test]
fn tokenize_invalid_string_cols() {
let sql = String::from("\n\nSELECT * FROM table\tمصطفىh");
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize();
match tokens {
Err(e) => assert_eq!(TokenizerError("Tokenizer Error at Line: 3, Column: 24, unhandled char \'م\'".to_string()), e),
_ => panic!("Test Failure in tokenize_invalid_string_cols"),
}
}
#[test] #[test]
fn tokenize_is_null() { fn tokenize_is_null() {
let sql = String::from("a IS NULL"); let sql = String::from("a IS NULL");