mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-27 09:24:04 +00:00
Merge pull request #86 from nickolay/pr/token-refactor
Internal improvements to Parser::next_token/prev_token This reduces the number of helper functions used by next_token()/prev_token() while slightly improving performance and reducing the chances of coding errors when using prev_token() after hitting end-of-file.
This commit is contained in:
commit
2308c1c6f7
1 changed files with 46 additions and 73 deletions
119
src/sqlparser.rs
119
src/sqlparser.rs
|
@ -66,6 +66,7 @@ impl Error for ParserError {}
|
||||||
/// SQL Parser
|
/// SQL Parser
|
||||||
pub struct Parser {
|
pub struct Parser {
|
||||||
tokens: Vec<Token>,
|
tokens: Vec<Token>,
|
||||||
|
/// The index of the first unprocessed token in `self.tokens`
|
||||||
index: usize,
|
index: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -558,7 +559,8 @@ impl Parser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return first non-whitespace token that has not yet been processed
|
/// Return the first non-whitespace token that has not yet been processed
|
||||||
|
/// (or None if reached end-of-file)
|
||||||
pub fn peek_token(&self) -> Option<Token> {
|
pub fn peek_token(&self) -> Option<Token> {
|
||||||
self.peek_nth_token(0)
|
self.peek_nth_token(0)
|
||||||
}
|
}
|
||||||
|
@ -567,78 +569,49 @@ impl Parser {
|
||||||
pub fn peek_nth_token(&self, mut n: usize) -> Option<Token> {
|
pub fn peek_nth_token(&self, mut n: usize) -> Option<Token> {
|
||||||
let mut index = self.index;
|
let mut index = self.index;
|
||||||
loop {
|
loop {
|
||||||
match self.token_at(index) {
|
index += 1;
|
||||||
Some(Token::Whitespace(_)) => {
|
match self.tokens.get(index - 1) {
|
||||||
index += 1;
|
Some(Token::Whitespace(_)) => continue,
|
||||||
}
|
non_whitespace => {
|
||||||
Some(token) => {
|
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
return Some(token);
|
return non_whitespace.cloned();
|
||||||
}
|
}
|
||||||
index += 1;
|
|
||||||
n -= 1;
|
n -= 1;
|
||||||
}
|
}
|
||||||
None => {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the next token skipping whitespace and increment the token index
|
/// Return the first non-whitespace token that has not yet been processed
|
||||||
|
/// (or None if reached end-of-file) and mark it as processed. OK to call
|
||||||
|
/// repeatedly after reaching EOF.
|
||||||
pub fn next_token(&mut self) -> Option<Token> {
|
pub fn next_token(&mut self) -> Option<Token> {
|
||||||
loop {
|
loop {
|
||||||
match self.next_token_no_skip() {
|
|
||||||
Some(Token::Whitespace(_)) => {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
token => {
|
|
||||||
return token;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// see the token at this index
|
|
||||||
fn token_at(&self, n: usize) -> Option<Token> {
|
|
||||||
if let Some(token) = self.tokens.get(n) {
|
|
||||||
Some(token.clone())
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn next_token_no_skip(&mut self) -> Option<Token> {
|
|
||||||
if self.index < self.tokens.len() {
|
|
||||||
self.index += 1;
|
self.index += 1;
|
||||||
Some(self.tokens[self.index - 1].clone())
|
match self.tokens.get(self.index - 1) {
|
||||||
} else {
|
Some(Token::Whitespace(_)) => continue,
|
||||||
None
|
token => return token.cloned(),
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Push back the last one non-whitespace token
|
|
||||||
pub fn prev_token(&mut self) -> Option<Token> {
|
|
||||||
// TODO: returned value is unused (available via peek_token)
|
|
||||||
loop {
|
|
||||||
match self.prev_token_no_skip() {
|
|
||||||
Some(Token::Whitespace(_)) => {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
token => {
|
|
||||||
return token;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the previous token and decrement the token index
|
/// Return the first unprocessed token, possibly whitespace.
|
||||||
fn prev_token_no_skip(&mut self) -> Option<Token> {
|
pub fn next_token_no_skip(&mut self) -> Option<&Token> {
|
||||||
if self.index > 0 {
|
self.index += 1;
|
||||||
|
self.tokens.get(self.index - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Push back the last one non-whitespace token. Must be called after
|
||||||
|
/// `next_token()`, otherwise might panic. OK to call after
|
||||||
|
/// `next_token()` indicates an EOF.
|
||||||
|
pub fn prev_token(&mut self) {
|
||||||
|
loop {
|
||||||
|
assert!(self.index > 0);
|
||||||
self.index -= 1;
|
self.index -= 1;
|
||||||
Some(self.tokens[self.index].clone())
|
if let Some(Token::Whitespace(_)) = self.tokens.get(self.index) {
|
||||||
} else {
|
continue;
|
||||||
None
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -953,9 +926,7 @@ impl Parser {
|
||||||
if name.is_some() {
|
if name.is_some() {
|
||||||
self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", unexpected)
|
self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", unexpected)
|
||||||
} else {
|
} else {
|
||||||
if unexpected.is_some() {
|
self.prev_token();
|
||||||
self.prev_token();
|
|
||||||
}
|
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1173,8 +1144,7 @@ impl Parser {
|
||||||
reserved_kwds: &[&str],
|
reserved_kwds: &[&str],
|
||||||
) -> Result<Option<SQLIdent>, ParserError> {
|
) -> Result<Option<SQLIdent>, ParserError> {
|
||||||
let after_as = self.parse_keyword("AS");
|
let after_as = self.parse_keyword("AS");
|
||||||
let maybe_alias = self.next_token();
|
match self.next_token() {
|
||||||
match maybe_alias {
|
|
||||||
// Accept any identifier after `AS` (though many dialects have restrictions on
|
// Accept any identifier after `AS` (though many dialects have restrictions on
|
||||||
// keywords that may appear here). If there's no `AS`: don't parse keywords,
|
// keywords that may appear here). If there's no `AS`: don't parse keywords,
|
||||||
// which may start a construct allowed in this position, to be parsed as aliases.
|
// which may start a construct allowed in this position, to be parsed as aliases.
|
||||||
|
@ -1192,9 +1162,7 @@ impl Parser {
|
||||||
if after_as {
|
if after_as {
|
||||||
return self.expected("an identifier after AS", not_an_ident);
|
return self.expected("an identifier after AS", not_an_ident);
|
||||||
}
|
}
|
||||||
if not_an_ident.is_some() {
|
self.prev_token();
|
||||||
self.prev_token();
|
|
||||||
}
|
|
||||||
Ok(None) // no alias found
|
Ok(None) // no alias found
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1216,9 +1184,7 @@ impl Parser {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
if token.is_some() {
|
self.prev_token();
|
||||||
self.prev_token();
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1774,15 +1740,22 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_prev_index() {
|
fn test_prev_index() {
|
||||||
let sql = "SELECT version()";
|
let sql = "SELECT version";
|
||||||
all_dialects().run_parser_method(sql, |parser| {
|
all_dialects().run_parser_method(sql, |parser| {
|
||||||
assert_eq!(parser.prev_token(), None);
|
assert_eq!(parser.peek_token(), Some(Token::make_keyword("SELECT")));
|
||||||
|
assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT")));
|
||||||
|
parser.prev_token();
|
||||||
assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT")));
|
assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT")));
|
||||||
assert_eq!(parser.next_token(), Some(Token::make_word("version", None)));
|
assert_eq!(parser.next_token(), Some(Token::make_word("version", None)));
|
||||||
assert_eq!(parser.prev_token(), Some(Token::make_word("version", None)));
|
parser.prev_token();
|
||||||
assert_eq!(parser.peek_token(), Some(Token::make_word("version", None)));
|
assert_eq!(parser.peek_token(), Some(Token::make_word("version", None)));
|
||||||
assert_eq!(parser.prev_token(), Some(Token::make_keyword("SELECT")));
|
assert_eq!(parser.next_token(), Some(Token::make_word("version", None)));
|
||||||
assert_eq!(parser.prev_token(), None);
|
assert_eq!(parser.peek_token(), None);
|
||||||
|
parser.prev_token();
|
||||||
|
assert_eq!(parser.next_token(), Some(Token::make_word("version", None)));
|
||||||
|
assert_eq!(parser.next_token(), None);
|
||||||
|
assert_eq!(parser.next_token(), None);
|
||||||
|
parser.prev_token();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue