Introduce peeking_take_while to simplify tokenizer

I could probably look into using an existing crate like
https://github.com/fitzgen/peeking_take_while - but as a small helper
works as well I didn't have the reason to.
This commit is contained in:
Nickolay Ponomarev 2019-06-08 04:37:46 +03:00
parent ebc5efda98
commit 20637f0327

View file

@ -319,29 +319,19 @@ impl<'a> Tokenizer<'a> {
} }
// delimited (quoted) identifier // delimited (quoted) identifier
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => { quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
let mut s = String::new();
chars.next(); // consume the opening quote chars.next(); // consume the opening quote
let quote_end = SQLWord::matching_end_quote(quote_start); let quote_end = SQLWord::matching_end_quote(quote_start);
while let Some(ch) = chars.next() { let s = peeking_take_while(chars, |ch| ch != quote_end);
match ch { chars.next(); // TODO: raise error on EOF
c if c == quote_end => break,
_ => s.push(ch),
}
}
Ok(Some(Token::make_word(&s, Some(quote_start)))) Ok(Some(Token::make_word(&s, Some(quote_start))))
} }
// numbers // numbers
'0'..='9' => { '0'..='9' => {
let mut s = String::new(); // TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal
while let Some(&ch) = chars.peek() { let s = peeking_take_while(chars, |ch| match ch {
match ch { '0'..='9' | '.' => true,
'0'..='9' | '.' => { _ => false,
chars.next(); // consume });
s.push(ch);
}
_ => break,
}
}
Ok(Some(Token::Number(s))) Ok(Some(Token::Number(s)))
} }
// punctuation // punctuation
@ -354,22 +344,12 @@ impl<'a> Tokenizer<'a> {
match chars.peek() { match chars.peek() {
Some('-') => { Some('-') => {
chars.next(); // consume the second '-', starting a single-line comment chars.next(); // consume the second '-', starting a single-line comment
let mut s = String::new(); let mut s = peeking_take_while(chars, |ch| ch != '\n');
loop { if let Some(ch) = chars.next() {
match chars.next() { assert_eq!(ch, '\n');
Some(ch) if ch != '\n' => {
s.push(ch); s.push(ch);
} }
other => { Ok(Some(Token::Whitespace(Whitespace::SingleLineComment(s))))
if other.is_some() {
s.push('\n');
}
break Ok(Some(Token::Whitespace(
Whitespace::SingleLineComment(s),
)));
}
}
}
} }
// a regular '-' operator // a regular '-' operator
_ => Ok(Some(Token::Minus)), _ => Ok(Some(Token::Minus)),
@ -438,16 +418,10 @@ impl<'a> Tokenizer<'a> {
/// Tokenize an identifier or keyword, after the first char is already consumed. /// Tokenize an identifier or keyword, after the first char is already consumed.
fn tokenize_word(&self, first_char: char, chars: &mut Peekable<Chars<'_>>) -> String { fn tokenize_word(&self, first_char: char, chars: &mut Peekable<Chars<'_>>) -> String {
let mut s = String::new(); let mut s = first_char.to_string();
s.push(first_char); s.push_str(&peeking_take_while(chars, |ch| {
while let Some(&ch) = chars.peek() { self.dialect.is_identifier_part(ch)
if self.dialect.is_identifier_part(ch) { }));
chars.next(); // consume
s.push(ch);
} else {
break;
}
}
s s
} }
@ -521,6 +495,25 @@ impl<'a> Tokenizer<'a> {
} }
} }
/// Read from `chars` until `predicate` returns `false` or EOF is hit.
/// Return the characters read as String, and keep the first non-matching
/// char available as `chars.next()`.
fn peeking_take_while(
chars: &mut Peekable<Chars<'_>>,
mut predicate: impl FnMut(char) -> bool,
) -> String {
let mut s = String::new();
while let Some(&ch) = chars.peek() {
if predicate(ch) {
chars.next(); // consume
s.push(ch);
} else {
break;
}
}
s
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::super::dialect::GenericSqlDialect; use super::super::dialect::GenericSqlDialect;