mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-09-01 19:57:30 +00:00
Introduce peeking_take_while to simplify tokenizer
I could probably look into using an existing crate like https://github.com/fitzgen/peeking_take_while - but as a small helper works as well I didn't have the reason to.
This commit is contained in:
parent
ebc5efda98
commit
20637f0327
1 changed files with 35 additions and 42 deletions
|
@ -319,29 +319,19 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
// delimited (quoted) identifier
|
||||
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
|
||||
let mut s = String::new();
|
||||
chars.next(); // consume the opening quote
|
||||
let quote_end = SQLWord::matching_end_quote(quote_start);
|
||||
while let Some(ch) = chars.next() {
|
||||
match ch {
|
||||
c if c == quote_end => break,
|
||||
_ => s.push(ch),
|
||||
}
|
||||
}
|
||||
let s = peeking_take_while(chars, |ch| ch != quote_end);
|
||||
chars.next(); // TODO: raise error on EOF
|
||||
Ok(Some(Token::make_word(&s, Some(quote_start))))
|
||||
}
|
||||
// numbers
|
||||
'0'..='9' => {
|
||||
let mut s = String::new();
|
||||
while let Some(&ch) = chars.peek() {
|
||||
match ch {
|
||||
'0'..='9' | '.' => {
|
||||
chars.next(); // consume
|
||||
s.push(ch);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
// TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal
|
||||
let s = peeking_take_while(chars, |ch| match ch {
|
||||
'0'..='9' | '.' => true,
|
||||
_ => false,
|
||||
});
|
||||
Ok(Some(Token::Number(s)))
|
||||
}
|
||||
// punctuation
|
||||
|
@ -354,22 +344,12 @@ impl<'a> Tokenizer<'a> {
|
|||
match chars.peek() {
|
||||
Some('-') => {
|
||||
chars.next(); // consume the second '-', starting a single-line comment
|
||||
let mut s = String::new();
|
||||
loop {
|
||||
match chars.next() {
|
||||
Some(ch) if ch != '\n' => {
|
||||
s.push(ch);
|
||||
}
|
||||
other => {
|
||||
if other.is_some() {
|
||||
s.push('\n');
|
||||
}
|
||||
break Ok(Some(Token::Whitespace(
|
||||
Whitespace::SingleLineComment(s),
|
||||
)));
|
||||
}
|
||||
}
|
||||
let mut s = peeking_take_while(chars, |ch| ch != '\n');
|
||||
if let Some(ch) = chars.next() {
|
||||
assert_eq!(ch, '\n');
|
||||
s.push(ch);
|
||||
}
|
||||
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment(s))))
|
||||
}
|
||||
// a regular '-' operator
|
||||
_ => Ok(Some(Token::Minus)),
|
||||
|
@ -438,16 +418,10 @@ impl<'a> Tokenizer<'a> {
|
|||
|
||||
/// Tokenize an identifier or keyword, after the first char is already consumed.
|
||||
fn tokenize_word(&self, first_char: char, chars: &mut Peekable<Chars<'_>>) -> String {
|
||||
let mut s = String::new();
|
||||
s.push(first_char);
|
||||
while let Some(&ch) = chars.peek() {
|
||||
if self.dialect.is_identifier_part(ch) {
|
||||
chars.next(); // consume
|
||||
s.push(ch);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let mut s = first_char.to_string();
|
||||
s.push_str(&peeking_take_while(chars, |ch| {
|
||||
self.dialect.is_identifier_part(ch)
|
||||
}));
|
||||
s
|
||||
}
|
||||
|
||||
|
@ -521,6 +495,25 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Read from `chars` until `predicate` returns `false` or EOF is hit.
|
||||
/// Return the characters read as String, and keep the first non-matching
|
||||
/// char available as `chars.next()`.
|
||||
fn peeking_take_while(
|
||||
chars: &mut Peekable<Chars<'_>>,
|
||||
mut predicate: impl FnMut(char) -> bool,
|
||||
) -> String {
|
||||
let mut s = String::new();
|
||||
while let Some(&ch) = chars.peek() {
|
||||
if predicate(ch) {
|
||||
chars.next(); // consume
|
||||
s.push(ch);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::super::dialect::GenericSqlDialect;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue