diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index 7753a7ee..5725f2d5 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -319,29 +319,19 @@ impl<'a> Tokenizer<'a> { } // delimited (quoted) identifier quote_start if self.dialect.is_delimited_identifier_start(quote_start) => { - let mut s = String::new(); chars.next(); // consume the opening quote let quote_end = SQLWord::matching_end_quote(quote_start); - while let Some(ch) = chars.next() { - match ch { - c if c == quote_end => break, - _ => s.push(ch), - } - } + let s = peeking_take_while(chars, |ch| ch != quote_end); + chars.next(); // TODO: raise error on EOF Ok(Some(Token::make_word(&s, Some(quote_start)))) } // numbers '0'..='9' => { - let mut s = String::new(); - while let Some(&ch) = chars.peek() { - match ch { - '0'..='9' | '.' => { - chars.next(); // consume - s.push(ch); - } - _ => break, - } - } + // TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal + let s = peeking_take_while(chars, |ch| match ch { + '0'..='9' | '.' => true, + _ => false, + }); Ok(Some(Token::Number(s))) } // punctuation @@ -354,22 +344,12 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('-') => { chars.next(); // consume the second '-', starting a single-line comment - let mut s = String::new(); - loop { - match chars.next() { - Some(ch) if ch != '\n' => { - s.push(ch); - } - other => { - if other.is_some() { - s.push('\n'); - } - break Ok(Some(Token::Whitespace( - Whitespace::SingleLineComment(s), - ))); - } - } + let mut s = peeking_take_while(chars, |ch| ch != '\n'); + if let Some(ch) = chars.next() { + assert_eq!(ch, '\n'); + s.push(ch); } + Ok(Some(Token::Whitespace(Whitespace::SingleLineComment(s)))) } // a regular '-' operator _ => Ok(Some(Token::Minus)), @@ -438,16 +418,10 @@ impl<'a> Tokenizer<'a> { /// Tokenize an identifier or keyword, after the first char is already consumed. fn tokenize_word(&self, first_char: char, chars: &mut Peekable>) -> String { - let mut s = String::new(); - s.push(first_char); - while let Some(&ch) = chars.peek() { - if self.dialect.is_identifier_part(ch) { - chars.next(); // consume - s.push(ch); - } else { - break; - } - } + let mut s = first_char.to_string(); + s.push_str(&peeking_take_while(chars, |ch| { + self.dialect.is_identifier_part(ch) + })); s } @@ -521,6 +495,25 @@ impl<'a> Tokenizer<'a> { } } +/// Read from `chars` until `predicate` returns `false` or EOF is hit. +/// Return the characters read as String, and keep the first non-matching +/// char available as `chars.next()`. +fn peeking_take_while( + chars: &mut Peekable>, + mut predicate: impl FnMut(char) -> bool, +) -> String { + let mut s = String::new(); + while let Some(&ch) = chars.peek() { + if predicate(ch) { + chars.next(); // consume + s.push(ch); + } else { + break; + } + } + s +} + #[cfg(test)] mod tests { use super::super::dialect::GenericSqlDialect;