Clean up JSON operator tokenizing code (#923)

This commit is contained in:
Andrew Lamb 2023-07-17 16:09:55 -04:00 committed by GitHub
parent df45db1375
commit c45451850c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -497,12 +497,14 @@ impl<'a> Tokenizer<'a> {
Ok(tokens) Ok(tokens)
} }
// Tokenize the identifer or keywords in `ch`
fn tokenize_identifier_or_keyword( fn tokenize_identifier_or_keyword(
&self, &self,
ch: String, ch: impl IntoIterator<Item = char>,
chars: &mut State, chars: &mut State,
) -> Result<Option<Token>, TokenizerError> { ) -> Result<Option<Token>, TokenizerError> {
chars.next(); // consume the first char chars.next(); // consume the first char
let ch: String = ch.into_iter().collect();
let word = self.tokenize_word(ch, chars); let word = self.tokenize_word(ch, chars);
// TODO: implement parsing of exponent here // TODO: implement parsing of exponent here
@ -550,7 +552,7 @@ impl<'a> Tokenizer<'a> {
} }
_ => { _ => {
// regular identifier starting with an "b" or "B" // regular identifier starting with an "b" or "B"
let s = self.tokenize_word(b.to_string(), chars); let s = self.tokenize_word(b, chars);
Ok(Some(Token::make_word(&s, None))) Ok(Some(Token::make_word(&s, None)))
} }
} }
@ -569,7 +571,7 @@ impl<'a> Tokenizer<'a> {
} }
_ => { _ => {
// regular identifier starting with an "r" or "R" // regular identifier starting with an "r" or "R"
let s = self.tokenize_word(b.to_string(), chars); let s = self.tokenize_word(b, chars);
Ok(Some(Token::make_word(&s, None))) Ok(Some(Token::make_word(&s, None)))
} }
} }
@ -585,7 +587,7 @@ impl<'a> Tokenizer<'a> {
} }
_ => { _ => {
// regular identifier starting with an "N" // regular identifier starting with an "N"
let s = self.tokenize_word(n.to_string(), chars); let s = self.tokenize_word(n, chars);
Ok(Some(Token::make_word(&s, None))) Ok(Some(Token::make_word(&s, None)))
} }
} }
@ -602,7 +604,7 @@ impl<'a> Tokenizer<'a> {
} }
_ => { _ => {
// regular identifier starting with an "E" or "e" // regular identifier starting with an "E" or "e"
let s = self.tokenize_word(x.to_string(), chars); let s = self.tokenize_word(x, chars);
Ok(Some(Token::make_word(&s, None))) Ok(Some(Token::make_word(&s, None)))
} }
} }
@ -619,7 +621,7 @@ impl<'a> Tokenizer<'a> {
} }
_ => { _ => {
// regular identifier starting with an "X" // regular identifier starting with an "X"
let s = self.tokenize_word(x.to_string(), chars); let s = self.tokenize_word(x, chars);
Ok(Some(Token::make_word(&s, None))) Ok(Some(Token::make_word(&s, None)))
} }
} }
@ -794,9 +796,7 @@ impl<'a> Tokenizer<'a> {
match chars.peek() { match chars.peek() {
Some(' ') => self.consume_and_return(chars, Token::Mod), Some(' ') => self.consume_and_return(chars, Token::Mod),
Some(sch) if self.dialect.is_identifier_start('%') => { Some(sch) if self.dialect.is_identifier_start('%') => {
let mut s = ch.to_string(); self.tokenize_identifier_or_keyword([ch, *sch], chars)
s.push_str(&sch.to_string());
self.tokenize_identifier_or_keyword(s, chars)
} }
_ => self.consume_and_return(chars, Token::Mod), _ => self.consume_and_return(chars, Token::Mod),
} }
@ -917,9 +917,7 @@ impl<'a> Tokenizer<'a> {
} }
Some(' ') => Ok(Some(Token::Sharp)), Some(' ') => Ok(Some(Token::Sharp)),
Some(sch) if self.dialect.is_identifier_start('#') => { Some(sch) if self.dialect.is_identifier_start('#') => {
let mut s = ch.to_string(); self.tokenize_identifier_or_keyword([ch, *sch], chars)
s.push_str(&sch.to_string());
self.tokenize_identifier_or_keyword(s, chars)
} }
_ => Ok(Some(Token::Sharp)), _ => Ok(Some(Token::Sharp)),
} }
@ -934,19 +932,14 @@ impl<'a> Tokenizer<'a> {
match chars.peek() { match chars.peek() {
Some(' ') => Ok(Some(Token::AtAt)), Some(' ') => Ok(Some(Token::AtAt)),
Some(tch) if self.dialect.is_identifier_start('@') => { Some(tch) if self.dialect.is_identifier_start('@') => {
let mut s = ch.to_string(); self.tokenize_identifier_or_keyword([ch, '@', *tch], chars)
s.push('@');
s.push_str(&tch.to_string());
self.tokenize_identifier_or_keyword(s, chars)
} }
_ => Ok(Some(Token::AtAt)), _ => Ok(Some(Token::AtAt)),
} }
} }
Some(' ') => Ok(Some(Token::AtSign)), Some(' ') => Ok(Some(Token::AtSign)),
Some(sch) if self.dialect.is_identifier_start('@') => { Some(sch) if self.dialect.is_identifier_start('@') => {
let mut s = ch.to_string(); self.tokenize_identifier_or_keyword([ch, *sch], chars)
s.push_str(&sch.to_string());
self.tokenize_identifier_or_keyword(s, chars)
} }
_ => Ok(Some(Token::AtSign)), _ => Ok(Some(Token::AtSign)),
} }
@ -959,7 +952,7 @@ impl<'a> Tokenizer<'a> {
// identifier or keyword // identifier or keyword
ch if self.dialect.is_identifier_start(ch) => { ch if self.dialect.is_identifier_start(ch) => {
self.tokenize_identifier_or_keyword(ch.to_string(), chars) self.tokenize_identifier_or_keyword([ch], chars)
} }
'$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)), '$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)),
@ -1086,8 +1079,8 @@ impl<'a> Tokenizer<'a> {
} }
/// Tokenize an identifier or keyword, after the first char is already consumed. /// Tokenize an identifier or keyword, after the first char is already consumed.
fn tokenize_word(&self, first_chars: String, chars: &mut State) -> String { fn tokenize_word(&self, first_chars: impl Into<String>, chars: &mut State) -> String {
let mut s = first_chars; let mut s = first_chars.into();
s.push_str(&peeking_take_while(chars, |ch| { s.push_str(&peeking_take_while(chars, |ch| {
self.dialect.is_identifier_part(ch) self.dialect.is_identifier_part(ch)
})); }));