Merge pull request #4379 from harupy/refactor-FStringParser

Refactor `FStringParser`
This commit is contained in:
Jeong YunWon 2022-12-31 10:56:52 +09:00 committed by GitHub
commit aa32a73c5b

View file

@ -7,14 +7,27 @@ use crate::{
}; };
use std::{iter, mem, str}; use std::{iter, mem, str};
struct FStringParser { struct FStringParser<'a> {
chars: iter::Peekable<str::Chars<'a>>,
str_start: Location, str_start: Location,
str_end: Location, str_end: Location,
} }
impl FStringParser { impl<'a> FStringParser<'a> {
fn new(str_start: Location, str_end: Location) -> Self { fn new(source: &'a str, str_start: Location, str_end: Location) -> Self {
Self { str_start, str_end } Self {
chars: source.chars().peekable(),
str_start,
str_end,
}
}
fn next_char(&mut self) -> Option<char> {
self.chars.next()
}
fn peek(&mut self) -> Option<&char> {
self.chars.peek()
} }
#[inline] #[inline]
@ -22,11 +35,7 @@ impl FStringParser {
Expr::new(self.str_start, self.str_end, node) Expr::new(self.str_start, self.str_end, node)
} }
fn parse_formatted_value<'a>( fn parse_formatted_value(&mut self, nested: u8) -> Result<Vec<Expr>, FStringErrorType> {
&mut self,
mut chars: iter::Peekable<str::Chars<'a>>,
nested: u8,
) -> Result<(Vec<Expr>, iter::Peekable<str::Chars<'a>>), FStringErrorType> {
let mut expression = String::new(); let mut expression = String::new();
let mut spec = None; let mut spec = None;
let mut delims = Vec::new(); let mut delims = Vec::new();
@ -34,36 +43,36 @@ impl FStringParser {
let mut self_documenting = false; let mut self_documenting = false;
let mut trailing_seq = String::new(); let mut trailing_seq = String::new();
while let Some(ch) = chars.next() { while let Some(ch) = self.next_char() {
match ch { match ch {
// can be integrated better with the remainign code, but as a starting point ok // can be integrated better with the remainign code, but as a starting point ok
// in general I would do here a tokenizing of the fstrings to omit this peeking. // in general I would do here a tokenizing of the fstrings to omit this peeking.
'!' if chars.peek() == Some(&'=') => { '!' if self.peek() == Some(&'=') => {
expression.push_str("!="); expression.push_str("!=");
chars.next(); self.next_char();
} }
'=' if chars.peek() == Some(&'=') => { '=' if self.peek() == Some(&'=') => {
expression.push_str("=="); expression.push_str("==");
chars.next(); self.next_char();
} }
'>' if chars.peek() == Some(&'=') => { '>' if self.peek() == Some(&'=') => {
expression.push_str(">="); expression.push_str(">=");
chars.next(); self.next_char();
} }
'<' if chars.peek() == Some(&'=') => { '<' if self.peek() == Some(&'=') => {
expression.push_str("<="); expression.push_str("<=");
chars.next(); self.next_char();
} }
'!' if delims.is_empty() && chars.peek() != Some(&'=') => { '!' if delims.is_empty() && self.peek() != Some(&'=') => {
if expression.trim().is_empty() { if expression.trim().is_empty() {
return Err(EmptyExpression); return Err(EmptyExpression);
} }
conversion = match chars.next() { conversion = match self.next_char() {
Some('s') => ConversionFlag::Str, Some('s') => ConversionFlag::Str,
Some('a') => ConversionFlag::Ascii, Some('a') => ConversionFlag::Ascii,
Some('r') => ConversionFlag::Repr, Some('r') => ConversionFlag::Repr,
@ -83,7 +92,7 @@ impl FStringParser {
} }
}; };
if let Some(&peek) = chars.peek() { if let Some(&peek) = self.peek() {
if peek != '}' && peek != ':' { if peek != '}' && peek != ':' {
return Err(if expression.trim().is_empty() { return Err(if expression.trim().is_empty() {
EmptyExpression EmptyExpression
@ -102,17 +111,16 @@ impl FStringParser {
// match a python 3.8 self documenting expression // match a python 3.8 self documenting expression
// format '{' PYTHON_EXPRESSION '=' FORMAT_SPECIFIER? '}' // format '{' PYTHON_EXPRESSION '=' FORMAT_SPECIFIER? '}'
'=' if chars.peek() != Some(&'=') && delims.is_empty() => { '=' if self.peek() != Some(&'=') && delims.is_empty() => {
self_documenting = true; self_documenting = true;
} }
':' if delims.is_empty() => { ':' if delims.is_empty() => {
let (parsed_spec, remaining_chars) = self.parse_spec(chars, nested)?; let parsed_spec = self.parse_spec(nested)?;
spec = Some(Box::new(self.expr(ExprKind::JoinedStr { spec = Some(Box::new(self.expr(ExprKind::JoinedStr {
values: parsed_spec, values: parsed_spec,
}))); })));
chars = remaining_chars;
} }
'(' | '{' | '[' => { '(' | '{' | '[' => {
expression.push(ch); expression.push(ch);
@ -195,21 +203,19 @@ impl FStringParser {
}), }),
] ]
}; };
return Ok((ret, chars)); return Ok(ret);
} }
'"' | '\'' => { '"' | '\'' => {
expression.push(ch); expression.push(ch);
let mut string_ended = false; loop {
for next in &mut chars { let Some(c) = self.next_char() else {
expression.push(next); return Err(UnterminatedString);
if next == ch { };
string_ended = true; expression.push(c);
if c == ch {
break; break;
} }
} }
if !string_ended {
return Err(UnterminatedString);
}
} }
' ' if self_documenting => { ' ' if self_documenting => {
trailing_seq.push(ch); trailing_seq.push(ch);
@ -231,14 +237,10 @@ impl FStringParser {
}) })
} }
fn parse_spec<'a>( fn parse_spec(&mut self, nested: u8) -> Result<Vec<Expr>, FStringErrorType> {
&mut self,
mut chars: iter::Peekable<str::Chars<'a>>,
nested: u8,
) -> Result<(Vec<Expr>, iter::Peekable<str::Chars<'a>>), FStringErrorType> {
let mut spec_constructor = Vec::new(); let mut spec_constructor = Vec::new();
let mut constant_piece = String::new(); let mut constant_piece = String::new();
while let Some(&next) = chars.peek() { while let Some(&next) = self.peek() {
match next { match next {
'{' => { '{' => {
if !constant_piece.is_empty() { if !constant_piece.is_empty() {
@ -248,9 +250,8 @@ impl FStringParser {
})); }));
constant_piece.clear(); constant_piece.clear();
} }
let (parsed_expr, remaining_chars) = self.parse(chars, nested + 1)?; let parsed_expr = self.parse(nested + 1)?;
spec_constructor.extend(parsed_expr); spec_constructor.extend(parsed_expr);
chars = remaining_chars;
continue; continue;
} }
'}' => { '}' => {
@ -260,7 +261,7 @@ impl FStringParser {
constant_piece.push(next); constant_piece.push(next);
} }
} }
chars.next(); self.next_char();
} }
if !constant_piece.is_empty() { if !constant_piece.is_empty() {
spec_constructor.push(self.expr(ExprKind::Constant { spec_constructor.push(self.expr(ExprKind::Constant {
@ -269,14 +270,10 @@ impl FStringParser {
})); }));
constant_piece.clear(); constant_piece.clear();
} }
Ok((spec_constructor, chars)) Ok(spec_constructor)
} }
fn parse<'a>( fn parse(&mut self, nested: u8) -> Result<Vec<Expr>, FStringErrorType> {
&mut self,
mut chars: iter::Peekable<str::Chars<'a>>,
nested: u8,
) -> Result<(Vec<Expr>, iter::Peekable<str::Chars<'a>>), FStringErrorType> {
if nested >= 2 { if nested >= 2 {
return Err(ExpressionNestedTooDeeply); return Err(ExpressionNestedTooDeeply);
} }
@ -284,14 +281,14 @@ impl FStringParser {
let mut content = String::new(); let mut content = String::new();
let mut values = vec![]; let mut values = vec![];
while let Some(&ch) = chars.peek() { while let Some(&ch) = self.peek() {
match ch { match ch {
'{' => { '{' => {
chars.next(); self.next_char();
if nested == 0 { if nested == 0 {
match chars.peek() { match self.peek() {
Some('{') => { Some('{') => {
chars.next(); self.next_char();
content.push('{'); content.push('{');
continue; continue;
} }
@ -306,18 +303,16 @@ impl FStringParser {
})); }));
} }
let (parsed_values, remaining_chars) = let parsed_values = self.parse_formatted_value(nested)?;
self.parse_formatted_value(chars, nested)?;
values.extend(parsed_values); values.extend(parsed_values);
chars = remaining_chars;
} }
'}' => { '}' => {
if nested > 0 { if nested > 0 {
break; break;
} }
chars.next(); self.next_char();
if let Some('}') = chars.peek() { if let Some('}') = self.peek() {
chars.next(); self.next_char();
content.push('}'); content.push('}');
} else { } else {
return Err(SingleRbrace); return Err(SingleRbrace);
@ -325,7 +320,7 @@ impl FStringParser {
} }
_ => { _ => {
content.push(ch); content.push(ch);
chars.next(); self.next_char();
} }
} }
} }
@ -337,7 +332,7 @@ impl FStringParser {
})) }))
} }
Ok((values, chars)) Ok(values)
} }
} }
@ -353,9 +348,8 @@ pub fn parse_located_fstring(
start: Location, start: Location,
end: Location, end: Location,
) -> Result<Vec<Expr>, FStringError> { ) -> Result<Vec<Expr>, FStringError> {
FStringParser::new(start, end) FStringParser::new(source, start, end)
.parse(source.chars().peekable(), 0) .parse(0)
.map(|(e, _)| e)
.map_err(|error| FStringError { .map_err(|error| FStringError {
error, error,
location: start, location: start,
@ -367,9 +361,7 @@ mod tests {
use super::*; use super::*;
fn parse_fstring(source: &str) -> Result<Vec<Expr>, FStringErrorType> { fn parse_fstring(source: &str) -> Result<Vec<Expr>, FStringErrorType> {
FStringParser::new(Location::default(), Location::default()) FStringParser::new(source, Location::default(), Location::default()).parse(0)
.parse(source.chars().peekable(), 0)
.map(|(e, _)| e)
} }
#[test] #[test]