Merge pull request #4409 from harupy/improve-error-conversion-in-string-parser

Improve error conversion in `string_parsers.rs`
This commit is contained in:
Jeong YunWon 2023-01-03 14:39:01 +09:00 committed by GitHub
commit 2858c315bf
2 changed files with 116 additions and 89 deletions

View file

@ -12,6 +12,12 @@ pub struct LexicalError {
pub location: Location, pub location: Location,
} }
impl LexicalError {
pub fn new(error: LexicalErrorType, location: Location) -> Self {
Self { error, location }
}
}
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum LexicalErrorType { pub enum LexicalErrorType {
StringError, StringError,
@ -85,6 +91,21 @@ pub struct FStringError {
pub location: Location, pub location: Location,
} }
impl FStringError {
pub fn new(error: FStringErrorType, location: Location) -> Self {
Self { error, location }
}
}
impl From<FStringError> for LexicalError {
fn from(err: FStringError) -> Self {
LexicalError {
error: LexicalErrorType::FStringError(err.error),
location: err.location,
}
}
}
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum FStringErrorType { pub enum FStringErrorType {
UnclosedLbrace, UnclosedLbrace,
@ -101,15 +122,6 @@ pub enum FStringErrorType {
UnterminatedString, UnterminatedString,
} }
impl FStringErrorType {
pub fn to_lexical_error(self, location: Location) -> LexicalError {
LexicalError {
error: LexicalErrorType::FStringError(self),
location,
}
}
}
impl fmt::Display for FStringErrorType { impl fmt::Display for FStringErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self { match self {

View file

@ -1,7 +1,7 @@
use self::FStringErrorType::*; use self::FStringErrorType::*;
use crate::{ use crate::{
ast::{Constant, ConversionFlag, Expr, ExprKind, Location}, ast::{Constant, ConversionFlag, Expr, ExprKind, Location},
error::{FStringErrorType, LexicalError, LexicalErrorType, ParseError}, error::{FStringError, FStringErrorType, LexicalError, LexicalErrorType, ParseError},
parser::parse_expression_located, parser::parse_expression_located,
token::StringKind, token::StringKind,
}; };
@ -66,10 +66,7 @@ impl<'a> StringParser<'a> {
fn parse_unicode_literal(&mut self, literal_number: usize) -> Result<char, LexicalError> { fn parse_unicode_literal(&mut self, literal_number: usize) -> Result<char, LexicalError> {
let mut p: u32 = 0u32; let mut p: u32 = 0u32;
let unicode_error = LexicalError { let unicode_error = LexicalError::new(LexicalErrorType::UnicodeError, self.get_pos());
error: LexicalErrorType::UnicodeError,
location: self.get_pos(),
};
for i in 1..=literal_number { for i in 1..=literal_number {
match self.next_char() { match self.next_char() {
Some(c) => match c.to_digit(16) { Some(c) => match c.to_digit(16) {
@ -103,12 +100,7 @@ impl<'a> StringParser<'a> {
let start_pos = self.get_pos(); let start_pos = self.get_pos();
match self.next_char() { match self.next_char() {
Some('{') => {} Some('{') => {}
_ => { _ => return Err(LexicalError::new(LexicalErrorType::StringError, start_pos)),
return Err(LexicalError {
error: LexicalErrorType::StringError,
location: start_pos,
})
}
} }
let start_pos = self.get_pos(); let start_pos = self.get_pos();
let mut name = String::new(); let mut name = String::new();
@ -117,25 +109,23 @@ impl<'a> StringParser<'a> {
Some('}') => break, Some('}') => break,
Some(c) => name.push(c), Some(c) => name.push(c),
None => { None => {
return Err(LexicalError { return Err(LexicalError::new(
error: LexicalErrorType::StringError, LexicalErrorType::StringError,
location: self.get_pos(), self.get_pos(),
}) ))
} }
} }
} }
if name.len() > MAX_UNICODE_NAME { if name.len() > MAX_UNICODE_NAME {
return Err(LexicalError { return Err(LexicalError::new(
error: LexicalErrorType::UnicodeError, LexicalErrorType::UnicodeError,
location: self.get_pos(), self.get_pos(),
}); ));
} }
unicode_names2::character(&name).ok_or(LexicalError { unicode_names2::character(&name)
error: LexicalErrorType::UnicodeError, .ok_or_else(|| LexicalError::new(LexicalErrorType::UnicodeError, start_pos))
location: start_pos,
})
} }
fn parse_escaped_char(&mut self) -> Result<String, LexicalError> { fn parse_escaped_char(&mut self) -> Result<String, LexicalError> {
@ -159,20 +149,20 @@ impl<'a> StringParser<'a> {
'N' if !self.kind.is_bytes() => self.parse_unicode_name()?.to_string(), 'N' if !self.kind.is_bytes() => self.parse_unicode_name()?.to_string(),
c => { c => {
if self.kind.is_bytes() && !c.is_ascii() { if self.kind.is_bytes() && !c.is_ascii() {
return Err(LexicalError { return Err(LexicalError::new(
error: LexicalErrorType::OtherError( LexicalErrorType::OtherError(
"bytes can only contain ASCII literal characters".to_owned(), "bytes can only contain ASCII literal characters".to_owned(),
), ),
location: self.get_pos(), self.get_pos(),
}); ));
} }
format!("\\{c}") format!("\\{c}")
} }
}), }),
None => Err(LexicalError { None => Err(LexicalError::new(
error: LexicalErrorType::StringError, LexicalErrorType::StringError,
location: self.get_pos(), self.get_pos(),
}), )),
} }
} }
@ -196,7 +186,7 @@ impl<'a> StringParser<'a> {
} }
'!' if delims.is_empty() && self.peek() != Some(&'=') => { '!' if delims.is_empty() && self.peek() != Some(&'=') => {
if expression.trim().is_empty() { if expression.trim().is_empty() {
return Err(EmptyExpression.to_lexical_error(self.get_pos())); return Err(FStringError::new(EmptyExpression, self.get_pos()).into());
} }
conversion = match self.next_char() { conversion = match self.next_char() {
@ -204,17 +194,19 @@ impl<'a> StringParser<'a> {
Some('a') => ConversionFlag::Ascii, Some('a') => ConversionFlag::Ascii,
Some('r') => ConversionFlag::Repr, Some('r') => ConversionFlag::Repr,
Some(_) => { Some(_) => {
return Err(InvalidConversionFlag.to_lexical_error(self.get_pos())); return Err(
FStringError::new(InvalidConversionFlag, self.get_pos()).into()
);
} }
None => { None => {
return Err(UnclosedLbrace.to_lexical_error(self.get_pos())); return Err(FStringError::new(UnclosedLbrace, self.get_pos()).into());
} }
}; };
match self.peek() { match self.peek() {
Some('}' | ':') => {} Some('}' | ':') => {}
Some(_) | None => { Some(_) | None => {
return Err(UnclosedLbrace.to_lexical_error(self.get_pos())) return Err(FStringError::new(UnclosedLbrace, self.get_pos()).into());
} }
} }
} }
@ -243,12 +235,14 @@ impl<'a> StringParser<'a> {
expression.push(ch); expression.push(ch);
} }
Some(c) => { Some(c) => {
return Err( return Err(FStringError::new(
MismatchedDelimiter(c, ')').to_lexical_error(self.get_pos()) MismatchedDelimiter(c, ')'),
); self.get_pos(),
)
.into());
} }
None => { None => {
return Err(Unmatched(')').to_lexical_error(self.get_pos())); return Err(FStringError::new(Unmatched(')'), self.get_pos()).into());
} }
} }
} }
@ -259,12 +253,14 @@ impl<'a> StringParser<'a> {
expression.push(ch); expression.push(ch);
} }
Some(c) => { Some(c) => {
return Err( return Err(FStringError::new(
MismatchedDelimiter(c, ']').to_lexical_error(self.get_pos()) MismatchedDelimiter(c, ']'),
); self.get_pos(),
)
.into());
} }
None => { None => {
return Err(Unmatched(']').to_lexical_error(self.get_pos())); return Err(FStringError::new(Unmatched(']'), self.get_pos()).into());
} }
} }
} }
@ -275,22 +271,28 @@ impl<'a> StringParser<'a> {
expression.push(ch); expression.push(ch);
} }
Some(c) => { Some(c) => {
return Err(MismatchedDelimiter(c, '}').to_lexical_error(self.get_pos())) return Err(FStringError::new(
MismatchedDelimiter(c, '}'),
self.get_pos(),
)
.into());
} }
None => {} None => {}
} }
} }
'}' => { '}' => {
if expression.trim().is_empty() { if expression.trim().is_empty() {
return Err(EmptyExpression.to_lexical_error(self.get_pos())); return Err(FStringError::new(EmptyExpression, self.get_pos()).into());
} }
let ret = if !self_documenting { let ret = if !self_documenting {
vec![self.expr(ExprKind::FormattedValue { vec![self.expr(ExprKind::FormattedValue {
value: Box::new(parse_fstring_expr(&expression, location).map_err( value: Box::new(parse_fstring_expr(&expression, location).map_err(
|e| { |e| {
InvalidExpression(Box::new(e.error)) FStringError::new(
.to_lexical_error(self.get_pos()) InvalidExpression(Box::new(e.error)),
location,
)
}, },
)?), )?),
conversion: conversion as _, conversion: conversion as _,
@ -309,8 +311,10 @@ impl<'a> StringParser<'a> {
self.expr(ExprKind::FormattedValue { self.expr(ExprKind::FormattedValue {
value: Box::new( value: Box::new(
parse_fstring_expr(&expression, location).map_err(|e| { parse_fstring_expr(&expression, location).map_err(|e| {
InvalidExpression(Box::new(e.error)) FStringError::new(
.to_lexical_error(self.get_pos()) InvalidExpression(Box::new(e.error)),
location,
)
})?, })?,
), ),
conversion: (if conversion == ConversionFlag::None && spec.is_none() conversion: (if conversion == ConversionFlag::None && spec.is_none()
@ -329,7 +333,7 @@ impl<'a> StringParser<'a> {
expression.push(ch); expression.push(ch);
loop { loop {
let Some(c) = self.next_char() else { let Some(c) = self.next_char() else {
return Err(UnterminatedString.to_lexical_error(self.get_pos())); return Err(FStringError::new(UnterminatedString, self.get_pos()).into());
}; };
expression.push(c); expression.push(c);
if c == ch { if c == ch {
@ -340,10 +344,10 @@ impl<'a> StringParser<'a> {
' ' if self_documenting => { ' ' if self_documenting => {
trailing_seq.push(ch); trailing_seq.push(ch);
} }
'\\' => return Err(ExpressionCannotInclude('\\').to_lexical_error(self.get_pos())), '\\' => return Err(FStringError::new(UnterminatedString, self.get_pos()).into()),
_ => { _ => {
if self_documenting { if self_documenting {
return Err(UnclosedLbrace.to_lexical_error(self.get_pos())); return Err(FStringError::new(UnclosedLbrace, self.get_pos()).into());
} }
expression.push(ch); expression.push(ch);
@ -351,9 +355,9 @@ impl<'a> StringParser<'a> {
} }
} }
Err(if expression.trim().is_empty() { Err(if expression.trim().is_empty() {
EmptyExpression.to_lexical_error(self.get_pos()) FStringError::new(EmptyExpression, self.get_pos()).into()
} else { } else {
UnclosedLbrace.to_lexical_error(self.get_pos()) FStringError::new(UnclosedLbrace, self.get_pos()).into()
}) })
} }
@ -393,7 +397,7 @@ impl<'a> StringParser<'a> {
fn parse_fstring(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> { fn parse_fstring(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
if nested >= 2 { if nested >= 2 {
return Err(ExpressionNestedTooDeeply.to_lexical_error(self.get_pos())); return Err(FStringError::new(ExpressionNestedTooDeeply, self.get_pos()).into());
} }
let mut content = String::new(); let mut content = String::new();
@ -410,7 +414,9 @@ impl<'a> StringParser<'a> {
content.push('{'); content.push('{');
continue; continue;
} }
None => return Err(UnclosedLbrace.to_lexical_error(self.get_pos())), None => {
return Err(FStringError::new(UnclosedLbrace, self.get_pos()).into())
}
_ => {} _ => {}
} }
} }
@ -433,7 +439,7 @@ impl<'a> StringParser<'a> {
self.next_char(); self.next_char();
content.push('}'); content.push('}');
} else { } else {
return Err(SingleRbrace.to_lexical_error(self.get_pos())); return Err(FStringError::new(SingleRbrace, self.get_pos()).into());
} }
} }
'\\' if !self.kind.is_raw() => { '\\' if !self.kind.is_raw() => {
@ -466,12 +472,12 @@ impl<'a> StringParser<'a> {
} }
ch => { ch => {
if !ch.is_ascii() { if !ch.is_ascii() {
return Err(LexicalError { return Err(LexicalError::new(
error: LexicalErrorType::OtherError( LexicalErrorType::OtherError(
"bytes can only contain ASCII literal characters".to_string(), "bytes can only contain ASCII literal characters".to_string(),
), ),
location: self.get_pos(), self.get_pos(),
}); ));
} }
content.push(ch); content.push(ch);
} }
@ -534,7 +540,7 @@ pub fn parse_string(
mod tests { mod tests {
use super::*; use super::*;
fn parse_fstring(source: &str) -> Result<Vec<Expr>, FStringErrorType> { fn parse_fstring(source: &str) -> Result<Vec<Expr>, LexicalError> {
StringParser::new( StringParser::new(
source, source,
StringKind::FString, StringKind::FString,
@ -543,10 +549,6 @@ mod tests {
Location::new(1, source.len() + 3), // 3 for prefix and quotes Location::new(1, source.len() + 3), // 3 for prefix and quotes
) )
.parse() .parse()
.map_err(|e| match e.error {
LexicalErrorType::FStringError(e) => e,
e => unreachable!("Unexpected error type {:?}", e),
})
} }
#[test] #[test]
@ -602,26 +604,39 @@ mod tests {
insta::assert_debug_snapshot!(parse_ast); insta::assert_debug_snapshot!(parse_ast);
} }
fn parse_fstring_error(source: &str) -> FStringErrorType {
parse_fstring(source)
.map_err(|e| match e.error {
LexicalErrorType::FStringError(e) => e,
e => unreachable!("Expected FStringError: {:?}", e),
})
.err()
.expect("Expected error")
}
#[test] #[test]
fn test_parse_invalid_fstring() { fn test_parse_invalid_fstring() {
assert_eq!(parse_fstring("{5!a"), Err(UnclosedLbrace)); assert_eq!(parse_fstring_error("{5!a"), UnclosedLbrace);
assert_eq!(parse_fstring("{5!a1}"), Err(UnclosedLbrace)); assert_eq!(parse_fstring_error("{5!a1}"), UnclosedLbrace);
assert_eq!(parse_fstring("{5!"), Err(UnclosedLbrace)); assert_eq!(parse_fstring_error("{5!"), UnclosedLbrace);
assert_eq!(parse_fstring("abc{!a 'cat'}"), Err(EmptyExpression)); assert_eq!(parse_fstring_error("abc{!a 'cat'}"), EmptyExpression);
assert_eq!(parse_fstring("{!a"), Err(EmptyExpression)); assert_eq!(parse_fstring_error("{!a"), EmptyExpression);
assert_eq!(parse_fstring("{ !a}"), Err(EmptyExpression)); assert_eq!(parse_fstring_error("{ !a}"), EmptyExpression);
assert_eq!(parse_fstring("{5!}"), Err(InvalidConversionFlag)); assert_eq!(parse_fstring_error("{5!}"), InvalidConversionFlag);
assert_eq!(parse_fstring("{5!x}"), Err(InvalidConversionFlag)); assert_eq!(parse_fstring_error("{5!x}"), InvalidConversionFlag);
assert_eq!(parse_fstring("{a:{a:{b}}}"), Err(ExpressionNestedTooDeeply)); assert_eq!(
parse_fstring_error("{a:{a:{b}}}"),
ExpressionNestedTooDeeply
);
assert_eq!(parse_fstring("{a:b}}"), Err(SingleRbrace)); assert_eq!(parse_fstring_error("{a:b}}"), SingleRbrace);
assert_eq!(parse_fstring("}"), Err(SingleRbrace)); assert_eq!(parse_fstring_error("}"), SingleRbrace);
assert_eq!(parse_fstring("{a:{b}"), Err(UnclosedLbrace)); assert_eq!(parse_fstring_error("{a:{b}"), UnclosedLbrace);
assert_eq!(parse_fstring("{"), Err(UnclosedLbrace)); assert_eq!(parse_fstring_error("{"), UnclosedLbrace);
assert_eq!(parse_fstring("{}"), Err(EmptyExpression)); assert_eq!(parse_fstring_error("{}"), EmptyExpression);
// TODO: check for InvalidExpression enum? // TODO: check for InvalidExpression enum?
assert!(parse_fstring("{class}").is_err()); assert!(parse_fstring("{class}").is_err());