Reduce Result<Tok, LexicalError> size by using Box<str> instead of String (#9885)

This commit is contained in:
Micha Reiser 2024-02-08 21:36:22 +01:00 committed by GitHub
parent 9027169125
commit fe7d965334
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 454 additions and 425 deletions

View file

@ -107,10 +107,10 @@ where
fn next(&mut self) -> Option<Self::Item> {
let result = match self.inner.next()? {
Ok((tok, range)) => Ok((tok, range + self.start_offset)),
Err(error) => Err(LexicalError {
location: error.location + self.start_offset,
..error
}),
Err(error) => {
let location = error.location() + self.start_offset;
Err(LexicalError::new(error.into_error(), location))
}
};
Some(result)
@ -241,7 +241,7 @@ impl<'source> Lexer<'source> {
"yield" => Tok::Yield,
_ => {
return Ok(Tok::Name {
name: text.to_string(),
name: text.to_string().into_boxed_str(),
})
}
};
@ -284,10 +284,10 @@ impl<'source> Lexer<'source> {
let value = match Int::from_str_radix(number.as_str(), radix.as_u32(), token) {
Ok(int) => int,
Err(err) => {
return Err(LexicalError {
error: LexicalErrorType::OtherError(format!("{err:?}")),
location: self.token_range().start(),
});
return Err(LexicalError::new(
LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()),
self.token_range().start(),
));
}
};
Ok(Tok::Int { value })
@ -309,10 +309,10 @@ impl<'source> Lexer<'source> {
number.push('.');
if self.cursor.eat_char('_') {
return Err(LexicalError {
error: LexicalErrorType::OtherError("Invalid Syntax".to_owned()),
location: self.offset() - TextSize::new(1),
});
return Err(LexicalError::new(
LexicalErrorType::OtherError("Invalid Syntax".to_string().into_boxed_str()),
self.offset() - TextSize::new(1),
));
}
self.radix_run(&mut number, Radix::Decimal);
@ -340,9 +340,13 @@ impl<'source> Lexer<'source> {
if is_float {
// Improvement: Use `Cow` instead of pushing to value text
let value = f64::from_str(number.as_str()).map_err(|_| LexicalError {
error: LexicalErrorType::OtherError("Invalid decimal literal".to_owned()),
location: self.token_start(),
let value = f64::from_str(number.as_str()).map_err(|_| {
LexicalError::new(
LexicalErrorType::OtherError(
"Invalid decimal literal".to_string().into_boxed_str(),
),
self.token_start(),
)
})?;
// Parse trailing 'j':
@ -364,18 +368,20 @@ impl<'source> Lexer<'source> {
Ok(value) => {
if start_is_zero && value.as_u8() != Some(0) {
// Leading zeros in decimal integer literals are not permitted.
return Err(LexicalError {
error: LexicalErrorType::OtherError("Invalid Token".to_owned()),
location: self.token_range().start(),
});
return Err(LexicalError::new(
LexicalErrorType::OtherError(
"Invalid Token".to_string().into_boxed_str(),
),
self.token_range().start(),
));
}
value
}
Err(err) => {
return Err(LexicalError {
error: LexicalErrorType::OtherError(format!("{err:?}")),
location: self.token_range().start(),
})
return Err(LexicalError::new(
LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()),
self.token_range().start(),
))
}
};
Ok(Tok::Int { value })
@ -411,7 +417,7 @@ impl<'source> Lexer<'source> {
let offset = memchr::memchr2(b'\n', b'\r', bytes).unwrap_or(bytes.len());
self.cursor.skip_bytes(offset);
Tok::Comment(self.token_text().to_string())
Tok::Comment(self.token_text().to_string().into_boxed_str())
}
/// Lex a single IPython escape command.
@ -508,12 +514,15 @@ impl<'source> Lexer<'source> {
2 => IpyEscapeKind::Help2,
_ => unreachable!("`question_count` is always 1 or 2"),
};
return Tok::IpyEscapeCommand { kind, value };
return Tok::IpyEscapeCommand {
kind,
value: value.into_boxed_str(),
};
}
'\n' | '\r' | EOF_CHAR => {
return Tok::IpyEscapeCommand {
kind: escape_kind,
value,
value: value.into_boxed_str(),
};
}
c => {
@ -584,10 +593,10 @@ impl<'source> Lexer<'source> {
} else {
FStringErrorType::UnterminatedString
};
return Err(LexicalError {
error: LexicalErrorType::FStringError(error),
location: self.offset(),
});
return Err(LexicalError::new(
LexicalErrorType::FStringError(error),
self.offset(),
));
}
'\n' | '\r' if !fstring.is_triple_quoted() => {
// If we encounter a newline while we're in a format spec, then
@ -597,10 +606,10 @@ impl<'source> Lexer<'source> {
if in_format_spec {
break;
}
return Err(LexicalError {
error: LexicalErrorType::FStringError(FStringErrorType::UnterminatedString),
location: self.offset(),
});
return Err(LexicalError::new(
LexicalErrorType::FStringError(FStringErrorType::UnterminatedString),
self.offset(),
));
}
'\\' => {
self.cursor.bump(); // '\'
@ -673,7 +682,7 @@ impl<'source> Lexer<'source> {
normalized
};
Ok(Some(Tok::FStringMiddle {
value,
value: value.into_boxed_str(),
is_raw: fstring.is_raw_string(),
triple_quoted: fstring.is_triple_quoted(),
}))
@ -705,18 +714,16 @@ impl<'source> Lexer<'source> {
if fstring.quote_char() == quote
&& fstring.is_triple_quoted() == triple_quoted
{
return Err(LexicalError {
error: LexicalErrorType::FStringError(
FStringErrorType::UnclosedLbrace,
),
location: self.cursor.text_len(),
});
return Err(LexicalError::new(
LexicalErrorType::FStringError(FStringErrorType::UnclosedLbrace),
self.cursor.text_len(),
));
}
}
return Err(LexicalError {
error: LexicalErrorType::Eof,
location: self.cursor.text_len(),
});
return Err(LexicalError::new(
LexicalErrorType::Eof,
self.cursor.text_len(),
));
};
// Rare case: if there are an odd number of backslashes before the quote, then
@ -756,18 +763,16 @@ impl<'source> Lexer<'source> {
if fstring.quote_char() == quote
&& fstring.is_triple_quoted() == triple_quoted
{
return Err(LexicalError {
error: LexicalErrorType::FStringError(
FStringErrorType::UnclosedLbrace,
),
location: self.offset(),
});
return Err(LexicalError::new(
LexicalErrorType::FStringError(FStringErrorType::UnclosedLbrace),
self.offset(),
));
}
}
return Err(LexicalError {
error: LexicalErrorType::StringError,
location: self.offset(),
});
return Err(LexicalError::new(
LexicalErrorType::StringError,
self.offset(),
));
};
// Rare case: if there are an odd number of backslashes before the quote, then
@ -797,20 +802,22 @@ impl<'source> Lexer<'source> {
// matches with f-strings quotes and if it is, then this must be a
// missing '}' token so raise the proper error.
if fstring.quote_char() == quote && !fstring.is_triple_quoted() {
return Err(LexicalError {
error: LexicalErrorType::FStringError(
return Err(LexicalError::new(
LexicalErrorType::FStringError(
FStringErrorType::UnclosedLbrace,
),
location: self.offset() - TextSize::new(1),
});
self.offset() - TextSize::new(1),
));
}
}
return Err(LexicalError {
error: LexicalErrorType::OtherError(
"EOL while scanning string literal".to_owned(),
return Err(LexicalError::new(
LexicalErrorType::OtherError(
"EOL while scanning string literal"
.to_string()
.into_boxed_str(),
),
location: self.offset() - TextSize::new(1),
});
self.offset() - TextSize::new(1),
));
}
Some(ch) if ch == quote => {
break self.offset() - TextSize::new(1);
@ -821,7 +828,9 @@ impl<'source> Lexer<'source> {
};
Ok(Tok::String {
value: self.source[TextRange::new(value_start, value_end)].to_string(),
value: self.source[TextRange::new(value_start, value_end)]
.to_string()
.into_boxed_str(),
kind,
triple_quoted,
})
@ -889,10 +898,10 @@ impl<'source> Lexer<'source> {
Ok((identifier, self.token_range()))
} else {
Err(LexicalError {
error: LexicalErrorType::UnrecognizedToken { tok: c },
location: self.token_start(),
})
Err(LexicalError::new(
LexicalErrorType::UnrecognizedToken { tok: c },
self.token_start(),
))
}
} else {
// Reached the end of the file. Emit a trailing newline token if not at the beginning of a logical line,
@ -915,15 +924,12 @@ impl<'source> Lexer<'source> {
if self.cursor.eat_char('\r') {
self.cursor.eat_char('\n');
} else if self.cursor.is_eof() {
return Err(LexicalError {
error: LexicalErrorType::Eof,
location: self.token_start(),
});
return Err(LexicalError::new(LexicalErrorType::Eof, self.token_start()));
} else if !self.cursor.eat_char('\n') {
return Err(LexicalError {
error: LexicalErrorType::LineContinuationError,
location: self.token_start(),
});
return Err(LexicalError::new(
LexicalErrorType::LineContinuationError,
self.token_start(),
));
}
}
// Form feed
@ -956,15 +962,12 @@ impl<'source> Lexer<'source> {
if self.cursor.eat_char('\r') {
self.cursor.eat_char('\n');
} else if self.cursor.is_eof() {
return Err(LexicalError {
error: LexicalErrorType::Eof,
location: self.token_start(),
});
return Err(LexicalError::new(LexicalErrorType::Eof, self.token_start()));
} else if !self.cursor.eat_char('\n') {
return Err(LexicalError {
error: LexicalErrorType::LineContinuationError,
location: self.token_start(),
});
return Err(LexicalError::new(
LexicalErrorType::LineContinuationError,
self.token_start(),
));
}
indentation = Indentation::root();
}
@ -1015,10 +1018,10 @@ impl<'source> Lexer<'source> {
Some((Tok::Indent, self.token_range()))
}
Err(_) => {
return Err(LexicalError {
error: LexicalErrorType::IndentationError,
location: self.offset(),
});
return Err(LexicalError::new(
LexicalErrorType::IndentationError,
self.offset(),
));
}
};
@ -1031,10 +1034,7 @@ impl<'source> Lexer<'source> {
if self.nesting > 0 {
// Reset the nesting to avoid going into infinite loop.
self.nesting = 0;
return Err(LexicalError {
error: LexicalErrorType::Eof,
location: self.offset(),
});
return Err(LexicalError::new(LexicalErrorType::Eof, self.offset()));
}
// Next, insert a trailing newline, if required.
@ -1199,10 +1199,10 @@ impl<'source> Lexer<'source> {
'}' => {
if let Some(fstring) = self.fstrings.current_mut() {
if fstring.nesting() == self.nesting {
return Err(LexicalError {
error: LexicalErrorType::FStringError(FStringErrorType::SingleRbrace),
location: self.token_start(),
});
return Err(LexicalError::new(
LexicalErrorType::FStringError(FStringErrorType::SingleRbrace),
self.token_start(),
));
}
fstring.try_end_format_spec(self.nesting);
}
@ -1293,10 +1293,10 @@ impl<'source> Lexer<'source> {
_ => {
self.state = State::Other;
return Err(LexicalError {
error: LexicalErrorType::UnrecognizedToken { tok: c },
location: self.token_start(),
});
return Err(LexicalError::new(
LexicalErrorType::UnrecognizedToken { tok: c },
self.token_start(),
));
}
};
@ -1357,9 +1357,9 @@ impl FusedIterator for Lexer<'_> {}
#[derive(Debug, Clone, PartialEq)]
pub struct LexicalError {
/// The type of error that occurred.
pub error: LexicalErrorType,
error: LexicalErrorType,
/// The location of the error.
pub location: TextSize,
location: TextSize,
}
impl LexicalError {
@ -1367,19 +1367,31 @@ impl LexicalError {
pub fn new(error: LexicalErrorType, location: TextSize) -> Self {
Self { error, location }
}
pub fn error(&self) -> &LexicalErrorType {
&self.error
}
pub fn into_error(self) -> LexicalErrorType {
self.error
}
pub fn location(&self) -> TextSize {
self.location
}
}
impl std::ops::Deref for LexicalError {
type Target = LexicalErrorType;
fn deref(&self) -> &Self::Target {
&self.error
self.error()
}
}
impl std::error::Error for LexicalError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.error)
Some(self.error())
}
}
@ -1388,8 +1400,8 @@ impl std::fmt::Display for LexicalError {
write!(
f,
"{} at byte offset {}",
&self.error,
u32::from(self.location)
self.error(),
u32::from(self.location())
)
}
}
@ -1397,6 +1409,9 @@ impl std::fmt::Display for LexicalError {
/// Represents the different types of errors that can occur during lexing.
#[derive(Debug, Clone, PartialEq)]
pub enum LexicalErrorType {
/// A duplicate argument was found in a function definition.
DuplicateArgumentError(Box<str>),
// TODO: Can probably be removed, the places it is used seem to be able
// to use the `UnicodeError` variant instead.
#[doc(hidden)]
@ -1414,14 +1429,13 @@ pub enum LexicalErrorType {
TabsAfterSpaces,
/// A non-default argument follows a default argument.
DefaultArgumentError,
/// A duplicate argument was found in a function definition.
DuplicateArgumentError(String),
/// A positional argument follows a keyword argument.
PositionalArgumentError,
/// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`.
UnpackedArgumentError,
/// A keyword argument was repeated.
DuplicateKeywordArgumentError(String),
DuplicateKeywordArgumentError(Box<str>),
/// An unrecognized token was encountered.
UnrecognizedToken { tok: char },
/// An f-string error containing the [`FStringErrorType`].
@ -1433,7 +1447,7 @@ pub enum LexicalErrorType {
/// Occurs when a syntactically invalid assignment was encountered.
AssignmentError,
/// An unexpected error occurred.
OtherError(String),
OtherError(Box<str>),
}
impl std::error::Error for LexicalErrorType {}
@ -2053,8 +2067,8 @@ def f(arg=%timeit a = b):
match lexed.as_slice() {
[Err(error)] => {
assert_eq!(
error.error,
LexicalErrorType::UnrecognizedToken { tok: '🐦' }
error.error(),
&LexicalErrorType::UnrecognizedToken { tok: '🐦' }
);
}
result => panic!("Expected an error token but found {result:?}"),
@ -2267,7 +2281,7 @@ f"{(lambda x:{x})}"
}
fn lex_fstring_error(source: &str) -> FStringErrorType {
match lex_error(source).error {
match lex_error(source).into_error() {
LexicalErrorType::FStringError(error) => error,
err => panic!("Expected FStringError: {err:?}"),
}