mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-23 11:54:39 +00:00
Reduce Result<Tok, LexicalError>
size by using Box<str>
instead of String
(#9885)
This commit is contained in:
parent
9027169125
commit
fe7d965334
22 changed files with 454 additions and 425 deletions
|
@ -107,10 +107,10 @@ where
|
|||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let result = match self.inner.next()? {
|
||||
Ok((tok, range)) => Ok((tok, range + self.start_offset)),
|
||||
Err(error) => Err(LexicalError {
|
||||
location: error.location + self.start_offset,
|
||||
..error
|
||||
}),
|
||||
Err(error) => {
|
||||
let location = error.location() + self.start_offset;
|
||||
Err(LexicalError::new(error.into_error(), location))
|
||||
}
|
||||
};
|
||||
|
||||
Some(result)
|
||||
|
@ -241,7 +241,7 @@ impl<'source> Lexer<'source> {
|
|||
"yield" => Tok::Yield,
|
||||
_ => {
|
||||
return Ok(Tok::Name {
|
||||
name: text.to_string(),
|
||||
name: text.to_string().into_boxed_str(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
@ -284,10 +284,10 @@ impl<'source> Lexer<'source> {
|
|||
let value = match Int::from_str_radix(number.as_str(), radix.as_u32(), token) {
|
||||
Ok(int) => int,
|
||||
Err(err) => {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError(format!("{err:?}")),
|
||||
location: self.token_range().start(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()),
|
||||
self.token_range().start(),
|
||||
));
|
||||
}
|
||||
};
|
||||
Ok(Tok::Int { value })
|
||||
|
@ -309,10 +309,10 @@ impl<'source> Lexer<'source> {
|
|||
number.push('.');
|
||||
|
||||
if self.cursor.eat_char('_') {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError("Invalid Syntax".to_owned()),
|
||||
location: self.offset() - TextSize::new(1),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::OtherError("Invalid Syntax".to_string().into_boxed_str()),
|
||||
self.offset() - TextSize::new(1),
|
||||
));
|
||||
}
|
||||
|
||||
self.radix_run(&mut number, Radix::Decimal);
|
||||
|
@ -340,9 +340,13 @@ impl<'source> Lexer<'source> {
|
|||
|
||||
if is_float {
|
||||
// Improvement: Use `Cow` instead of pushing to value text
|
||||
let value = f64::from_str(number.as_str()).map_err(|_| LexicalError {
|
||||
error: LexicalErrorType::OtherError("Invalid decimal literal".to_owned()),
|
||||
location: self.token_start(),
|
||||
let value = f64::from_str(number.as_str()).map_err(|_| {
|
||||
LexicalError::new(
|
||||
LexicalErrorType::OtherError(
|
||||
"Invalid decimal literal".to_string().into_boxed_str(),
|
||||
),
|
||||
self.token_start(),
|
||||
)
|
||||
})?;
|
||||
|
||||
// Parse trailing 'j':
|
||||
|
@ -364,18 +368,20 @@ impl<'source> Lexer<'source> {
|
|||
Ok(value) => {
|
||||
if start_is_zero && value.as_u8() != Some(0) {
|
||||
// Leading zeros in decimal integer literals are not permitted.
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError("Invalid Token".to_owned()),
|
||||
location: self.token_range().start(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::OtherError(
|
||||
"Invalid Token".to_string().into_boxed_str(),
|
||||
),
|
||||
self.token_range().start(),
|
||||
));
|
||||
}
|
||||
value
|
||||
}
|
||||
Err(err) => {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError(format!("{err:?}")),
|
||||
location: self.token_range().start(),
|
||||
})
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()),
|
||||
self.token_range().start(),
|
||||
))
|
||||
}
|
||||
};
|
||||
Ok(Tok::Int { value })
|
||||
|
@ -411,7 +417,7 @@ impl<'source> Lexer<'source> {
|
|||
let offset = memchr::memchr2(b'\n', b'\r', bytes).unwrap_or(bytes.len());
|
||||
self.cursor.skip_bytes(offset);
|
||||
|
||||
Tok::Comment(self.token_text().to_string())
|
||||
Tok::Comment(self.token_text().to_string().into_boxed_str())
|
||||
}
|
||||
|
||||
/// Lex a single IPython escape command.
|
||||
|
@ -508,12 +514,15 @@ impl<'source> Lexer<'source> {
|
|||
2 => IpyEscapeKind::Help2,
|
||||
_ => unreachable!("`question_count` is always 1 or 2"),
|
||||
};
|
||||
return Tok::IpyEscapeCommand { kind, value };
|
||||
return Tok::IpyEscapeCommand {
|
||||
kind,
|
||||
value: value.into_boxed_str(),
|
||||
};
|
||||
}
|
||||
'\n' | '\r' | EOF_CHAR => {
|
||||
return Tok::IpyEscapeCommand {
|
||||
kind: escape_kind,
|
||||
value,
|
||||
value: value.into_boxed_str(),
|
||||
};
|
||||
}
|
||||
c => {
|
||||
|
@ -584,10 +593,10 @@ impl<'source> Lexer<'source> {
|
|||
} else {
|
||||
FStringErrorType::UnterminatedString
|
||||
};
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::FStringError(error),
|
||||
location: self.offset(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::FStringError(error),
|
||||
self.offset(),
|
||||
));
|
||||
}
|
||||
'\n' | '\r' if !fstring.is_triple_quoted() => {
|
||||
// If we encounter a newline while we're in a format spec, then
|
||||
|
@ -597,10 +606,10 @@ impl<'source> Lexer<'source> {
|
|||
if in_format_spec {
|
||||
break;
|
||||
}
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::FStringError(FStringErrorType::UnterminatedString),
|
||||
location: self.offset(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::FStringError(FStringErrorType::UnterminatedString),
|
||||
self.offset(),
|
||||
));
|
||||
}
|
||||
'\\' => {
|
||||
self.cursor.bump(); // '\'
|
||||
|
@ -673,7 +682,7 @@ impl<'source> Lexer<'source> {
|
|||
normalized
|
||||
};
|
||||
Ok(Some(Tok::FStringMiddle {
|
||||
value,
|
||||
value: value.into_boxed_str(),
|
||||
is_raw: fstring.is_raw_string(),
|
||||
triple_quoted: fstring.is_triple_quoted(),
|
||||
}))
|
||||
|
@ -705,18 +714,16 @@ impl<'source> Lexer<'source> {
|
|||
if fstring.quote_char() == quote
|
||||
&& fstring.is_triple_quoted() == triple_quoted
|
||||
{
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::FStringError(
|
||||
FStringErrorType::UnclosedLbrace,
|
||||
),
|
||||
location: self.cursor.text_len(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::FStringError(FStringErrorType::UnclosedLbrace),
|
||||
self.cursor.text_len(),
|
||||
));
|
||||
}
|
||||
}
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::Eof,
|
||||
location: self.cursor.text_len(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::Eof,
|
||||
self.cursor.text_len(),
|
||||
));
|
||||
};
|
||||
|
||||
// Rare case: if there are an odd number of backslashes before the quote, then
|
||||
|
@ -756,18 +763,16 @@ impl<'source> Lexer<'source> {
|
|||
if fstring.quote_char() == quote
|
||||
&& fstring.is_triple_quoted() == triple_quoted
|
||||
{
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::FStringError(
|
||||
FStringErrorType::UnclosedLbrace,
|
||||
),
|
||||
location: self.offset(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::FStringError(FStringErrorType::UnclosedLbrace),
|
||||
self.offset(),
|
||||
));
|
||||
}
|
||||
}
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::StringError,
|
||||
location: self.offset(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::StringError,
|
||||
self.offset(),
|
||||
));
|
||||
};
|
||||
|
||||
// Rare case: if there are an odd number of backslashes before the quote, then
|
||||
|
@ -797,20 +802,22 @@ impl<'source> Lexer<'source> {
|
|||
// matches with f-strings quotes and if it is, then this must be a
|
||||
// missing '}' token so raise the proper error.
|
||||
if fstring.quote_char() == quote && !fstring.is_triple_quoted() {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::FStringError(
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::FStringError(
|
||||
FStringErrorType::UnclosedLbrace,
|
||||
),
|
||||
location: self.offset() - TextSize::new(1),
|
||||
});
|
||||
self.offset() - TextSize::new(1),
|
||||
));
|
||||
}
|
||||
}
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError(
|
||||
"EOL while scanning string literal".to_owned(),
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::OtherError(
|
||||
"EOL while scanning string literal"
|
||||
.to_string()
|
||||
.into_boxed_str(),
|
||||
),
|
||||
location: self.offset() - TextSize::new(1),
|
||||
});
|
||||
self.offset() - TextSize::new(1),
|
||||
));
|
||||
}
|
||||
Some(ch) if ch == quote => {
|
||||
break self.offset() - TextSize::new(1);
|
||||
|
@ -821,7 +828,9 @@ impl<'source> Lexer<'source> {
|
|||
};
|
||||
|
||||
Ok(Tok::String {
|
||||
value: self.source[TextRange::new(value_start, value_end)].to_string(),
|
||||
value: self.source[TextRange::new(value_start, value_end)]
|
||||
.to_string()
|
||||
.into_boxed_str(),
|
||||
kind,
|
||||
triple_quoted,
|
||||
})
|
||||
|
@ -889,10 +898,10 @@ impl<'source> Lexer<'source> {
|
|||
|
||||
Ok((identifier, self.token_range()))
|
||||
} else {
|
||||
Err(LexicalError {
|
||||
error: LexicalErrorType::UnrecognizedToken { tok: c },
|
||||
location: self.token_start(),
|
||||
})
|
||||
Err(LexicalError::new(
|
||||
LexicalErrorType::UnrecognizedToken { tok: c },
|
||||
self.token_start(),
|
||||
))
|
||||
}
|
||||
} else {
|
||||
// Reached the end of the file. Emit a trailing newline token if not at the beginning of a logical line,
|
||||
|
@ -915,15 +924,12 @@ impl<'source> Lexer<'source> {
|
|||
if self.cursor.eat_char('\r') {
|
||||
self.cursor.eat_char('\n');
|
||||
} else if self.cursor.is_eof() {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::Eof,
|
||||
location: self.token_start(),
|
||||
});
|
||||
return Err(LexicalError::new(LexicalErrorType::Eof, self.token_start()));
|
||||
} else if !self.cursor.eat_char('\n') {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::LineContinuationError,
|
||||
location: self.token_start(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::LineContinuationError,
|
||||
self.token_start(),
|
||||
));
|
||||
}
|
||||
}
|
||||
// Form feed
|
||||
|
@ -956,15 +962,12 @@ impl<'source> Lexer<'source> {
|
|||
if self.cursor.eat_char('\r') {
|
||||
self.cursor.eat_char('\n');
|
||||
} else if self.cursor.is_eof() {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::Eof,
|
||||
location: self.token_start(),
|
||||
});
|
||||
return Err(LexicalError::new(LexicalErrorType::Eof, self.token_start()));
|
||||
} else if !self.cursor.eat_char('\n') {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::LineContinuationError,
|
||||
location: self.token_start(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::LineContinuationError,
|
||||
self.token_start(),
|
||||
));
|
||||
}
|
||||
indentation = Indentation::root();
|
||||
}
|
||||
|
@ -1015,10 +1018,10 @@ impl<'source> Lexer<'source> {
|
|||
Some((Tok::Indent, self.token_range()))
|
||||
}
|
||||
Err(_) => {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::IndentationError,
|
||||
location: self.offset(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::IndentationError,
|
||||
self.offset(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1031,10 +1034,7 @@ impl<'source> Lexer<'source> {
|
|||
if self.nesting > 0 {
|
||||
// Reset the nesting to avoid going into infinite loop.
|
||||
self.nesting = 0;
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::Eof,
|
||||
location: self.offset(),
|
||||
});
|
||||
return Err(LexicalError::new(LexicalErrorType::Eof, self.offset()));
|
||||
}
|
||||
|
||||
// Next, insert a trailing newline, if required.
|
||||
|
@ -1199,10 +1199,10 @@ impl<'source> Lexer<'source> {
|
|||
'}' => {
|
||||
if let Some(fstring) = self.fstrings.current_mut() {
|
||||
if fstring.nesting() == self.nesting {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::FStringError(FStringErrorType::SingleRbrace),
|
||||
location: self.token_start(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::FStringError(FStringErrorType::SingleRbrace),
|
||||
self.token_start(),
|
||||
));
|
||||
}
|
||||
fstring.try_end_format_spec(self.nesting);
|
||||
}
|
||||
|
@ -1293,10 +1293,10 @@ impl<'source> Lexer<'source> {
|
|||
_ => {
|
||||
self.state = State::Other;
|
||||
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::UnrecognizedToken { tok: c },
|
||||
location: self.token_start(),
|
||||
});
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::UnrecognizedToken { tok: c },
|
||||
self.token_start(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1357,9 +1357,9 @@ impl FusedIterator for Lexer<'_> {}
|
|||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct LexicalError {
|
||||
/// The type of error that occurred.
|
||||
pub error: LexicalErrorType,
|
||||
error: LexicalErrorType,
|
||||
/// The location of the error.
|
||||
pub location: TextSize,
|
||||
location: TextSize,
|
||||
}
|
||||
|
||||
impl LexicalError {
|
||||
|
@ -1367,19 +1367,31 @@ impl LexicalError {
|
|||
pub fn new(error: LexicalErrorType, location: TextSize) -> Self {
|
||||
Self { error, location }
|
||||
}
|
||||
|
||||
pub fn error(&self) -> &LexicalErrorType {
|
||||
&self.error
|
||||
}
|
||||
|
||||
pub fn into_error(self) -> LexicalErrorType {
|
||||
self.error
|
||||
}
|
||||
|
||||
pub fn location(&self) -> TextSize {
|
||||
self.location
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for LexicalError {
|
||||
type Target = LexicalErrorType;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.error
|
||||
self.error()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for LexicalError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
Some(&self.error)
|
||||
Some(self.error())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1388,8 +1400,8 @@ impl std::fmt::Display for LexicalError {
|
|||
write!(
|
||||
f,
|
||||
"{} at byte offset {}",
|
||||
&self.error,
|
||||
u32::from(self.location)
|
||||
self.error(),
|
||||
u32::from(self.location())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -1397,6 +1409,9 @@ impl std::fmt::Display for LexicalError {
|
|||
/// Represents the different types of errors that can occur during lexing.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum LexicalErrorType {
|
||||
/// A duplicate argument was found in a function definition.
|
||||
DuplicateArgumentError(Box<str>),
|
||||
|
||||
// TODO: Can probably be removed, the places it is used seem to be able
|
||||
// to use the `UnicodeError` variant instead.
|
||||
#[doc(hidden)]
|
||||
|
@ -1414,14 +1429,13 @@ pub enum LexicalErrorType {
|
|||
TabsAfterSpaces,
|
||||
/// A non-default argument follows a default argument.
|
||||
DefaultArgumentError,
|
||||
/// A duplicate argument was found in a function definition.
|
||||
DuplicateArgumentError(String),
|
||||
|
||||
/// A positional argument follows a keyword argument.
|
||||
PositionalArgumentError,
|
||||
/// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`.
|
||||
UnpackedArgumentError,
|
||||
/// A keyword argument was repeated.
|
||||
DuplicateKeywordArgumentError(String),
|
||||
DuplicateKeywordArgumentError(Box<str>),
|
||||
/// An unrecognized token was encountered.
|
||||
UnrecognizedToken { tok: char },
|
||||
/// An f-string error containing the [`FStringErrorType`].
|
||||
|
@ -1433,7 +1447,7 @@ pub enum LexicalErrorType {
|
|||
/// Occurs when a syntactically invalid assignment was encountered.
|
||||
AssignmentError,
|
||||
/// An unexpected error occurred.
|
||||
OtherError(String),
|
||||
OtherError(Box<str>),
|
||||
}
|
||||
|
||||
impl std::error::Error for LexicalErrorType {}
|
||||
|
@ -2053,8 +2067,8 @@ def f(arg=%timeit a = b):
|
|||
match lexed.as_slice() {
|
||||
[Err(error)] => {
|
||||
assert_eq!(
|
||||
error.error,
|
||||
LexicalErrorType::UnrecognizedToken { tok: '🐦' }
|
||||
error.error(),
|
||||
&LexicalErrorType::UnrecognizedToken { tok: '🐦' }
|
||||
);
|
||||
}
|
||||
result => panic!("Expected an error token but found {result:?}"),
|
||||
|
@ -2267,7 +2281,7 @@ f"{(lambda x:{x})}"
|
|||
}
|
||||
|
||||
fn lex_fstring_error(source: &str) -> FStringErrorType {
|
||||
match lex_error(source).error {
|
||||
match lex_error(source).into_error() {
|
||||
LexicalErrorType::FStringError(error) => error,
|
||||
err => panic!("Expected FStringError: {err:?}"),
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue