mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-27 12:29:28 +00:00
Implement our own small-integer optimization (#7584)
## Summary This is a follow-up to #7469 that attempts to achieve similar gains, but without introducing malachite. Instead, this PR removes the `BigInt` type altogether, instead opting for a simple enum that allows us to store small integers directly and only allocate for values greater than `i64`: ```rust /// A Python integer literal. Represents both small (fits in an `i64`) and large integers. #[derive(Clone, PartialEq, Eq, Hash)] pub struct Int(Number); #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Number { /// A "small" number that can be represented as an `i64`. Small(i64), /// A "large" number that cannot be represented as an `i64`. Big(Box<str>), } impl std::fmt::Display for Number { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Number::Small(value) => write!(f, "{value}"), Number::Big(value) => write!(f, "{value}"), } } } ``` We typically don't care about numbers greater than `isize` -- our only uses are comparisons against small constants (like `1`, `2`, `3`, etc.), so there's no real loss of information, except in one or two rules where we're now a little more conservative (with the worst-case being that we don't flag, e.g., an `itertools.pairwise` that uses an extremely large value for the slice start constant). For simplicity, a few diagnostics now show a dedicated message when they see integers that are out of the supported range (e.g., `outdated-version-block`). An additional benefit here is that we get to remove a few dependencies, especially `num-bigint`. ## Test Plan `cargo test`
This commit is contained in:
parent
65aebf127a
commit
93b5d8a0fb
40 changed files with 707 additions and 385 deletions
|
@ -31,12 +31,11 @@
|
|||
use std::iter::FusedIterator;
|
||||
use std::{char, cmp::Ordering, str::FromStr};
|
||||
|
||||
use num_bigint::BigInt;
|
||||
use num_traits::{Num, Zero};
|
||||
use ruff_python_ast::IpyEscapeKind;
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use unicode_ident::{is_xid_continue, is_xid_start};
|
||||
|
||||
use ruff_python_ast::{Int, IpyEscapeKind};
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
|
||||
use crate::lexer::cursor::{Cursor, EOF_CHAR};
|
||||
use crate::lexer::indentation::{Indentation, Indentations};
|
||||
use crate::{
|
||||
|
@ -264,11 +263,16 @@ impl<'source> Lexer<'source> {
|
|||
|
||||
let mut number = LexedText::new(self.offset(), self.source);
|
||||
self.radix_run(&mut number, radix);
|
||||
let value =
|
||||
BigInt::from_str_radix(number.as_str(), radix.as_u32()).map_err(|e| LexicalError {
|
||||
error: LexicalErrorType::OtherError(format!("{e:?}")),
|
||||
location: self.token_range().start(),
|
||||
})?;
|
||||
|
||||
let value = match Int::from_str_radix(number.as_str(), radix.as_u32()) {
|
||||
Ok(int) => int,
|
||||
Err(err) => {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError(format!("{err:?}")),
|
||||
location: self.token_range().start(),
|
||||
});
|
||||
}
|
||||
};
|
||||
Ok(Tok::Int { value })
|
||||
}
|
||||
|
||||
|
@ -339,14 +343,24 @@ impl<'source> Lexer<'source> {
|
|||
let imag = f64::from_str(number.as_str()).unwrap();
|
||||
Ok(Tok::Complex { real: 0.0, imag })
|
||||
} else {
|
||||
let value = number.as_str().parse::<BigInt>().unwrap();
|
||||
if start_is_zero && !value.is_zero() {
|
||||
// leading zeros in decimal integer literals are not permitted
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError("Invalid Token".to_owned()),
|
||||
location: self.token_range().start(),
|
||||
});
|
||||
}
|
||||
let value = match Int::from_str(number.as_str()) {
|
||||
Ok(value) => {
|
||||
if start_is_zero && value.as_u8() != Some(0) {
|
||||
// Leading zeros in decimal integer literals are not permitted.
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError("Invalid Token".to_owned()),
|
||||
location: self.token_range().start(),
|
||||
});
|
||||
}
|
||||
value
|
||||
}
|
||||
Err(err) => {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError(format!("{err:?}")),
|
||||
location: self.token_range().start(),
|
||||
})
|
||||
}
|
||||
};
|
||||
Ok(Tok::Int { value })
|
||||
}
|
||||
}
|
||||
|
@ -1448,10 +1462,29 @@ def f(arg=%timeit a = b):
|
|||
|
||||
#[test]
|
||||
fn test_numbers() {
|
||||
let source = "0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j";
|
||||
let source = "0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j 000";
|
||||
assert_debug_snapshot!(lex_source(source));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_leading_zero_small() {
|
||||
let source = "025";
|
||||
|
||||
let lexer = lex(source, Mode::Module);
|
||||
let tokens = lexer.collect::<Result<Vec<_>, LexicalError>>();
|
||||
assert_debug_snapshot!(tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_leading_zero_big() {
|
||||
let source =
|
||||
"0252222222222222522222222222225222222222222252222222222222522222222222225222222222222";
|
||||
|
||||
let lexer = lex(source, Mode::Module);
|
||||
let tokens = lexer.collect::<Result<Vec<_>, LexicalError>>();
|
||||
assert_debug_snapshot!(tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_comment_long() {
|
||||
let source = "99232 # foo".to_string();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue