Include radix base prefix in large number representation (#7700)

## Summary

When lexing a number like `0x995DC9BBDF1939FA` that exceeds our small
number representation, we were only storing the portion after the base
(in this case, `995DC9BBDF1939FA`). When using that representation in
code generation, this could lead to invalid syntax, since
`995DC9BBDF1939FA)` on its own is not a valid integer.

This PR modifies the code to store the full span, including the radix
prefix.

See:
https://github.com/astral-sh/ruff/issues/7455#issuecomment-1739802958.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2023-09-28 16:38:06 -04:00 committed by GitHub
parent 316f75987d
commit f45281345d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 25 additions and 7 deletions

View file

@ -40,16 +40,23 @@ impl Int {
Self(Number::Big(value.into()))
}
/// Parse an [`Int`] from a string with a given radix.
pub fn from_str_radix(s: &str, radix: u32) -> Result<Self, std::num::ParseIntError> {
match i64::from_str_radix(s, radix) {
/// Parse an [`Int`] from a string with a given radix, like `0x95D`.
///
/// Takes, as input, the numerical portion (`95D`), the parsed base (`16`), and the entire
/// token (`0x95D`).
pub fn from_str_radix(
number: &str,
radix: u32,
token: &str,
) -> Result<Self, std::num::ParseIntError> {
match i64::from_str_radix(number, radix) {
Ok(value) => Ok(Int::small(value)),
Err(err) => {
if matches!(
err.kind(),
std::num::IntErrorKind::PosOverflow | std::num::IntErrorKind::NegOverflow
) {
Ok(Int::big(s))
Ok(Int::big(token))
} else {
Err(err)
}

View file

@ -261,10 +261,14 @@ impl<'source> Lexer<'source> {
'x' | 'o' | 'b'
));
// Lex the portion of the token after the base prefix (e.g., `9D5` in `0x9D5`).
let mut number = LexedText::new(self.offset(), self.source);
self.radix_run(&mut number, radix);
let value = match Int::from_str_radix(number.as_str(), radix.as_u32()) {
// Extract the entire number, including the base prefix (e.g., `0x9D5`).
let token = &self.source[self.token_range()];
let value = match Int::from_str_radix(number.as_str(), radix.as_u32(), token) {
Ok(int) => int,
Err(err) => {
return Err(LexicalError {
@ -1462,7 +1466,8 @@ def f(arg=%timeit a = b):
#[test]
fn test_numbers() {
let source = "0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j 000";
let source =
"0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j 000 0x995DC9BBDF1939FA";
assert_debug_snapshot!(lex_source(source));
}

View file

@ -77,8 +77,14 @@ expression: lex_source(source)
},
60..63,
),
(
Int {
value: 0x995DC9BBDF1939FA,
},
64..82,
),
(
Newline,
63..63,
82..82,
),
]