Include radix base prefix in large number representation (#7700)

## Summary

When lexing a number like `0x995DC9BBDF1939FA` that exceeds our small
number representation, we were only storing the portion after the base
(in this case, `995DC9BBDF1939FA`). When using that representation in
code generation, this could lead to invalid syntax, since
`995DC9BBDF1939FA)` on its own is not a valid integer.

This PR modifies the code to store the full span, including the radix
prefix.

See:
https://github.com/astral-sh/ruff/issues/7455#issuecomment-1739802958.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2023-09-28 16:38:06 -04:00 committed by GitHub
parent 316f75987d
commit f45281345d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 25 additions and 7 deletions

View file

@ -261,10 +261,14 @@ impl<'source> Lexer<'source> {
'x' | 'o' | 'b'
));
// Lex the portion of the token after the base prefix (e.g., `9D5` in `0x9D5`).
let mut number = LexedText::new(self.offset(), self.source);
self.radix_run(&mut number, radix);
let value = match Int::from_str_radix(number.as_str(), radix.as_u32()) {
// Extract the entire number, including the base prefix (e.g., `0x9D5`).
let token = &self.source[self.token_range()];
let value = match Int::from_str_radix(number.as_str(), radix.as_u32(), token) {
Ok(int) => int,
Err(err) => {
return Err(LexicalError {
@ -1462,7 +1466,8 @@ def f(arg=%timeit a = b):
#[test]
fn test_numbers() {
let source = "0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j 000";
let source =
"0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j 000 0x995DC9BBDF1939FA";
assert_debug_snapshot!(lex_source(source));
}

View file

@ -77,8 +77,14 @@ expression: lex_source(source)
},
60..63,
),
(
Int {
value: 0x995DC9BBDF1939FA,
},
64..82,
),
(
Newline,
63..63,
82..82,
),
]