mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 18:58:04 +00:00
Use characters instead of u32
in confusable map (#8463)
This commit is contained in:
parent
41e538a748
commit
7c12eaf322
3 changed files with 1602 additions and 1594 deletions
|
@ -163,7 +163,7 @@ pub(crate) fn ambiguous_unicode_character(
|
|||
let candidate = Candidate::new(
|
||||
TextSize::try_from(relative_offset).unwrap() + range.start(),
|
||||
current_char,
|
||||
char::from_u32(representant).unwrap(),
|
||||
representant,
|
||||
);
|
||||
if let Some(diagnostic) = candidate.into_diagnostic(context, settings) {
|
||||
diagnostics.push(diagnostic);
|
||||
|
@ -178,7 +178,7 @@ pub(crate) fn ambiguous_unicode_character(
|
|||
word_candidates.push(Candidate::new(
|
||||
TextSize::try_from(relative_offset).unwrap() + range.start(),
|
||||
current_char,
|
||||
char::from_u32(representant).unwrap(),
|
||||
representant,
|
||||
));
|
||||
} else {
|
||||
// The current word contains at least one unambiguous unicode character.
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -13,7 +13,7 @@ prelude = """
|
|||
|
||||
/// Via: <https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json>
|
||||
/// See: <https://github.com/microsoft/vscode/blob/095ddabc52b82498ee7f718a34f9dd11d59099a8/src/vs/base/common/strings.ts#L1094>
|
||||
pub(crate) fn confusable(c: u32) -> Option<u8> {
|
||||
pub(crate) fn confusable(c: u32) -> Option<char> {
|
||||
let result = match c {
|
||||
|
||||
""".lstrip()
|
||||
|
@ -49,6 +49,14 @@ def format_number(number: int) -> str:
|
|||
return f"{number}u32"
|
||||
|
||||
|
||||
def format_char(number: int) -> str:
|
||||
"""Format a Python integer as a Rust character literal."""
|
||||
char = chr(number)
|
||||
if char == "\\":
|
||||
return "\\\\"
|
||||
return char
|
||||
|
||||
|
||||
def format_confusables_rs(raw_data: dict[str, list[int]]) -> str:
|
||||
"""Format the downloaded data into a Rust source file."""
|
||||
# The input data contains duplicate entries.
|
||||
|
@ -59,7 +67,7 @@ def format_confusables_rs(raw_data: dict[str, list[int]]) -> str:
|
|||
flattened_items.add((items[i], items[i + 1]))
|
||||
|
||||
tuples = [
|
||||
f" {format_number(left)} => {right},\n"
|
||||
f" {format_number(left)} => '{format_char(right)}',\n"
|
||||
for left, right in sorted(flattened_items)
|
||||
]
|
||||
|
||||
|
@ -67,13 +75,13 @@ def format_confusables_rs(raw_data: dict[str, list[int]]) -> str:
|
|||
# as they're unicode-to-unicode confusables, not unicode-to-ASCII confusables.
|
||||
confusable_units = [
|
||||
# ANGSTROM SIGN → LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
("0x212B", "0x00C5"),
|
||||
("0x212B", chr(0x00C5)),
|
||||
# OHM SIGN → GREEK CAPITAL LETTER OMEGA
|
||||
("0x2126", "0x03A9"),
|
||||
("0x2126", chr(0x03A9)),
|
||||
# MICRO SIGN → GREEK SMALL LETTER MU
|
||||
("0x00B5", "0x03BC"),
|
||||
("0x00B5", chr(0x03BC)),
|
||||
]
|
||||
tuples += [f" {left} => {right},\n" for left, right in confusable_units]
|
||||
tuples += [f" {left} => '{right}',\n" for left, right in confusable_units]
|
||||
|
||||
print(f"{len(tuples)} confusable tuples.")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue