mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-27 12:29:28 +00:00
Autoformat confusable units (#4430)
I've seen errors crop up from using the different micro and mu characters. Follow matching recommendations on which character to prefer for micro, ohm, and angstrom. References: * Section 22.2 Letterlike Symbols, subsection Unit Symbols, page 877 of [The Unicode Standard, Version 15.0 ](https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf) * Section 2.5 Duplicated Characters of [Unicode Technical Report 25](https://www.unicode.org/reports/tr25/) * [SI brochure](https://www.bipm.org/documents/20126/41483022/SI-Brochure-9-EN.pdf) * https://github.com/unicode-org/icu/blob/main/icu4c/source/data/unidata/confusables.txt
This commit is contained in:
parent
31286e1c95
commit
9f30ccc1f4
5 changed files with 33 additions and 4 deletions
|
@ -45,3 +45,11 @@ x = f"string { # And here's a comment with an unusual parenthesis: )
|
||||||
# And here's a comment with a greek alpha: ∗
|
# And here's a comment with a greek alpha: ∗
|
||||||
foo # And here's a comment with an unusual punctuation mark: ᜵
|
foo # And here's a comment with an unusual punctuation mark: ᜵
|
||||||
}"
|
}"
|
||||||
|
|
||||||
|
# At runtime the attribute will be stored as Greek small letter mu instead of
|
||||||
|
# micro sign because of PEP 3131's NFKC normalization
|
||||||
|
class Labware:
|
||||||
|
µL = 1.5
|
||||||
|
|
||||||
|
|
||||||
|
assert getattr(Labware(), "µL") == 1.5
|
||||||
|
|
|
@ -163,7 +163,7 @@ pub(crate) fn ambiguous_unicode_character(
|
||||||
let candidate = Candidate::new(
|
let candidate = Candidate::new(
|
||||||
TextSize::try_from(relative_offset).unwrap() + range.start(),
|
TextSize::try_from(relative_offset).unwrap() + range.start(),
|
||||||
current_char,
|
current_char,
|
||||||
representant as char,
|
char::from_u32(representant).unwrap(),
|
||||||
);
|
);
|
||||||
if let Some(diagnostic) = candidate.into_diagnostic(context, settings) {
|
if let Some(diagnostic) = candidate.into_diagnostic(context, settings) {
|
||||||
diagnostics.push(diagnostic);
|
diagnostics.push(diagnostic);
|
||||||
|
@ -178,7 +178,7 @@ pub(crate) fn ambiguous_unicode_character(
|
||||||
word_candidates.push(Candidate::new(
|
word_candidates.push(Candidate::new(
|
||||||
TextSize::try_from(relative_offset).unwrap() + range.start(),
|
TextSize::try_from(relative_offset).unwrap() + range.start(),
|
||||||
current_char,
|
current_char,
|
||||||
representant as char,
|
char::from_u32(representant).unwrap(),
|
||||||
));
|
));
|
||||||
} else {
|
} else {
|
||||||
// The current word contains at least one unambiguous unicode character.
|
// The current word contains at least one unambiguous unicode character.
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
/// Via: <https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json>
|
/// Via: <https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json>
|
||||||
/// See: <https://github.com/microsoft/vscode/blob/095ddabc52b82498ee7f718a34f9dd11d59099a8/src/vs/base/common/strings.ts#L1094>
|
/// See: <https://github.com/microsoft/vscode/blob/095ddabc52b82498ee7f718a34f9dd11d59099a8/src/vs/base/common/strings.ts#L1094>
|
||||||
pub(crate) fn confusable(c: u32) -> Option<u8> {
|
pub(crate) fn confusable(c: u32) -> Option<u32> {
|
||||||
let result = match c {
|
let result = match c {
|
||||||
160u32 => 32,
|
160u32 => 32,
|
||||||
180u32 => 96,
|
180u32 => 96,
|
||||||
|
@ -1586,6 +1586,9 @@ pub(crate) fn confusable(c: u32) -> Option<u8> {
|
||||||
130_039_u32 => 55,
|
130_039_u32 => 55,
|
||||||
130_040_u32 => 56,
|
130_040_u32 => 56,
|
||||||
130_041_u32 => 57,
|
130_041_u32 => 57,
|
||||||
|
0x212B => 0x00C5,
|
||||||
|
0x2126 => 0x03A9,
|
||||||
|
0x00B5 => 0x03BC,
|
||||||
_ => return None,
|
_ => return None,
|
||||||
};
|
};
|
||||||
Some(result)
|
Some(result)
|
||||||
|
|
|
@ -155,4 +155,10 @@ confusables.py:46:62: RUF003 Comment contains ambiguous `᜵` (PHILIPPINE SINGLE
|
||||||
47 | }"
|
47 | }"
|
||||||
|
|
|
|
||||||
|
|
||||||
|
confusables.py:55:28: RUF001 String contains ambiguous `µ` (MICRO SIGN). Did you mean `μ` (GREEK SMALL LETTER MU)?
|
||||||
|
|
|
||||||
|
55 | assert getattr(Labware(), "µL") == 1.5
|
||||||
|
| ^ RUF001
|
||||||
|
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,7 @@ def format_number(number: int) -> str:
|
||||||
|
|
||||||
def format_confusables_rs(raw_data: dict[str, list[int]]) -> str:
|
def format_confusables_rs(raw_data: dict[str, list[int]]) -> str:
|
||||||
"""Format the downloaded data into a Rust source file."""
|
"""Format the downloaded data into a Rust source file."""
|
||||||
# The input data contains duplicate entries
|
# The input data contains duplicate entries.
|
||||||
flattened_items: set[tuple[int, int]] = set()
|
flattened_items: set[tuple[int, int]] = set()
|
||||||
for _category, items in raw_data.items():
|
for _category, items in raw_data.items():
|
||||||
assert len(items) % 2 == 0, "Expected pairs of items"
|
assert len(items) % 2 == 0, "Expected pairs of items"
|
||||||
|
@ -63,6 +63,18 @@ def format_confusables_rs(raw_data: dict[str, list[int]]) -> str:
|
||||||
for left, right in sorted(flattened_items)
|
for left, right in sorted(flattened_items)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Add some additional confusable pairs that are not included in the VS Code data,
|
||||||
|
# as they're unicode-to-unicode confusables, not unicode-to-ASCII confusables.
|
||||||
|
confusable_units = [
|
||||||
|
# ANGSTROM SIGN → LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||||
|
("0x212B", "0x00C5"),
|
||||||
|
# OHM SIGN → GREEK CAPITAL LETTER OMEGA
|
||||||
|
("0x2126", "0x03A9"),
|
||||||
|
# MICRO SIGN → GREEK SMALL LETTER MU
|
||||||
|
("0x00B5", "0x03BC"),
|
||||||
|
]
|
||||||
|
tuples += [f" {left} => {right},\n" for left, right in confusable_units]
|
||||||
|
|
||||||
print(f"{len(tuples)} confusable tuples.")
|
print(f"{len(tuples)} confusable tuples.")
|
||||||
|
|
||||||
return prelude + "".join(tuples) + postlude
|
return prelude + "".join(tuples) + postlude
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue