mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 10:49:50 +00:00
Autoformat confusable units (#4430)
I've seen errors crop up from using the different micro and mu characters. Follow matching recommendations on which character to prefer for micro, ohm, and angstrom. References: * Section 22.2 Letterlike Symbols, subsection Unit Symbols, page 877 of [The Unicode Standard, Version 15.0 ](https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf) * Section 2.5 Duplicated Characters of [Unicode Technical Report 25](https://www.unicode.org/reports/tr25/) * [SI brochure](https://www.bipm.org/documents/20126/41483022/SI-Brochure-9-EN.pdf) * https://github.com/unicode-org/icu/blob/main/icu4c/source/data/unidata/confusables.txt
This commit is contained in:
parent
31286e1c95
commit
9f30ccc1f4
5 changed files with 33 additions and 4 deletions
|
@ -51,7 +51,7 @@ def format_number(number: int) -> str:
|
|||
|
||||
def format_confusables_rs(raw_data: dict[str, list[int]]) -> str:
|
||||
"""Format the downloaded data into a Rust source file."""
|
||||
# The input data contains duplicate entries
|
||||
# The input data contains duplicate entries.
|
||||
flattened_items: set[tuple[int, int]] = set()
|
||||
for _category, items in raw_data.items():
|
||||
assert len(items) % 2 == 0, "Expected pairs of items"
|
||||
|
@ -63,6 +63,18 @@ def format_confusables_rs(raw_data: dict[str, list[int]]) -> str:
|
|||
for left, right in sorted(flattened_items)
|
||||
]
|
||||
|
||||
# Add some additional confusable pairs that are not included in the VS Code data,
|
||||
# as they're unicode-to-unicode confusables, not unicode-to-ASCII confusables.
|
||||
confusable_units = [
|
||||
# ANGSTROM SIGN → LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
("0x212B", "0x00C5"),
|
||||
# OHM SIGN → GREEK CAPITAL LETTER OMEGA
|
||||
("0x2126", "0x03A9"),
|
||||
# MICRO SIGN → GREEK SMALL LETTER MU
|
||||
("0x00B5", "0x03BC"),
|
||||
]
|
||||
tuples += [f" {left} => {right},\n" for left, right in confusable_units]
|
||||
|
||||
print(f"{len(tuples)} confusable tuples.")
|
||||
|
||||
return prelude + "".join(tuples) + postlude
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue