ruff/scripts/update_ambiguous_characters.py
qdegraaf 93b2bd7184
[perflint] Add PERF401 and PERF402 rules (#5298)
## Summary

Adds `PERF401` and `PERF402` mirroring `W8401` and `W8402` from
https://github.com/tonybaloney/perflint

Implementation is not super smart but should be at parity with upstream
implementation judging by:
c07391c176/perflint/comprehension_checker.py (L42-L73)

It essentially checks:

- If the body of a for-loop is just one statement
- If that statement is an `if` and the if-statement contains a call to
`append()` we flag `PERF401` and suggest a list comprehension
- If that statement is a plain call to `append()` or `insert()` we flag
`PERF402` and suggest `list()` or `list.copy()`

I've set the violation to only flag the first append call in a long
`if-else` statement for `PERF401`. Happy to change this to some other
location or make it multiple violations if that makes more sense.

## Test Plan

Fixtures were added with the relevant scenarios for both rules

## Issue Links

Refers: https://github.com/astral-sh/ruff/issues/4789
2023-07-03 04:03:09 +00:00

67 lines
2.4 KiB
Python

"""Generate the confusables.rs file from the VS Code ambiguous.json file."""
from __future__ import annotations
import json
import subprocess
from pathlib import Path
CONFUSABLES_RS_PATH = "crates/ruff/src/rules/ruff/rules/confusables.rs"
AMBIGUOUS_JSON_URL = "https://raw.githubusercontent.com/hediet/vscode-unicode-data/main/out/ambiguous.json"
prelude = """
/// This file is auto-generated by `scripts/update_ambiguous_characters.py`.
use phf::phf_map;
/// Via: <https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json>
/// See: <https://github.com/microsoft/vscode/blob/095ddabc52b82498ee7f718a34f9dd11d59099a8/src/vs/base/common/strings.ts#L1094>
#[allow(clippy::unreadable_literal)]
pub(crate) static CONFUSABLES: phf::Map<u32, u8> = phf_map! {
""".lstrip()
postlude = """};"""
def get_mapping_data() -> dict:
"""
Get the ambiguous character mapping data from the vscode-unicode-data repository.
Uses the system's `curl` command to download the data,
instead of adding a dependency to a Python-native HTTP client.
"""
content = subprocess.check_output(
["curl", "-sSL", AMBIGUOUS_JSON_URL],
encoding="utf-8",
)
# The content is a JSON object literal wrapped in a JSON string, so double decode:
return json.loads(json.loads(content))
def format_confusables_rs(raw_data: dict[str, list[int]]) -> str:
"""Format the downloaded data into a Rust source file."""
# The input data contains duplicate entries
flattened_items: set[tuple[int, int]] = set()
for _category, items in raw_data.items():
assert len(items) % 2 == 0, "Expected pairs of items"
for i in range(0, len(items), 2):
flattened_items.add((items[i], items[i + 1]))
tuples = [f" {left}u32 => {right},\n" for left, right in sorted(flattened_items)]
print(f"{len(tuples)} confusable tuples.")
return prelude + "".join(tuples) + postlude
def main() -> None:
print("Retrieving data...")
mapping_data = get_mapping_data()
formatted_data = format_confusables_rs(mapping_data)
confusables_path = Path(__file__).parent.parent / CONFUSABLES_RS_PATH
confusables_path.write_text(formatted_data, encoding="utf-8")
print("Formatting Rust file with cargo fmt...")
subprocess.check_call(["cargo", "fmt", "--", confusables_path])
print("Done.")
if __name__ == "__main__":
main()