mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-01 09:22:19 +00:00
Use phf for confusables to reduce llvm lines (#4926)
* Use phf for confusables to reduce llvm lines ## Summary This replaces FxHashMap for the confusables with a perfect hash map from the [phf crate](https://github.com/rust-phf/rust-phf) to reduce the generated llvm instructions. A perfect hash function is one that doesn't have any collisions. We can build one because we know all keys at compile time. This improves hashmap efficiency, even though this is likely not noticeable in our case (except someone has a large non-english crate to test on). The original hashmap contained a lot of duplicates, which i had to remove when phf_map complained, i did so by sorting the keys. The important part that it reduces the llvm instructions generated (#3808, `RUSTFLAGS="-Csymbol-mangling-version=v0" cargo llvm-lines -p ruff --lib | head -20`): ``` Lines Copies Function name ----- ------ ------------- 1740502 38973 (TOTAL) 27423 (1.6%, 1.6%) 1 (0.0%, 0.0%) ruff[cef4c65d96248843]::rules::ruff::rules::confusables::CONFUSABLES::{closure#0} 10193 (0.6%, 2.2%) 1 (0.0%, 0.0%) <ruff[cef4c65d96248843]::codes::RuleCodePrefix>::iter 8107 (0.5%, 2.6%) 1 (0.0%, 0.0%) <ruff[cef4c65d96248843]::codes::Rule>::noqa_code 7345 (0.4%, 3.0%) 1 (0.0%, 0.0%) <ruff[cef4c65d96248843]::checkers::ast::Checker as ruff_python_ast[3778b140caf21545]::visitor::Visitor>::visit_stmt 6412 (0.4%, 3.4%) 1 (0.0%, 0.0%) <<ruff[cef4c65d96248843]::settings::options::Options as serde[d89b1b632568f5a3]:🇩🇪:Deserialize>::deserialize::__Visitor as serde[d89b1b632568f5a3]:🇩🇪:Visitor>::visit_map::<toml_edit[7e3a6c5e67260672]:🇩🇪:spanned::SpannedDeserializer<toml_edit[7e3a6c5e67260672]:🇩🇪:value::ValueDeserializer>> 6412 (0.4%, 3.8%) 1 (0.0%, 0.0%) <<ruff[cef4c65d96248843]::settings::options::Options as serde[d89b1b632568f5a3]:🇩🇪:Deserialize>::deserialize::__Visitor as serde[d89b1b632568f5a3]:🇩🇪:Visitor>::visit_map::<toml_edit[7e3a6c5e67260672]:🇩🇪:table::TableMapAccess> 6409 (0.4%, 4.2%) 1 (0.0%, 0.0%) <<ruff[cef4c65d96248843]::settings::options::Options as serde[d89b1b632568f5a3]:🇩🇪:Deserialize>::deserialize::__Visitor as serde[d89b1b632568f5a3]:🇩🇪:Visitor>::visit_map::<toml_edit[7e3a6c5e67260672]:🇩🇪:datetime::DatetimeDeserializer> 5696 (0.3%, 4.5%) 1 (0.0%, 0.0%) <ruff[cef4c65d96248843]::checkers::ast::Checker as ruff_python_ast[3778b140caf21545]::visitor::Visitor>::visit_expr 4448 (0.3%, 4.7%) 1 (0.0%, 0.0%) ruff[cef4c65d96248843]::flake8_to_ruff::converter::convert 3702 (0.2%, 4.9%) 1 (0.0%, 0.0%) <&ruff[cef4c65d96248843]::registry::Linter as core[da82827a87f140f9]::iter::traits::collect::IntoIterator>::into_iter 3349 (0.2%, 5.1%) 1 (0.0%, 0.0%) <ruff[cef4c65d96248843]::registry::Linter>::code_for_rule 3132 (0.2%, 5.3%) 1 (0.0%, 0.0%) <ruff[cef4c65d96248843]::codes::Rule as core[da82827a87f140f9]::fmt::Debug>::fmt 3130 (0.2%, 5.5%) 1 (0.0%, 0.0%) <&str as core[da82827a87f140f9]::convert::From<&ruff[cef4c65d96248843]::codes::Rule>>::from 3130 (0.2%, 5.7%) 1 (0.0%, 0.0%) <&str as core[da82827a87f140f9]::convert::From<ruff[cef4c65d96248843]::codes::Rule>>::from 3130 (0.2%, 5.9%) 1 (0.0%, 0.0%) <ruff[cef4c65d96248843]::codes::Rule as core[da82827a87f140f9]::convert::AsRef<str>>::as_ref 3128 (0.2%, 6.0%) 1 (0.0%, 0.0%) <ruff[cef4c65d96248843]::codes::RuleIter>::get 2669 (0.2%, 6.2%) 1 (0.0%, 0.0%) <<ruff[cef4c65d96248843]::settings::options::Options as serde[d89b1b632568f5a3]:🇩🇪:Deserialize>::deserialize::__Visitor as serde[d89b1b632568f5a3]:🇩🇪:Visitor>::visit_seq::<toml_edit[7e3a6c5e67260672]:🇩🇪:array::ArraySeqAccess> ``` After: ``` Lines Copies Function name ----- ------ ------------- 1710487 38900 (TOTAL) 10193 (0.6%, 0.6%) 1 (0.0%, 0.0%) <ruff[52408f46d2058296]::codes::RuleCodePrefix>::iter 8107 (0.5%, 1.1%) 1 (0.0%, 0.0%) <ruff[52408f46d2058296]::codes::Rule>::noqa_code 7345 (0.4%, 1.5%) 1 (0.0%, 0.0%) <ruff[52408f46d2058296]::checkers::ast::Checker as ruff_python_ast[5588cd60041c8605]::visitor::Visitor>::visit_stmt 6412 (0.4%, 1.9%) 1 (0.0%, 0.0%) <<ruff[52408f46d2058296]::settings::options::Options as serde[d89b1b632568f5a3]:🇩🇪:Deserialize>::deserialize::__Visitor as serde[d89b1b632568f5a3]:🇩🇪:Visitor>::visit_map::<toml_edit[7e3a6c5e67260672]:🇩🇪:spanned::SpannedDeserializer<toml_edit[7e3a6c5e67260672]:🇩🇪:value::ValueDeserializer>> 6412 (0.4%, 2.2%) 1 (0.0%, 0.0%) <<ruff[52408f46d2058296]::settings::options::Options as serde[d89b1b632568f5a3]:🇩🇪:Deserialize>::deserialize::__Visitor as serde[d89b1b632568f5a3]:🇩🇪:Visitor>::visit_map::<toml_edit[7e3a6c5e67260672]:🇩🇪:table::TableMapAccess> 6409 (0.4%, 2.6%) 1 (0.0%, 0.0%) <<ruff[52408f46d2058296]::settings::options::Options as serde[d89b1b632568f5a3]:🇩🇪:Deserialize>::deserialize::__Visitor as serde[d89b1b632568f5a3]:🇩🇪:Visitor>::visit_map::<toml_edit[7e3a6c5e67260672]:🇩🇪:datetime::DatetimeDeserializer> 5696 (0.3%, 3.0%) 1 (0.0%, 0.0%) <ruff[52408f46d2058296]::checkers::ast::Checker as ruff_python_ast[5588cd60041c8605]::visitor::Visitor>::visit_expr 4448 (0.3%, 3.2%) 1 (0.0%, 0.0%) ruff[52408f46d2058296]::flake8_to_ruff::converter::convert 3702 (0.2%, 3.4%) 1 (0.0%, 0.0%) <&ruff[52408f46d2058296]::registry::Linter as core[da82827a87f140f9]::iter::traits::collect::IntoIterator>::into_iter 3349 (0.2%, 3.6%) 1 (0.0%, 0.0%) <ruff[52408f46d2058296]::registry::Linter>::code_for_rule 3132 (0.2%, 3.8%) 1 (0.0%, 0.0%) <ruff[52408f46d2058296]::codes::Rule as core[da82827a87f140f9]::fmt::Debug>::fmt 3130 (0.2%, 4.0%) 1 (0.0%, 0.0%) <&str as core[da82827a87f140f9]::convert::From<&ruff[52408f46d2058296]::codes::Rule>>::from 3130 (0.2%, 4.2%) 1 (0.0%, 0.0%) <&str as core[da82827a87f140f9]::convert::From<ruff[52408f46d2058296]::codes::Rule>>::from 3130 (0.2%, 4.4%) 1 (0.0%, 0.0%) <ruff[52408f46d2058296]::codes::Rule as core[da82827a87f140f9]::convert::AsRef<str>>::as_ref 3128 (0.2%, 4.5%) 1 (0.0%, 0.0%) <ruff[52408f46d2058296]::codes::RuleIter>::get 2669 (0.2%, 4.7%) 1 (0.0%, 0.0%) <<ruff[52408f46d2058296]::settings::options::Options as serde[d89b1b632568f5a3]:🇩🇪:Deserialize>::deserialize::__Visitor as serde[d89b1b632568f5a3]:🇩🇪:Visitor>::visit_seq::<toml_edit[7e3a6c5e67260672]:🇩🇪:array::ArraySeqAccess> 2659 (0.2%, 4.9%) 1 (0.0%, 0.0%) <&ruff[52408f46d2058296]::codes::Pylint as core[da82827a87f140f9]::iter::traits::collect::IntoIterator>::into_iter ``` I'd assume this has a positive effect both on compile time and on runtime, but i don't know the actual effect on compile times and can't really measure. ## Test plan Check CI for any performance regressions. This should fix #3808 if we merge it. * clippy * Update update_ambiguous_characters.py
This commit is contained in:
parent
39a1f3980f
commit
651d89794c
4 changed files with 1624 additions and 2133 deletions
|
@ -6,6 +6,18 @@ from pathlib import Path
|
|||
CONFUSABLES_RS_PATH = "crates/ruff/src/rules/ruff/rules/confusables.rs"
|
||||
AMBIGUOUS_JSON_URL = "https://raw.githubusercontent.com/hediet/vscode-unicode-data/main/out/ambiguous.json"
|
||||
|
||||
prelude = """
|
||||
/// This file is auto-generated by `scripts/update_ambiguous_characters.py`.
|
||||
use phf::phf_map;
|
||||
|
||||
/// Via: <https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json>
|
||||
/// See: <https://github.com/microsoft/vscode/blob/095ddabc52b82498ee7f718a34f9dd11d59099a8/src/vs/base/common/strings.ts#L1094>
|
||||
#[allow(clippy::unreadable_literal)]
|
||||
pub(crate) static CONFUSABLES: phf::Map<u32, u8> = phf_map! {
|
||||
""".lstrip()
|
||||
|
||||
postlude = """};"""
|
||||
|
||||
|
||||
def get_mapping_data() -> dict:
|
||||
"""
|
||||
|
@ -22,29 +34,22 @@ def get_mapping_data() -> dict:
|
|||
return json.loads(json.loads(content))
|
||||
|
||||
|
||||
def format_confusables_rs(raw_data: dict) -> str:
|
||||
def format_confusables_rs(raw_data: dict[str, list[int]]) -> str:
|
||||
"""Format the downloaded data into a Rust source file."""
|
||||
prelude = """
|
||||
/// This file is auto-generated by scripts/update_ambiguous_characters.py.
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
/// Via: <https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json>
|
||||
/// See: <https://github.com/microsoft/vscode/blob/095ddabc52b82498ee7f718a34f9dd11d59099a8/src/vs/base/common/strings.ts#L1094>
|
||||
pub(crate) static CONFUSABLES: Lazy<FxHashMap<u32, u8>> = Lazy::new(|| {
|
||||
#[allow(clippy::unreadable_literal)]
|
||||
FxHashMap::from_iter([
|
||||
""".lstrip()
|
||||
tuples = []
|
||||
# The input data contains duplicate entries
|
||||
flattened_items: set[tuple[int, int]] = set()
|
||||
for _category, items in raw_data.items():
|
||||
assert len(items) % 2 == 0, "Expected pairs of items"
|
||||
for i in range(0, len(items), 2):
|
||||
tuples.append(f"({items[i]}, {items[i + 1]}),")
|
||||
postlude = """])});"""
|
||||
flattened_items.add((items[i], items[i + 1]))
|
||||
|
||||
tuples = []
|
||||
for left, right in sorted(flattened_items):
|
||||
tuples.append(f" {left}u32 => {right},\n")
|
||||
|
||||
print(f"{len(tuples)} confusable tuples.")
|
||||
|
||||
return prelude + "\n".join(tuples) + postlude
|
||||
return prelude + "".join(tuples) + postlude
|
||||
|
||||
|
||||
def main() -> None:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue