[red-knot] gather type prevalence statistics (#15834)
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (linux, release) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Waiting to run
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / ecosystem (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / benchmarks (push) Blocked by required conditions

Something Alex and I threw together during our 1:1 this morning. Allows
us to collect statistics on the prevalence of various types in a file,
most usefully TODO types or other dynamic types.
This commit is contained in:
Carl Meyer 2025-01-31 07:10:00 -08:00 committed by GitHub
parent 44ac17b3ba
commit ce769f6ae2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 131 additions and 0 deletions

View file

@ -53,6 +53,7 @@ mod mro;
mod narrow;
mod signatures;
mod slots;
mod statistics;
mod string_annotation;
mod subclass_of;
mod type_ordering;

View file

@ -61,6 +61,7 @@ use crate::types::diagnostic::{
UNDEFINED_REVEAL, UNRESOLVED_ATTRIBUTE, UNRESOLVED_IMPORT, UNSUPPORTED_OPERATOR,
};
use crate::types::mro::MroErrorKind;
use crate::types::statistics::TypeStatistics;
use crate::types::unpacker::{UnpackResult, Unpacker};
use crate::types::{
builtins_symbol, global_symbol, symbol, symbol_from_bindings, symbol_from_declarations,
@ -299,6 +300,14 @@ impl<'db> TypeInference<'db> {
self.diagnostics.shrink_to_fit();
self.deferred.shrink_to_fit();
}
pub(super) fn statistics(&self) -> TypeStatistics {
let mut statistics = TypeStatistics::default();
for ty in self.expressions.values() {
statistics.increment(*ty);
}
statistics
}
}
impl WithDiagnostics for TypeInference<'_> {

View file

@ -0,0 +1,121 @@
use crate::types::{infer_scope_types, semantic_index, Type};
use crate::Db;
use ruff_db::files::File;
use rustc_hash::FxHashMap;
/// Get type-coverage statistics for a file.
#[salsa::tracked(return_ref)]
pub fn type_statistics<'db>(db: &'db dyn Db, file: File) -> TypeStatistics<'db> {
let _span = tracing::trace_span!("type_statistics", file=?file.path(db)).entered();
tracing::debug!(
"Gathering statistics for file '{path}'",
path = file.path(db)
);
let index = semantic_index(db, file);
let mut statistics = TypeStatistics::default();
for scope_id in index.scope_ids() {
let result = infer_scope_types(db, scope_id);
statistics.extend(&result.statistics());
}
statistics
}
/// Map each type to count of expressions with that type.
#[derive(Debug, Default, Eq, PartialEq)]
pub(super) struct TypeStatistics<'db>(FxHashMap<Type<'db>, u32>);
impl<'db> TypeStatistics<'db> {
fn extend(&mut self, other: &TypeStatistics<'db>) {
for (ty, count) in &other.0 {
self.0
.entry(*ty)
.and_modify(|my_count| *my_count += count)
.or_insert(*count);
}
}
pub(super) fn increment(&mut self, ty: Type<'db>) {
self.0
.entry(ty)
.and_modify(|count| *count += 1)
.or_insert(1);
}
#[allow(unused)]
fn expression_count(&self) -> u32 {
self.0.values().sum()
}
#[allow(unused)]
fn todo_count(&self) -> u32 {
self.0
.iter()
.filter(|(key, _)| key.is_todo())
.map(|(_, count)| count)
.sum()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::db::tests::{setup_db, TestDb};
use ruff_db::files::system_path_to_file;
use ruff_db::system::DbWithTestSystem;
fn get_stats<'db>(
db: &'db mut TestDb,
filename: &str,
source: &str,
) -> &'db TypeStatistics<'db> {
db.write_dedented(filename, source).unwrap();
type_statistics(db, system_path_to_file(db, filename).unwrap())
}
#[test]
fn all_static() {
let mut db = setup_db();
let stats = get_stats(&mut db, "src/foo.py", "1");
assert_eq!(stats.0, FxHashMap::from_iter([(Type::IntLiteral(1), 1)]));
}
#[test]
fn todo_and_expression_count() {
let mut db = setup_db();
let stats = get_stats(
&mut db,
"src/foo.py",
r#"
x = [x for x in [1]]
"#,
);
assert_eq!(stats.todo_count(), 4);
assert_eq!(stats.expression_count(), 6);
}
#[test]
fn sum() {
let mut db = setup_db();
let stats = get_stats(
&mut db,
"src/foo.py",
r#"
1
def f():
1
"#,
);
assert_eq!(stats.0[&Type::IntLiteral(1)], 2);
}
}