ruff/crates/ruff_python_semantic/src/globals.rs
Ibraheem Ahmed c9dff5c7d5
[ty] AST garbage collection (#18482)
## Summary

Garbage collect ASTs once we are done checking a given file. Queries
with a cross-file dependency on the AST will reparse the file on demand.
This reduces ty's peak memory usage by ~20-30%.

The primary change of this PR is adding a `node_index` field to every
AST node, that is assigned by the parser. `ParsedModule` can use this to
create a flat index of AST nodes any time the file is parsed (or
reparsed). This allows `AstNodeRef` to simply index into the current
instance of the `ParsedModule`, instead of storing a pointer directly.

The indices are somewhat hackily (using an atomic integer) assigned by
the `parsed_module` query instead of by the parser directly. Assigning
the indices in source-order in the (recursive) parser turns out to be
difficult, and collecting the nodes during semantic indexing is
impossible as `SemanticIndex` does not hold onto a specific
`ParsedModuleRef`, which the pointers in the flat AST are tied to. This
means that we have to do an extra AST traversal to assign and collect
the nodes into a flat index, but the small performance impact (~3% on
cold runs) seems worth it for the memory savings.

Part of https://github.com/astral-sh/ty/issues/214.
2025-06-13 08:40:11 -04:00

92 lines
2.7 KiB
Rust

//! When building a semantic model, we often need to know which names in a given scope are declared
//! as `global`. This module provides data structures for storing and querying the set of `global`
//! names in a given scope.
use std::ops::Index;
use ruff_python_ast::{self as ast, Stmt};
use ruff_text_size::{Ranged, TextRange};
use rustc_hash::FxHashMap;
use ruff_index::{IndexVec, newtype_index};
use ruff_python_ast::statement_visitor::{StatementVisitor, walk_stmt};
/// Id uniquely identifying the set of global names for a given scope.
#[newtype_index]
pub struct GlobalsId;
#[derive(Debug, Default)]
pub(crate) struct GlobalsArena<'a>(IndexVec<GlobalsId, Globals<'a>>);
impl<'a> GlobalsArena<'a> {
/// Inserts a new set of global names into the global names arena and returns its unique id.
pub(crate) fn push(&mut self, globals: Globals<'a>) -> GlobalsId {
self.0.push(globals)
}
}
impl<'a> Index<GlobalsId> for GlobalsArena<'a> {
type Output = Globals<'a>;
#[inline]
fn index(&self, index: GlobalsId) -> &Self::Output {
&self.0[index]
}
}
/// The set of global names for a given scope, represented as a map from the name of the global to
/// the range of the declaration in the source code.
#[derive(Debug)]
pub struct Globals<'a>(FxHashMap<&'a str, TextRange>);
impl<'a> Globals<'a> {
/// Extracts the set of global names from a given scope, or return `None` if the scope does not
/// contain any `global` declarations.
pub fn from_body(body: &'a [Stmt]) -> Option<Self> {
let mut builder = GlobalsVisitor::new();
builder.visit_body(body);
builder.finish()
}
pub(crate) fn get(&self, name: &str) -> Option<TextRange> {
self.0.get(name).copied()
}
pub(crate) fn iter(&self) -> impl Iterator<Item = (&&'a str, &TextRange)> + '_ {
self.0.iter()
}
}
/// Extracts the set of global names from a given scope.
#[derive(Debug)]
struct GlobalsVisitor<'a>(FxHashMap<&'a str, TextRange>);
impl<'a> GlobalsVisitor<'a> {
fn new() -> Self {
Self(FxHashMap::default())
}
fn finish(self) -> Option<Globals<'a>> {
(!self.0.is_empty()).then_some(Globals(self.0))
}
}
impl<'a> StatementVisitor<'a> for GlobalsVisitor<'a> {
fn visit_stmt(&mut self, stmt: &'a Stmt) {
match stmt {
Stmt::Global(ast::StmtGlobal {
names,
range: _,
node_index: _,
}) => {
for name in names {
self.0.insert(name.as_str(), name.range());
}
}
Stmt::FunctionDef(_) | Stmt::ClassDef(_) => {
// Don't recurse.
}
_ => walk_stmt(self, stmt),
}
}
}