[ty] AST garbage collection (#18482)

## Summary

Garbage collect ASTs once we are done checking a given file. Queries
with a cross-file dependency on the AST will reparse the file on demand.
This reduces ty's peak memory usage by ~20-30%.

The primary change of this PR is adding a `node_index` field to every
AST node, that is assigned by the parser. `ParsedModule` can use this to
create a flat index of AST nodes any time the file is parsed (or
reparsed). This allows `AstNodeRef` to simply index into the current
instance of the `ParsedModule`, instead of storing a pointer directly.

The indices are somewhat hackily (using an atomic integer) assigned by
the `parsed_module` query instead of by the parser directly. Assigning
the indices in source-order in the (recursive) parser turns out to be
difficult, and collecting the nodes during semantic indexing is
impossible as `SemanticIndex` does not hold onto a specific
`ParsedModuleRef`, which the pointers in the flat AST are tied to. This
means that we have to do an extra AST traversal to assign and collect
the nodes into a flat index, but the small performance impact (~3% on
cold runs) seems worth it for the memory savings.

Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
Ibraheem Ahmed 2025-06-13 08:40:11 -04:00 committed by GitHub
parent 76d9009a6e
commit c9dff5c7d5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
824 changed files with 25243 additions and 804 deletions

View file

@ -136,7 +136,11 @@ fn is_class_method(
pub fn is_stub(function_def: &StmtFunctionDef, semantic: &SemanticModel) -> bool {
function_def.body.iter().all(|stmt| match stmt {
Stmt::Pass(_) => true,
Stmt::Expr(StmtExpr { value, range: _ }) => {
Stmt::Expr(StmtExpr {
value,
range: _,
node_index: _,
}) => {
matches!(
value.as_ref(),
Expr::StringLiteral(_) | Expr::EllipsisLiteral(_)
@ -144,6 +148,7 @@ pub fn is_stub(function_def: &StmtFunctionDef, semantic: &SemanticModel) -> bool
}
Stmt::Raise(StmtRaise {
range: _,
node_index: _,
exc: exception,
cause: _,
}) => exception.as_ref().is_some_and(|exc| {

View file

@ -12,7 +12,11 @@ use crate::SemanticModel;
/// ```
pub fn is_sys_path_modification(stmt: &Stmt, semantic: &SemanticModel) -> bool {
match stmt {
Stmt::Expr(ast::StmtExpr { value, range: _ }) => match value.as_ref() {
Stmt::Expr(ast::StmtExpr {
value,
range: _,
node_index: _,
}) => match value.as_ref() {
Expr::Call(ast::ExprCall { func, .. }) => semantic
.resolve_qualified_name(func.as_ref())
.is_some_and(|qualified_name| {
@ -96,7 +100,12 @@ pub fn is_os_environ_modification(stmt: &Stmt, semantic: &SemanticModel) -> bool
/// matplotlib.use("Agg")
/// ```
pub fn is_matplotlib_activation(stmt: &Stmt, semantic: &SemanticModel) -> bool {
let Stmt::Expr(ast::StmtExpr { value, range: _ }) = stmt else {
let Stmt::Expr(ast::StmtExpr {
value,
range: _,
node_index: _,
}) = stmt
else {
return false;
};
let Expr::Call(ast::ExprCall { func, .. }) = value.as_ref() else {

View file

@ -287,6 +287,7 @@ pub fn is_immutable_annotation(
op: Operator::BitOr,
right,
range: _,
node_index: _,
}) => {
is_immutable_annotation(left, semantic, extend_immutable_calls)
&& is_immutable_annotation(right, semantic, extend_immutable_calls)
@ -442,6 +443,7 @@ where
left,
right,
range: _,
node_index: _,
}) = expr
{
// The union data structure usually looks like this:

View file

@ -74,7 +74,11 @@ impl<'a> GlobalsVisitor<'a> {
impl<'a> StatementVisitor<'a> for GlobalsVisitor<'a> {
fn visit_stmt(&mut self, stmt: &'a Stmt) {
match stmt {
Stmt::Global(ast::StmtGlobal { names, range: _ }) => {
Stmt::Global(ast::StmtGlobal {
names,
range: _,
node_index: _,
}) => {
for name in names {
self.0.insert(name.as_str(), name.range());
}

View file

@ -230,6 +230,7 @@ impl<'de> serde::de::Deserialize<'de> for NameImports {
names,
level,
range: _,
node_index: _,
}) => names
.iter()
.map(|name| {
@ -243,7 +244,11 @@ impl<'de> serde::de::Deserialize<'de> for NameImports {
})
})
.collect(),
Stmt::Import(ast::StmtImport { names, range: _ }) => names
Stmt::Import(ast::StmtImport {
names,
range: _,
node_index: _,
}) => names
.iter()
.map(|name| {
NameImport::Import(ModuleNameImport {

View file

@ -82,7 +82,12 @@ impl SemanticModel<'_> {
flags: &mut DunderAllFlags,
) {
for elt in elts {
if let Expr::StringLiteral(ast::ExprStringLiteral { value, range }) = elt {
if let Expr::StringLiteral(ast::ExprStringLiteral {
value,
range,
node_index: _,
}) = elt
{
names.push(DunderAllName {
name: value.to_str(),
range: *range,