[ty] AST garbage collection (#18482)

## Summary

Garbage collect ASTs once we are done checking a given file. Queries
with a cross-file dependency on the AST will reparse the file on demand.
This reduces ty's peak memory usage by ~20-30%.

The primary change of this PR is adding a `node_index` field to every
AST node, that is assigned by the parser. `ParsedModule` can use this to
create a flat index of AST nodes any time the file is parsed (or
reparsed). This allows `AstNodeRef` to simply index into the current
instance of the `ParsedModule`, instead of storing a pointer directly.

The indices are somewhat hackily (using an atomic integer) assigned by
the `parsed_module` query instead of by the parser directly. Assigning
the indices in source-order in the (recursive) parser turns out to be
difficult, and collecting the nodes during semantic indexing is
impossible as `SemanticIndex` does not hold onto a specific
`ParsedModuleRef`, which the pointers in the flat AST are tied to. This
means that we have to do an extra AST traversal to assign and collect
the nodes into a flat index, but the small performance impact (~3% on
cold runs) seems worth it for the memory savings.

Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
Ibraheem Ahmed 2025-06-13 08:40:11 -04:00 committed by GitHub
parent 76d9009a6e
commit c9dff5c7d5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
824 changed files with 25243 additions and 804 deletions

View file

@ -12,8 +12,8 @@ use crate::parenthesize::parenthesized_range;
use crate::statement_visitor::StatementVisitor;
use crate::visitor::Visitor;
use crate::{
self as ast, Arguments, CmpOp, DictItem, ExceptHandler, Expr, InterpolatedStringElement,
MatchCase, Operator, Pattern, Stmt, TypeParam,
self as ast, Arguments, AtomicNodeIndex, CmpOp, DictItem, ExceptHandler, Expr,
InterpolatedStringElement, MatchCase, Operator, Pattern, Stmt, TypeParam,
};
use crate::{AnyNodeRef, ExprContext};
@ -53,6 +53,7 @@ where
func,
arguments,
range: _,
node_index: _,
}) = expr
{
// Ex) `list()`
@ -146,6 +147,7 @@ pub fn any_over_expr(expr: &Expr, func: &dyn Fn(&Expr) -> bool) -> bool {
target,
value,
range: _,
node_index: _,
}) => any_over_expr(target, func) || any_over_expr(value, func),
Expr::BinOp(ast::ExprBinOp { left, right, .. }) => {
any_over_expr(left, func) || any_over_expr(right, func)
@ -157,32 +159,49 @@ pub fn any_over_expr(expr: &Expr, func: &dyn Fn(&Expr) -> bool) -> bool {
body,
orelse,
range: _,
node_index: _,
}) => any_over_expr(test, func) || any_over_expr(body, func) || any_over_expr(orelse, func),
Expr::Dict(ast::ExprDict { items, range: _ }) => {
items.iter().any(|ast::DictItem { key, value }| {
any_over_expr(value, func)
|| key.as_ref().is_some_and(|key| any_over_expr(key, func))
})
}
Expr::Set(ast::ExprSet { elts, range: _ })
| Expr::List(ast::ExprList { elts, range: _, .. })
| Expr::Tuple(ast::ExprTuple { elts, range: _, .. }) => {
elts.iter().any(|expr| any_over_expr(expr, func))
}
Expr::Dict(ast::ExprDict {
items,
range: _,
node_index: _,
}) => items.iter().any(|ast::DictItem { key, value }| {
any_over_expr(value, func) || key.as_ref().is_some_and(|key| any_over_expr(key, func))
}),
Expr::Set(ast::ExprSet {
elts,
range: _,
node_index: _,
})
| Expr::List(ast::ExprList {
elts,
range: _,
node_index: _,
..
})
| Expr::Tuple(ast::ExprTuple {
elts,
range: _,
node_index: _,
..
}) => elts.iter().any(|expr| any_over_expr(expr, func)),
Expr::ListComp(ast::ExprListComp {
elt,
generators,
range: _,
node_index: _,
})
| Expr::SetComp(ast::ExprSetComp {
elt,
generators,
range: _,
node_index: _,
})
| Expr::Generator(ast::ExprGenerator {
elt,
generators,
range: _,
node_index: _,
parenthesized: _,
}) => {
any_over_expr(elt, func)
@ -197,6 +216,7 @@ pub fn any_over_expr(expr: &Expr, func: &dyn Fn(&Expr) -> bool) -> bool {
value,
generators,
range: _,
node_index: _,
}) => {
any_over_expr(key, func)
|| any_over_expr(value, func)
@ -206,15 +226,33 @@ pub fn any_over_expr(expr: &Expr, func: &dyn Fn(&Expr) -> bool) -> bool {
|| generator.ifs.iter().any(|expr| any_over_expr(expr, func))
})
}
Expr::Await(ast::ExprAwait { value, range: _ })
| Expr::YieldFrom(ast::ExprYieldFrom { value, range: _ })
Expr::Await(ast::ExprAwait {
value,
range: _,
node_index: _,
})
| Expr::YieldFrom(ast::ExprYieldFrom {
value,
range: _,
node_index: _,
})
| Expr::Attribute(ast::ExprAttribute {
value, range: _, ..
value,
range: _,
node_index: _,
..
})
| Expr::Starred(ast::ExprStarred {
value, range: _, ..
value,
range: _,
node_index: _,
..
}) => any_over_expr(value, func),
Expr::Yield(ast::ExprYield { value, range: _ }) => value
Expr::Yield(ast::ExprYield {
value,
range: _,
node_index: _,
}) => value
.as_ref()
.is_some_and(|value| any_over_expr(value, func)),
Expr::Compare(ast::ExprCompare {
@ -224,6 +262,7 @@ pub fn any_over_expr(expr: &Expr, func: &dyn Fn(&Expr) -> bool) -> bool {
func: call_func,
arguments,
range: _,
node_index: _,
}) => {
any_over_expr(call_func, func)
// Note that this is the evaluation order but not necessarily the declaration order
@ -241,6 +280,7 @@ pub fn any_over_expr(expr: &Expr, func: &dyn Fn(&Expr) -> bool) -> bool {
upper,
step,
range: _,
node_index: _,
}) => {
lower
.as_ref()
@ -284,11 +324,17 @@ pub fn any_over_type_param(type_param: &TypeParam, func: &dyn Fn(&Expr) -> bool)
pub fn any_over_pattern(pattern: &Pattern, func: &dyn Fn(&Expr) -> bool) -> bool {
match pattern {
Pattern::MatchValue(ast::PatternMatchValue { value, range: _ }) => {
any_over_expr(value, func)
}
Pattern::MatchValue(ast::PatternMatchValue {
value,
range: _,
node_index: _,
}) => any_over_expr(value, func),
Pattern::MatchSingleton(_) => false,
Pattern::MatchSequence(ast::PatternMatchSequence { patterns, range: _ }) => patterns
Pattern::MatchSequence(ast::PatternMatchSequence {
patterns,
range: _,
node_index: _,
}) => patterns
.iter()
.any(|pattern| any_over_pattern(pattern, func)),
Pattern::MatchMapping(ast::PatternMatchMapping { keys, patterns, .. }) => {
@ -312,7 +358,11 @@ pub fn any_over_pattern(pattern: &Pattern, func: &dyn Fn(&Expr) -> bool) -> bool
Pattern::MatchAs(ast::PatternMatchAs { pattern, .. }) => pattern
.as_ref()
.is_some_and(|pattern| any_over_pattern(pattern, func)),
Pattern::MatchOr(ast::PatternMatchOr { patterns, range: _ }) => patterns
Pattern::MatchOr(ast::PatternMatchOr {
patterns,
range: _,
node_index: _,
}) => patterns
.iter()
.any(|pattern| any_over_pattern(pattern, func)),
}
@ -395,12 +445,18 @@ pub fn any_over_stmt(stmt: &Stmt, func: &dyn Fn(&Expr) -> bool) -> bool {
.iter()
.any(|decorator| any_over_expr(&decorator.expression, func))
}
Stmt::Return(ast::StmtReturn { value, range: _ }) => value
Stmt::Return(ast::StmtReturn {
value,
range: _,
node_index: _,
}) => value
.as_ref()
.is_some_and(|value| any_over_expr(value, func)),
Stmt::Delete(ast::StmtDelete { targets, range: _ }) => {
targets.iter().any(|expr| any_over_expr(expr, func))
}
Stmt::Delete(ast::StmtDelete {
targets,
range: _,
node_index: _,
}) => targets.iter().any(|expr| any_over_expr(expr, func)),
Stmt::TypeAlias(ast::StmtTypeAlias {
name,
type_params,
@ -450,12 +506,14 @@ pub fn any_over_stmt(stmt: &Stmt, func: &dyn Fn(&Expr) -> bool) -> bool {
body,
orelse,
range: _,
node_index: _,
}) => any_over_expr(test, func) || any_over_body(body, func) || any_over_body(orelse, func),
Stmt::If(ast::StmtIf {
test,
body,
elif_else_clauses,
range: _,
node_index: _,
}) => {
any_over_expr(test, func)
|| any_over_body(body, func)
@ -480,6 +538,7 @@ pub fn any_over_stmt(stmt: &Stmt, func: &dyn Fn(&Expr) -> bool) -> bool {
exc,
cause,
range: _,
node_index: _,
}) => {
exc.as_ref().is_some_and(|value| any_over_expr(value, func))
|| cause
@ -493,6 +552,7 @@ pub fn any_over_stmt(stmt: &Stmt, func: &dyn Fn(&Expr) -> bool) -> bool {
finalbody,
is_star: _,
range: _,
node_index: _,
}) => {
any_over_body(body, func)
|| handlers.iter().any(|handler| {
@ -511,6 +571,7 @@ pub fn any_over_stmt(stmt: &Stmt, func: &dyn Fn(&Expr) -> bool) -> bool {
test,
msg,
range: _,
node_index: _,
}) => {
any_over_expr(test, func)
|| msg.as_ref().is_some_and(|value| any_over_expr(value, func))
@ -519,6 +580,7 @@ pub fn any_over_stmt(stmt: &Stmt, func: &dyn Fn(&Expr) -> bool) -> bool {
subject,
cases,
range: _,
node_index: _,
}) => {
any_over_expr(subject, func)
|| cases.iter().any(|case| {
@ -527,6 +589,7 @@ pub fn any_over_stmt(stmt: &Stmt, func: &dyn Fn(&Expr) -> bool) -> bool {
guard,
body,
range: _,
node_index: _,
} = case;
any_over_pattern(pattern, func)
|| guard.as_ref().is_some_and(|expr| any_over_expr(expr, func))
@ -537,7 +600,11 @@ pub fn any_over_stmt(stmt: &Stmt, func: &dyn Fn(&Expr) -> bool) -> bool {
Stmt::ImportFrom(_) => false,
Stmt::Global(_) => false,
Stmt::Nonlocal(_) => false,
Stmt::Expr(ast::StmtExpr { value, range: _ }) => any_over_expr(value, func),
Stmt::Expr(ast::StmtExpr {
value,
range: _,
node_index: _,
}) => any_over_expr(value, func),
Stmt::Pass(_) | Stmt::Break(_) | Stmt::Continue(_) => false,
Stmt::IpyEscapeCommand(_) => false,
}
@ -957,6 +1024,7 @@ impl<'a> StatementVisitor<'a> for RaiseStatementVisitor<'a> {
exc,
cause,
range: _,
node_index: _,
}) => {
self.raises
.push((stmt.range(), exc.as_deref(), cause.as_deref()));
@ -1026,7 +1094,12 @@ impl Visitor<'_> for AwaitVisitor {
/// Return `true` if a `Stmt` is a docstring.
pub fn is_docstring_stmt(stmt: &Stmt) -> bool {
if let Stmt::Expr(ast::StmtExpr { value, range: _ }) = stmt {
if let Stmt::Expr(ast::StmtExpr {
value,
range: _,
node_index: _,
}) = stmt
{
value.is_string_literal_expr()
} else {
false
@ -1039,7 +1112,12 @@ pub fn on_conditional_branch<'a>(parents: &mut impl Iterator<Item = &'a Stmt>) -
if matches!(parent, Stmt::If(_) | Stmt::While(_) | Stmt::Match(_)) {
return true;
}
if let Stmt::Expr(ast::StmtExpr { value, range: _ }) = parent {
if let Stmt::Expr(ast::StmtExpr {
value,
range: _,
node_index: _,
}) = parent
{
if value.is_if_expr() {
return true;
}
@ -1480,6 +1558,7 @@ pub fn pep_604_optional(expr: &Expr) -> Expr {
op: Operator::BitOr,
right: Box::new(Expr::NoneLiteral(ast::ExprNoneLiteral::default())),
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
}
.into()
}
@ -1491,6 +1570,7 @@ pub fn pep_604_union(elts: &[Expr]) -> Expr {
elts: vec![],
ctx: ExprContext::Load,
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
parenthesized: true,
}),
[Expr::Tuple(ast::ExprTuple { elts, .. })] => pep_604_union(elts),
@ -1500,6 +1580,7 @@ pub fn pep_604_union(elts: &[Expr]) -> Expr {
op: Operator::BitOr,
right: Box::new(pep_604_union(&[elt.clone()])),
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
}),
}
}
@ -1510,11 +1591,13 @@ pub fn typing_optional(elt: Expr, binding: Name) -> Expr {
value: Box::new(Expr::Name(ast::ExprName {
id: binding,
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
ctx: ExprContext::Load,
})),
slice: Box::new(elt),
ctx: ExprContext::Load,
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
})
}
@ -1527,16 +1610,19 @@ pub fn typing_union(elts: &[Expr], binding: Name) -> Expr {
value: Box::new(Expr::Name(ast::ExprName {
id: binding,
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
ctx: ExprContext::Load,
})),
slice: Box::new(Expr::Tuple(ast::ExprTuple {
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
elts: elts.to_vec(),
ctx: ExprContext::Load,
parenthesized: false,
})),
ctx: ExprContext::Load,
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
})
}
@ -1624,9 +1710,9 @@ mod tests {
use crate::helpers::{any_over_stmt, any_over_type_param, resolve_imported_module_path};
use crate::{
Expr, ExprContext, ExprName, ExprNumberLiteral, Identifier, Int, Number, Stmt,
StmtTypeAlias, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple,
TypeParams,
AtomicNodeIndex, Expr, ExprContext, ExprName, ExprNumberLiteral, Identifier, Int, Number,
Stmt, StmtTypeAlias, TypeParam, TypeParamParamSpec, TypeParamTypeVar,
TypeParamTypeVarTuple, TypeParams,
};
#[test]
@ -1669,28 +1755,34 @@ mod tests {
let name = Expr::Name(ExprName {
id: "x".into(),
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
ctx: ExprContext::Load,
});
let constant_one = Expr::NumberLiteral(ExprNumberLiteral {
value: Number::Int(Int::from(1u8)),
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
});
let constant_two = Expr::NumberLiteral(ExprNumberLiteral {
value: Number::Int(Int::from(2u8)),
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
});
let constant_three = Expr::NumberLiteral(ExprNumberLiteral {
value: Number::Int(Int::from(3u8)),
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
});
let type_var_one = TypeParam::TypeVar(TypeParamTypeVar {
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
bound: Some(Box::new(constant_one.clone())),
default: None,
name: Identifier::new("x", TextRange::default()),
});
let type_var_two = TypeParam::TypeVar(TypeParamTypeVar {
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
bound: None,
default: Some(Box::new(constant_two.clone())),
name: Identifier::new("x", TextRange::default()),
@ -1700,9 +1792,11 @@ mod tests {
type_params: Some(Box::new(TypeParams {
type_params: vec![type_var_one, type_var_two],
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
})),
value: Box::new(constant_three.clone()),
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
});
assert!(!any_over_stmt(&type_alias, &|expr| {
seen.borrow_mut().push(expr.clone());
@ -1718,6 +1812,7 @@ mod tests {
fn any_over_type_param_type_var() {
let type_var_no_bound = TypeParam::TypeVar(TypeParamTypeVar {
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
bound: None,
default: None,
name: Identifier::new("x", TextRange::default()),
@ -1727,10 +1822,12 @@ mod tests {
let constant = Expr::NumberLiteral(ExprNumberLiteral {
value: Number::Int(Int::ONE),
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
});
let type_var_with_bound = TypeParam::TypeVar(TypeParamTypeVar {
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
bound: Some(Box::new(constant.clone())),
default: None,
name: Identifier::new("x", TextRange::default()),
@ -1748,6 +1845,7 @@ mod tests {
let type_var_with_default = TypeParam::TypeVar(TypeParamTypeVar {
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
default: Some(Box::new(constant.clone())),
bound: None,
name: Identifier::new("x", TextRange::default()),
@ -1768,6 +1866,7 @@ mod tests {
fn any_over_type_param_type_var_tuple() {
let type_var_tuple = TypeParam::TypeVarTuple(TypeParamTypeVarTuple {
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
name: Identifier::new("x", TextRange::default()),
default: None,
});
@ -1779,10 +1878,12 @@ mod tests {
let constant = Expr::NumberLiteral(ExprNumberLiteral {
value: Number::Int(Int::ONE),
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
});
let type_var_tuple_with_default = TypeParam::TypeVarTuple(TypeParamTypeVarTuple {
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
default: Some(Box::new(constant.clone())),
name: Identifier::new("x", TextRange::default()),
});
@ -1802,6 +1903,7 @@ mod tests {
fn any_over_type_param_param_spec() {
let type_param_spec = TypeParam::ParamSpec(TypeParamParamSpec {
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
name: Identifier::new("x", TextRange::default()),
default: None,
});
@ -1813,10 +1915,12 @@ mod tests {
let constant = Expr::NumberLiteral(ExprNumberLiteral {
value: Number::Int(Int::ONE),
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
});
let param_spec_with_default = TypeParam::TypeVarTuple(TypeParamTypeVarTuple {
range: TextRange::default(),
node_index: AtomicNodeIndex::dummy(),
default: Some(Box::new(constant.clone())),
name: Identifier::new("x", TextRange::default()),
});