[ty] AST garbage collection (#18482)

## Summary

Garbage collect ASTs once we are done checking a given file. Queries
with a cross-file dependency on the AST will reparse the file on demand.
This reduces ty's peak memory usage by ~20-30%.

The primary change of this PR is adding a `node_index` field to every
AST node, that is assigned by the parser. `ParsedModule` can use this to
create a flat index of AST nodes any time the file is parsed (or
reparsed). This allows `AstNodeRef` to simply index into the current
instance of the `ParsedModule`, instead of storing a pointer directly.

The indices are somewhat hackily (using an atomic integer) assigned by
the `parsed_module` query instead of by the parser directly. Assigning
the indices in source-order in the (recursive) parser turns out to be
difficult, and collecting the nodes during semantic indexing is
impossible as `SemanticIndex` does not hold onto a specific
`ParsedModuleRef`, which the pointers in the flat AST are tied to. This
means that we have to do an extra AST traversal to assign and collect
the nodes into a flat index, but the small performance impact (~3% on
cold runs) seems worth it for the memory savings.

Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
Ibraheem Ahmed 2025-06-13 08:40:11 -04:00 committed by GitHub
parent 76d9009a6e
commit c9dff5c7d5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
824 changed files with 25243 additions and 804 deletions

View file

@ -264,6 +264,7 @@ impl<'a> Generator<'a> {
decorator_list,
type_params,
range: _,
node_index: _,
}) => {
self.newlines(if self.indent_depth == 0 { 2 } else { 1 });
for decorator in decorator_list {
@ -308,7 +309,11 @@ impl<'a> Generator<'a> {
self.newlines(2);
}
}
Stmt::Return(ast::StmtReturn { value, range: _ }) => {
Stmt::Return(ast::StmtReturn {
value,
range: _,
node_index: _,
}) => {
statement!({
if let Some(expr) = value {
self.p("return ");
@ -318,7 +323,11 @@ impl<'a> Generator<'a> {
}
});
}
Stmt::Delete(ast::StmtDelete { targets, range: _ }) => {
Stmt::Delete(ast::StmtDelete {
targets,
range: _,
node_index: _,
}) => {
statement!({
self.p("del ");
let mut first = true;
@ -342,6 +351,7 @@ impl<'a> Generator<'a> {
op,
value,
range: _,
node_index: _,
}) => {
statement!({
self.unparse_expr(target, precedence::AUG_ASSIGN);
@ -371,6 +381,7 @@ impl<'a> Generator<'a> {
value,
simple,
range: _,
node_index: _,
}) => {
statement!({
let need_parens = matches!(target.as_ref(), Expr::Name(_)) && !simple;
@ -416,6 +427,7 @@ impl<'a> Generator<'a> {
body,
orelse,
range: _,
node_index: _,
}) => {
statement!({
self.p("while ");
@ -435,6 +447,7 @@ impl<'a> Generator<'a> {
body,
elif_else_clauses,
range: _,
node_index: _,
}) => {
statement!({
self.p("if ");
@ -482,6 +495,7 @@ impl<'a> Generator<'a> {
subject,
cases,
range: _,
node_index: _,
}) => {
statement!({
self.p("match ");
@ -499,6 +513,7 @@ impl<'a> Generator<'a> {
Stmt::TypeAlias(ast::StmtTypeAlias {
name,
range: _,
node_index: _,
type_params,
value,
}) => {
@ -516,6 +531,7 @@ impl<'a> Generator<'a> {
exc,
cause,
range: _,
node_index: _,
}) => {
statement!({
self.p("raise");
@ -536,6 +552,7 @@ impl<'a> Generator<'a> {
finalbody,
is_star,
range: _,
node_index: _,
}) => {
statement!({
self.p("try:");
@ -565,6 +582,7 @@ impl<'a> Generator<'a> {
test,
msg,
range: _,
node_index: _,
}) => {
statement!({
self.p("assert ");
@ -575,7 +593,11 @@ impl<'a> Generator<'a> {
}
});
}
Stmt::Import(ast::StmtImport { names, range: _ }) => {
Stmt::Import(ast::StmtImport {
names,
range: _,
node_index: _,
}) => {
statement!({
self.p("import ");
let mut first = true;
@ -590,6 +612,7 @@ impl<'a> Generator<'a> {
names,
level,
range: _,
node_index: _,
}) => {
statement!({
self.p("from ");
@ -609,7 +632,11 @@ impl<'a> Generator<'a> {
}
});
}
Stmt::Global(ast::StmtGlobal { names, range: _ }) => {
Stmt::Global(ast::StmtGlobal {
names,
range: _,
node_index: _,
}) => {
statement!({
self.p("global ");
let mut first = true;
@ -619,7 +646,11 @@ impl<'a> Generator<'a> {
}
});
}
Stmt::Nonlocal(ast::StmtNonlocal { names, range: _ }) => {
Stmt::Nonlocal(ast::StmtNonlocal {
names,
range: _,
node_index: _,
}) => {
statement!({
self.p("nonlocal ");
let mut first = true;
@ -629,7 +660,11 @@ impl<'a> Generator<'a> {
}
});
}
Stmt::Expr(ast::StmtExpr { value, range: _ }) => {
Stmt::Expr(ast::StmtExpr {
value,
range: _,
node_index: _,
}) => {
statement!({
self.unparse_expr(value, precedence::EXPR);
});
@ -664,6 +699,7 @@ impl<'a> Generator<'a> {
name,
body,
range: _,
node_index: _,
}) => {
self.p("except");
if star {
@ -685,13 +721,25 @@ impl<'a> Generator<'a> {
fn unparse_pattern(&mut self, ast: &Pattern) {
match ast {
Pattern::MatchValue(ast::PatternMatchValue { value, range: _ }) => {
Pattern::MatchValue(ast::PatternMatchValue {
value,
range: _,
node_index: _,
}) => {
self.unparse_expr(value, precedence::MAX);
}
Pattern::MatchSingleton(ast::PatternMatchSingleton { value, range: _ }) => {
Pattern::MatchSingleton(ast::PatternMatchSingleton {
value,
range: _,
node_index: _,
}) => {
self.unparse_singleton(*value);
}
Pattern::MatchSequence(ast::PatternMatchSequence { patterns, range: _ }) => {
Pattern::MatchSequence(ast::PatternMatchSequence {
patterns,
range: _,
node_index: _,
}) => {
self.p("[");
let mut first = true;
for pattern in patterns {
@ -705,6 +753,7 @@ impl<'a> Generator<'a> {
patterns,
rest,
range: _,
node_index: _,
}) => {
self.p("{");
let mut first = true;
@ -722,7 +771,11 @@ impl<'a> Generator<'a> {
self.p("}");
}
Pattern::MatchClass(_) => {}
Pattern::MatchStar(ast::PatternMatchStar { name, range: _ }) => {
Pattern::MatchStar(ast::PatternMatchStar {
name,
range: _,
node_index: _,
}) => {
self.p("*");
if let Some(name) = name {
self.p_id(name);
@ -734,6 +787,7 @@ impl<'a> Generator<'a> {
pattern,
name,
range: _,
node_index: _,
}) => {
if let Some(pattern) = pattern {
self.unparse_pattern(pattern);
@ -745,7 +799,11 @@ impl<'a> Generator<'a> {
self.p("_");
}
}
Pattern::MatchOr(ast::PatternMatchOr { patterns, range: _ }) => {
Pattern::MatchOr(ast::PatternMatchOr {
patterns,
range: _,
node_index: _,
}) => {
let mut first = true;
for pattern in patterns {
self.p_delim(&mut first, " | ");
@ -841,6 +899,7 @@ impl<'a> Generator<'a> {
op,
values,
range: _,
node_index: _,
}) => {
let (op, prec) = opprec!(bin, op, BoolOp, And("and", AND), Or("or", OR));
group_if!(prec, {
@ -855,6 +914,7 @@ impl<'a> Generator<'a> {
target,
value,
range: _,
node_index: _,
}) => {
group_if!(precedence::NAMED_EXPR, {
self.unparse_expr(target, precedence::NAMED_EXPR);
@ -867,6 +927,7 @@ impl<'a> Generator<'a> {
op,
right,
range: _,
node_index: _,
}) => {
let rassoc = matches!(op, Operator::Pow);
let (op, prec) = opprec!(
@ -897,6 +958,7 @@ impl<'a> Generator<'a> {
op,
operand,
range: _,
node_index: _,
}) => {
let (op, prec) = opprec!(
un,
@ -916,6 +978,7 @@ impl<'a> Generator<'a> {
parameters,
body,
range: _,
node_index: _,
}) => {
group_if!(precedence::LAMBDA, {
self.p("lambda");
@ -932,6 +995,7 @@ impl<'a> Generator<'a> {
body,
orelse,
range: _,
node_index: _,
}) => {
group_if!(precedence::IF_EXP, {
self.unparse_expr(body, precedence::IF_EXP + 1);
@ -974,6 +1038,7 @@ impl<'a> Generator<'a> {
elt,
generators,
range: _,
node_index: _,
}) => {
self.p("[");
self.unparse_expr(elt, precedence::COMPREHENSION_ELEMENT);
@ -984,6 +1049,7 @@ impl<'a> Generator<'a> {
elt,
generators,
range: _,
node_index: _,
}) => {
self.p("{");
self.unparse_expr(elt, precedence::COMPREHENSION_ELEMENT);
@ -995,6 +1061,7 @@ impl<'a> Generator<'a> {
value,
generators,
range: _,
node_index: _,
}) => {
self.p("{");
self.unparse_expr(key, precedence::COMPREHENSION_ELEMENT);
@ -1008,19 +1075,28 @@ impl<'a> Generator<'a> {
generators,
parenthesized: _,
range: _,
node_index: _,
}) => {
self.p("(");
self.unparse_expr(elt, precedence::COMPREHENSION_ELEMENT);
self.unparse_comp(generators);
self.p(")");
}
Expr::Await(ast::ExprAwait { value, range: _ }) => {
Expr::Await(ast::ExprAwait {
value,
range: _,
node_index: _,
}) => {
group_if!(precedence::AWAIT, {
self.p("await ");
self.unparse_expr(value, precedence::MAX);
});
}
Expr::Yield(ast::ExprYield { value, range: _ }) => {
Expr::Yield(ast::ExprYield {
value,
range: _,
node_index: _,
}) => {
group_if!(precedence::YIELD, {
self.p("yield");
if let Some(value) = value {
@ -1029,7 +1105,11 @@ impl<'a> Generator<'a> {
}
});
}
Expr::YieldFrom(ast::ExprYieldFrom { value, range: _ }) => {
Expr::YieldFrom(ast::ExprYieldFrom {
value,
range: _,
node_index: _,
}) => {
group_if!(precedence::YIELD_FROM, {
self.p("yield from ");
self.unparse_expr(value, precedence::MAX);
@ -1040,6 +1120,7 @@ impl<'a> Generator<'a> {
ops,
comparators,
range: _,
node_index: _,
}) => {
group_if!(precedence::CMP, {
let new_lvl = precedence::CMP + 1;
@ -1066,6 +1147,7 @@ impl<'a> Generator<'a> {
func,
arguments,
range: _,
node_index: _,
}) => {
self.unparse_expr(func, precedence::MAX);
self.p("(");
@ -1075,6 +1157,7 @@ impl<'a> Generator<'a> {
elt,
generators,
range: _,
node_index: _,
parenthesized: _,
}),
],
@ -1217,6 +1300,7 @@ impl<'a> Generator<'a> {
upper,
step,
range: _,
node_index: _,
}) => {
if let Some(lower) = lower {
self.unparse_expr(lower, precedence::SLICE);
@ -1396,6 +1480,7 @@ impl<'a> Generator<'a> {
conversion,
format_spec,
range: _,
node_index: _,
}) => self.unparse_interpolated_element(
expression,
debug_text.as_ref(),