[ty] AST garbage collection (#18482)

## Summary

Garbage collect ASTs once we are done checking a given file. Queries
with a cross-file dependency on the AST will reparse the file on demand.
This reduces ty's peak memory usage by ~20-30%.

The primary change of this PR is adding a `node_index` field to every
AST node, that is assigned by the parser. `ParsedModule` can use this to
create a flat index of AST nodes any time the file is parsed (or
reparsed). This allows `AstNodeRef` to simply index into the current
instance of the `ParsedModule`, instead of storing a pointer directly.

The indices are somewhat hackily (using an atomic integer) assigned by
the `parsed_module` query instead of by the parser directly. Assigning
the indices in source-order in the (recursive) parser turns out to be
difficult, and collecting the nodes during semantic indexing is
impossible as `SemanticIndex` does not hold onto a specific
`ParsedModuleRef`, which the pointers in the flat AST are tied to. This
means that we have to do an extra AST traversal to assign and collect
the nodes into a flat index, but the small performance impact (~3% on
cold runs) seems worth it for the memory savings.

Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
Ibraheem Ahmed 2025-06-13 08:40:11 -04:00 committed by GitHub
parent 76d9009a6e
commit c9dff5c7d5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
824 changed files with 25243 additions and 804 deletions

View file

@ -3,8 +3,8 @@ use std::fmt::{Display, Write};
use ruff_python_ast::name::Name;
use ruff_python_ast::{
self as ast, ExceptHandler, Expr, ExprContext, IpyEscapeKind, Operator, PythonVersion, Stmt,
WithItem,
self as ast, AtomicNodeIndex, ExceptHandler, Expr, ExprContext, IpyEscapeKind, Operator,
PythonVersion, Stmt, WithItem,
};
use ruff_text_size::{Ranged, TextRange, TextSize};
@ -312,6 +312,7 @@ impl<'src> Parser<'src> {
Stmt::Expr(ast::StmtExpr {
range: self.node_range(start),
value: Box::new(parsed_expr.expr),
node_index: AtomicNodeIndex::dummy(),
})
}
}
@ -367,6 +368,7 @@ impl<'src> Parser<'src> {
ast::StmtDelete {
targets,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -415,6 +417,7 @@ impl<'src> Parser<'src> {
ast::StmtReturn {
range: self.node_range(start),
value,
node_index: AtomicNodeIndex::dummy(),
}
}
@ -520,6 +523,7 @@ impl<'src> Parser<'src> {
range: self.node_range(start),
exc,
cause,
node_index: AtomicNodeIndex::dummy(),
}
}
@ -560,6 +564,7 @@ impl<'src> Parser<'src> {
ast::StmtImport {
range: self.node_range(start),
names,
node_index: AtomicNodeIndex::dummy(),
}
}
@ -671,6 +676,7 @@ impl<'src> Parser<'src> {
names,
level: leading_dots,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -687,9 +693,11 @@ impl<'src> Parser<'src> {
name: ast::Identifier {
id: Name::new_static("*"),
range,
node_index: AtomicNodeIndex::dummy(),
},
asname: None,
range,
node_index: AtomicNodeIndex::dummy(),
};
}
@ -722,6 +730,7 @@ impl<'src> Parser<'src> {
range: self.node_range(start),
name,
asname,
node_index: AtomicNodeIndex::dummy(),
}
}
@ -750,6 +759,7 @@ impl<'src> Parser<'src> {
ast::Identifier {
id: Name::from(dotted_name),
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -765,6 +775,7 @@ impl<'src> Parser<'src> {
self.bump(TokenKind::Pass);
ast::StmtPass {
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -780,6 +791,7 @@ impl<'src> Parser<'src> {
self.bump(TokenKind::Continue);
ast::StmtContinue {
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -795,6 +807,7 @@ impl<'src> Parser<'src> {
self.bump(TokenKind::Break);
ast::StmtBreak {
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -844,6 +857,7 @@ impl<'src> Parser<'src> {
test: Box::new(test.expr),
msg,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -882,6 +896,7 @@ impl<'src> Parser<'src> {
ast::StmtGlobal {
range: self.node_range(start),
names,
node_index: AtomicNodeIndex::dummy(),
}
}
@ -927,6 +942,7 @@ impl<'src> Parser<'src> {
ast::StmtNonlocal {
range: self.node_range(start),
names,
node_index: AtomicNodeIndex::dummy(),
}
}
@ -979,6 +995,7 @@ impl<'src> Parser<'src> {
type_params: type_params.map(Box::new),
value: Box::new(value.expr),
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -1001,7 +1018,12 @@ impl<'src> Parser<'src> {
self.add_error(ParseErrorType::UnexpectedIpythonEscapeCommand, range);
}
ast::StmtIpyEscapeCommand { range, kind, value }
ast::StmtIpyEscapeCommand {
range,
kind,
value,
node_index: AtomicNodeIndex::dummy(),
}
}
/// Parses an IPython help end escape command at the statement level.
@ -1097,6 +1119,7 @@ impl<'src> Parser<'src> {
value: value.into_boxed_str(),
kind,
range: self.node_range(parsed_expr.start()),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -1164,6 +1187,7 @@ impl<'src> Parser<'src> {
targets,
value: Box::new(value.expr),
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -1243,6 +1267,7 @@ impl<'src> Parser<'src> {
value,
simple,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -1297,6 +1322,7 @@ impl<'src> Parser<'src> {
op,
value: Box::new(value.expr),
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -1352,6 +1378,7 @@ impl<'src> Parser<'src> {
body,
elif_else_clauses,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -1395,6 +1422,7 @@ impl<'src> Parser<'src> {
test,
body,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -1544,6 +1572,7 @@ impl<'src> Parser<'src> {
finalbody,
is_star,
range: self.node_range(try_start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -1693,6 +1722,7 @@ impl<'src> Parser<'src> {
name,
body: except_body,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}),
block_kind,
)
@ -1804,6 +1834,7 @@ impl<'src> Parser<'src> {
body,
orelse,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -1851,6 +1882,7 @@ impl<'src> Parser<'src> {
body,
orelse,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -1980,6 +2012,7 @@ impl<'src> Parser<'src> {
is_async: false,
returns,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -2049,6 +2082,7 @@ impl<'src> Parser<'src> {
type_params: type_params.map(Box::new),
arguments,
body,
node_index: AtomicNodeIndex::dummy(),
}
}
@ -2075,6 +2109,7 @@ impl<'src> Parser<'src> {
body,
is_async: false,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -2343,6 +2378,7 @@ impl<'src> Parser<'src> {
range: self.node_range(start),
context_expr: context_expr.expr,
optional_vars,
node_index: AtomicNodeIndex::dummy(),
},
}
}
@ -2411,6 +2447,7 @@ impl<'src> Parser<'src> {
subject: Box::new(subject),
cases,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
})
}
TokenKind::Newline if matches!(self.peek2(), (TokenKind::Indent, TokenKind::Case)) => {
@ -2433,6 +2470,7 @@ impl<'src> Parser<'src> {
subject: Box::new(subject),
cases,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
})
}
_ => {
@ -2480,6 +2518,7 @@ impl<'src> Parser<'src> {
subject: Box::new(subject),
cases,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -2658,6 +2697,7 @@ impl<'src> Parser<'src> {
guard,
body,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
@ -2826,6 +2866,7 @@ impl<'src> Parser<'src> {
decorators.push(ast::Decorator {
expression: parsed_expr.expr,
range: self.node_range(decorator_start),
node_index: AtomicNodeIndex::dummy(),
});
// test_err decorator_missing_newline
@ -3039,6 +3080,7 @@ impl<'src> Parser<'src> {
range: self.node_range(start),
name,
annotation,
node_index: AtomicNodeIndex::dummy(),
}
}
@ -3088,6 +3130,7 @@ impl<'src> Parser<'src> {
range: self.node_range(start),
parameter,
default,
node_index: AtomicNodeIndex::dummy(),
}
}
@ -3405,6 +3448,7 @@ impl<'src> Parser<'src> {
ast::TypeParams {
range: self.node_range(start),
type_params,
node_index: AtomicNodeIndex::dummy(),
}
}
@ -3467,6 +3511,7 @@ impl<'src> Parser<'src> {
range: self.node_range(start),
name,
default,
node_index: AtomicNodeIndex::dummy(),
})
// test_ok type_param_param_spec
@ -3506,6 +3551,7 @@ impl<'src> Parser<'src> {
range: self.node_range(start),
name,
default,
node_index: AtomicNodeIndex::dummy(),
})
// test_ok type_param_type_var
// type X[T] = int
@ -3589,6 +3635,7 @@ impl<'src> Parser<'src> {
name,
bound,
default,
node_index: AtomicNodeIndex::dummy(),
})
}
}