[ty] AST garbage collection (#18482)

## Summary

Garbage collect ASTs once we are done checking a given file. Queries
with a cross-file dependency on the AST will reparse the file on demand.
This reduces ty's peak memory usage by ~20-30%.

The primary change of this PR is adding a `node_index` field to every
AST node, that is assigned by the parser. `ParsedModule` can use this to
create a flat index of AST nodes any time the file is parsed (or
reparsed). This allows `AstNodeRef` to simply index into the current
instance of the `ParsedModule`, instead of storing a pointer directly.

The indices are somewhat hackily (using an atomic integer) assigned by
the `parsed_module` query instead of by the parser directly. Assigning
the indices in source-order in the (recursive) parser turns out to be
difficult, and collecting the nodes during semantic indexing is
impossible as `SemanticIndex` does not hold onto a specific
`ParsedModuleRef`, which the pointers in the flat AST are tied to. This
means that we have to do an extra AST traversal to assign and collect
the nodes into a flat index, but the small performance impact (~3% on
cold runs) seems worth it for the memory savings.

Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
Ibraheem Ahmed 2025-06-13 08:40:11 -04:00 committed by GitHub
parent 76d9009a6e
commit c9dff5c7d5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
824 changed files with 25243 additions and 804 deletions

View file

@ -87,6 +87,7 @@ impl ClauseHeader<'_> {
type_params,
arguments,
range: _,
node_index: _,
decorator_list: _,
name: _,
body: _,
@ -103,6 +104,7 @@ impl ClauseHeader<'_> {
type_params,
parameters,
range: _,
node_index: _,
is_async: _,
decorator_list: _,
name: _,
@ -121,6 +123,7 @@ impl ClauseHeader<'_> {
ClauseHeader::If(StmtIf {
test,
range: _,
node_index: _,
body: _,
elif_else_clauses: _,
}) => {
@ -129,6 +132,7 @@ impl ClauseHeader<'_> {
ClauseHeader::ElifElse(ElifElseClause {
test,
range: _,
node_index: _,
body: _,
}) => {
if let Some(test) = test.as_ref() {
@ -139,6 +143,7 @@ impl ClauseHeader<'_> {
ClauseHeader::ExceptHandler(ExceptHandlerExceptHandler {
type_: type_expr,
range: _,
node_index: _,
name: _,
body: _,
}) => {
@ -149,6 +154,7 @@ impl ClauseHeader<'_> {
ClauseHeader::Match(StmtMatch {
subject,
range: _,
node_index: _,
cases: _,
}) => {
visit(subject.as_ref(), visitor);
@ -157,6 +163,7 @@ impl ClauseHeader<'_> {
guard,
pattern,
range: _,
node_index: _,
body: _,
}) => {
visit(pattern, visitor);
@ -169,6 +176,7 @@ impl ClauseHeader<'_> {
target,
iter,
range: _,
node_index: _,
is_async: _,
body: _,
orelse: _,
@ -179,6 +187,7 @@ impl ClauseHeader<'_> {
ClauseHeader::While(StmtWhile {
test,
range: _,
node_index: _,
body: _,
orelse: _,
}) => {
@ -187,6 +196,7 @@ impl ClauseHeader<'_> {
ClauseHeader::With(StmtWith {
items,
range: _,
node_index: _,
is_async: _,
body: _,
}) => {

View file

@ -17,6 +17,7 @@ impl FormatNodeRule<StmtAnnAssign> for FormatStmtAnnAssign {
fn fmt_fields(&self, item: &StmtAnnAssign, f: &mut PyFormatter) -> FormatResult<()> {
let StmtAnnAssign {
range: _,
node_index: _,
target,
annotation,
value,

View file

@ -14,6 +14,7 @@ impl FormatNodeRule<StmtAssert> for FormatStmtAssert {
fn fmt_fields(&self, item: &StmtAssert, f: &mut PyFormatter) -> FormatResult<()> {
let StmtAssert {
range: _,
node_index: _,
test,
msg,
} = item;

View file

@ -33,6 +33,7 @@ impl FormatNodeRule<StmtAssign> for FormatStmtAssign {
fn fmt_fields(&self, item: &StmtAssign, f: &mut PyFormatter) -> FormatResult<()> {
let StmtAssign {
range: _,
node_index: _,
targets,
value,
} = item;

View file

@ -21,6 +21,7 @@ impl FormatNodeRule<StmtAugAssign> for FormatStmtAugAssign {
op,
value,
range: _,
node_index: _,
} = item;
if has_target_own_parentheses(target, f.context())

View file

@ -18,6 +18,7 @@ impl FormatNodeRule<StmtClassDef> for FormatStmtClassDef {
fn fmt_fields(&self, item: &StmtClassDef, f: &mut PyFormatter) -> FormatResult<()> {
let StmtClassDef {
range: _,
node_index: _,
name,
arguments,
body,

View file

@ -13,7 +13,11 @@ pub struct FormatStmtDelete;
impl FormatNodeRule<StmtDelete> for FormatStmtDelete {
fn fmt_fields(&self, item: &StmtDelete, f: &mut PyFormatter) -> FormatResult<()> {
let StmtDelete { range: _, targets } = item;
let StmtDelete {
range: _,
node_index: _,
targets,
} = item;
write!(f, [token("del"), space()])?;

View file

@ -36,6 +36,7 @@ impl FormatNodeRule<StmtFor> for FormatStmtFor {
body,
orelse,
range: _,
node_index: _,
} = item;
let comments = f.context().comments().clone();

View file

@ -93,6 +93,7 @@ impl FormatNodeRule<StmtFunctionDef> for FormatStmtFunctionDef {
fn format_function_header(f: &mut PyFormatter, item: &StmtFunctionDef) -> FormatResult<()> {
let StmtFunctionDef {
range: _,
node_index: _,
is_async,
decorator_list: _,
name,

View file

@ -15,6 +15,7 @@ impl FormatNodeRule<StmtIf> for FormatStmtIf {
fn fmt_fields(&self, item: &StmtIf, f: &mut PyFormatter) -> FormatResult<()> {
let StmtIf {
range: _,
node_index: _,
test,
body,
elif_else_clauses,
@ -68,6 +69,7 @@ pub(crate) fn format_elif_else_clause(
) -> FormatResult<()> {
let ElifElseClause {
range: _,
node_index: _,
test,
body,
} = item;

View file

@ -9,7 +9,11 @@ pub struct FormatStmtImport;
impl FormatNodeRule<StmtImport> for FormatStmtImport {
fn fmt_fields(&self, item: &StmtImport, f: &mut PyFormatter) -> FormatResult<()> {
let StmtImport { names, range: _ } = item;
let StmtImport {
names,
range: _,
node_index: _,
} = item;
let names = format_with(|f| {
f.join_with(&format_args![token(","), space()])
.entries(names.iter().formatted())

View file

@ -19,6 +19,7 @@ impl FormatNodeRule<StmtImportFrom> for FormatStmtImportFrom {
names,
level,
range: _,
node_index: _,
} = item;
write!(

View file

@ -15,6 +15,7 @@ impl FormatNodeRule<StmtMatch> for FormatStmtMatch {
fn fmt_fields(&self, item: &StmtMatch, f: &mut PyFormatter) -> FormatResult<()> {
let StmtMatch {
range: _,
node_index: _,
subject,
cases,
} = item;

View file

@ -13,6 +13,7 @@ impl FormatNodeRule<StmtRaise> for FormatStmtRaise {
fn fmt_fields(&self, item: &StmtRaise, f: &mut PyFormatter) -> FormatResult<()> {
let StmtRaise {
range: _,
node_index: _,
exc,
cause,
} = item;

View file

@ -11,7 +11,11 @@ pub struct FormatStmtReturn;
impl FormatNodeRule<StmtReturn> for FormatStmtReturn {
fn fmt_fields(&self, item: &StmtReturn, f: &mut PyFormatter) -> FormatResult<()> {
let StmtReturn { range: _, value } = item;
let StmtReturn {
range: _,
node_index: _,
value,
} = item;
token("return").fmt(f)?;

View file

@ -66,6 +66,7 @@ impl FormatNodeRule<StmtTry> for FormatStmtTry {
finalbody,
is_star,
range: _,
node_index: _,
} = item;
let comments_info = f.context().comments().clone();

View file

@ -17,6 +17,7 @@ impl FormatNodeRule<StmtTypeAlias> for FormatStmtTypeAlias {
type_params,
value,
range: _,
node_index: _,
} = item;
write!(f, [token("type"), space(), name.as_ref().format()])?;

View file

@ -15,6 +15,7 @@ impl FormatNodeRule<StmtWhile> for FormatStmtWhile {
fn fmt_fields(&self, item: &StmtWhile, f: &mut PyFormatter) -> FormatResult<()> {
let StmtWhile {
range: _,
node_index: _,
test,
body,
orelse,